ai-llm: tighten gpu placement and resources

This commit is contained in:
Brad Stein 2026-01-26 11:44:28 -03:00
parent fec7713049
commit 2c3ffdbf95

View File

@ -21,8 +21,8 @@ spec:
app: ollama app: ollama
annotations: annotations:
ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0 ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0
ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24) ai.bstein.dev/gpu: GPU pool (titan-22/24)
ai.bstein.dev/restartedAt: "2026-01-25T19:10:00Z" ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
spec: spec:
affinity: affinity:
nodeAffinity: nodeAffinity:
@ -32,8 +32,6 @@ spec:
- key: kubernetes.io/hostname - key: kubernetes.io/hostname
operator: In operator: In
values: values:
- titan-20
- titan-21
- titan-22 - titan-22
- titan-24 - titan-24
runtimeClassName: nvidia runtimeClassName: nvidia
@ -69,8 +67,8 @@ spec:
mountPath: /root/.ollama mountPath: /root/.ollama
resources: resources:
requests: requests:
cpu: 250m cpu: 500m
memory: 1Gi memory: 2Gi
nvidia.com/gpu.shared: 1 nvidia.com/gpu.shared: 1
limits: limits:
nvidia.com/gpu.shared: 1 nvidia.com/gpu.shared: 1
@ -97,10 +95,10 @@ spec:
mountPath: /root/.ollama mountPath: /root/.ollama
resources: resources:
requests: requests:
cpu: "2" cpu: "4"
memory: 8Gi memory: 16Gi
nvidia.com/gpu.shared: 1 nvidia.com/gpu.shared: 1
limits: limits:
cpu: "4" cpu: "8"
memory: 12Gi memory: 24Gi
nvidia.com/gpu.shared: 1 nvidia.com/gpu.shared: 1