ai-llm: tighten gpu placement and resources

This commit is contained in:
Brad Stein 2026-01-26 11:44:28 -03:00
parent fec7713049
commit 2c3ffdbf95

View File

@ -21,8 +21,8 @@ spec:
app: ollama
annotations:
ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0
ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24)
ai.bstein.dev/restartedAt: "2026-01-25T19:10:00Z"
ai.bstein.dev/gpu: GPU pool (titan-22/24)
ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
spec:
affinity:
nodeAffinity:
@ -32,8 +32,6 @@ spec:
- key: kubernetes.io/hostname
operator: In
values:
- titan-20
- titan-21
- titan-22
- titan-24
runtimeClassName: nvidia
@ -69,8 +67,8 @@ spec:
mountPath: /root/.ollama
resources:
requests:
cpu: 250m
memory: 1Gi
cpu: 500m
memory: 2Gi
nvidia.com/gpu.shared: 1
limits:
nvidia.com/gpu.shared: 1
@ -97,10 +95,10 @@ spec:
mountPath: /root/.ollama
resources:
requests:
cpu: "2"
memory: 8Gi
cpu: "4"
memory: 16Gi
nvidia.com/gpu.shared: 1
limits:
cpu: "4"
memory: 12Gi
cpu: "8"
memory: 24Gi
nvidia.com/gpu.shared: 1