ai-llm: tighten gpu placement and resources
This commit is contained in:
parent
fec7713049
commit
2c3ffdbf95
@ -21,8 +21,8 @@ spec:
|
||||
app: ollama
|
||||
annotations:
|
||||
ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0
|
||||
ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24)
|
||||
ai.bstein.dev/restartedAt: "2026-01-25T19:10:00Z"
|
||||
ai.bstein.dev/gpu: GPU pool (titan-22/24)
|
||||
ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
@ -32,8 +32,6 @@ spec:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values:
|
||||
- titan-20
|
||||
- titan-21
|
||||
- titan-22
|
||||
- titan-24
|
||||
runtimeClassName: nvidia
|
||||
@ -69,8 +67,8 @@ spec:
|
||||
mountPath: /root/.ollama
|
||||
resources:
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
cpu: 500m
|
||||
memory: 2Gi
|
||||
nvidia.com/gpu.shared: 1
|
||||
limits:
|
||||
nvidia.com/gpu.shared: 1
|
||||
@ -97,10 +95,10 @@ spec:
|
||||
mountPath: /root/.ollama
|
||||
resources:
|
||||
requests:
|
||||
cpu: "2"
|
||||
memory: 8Gi
|
||||
cpu: "4"
|
||||
memory: 16Gi
|
||||
nvidia.com/gpu.shared: 1
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: 12Gi
|
||||
cpu: "8"
|
||||
memory: 24Gi
|
||||
nvidia.com/gpu.shared: 1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user