ai-llm: tighten gpu placement and resources
This commit is contained in:
parent
fec7713049
commit
2c3ffdbf95
@ -21,8 +21,8 @@ spec:
|
|||||||
app: ollama
|
app: ollama
|
||||||
annotations:
|
annotations:
|
||||||
ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0
|
ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0
|
||||||
ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24)
|
ai.bstein.dev/gpu: GPU pool (titan-22/24)
|
||||||
ai.bstein.dev/restartedAt: "2026-01-25T19:10:00Z"
|
ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
|
||||||
spec:
|
spec:
|
||||||
affinity:
|
affinity:
|
||||||
nodeAffinity:
|
nodeAffinity:
|
||||||
@ -32,8 +32,6 @@ spec:
|
|||||||
- key: kubernetes.io/hostname
|
- key: kubernetes.io/hostname
|
||||||
operator: In
|
operator: In
|
||||||
values:
|
values:
|
||||||
- titan-20
|
|
||||||
- titan-21
|
|
||||||
- titan-22
|
- titan-22
|
||||||
- titan-24
|
- titan-24
|
||||||
runtimeClassName: nvidia
|
runtimeClassName: nvidia
|
||||||
@ -69,8 +67,8 @@ spec:
|
|||||||
mountPath: /root/.ollama
|
mountPath: /root/.ollama
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 250m
|
cpu: 500m
|
||||||
memory: 1Gi
|
memory: 2Gi
|
||||||
nvidia.com/gpu.shared: 1
|
nvidia.com/gpu.shared: 1
|
||||||
limits:
|
limits:
|
||||||
nvidia.com/gpu.shared: 1
|
nvidia.com/gpu.shared: 1
|
||||||
@ -97,10 +95,10 @@ spec:
|
|||||||
mountPath: /root/.ollama
|
mountPath: /root/.ollama
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: "2"
|
cpu: "4"
|
||||||
memory: 8Gi
|
memory: 16Gi
|
||||||
nvidia.com/gpu.shared: 1
|
nvidia.com/gpu.shared: 1
|
||||||
limits:
|
limits:
|
||||||
cpu: "4"
|
cpu: "8"
|
||||||
memory: 12Gi
|
memory: 24Gi
|
||||||
nvidia.com/gpu.shared: 1
|
nvidia.com/gpu.shared: 1
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user