diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index bf012c0b..f413c0e3 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -21,7 +21,7 @@ spec: app: ollama annotations: ai.bstein.dev/model: qwen2.5:14b-instruct-q4_0 - ai.bstein.dev/gpu: GPU pool (titan-22/24) + ai.bstein.dev/gpu: GPU pool (titan-20/21) ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z" spec: affinity: @@ -32,13 +32,13 @@ spec: - key: kubernetes.io/hostname operator: In values: - - titan-22 - - titan-24 + - titan-20 + - titan-21 runtimeClassName: nvidia volumes: - name: models persistentVolumeClaim: - claimName: ollama-models + claimName: ollama-models-asteria initContainers: - name: warm-model image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d diff --git a/services/ai-llm/pvc.yaml b/services/ai-llm/pvc.yaml index 51c0384c..6c57a34d 100644 --- a/services/ai-llm/pvc.yaml +++ b/services/ai-llm/pvc.yaml @@ -2,12 +2,12 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: ollama-models + name: ollama-models-asteria namespace: ai spec: accessModes: - - ReadWriteOnce + - ReadWriteMany resources: requests: storage: 30Gi - storageClassName: astreae + storageClassName: asteria