From 9162f5789f7c134ee38e59bc5ea8f829a0e3bda5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 20 Dec 2025 15:19:03 -0300 Subject: [PATCH] ai-llm: GPU qwen2.5-coder on titan-24; add chat.ai host --- services/ai-llm/deployment.yaml | 7 ++++++- services/bstein-dev-home/ingress.yaml | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index 25a7f2f..d78315b 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -35,7 +35,7 @@ spec: - name: OLLAMA_MODELS value: /root/.ollama - name: OLLAMA_MODEL - value: phi3:mini + value: qwen2.5-coder:7b-instruct-q4_0 command: - /bin/sh - -c @@ -52,6 +52,9 @@ spec: requests: cpu: 250m memory: 1Gi + nvidia.com/gpu: 1 + limits: + nvidia.com/gpu: 1 containers: - name: ollama image: ollama/ollama:latest @@ -77,6 +80,8 @@ spec: requests: cpu: "2" memory: 8Gi + nvidia.com/gpu: 1 limits: cpu: "4" memory: 12Gi + nvidia.com/gpu: 1 diff --git a/services/bstein-dev-home/ingress.yaml b/services/bstein-dev-home/ingress.yaml index 471f1bc..7e92941 100644 --- a/services/bstein-dev-home/ingress.yaml +++ b/services/bstein-dev-home/ingress.yaml @@ -9,9 +9,9 @@ metadata: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" cert-manager.io/cluster-issuer: letsencrypt -spec: + spec: tls: - - hosts: [ "bstein.dev" ] + - hosts: [ "bstein.dev", "chat.ai.bstein.dev" ] secretName: bstein-dev-home-tls rules: - host: bstein.dev @@ -29,3 +29,18 @@ spec: service: name: bstein-dev-home-frontend port: { number: 80 } + - host: chat.ai.bstein.dev + http: + paths: + - path: /api + pathType: Prefix + backend: + service: + name: bstein-dev-home-backend + port: { number: 80 } + - path: / + pathType: Prefix + backend: + service: + name: bstein-dev-home-frontend + port: { number: 80 }