ai-llm: GPU qwen2.5-coder on titan-24; add chat.ai host

This commit is contained in:
Brad Stein 2025-12-20 15:19:03 -03:00
parent 39a914effd
commit 9162f5789f
2 changed files with 23 additions and 3 deletions

View File

@ -35,7 +35,7 @@ spec:
- name: OLLAMA_MODELS
value: /root/.ollama
- name: OLLAMA_MODEL
value: phi3:mini
value: qwen2.5-coder:7b-instruct-q4_0
command:
- /bin/sh
- -c
@ -52,6 +52,9 @@ spec:
requests:
cpu: 250m
memory: 1Gi
nvidia.com/gpu: 1
limits:
nvidia.com/gpu: 1
containers:
- name: ollama
image: ollama/ollama:latest
@ -77,6 +80,8 @@ spec:
requests:
cpu: "2"
memory: 8Gi
nvidia.com/gpu: 1
limits:
cpu: "4"
memory: 12Gi
nvidia.com/gpu: 1

View File

@ -9,9 +9,9 @@ metadata:
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
cert-manager.io/cluster-issuer: letsencrypt
spec:
spec:
tls:
- hosts: [ "bstein.dev" ]
- hosts: [ "bstein.dev", "chat.ai.bstein.dev" ]
secretName: bstein-dev-home-tls
rules:
- host: bstein.dev
@ -29,3 +29,18 @@ spec:
service:
name: bstein-dev-home-frontend
port: { number: 80 }
- host: chat.ai.bstein.dev
http:
paths:
- path: /api
pathType: Prefix
backend:
service:
name: bstein-dev-home-backend
port: { number: 80 }
- path: /
pathType: Prefix
backend:
service:
name: bstein-dev-home-frontend
port: { number: 80 }