titan-iac/services/openclaw/ollama-deployment.yaml

# services/openclaw/ollama-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: openclaw-ollama
  namespace: openclaw
  labels:
    app: openclaw-ollama
spec:
  replicas: 1
  revisionHistoryLimit: 2
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: openclaw-ollama
  template:
    metadata:
      labels:
        app: openclaw-ollama
      annotations:
        ai.bstein.dev/model: qwen2.5:1.5b-instruct-q4_0
        ai.bstein.dev/gpu: Jetson pool (titan-20/21)
    spec:
      runtimeClassName: nvidia
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: In
                    values:
                      - titan-20
                      - titan-21
      volumes:
        - name: models
          emptyDir: {}
      initContainers:
        - name: warm-model
          image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
          imagePullPolicy: IfNotPresent
          env:
            - name: OLLAMA_HOST
              value: 0.0.0.0
            - name: OLLAMA_MODELS
              value: /root/.ollama
            - name: OLLAMA_MODEL
              value: qwen2.5:1.5b-instruct-q4_0
            - name: NVIDIA_VISIBLE_DEVICES
              value: all
            - name: NVIDIA_DRIVER_CAPABILITIES
              value: compute,utility
          command:
            - /bin/sh
            - -c
            - |
              set -e
              ollama serve >/tmp/ollama.log 2>&1 &
              sleep 6
              ollama pull "${OLLAMA_MODEL}"
              pkill ollama || true
          volumeMounts:
            - name: models
              mountPath: /root/.ollama
          resources:
            requests:
              cpu: "1"
              memory: 4Gi
              nvidia.com/gpu.shared: 1
            limits:
              cpu: "4"
              memory: 10Gi
              nvidia.com/gpu.shared: 1
      containers:
        - name: ollama
          image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
          imagePullPolicy: IfNotPresent
          ports:
            - name: http
              containerPort: 11434
          env:
            - name: OLLAMA_HOST
              value: 0.0.0.0
            - name: OLLAMA_KEEP_ALIVE
              value: 6h
            - name: OLLAMA_MODELS
              value: /root/.ollama
            - name: NVIDIA_VISIBLE_DEVICES
              value: all
            - name: NVIDIA_DRIVER_CAPABILITIES
              value: compute,utility
          volumeMounts:
            - name: models
              mountPath: /root/.ollama
          readinessProbe:
            httpGet:
              path: /api/tags
              port: 11434
            initialDelaySeconds: 15
            periodSeconds: 10
            timeoutSeconds: 5
          resources:
            requests:
              cpu: "2"
              memory: 8Gi
              nvidia.com/gpu.shared: 1
            limits:
              cpu: "6"
              memory: 12Gi
              nvidia.com/gpu.shared: 1