openclaw: add testing triage workspace
This commit is contained in:
parent
b7caf4cfec
commit
1bc58e10c0
@ -26,6 +26,7 @@ resources:
|
||||
- mailu/kustomization.yaml
|
||||
- jenkins/kustomization.yaml
|
||||
- ai-llm/kustomization.yaml
|
||||
- openclaw/kustomization.yaml
|
||||
- typhon/kustomization.yaml
|
||||
- nextcloud/kustomization.yaml
|
||||
- nextcloud-mail-sync/kustomization.yaml
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
# clusters/atlas/flux-system/applications/openclaw/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: openclaw
|
||||
namespace: flux-system
|
||||
annotations:
|
||||
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/openclaw
|
||||
targetNamespace: openclaw
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
wait: true
|
||||
timeout: 30m
|
||||
healthChecks:
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: openclaw-ollama
|
||||
namespace: openclaw
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: openclaw
|
||||
namespace: openclaw
|
||||
dependsOn:
|
||||
- name: cert-manager
|
||||
- name: core
|
||||
- name: longhorn
|
||||
- name: traefik
|
||||
|
||||
111
services/openclaw/configmap.yaml
Normal file
111
services/openclaw/configmap.yaml
Normal file
@ -0,0 +1,111 @@
|
||||
# services/openclaw/configmap.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: openclaw-config
|
||||
namespace: openclaw
|
||||
labels:
|
||||
app: openclaw
|
||||
data:
|
||||
openclaw.json: |
|
||||
{
|
||||
"agents": {
|
||||
"defaults": {
|
||||
"workspace": "/home/node/.openclaw/workspace",
|
||||
"model": {
|
||||
"primary": "ollama-cluster/qwen2.5:7b-instruct-q4_0"
|
||||
},
|
||||
"models": {
|
||||
"ollama-cluster/qwen2.5:7b-instruct-q4_0": {}
|
||||
}
|
||||
},
|
||||
"list": [
|
||||
{
|
||||
"id": "testing-triage",
|
||||
"name": "Titan Testing Triage",
|
||||
"workspace": "/home/node/.openclaw/workspace"
|
||||
}
|
||||
]
|
||||
},
|
||||
"gateway": {
|
||||
"mode": "local",
|
||||
"auth": {
|
||||
"mode": "token",
|
||||
"token": {
|
||||
"source": "env",
|
||||
"provider": "default",
|
||||
"id": "OPENCLAW_GATEWAY_TOKEN"
|
||||
}
|
||||
},
|
||||
"port": 18789,
|
||||
"bind": "lan",
|
||||
"controlUi": {
|
||||
"enabled": true
|
||||
},
|
||||
"tailscale": {
|
||||
"mode": "off",
|
||||
"resetOnExit": false
|
||||
}
|
||||
},
|
||||
"session": {
|
||||
"dmScope": "per-channel-peer"
|
||||
},
|
||||
"tools": {
|
||||
"profile": "coding"
|
||||
},
|
||||
"models": {
|
||||
"mode": "merge",
|
||||
"providers": {
|
||||
"ollama-cluster": {
|
||||
"baseUrl": "http://openclaw-ollama.openclaw.svc.cluster.local:11434/v1",
|
||||
"api": "openai-completions",
|
||||
"apiKey": "ollama",
|
||||
"models": [
|
||||
{
|
||||
"id": "qwen2.5:7b-instruct-q4_0",
|
||||
"name": "qwen2.5:7b-instruct-q4_0 (Titan local)",
|
||||
"contextWindow": 32768,
|
||||
"maxTokens": 4096,
|
||||
"input": ["text"],
|
||||
"cost": {
|
||||
"input": 0,
|
||||
"output": 0,
|
||||
"cacheRead": 0,
|
||||
"cacheWrite": 0
|
||||
},
|
||||
"reasoning": false
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AGENTS.md: |
|
||||
# Titan Testing Triage
|
||||
|
||||
You are OpenClaw running inside the Titan Kubernetes cluster as a read-only
|
||||
testing and operations triage assistant.
|
||||
|
||||
Your job is to explain failing or suspicious test runs without mutating the
|
||||
cluster. Prefer concise incident summaries with:
|
||||
|
||||
- affected suite, namespace, pod, build, or node
|
||||
- likely root cause
|
||||
- exact evidence gathered
|
||||
- the smallest suggested Flux/IaC change
|
||||
- commands a human can run to verify the conclusion
|
||||
|
||||
Useful read-only commands:
|
||||
|
||||
- `kubectl get nodes -o wide`
|
||||
- `kubectl get pods -A -o wide`
|
||||
- `kubectl get events -A --sort-by=.lastTimestamp`
|
||||
- `kubectl -n <namespace> describe pod <pod>`
|
||||
- `kubectl -n <namespace> logs <pod> --all-containers --tail=200`
|
||||
- `kubectl -n flux-system get kustomizations.kustomize.toolkit.fluxcd.io`
|
||||
- `curl -sS "$VICTORIA_METRICS_URL/api/v1/query?query=up"`
|
||||
|
||||
Do not run mutating commands such as `kubectl apply`, `delete`, `scale`,
|
||||
`patch`, `cordon`, `uncordon`, `drain`, or `rollout restart`. Do not read
|
||||
Kubernetes Secret values. Draft repo changes or operator steps instead.
|
||||
|
||||
175
services/openclaw/deployment.yaml
Normal file
175
services/openclaw/deployment.yaml
Normal file
@ -0,0 +1,175 @@
|
||||
# services/openclaw/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: openclaw
|
||||
namespace: openclaw
|
||||
labels:
|
||||
app: openclaw
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 2
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: openclaw
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: openclaw
|
||||
annotations:
|
||||
ai.bstein.dev/role: testing-triage
|
||||
ai.bstein.dev/placement: Jetson pool (titan-20/21)
|
||||
spec:
|
||||
serviceAccountName: openclaw-triage
|
||||
automountServiceAccountToken: true
|
||||
securityContext:
|
||||
fsGroup: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values:
|
||||
- titan-20
|
||||
- titan-21
|
||||
initContainers:
|
||||
- name: init-config
|
||||
image: busybox:1.37
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
cp /config/openclaw.json /home/node/.openclaw/openclaw.json
|
||||
mkdir -p /home/node/.openclaw/workspace
|
||||
cp /config/AGENTS.md /home/node/.openclaw/workspace/AGENTS.md
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
volumeMounts:
|
||||
- name: home
|
||||
mountPath: /home/node/.openclaw
|
||||
- name: config
|
||||
mountPath: /config
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 64Mi
|
||||
- name: install-kubectl
|
||||
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
cp "$(command -v kubectl)" /tools/kubectl
|
||||
chmod 0755 /tools/kubectl
|
||||
volumeMounts:
|
||||
- name: tools
|
||||
mountPath: /tools
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 64Mi
|
||||
containers:
|
||||
- name: gateway
|
||||
image: ghcr.io/openclaw/openclaw:slim@sha256:ac2c41d7122194d32258d1ec61b33079dbc498767ecadcd50849782ad5fcb057
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- node
|
||||
- /app/dist/index.js
|
||||
- gateway
|
||||
- run
|
||||
ports:
|
||||
- name: gateway
|
||||
containerPort: 18789
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: HOME
|
||||
value: /home/node
|
||||
- name: OPENCLAW_CONFIG_DIR
|
||||
value: /home/node/.openclaw
|
||||
- name: NODE_ENV
|
||||
value: production
|
||||
- name: PATH
|
||||
value: /home/node/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
- name: VICTORIA_METRICS_URL
|
||||
value: http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428
|
||||
- name: JENKINS_BASE_URL
|
||||
value: http://jenkins.jenkins.svc.cluster.local:8080
|
||||
- name: GITEA_BASE_URL
|
||||
value: https://scm.bstein.dev
|
||||
- name: GRAFANA_BASE_URL
|
||||
value: https://metrics.bstein.dev
|
||||
- name: OPENCLAW_GATEWAY_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: openclaw-secrets
|
||||
key: OPENCLAW_GATEWAY_TOKEN
|
||||
volumeMounts:
|
||||
- name: home
|
||||
mountPath: /home/node/.openclaw
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: tools
|
||||
mountPath: /home/node/.local/bin
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- node
|
||||
- -e
|
||||
- "require('http').get('http://127.0.0.1:18789/readyz', r => process.exit(r.statusCode < 400 ? 0 : 1)).on('error', () => process.exit(1))"
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- node
|
||||
- -e
|
||||
- "require('http').get('http://127.0.0.1:18789/healthz', r => process.exit(r.statusCode < 400 ? 0 : 1)).on('error', () => process.exit(1))"
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 10
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
resources:
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: "1"
|
||||
memory: 2Gi
|
||||
volumes:
|
||||
- name: home
|
||||
persistentVolumeClaim:
|
||||
claimName: openclaw-home
|
||||
- name: config
|
||||
configMap:
|
||||
name: openclaw-config
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: tools
|
||||
emptyDir: {}
|
||||
|
||||
28
services/openclaw/ingress.yaml
Normal file
28
services/openclaw/ingress.yaml
Normal file
@ -0,0 +1,28 @@
|
||||
# services/openclaw/ingress.yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: openclaw
|
||||
namespace: openclaw
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
spec:
|
||||
ingressClassName: traefik
|
||||
rules:
|
||||
- host: openclaw.bstein.dev
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: openclaw
|
||||
port:
|
||||
number: 18789
|
||||
tls:
|
||||
- hosts:
|
||||
- openclaw.bstein.dev
|
||||
secretName: openclaw-tls
|
||||
|
||||
14
services/openclaw/kustomization.yaml
Normal file
14
services/openclaw/kustomization.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
# services/openclaw/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: openclaw
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- pvc.yaml
|
||||
- configmap.yaml
|
||||
- rbac.yaml
|
||||
- ollama-deployment.yaml
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
- ingress.yaml
|
||||
|
||||
6
services/openclaw/namespace.yaml
Normal file
6
services/openclaw/namespace.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
# services/openclaw/namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: openclaw
|
||||
|
||||
113
services/openclaw/ollama-deployment.yaml
Normal file
113
services/openclaw/ollama-deployment.yaml
Normal file
@ -0,0 +1,113 @@
|
||||
# services/openclaw/ollama-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: openclaw-ollama
|
||||
namespace: openclaw
|
||||
labels:
|
||||
app: openclaw-ollama
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 2
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: openclaw-ollama
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: openclaw-ollama
|
||||
annotations:
|
||||
ai.bstein.dev/model: qwen2.5:7b-instruct-q4_0
|
||||
ai.bstein.dev/gpu: Jetson pool (titan-20/21)
|
||||
spec:
|
||||
runtimeClassName: nvidia
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values:
|
||||
- titan-20
|
||||
- titan-21
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: openclaw-ollama-models
|
||||
initContainers:
|
||||
- name: warm-model
|
||||
image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: OLLAMA_HOST
|
||||
value: 0.0.0.0
|
||||
- name: OLLAMA_MODELS
|
||||
value: /root/.ollama
|
||||
- name: OLLAMA_MODEL
|
||||
value: qwen2.5:7b-instruct-q4_0
|
||||
- name: NVIDIA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||
value: compute,utility
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
ollama serve >/tmp/ollama.log 2>&1 &
|
||||
sleep 6
|
||||
ollama pull "${OLLAMA_MODEL}"
|
||||
pkill ollama || true
|
||||
volumeMounts:
|
||||
- name: models
|
||||
mountPath: /root/.ollama
|
||||
resources:
|
||||
requests:
|
||||
cpu: "1"
|
||||
memory: 4Gi
|
||||
nvidia.com/gpu.shared: 1
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: 10Gi
|
||||
nvidia.com/gpu.shared: 1
|
||||
containers:
|
||||
- name: ollama
|
||||
image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
|
||||
imagePullPolicy: IfNotPresent
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 11434
|
||||
env:
|
||||
- name: OLLAMA_HOST
|
||||
value: 0.0.0.0
|
||||
- name: OLLAMA_KEEP_ALIVE
|
||||
value: 6h
|
||||
- name: OLLAMA_MODELS
|
||||
value: /root/.ollama
|
||||
- name: NVIDIA_VISIBLE_DEVICES
|
||||
value: all
|
||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||
value: compute,utility
|
||||
volumeMounts:
|
||||
- name: models
|
||||
mountPath: /root/.ollama
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /api/tags
|
||||
port: 11434
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
resources:
|
||||
requests:
|
||||
cpu: "2"
|
||||
memory: 8Gi
|
||||
nvidia.com/gpu.shared: 1
|
||||
limits:
|
||||
cpu: "6"
|
||||
memory: 12Gi
|
||||
nvidia.com/gpu.shared: 1
|
||||
|
||||
27
services/openclaw/pvc.yaml
Normal file
27
services/openclaw/pvc.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
# services/openclaw/pvc.yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: openclaw-home
|
||||
namespace: openclaw
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
storageClassName: asteria
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: openclaw-ollama-models
|
||||
namespace: openclaw
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 16Gi
|
||||
storageClassName: asteria
|
||||
|
||||
70
services/openclaw/rbac.yaml
Normal file
70
services/openclaw/rbac.yaml
Normal file
@ -0,0 +1,70 @@
|
||||
# services/openclaw/rbac.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: openclaw-triage
|
||||
namespace: openclaw
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: openclaw-triage-readonly
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
- endpoints
|
||||
- events
|
||||
- namespaces
|
||||
- nodes
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
- pods
|
||||
- pods/log
|
||||
- replicationcontrollers
|
||||
- services
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources:
|
||||
- daemonsets
|
||||
- deployments
|
||||
- replicasets
|
||||
- statefulsets
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["batch"]
|
||||
resources:
|
||||
- cronjobs
|
||||
- jobs
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["networking.k8s.io"]
|
||||
resources:
|
||||
- ingresses
|
||||
- networkpolicies
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["helm.toolkit.fluxcd.io"]
|
||||
resources:
|
||||
- helmreleases
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["kustomize.toolkit.fluxcd.io"]
|
||||
resources:
|
||||
- kustomizations
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["source.toolkit.fluxcd.io"]
|
||||
resources:
|
||||
- gitrepositories
|
||||
- helmrepositories
|
||||
verbs: ["get", "list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: openclaw-triage-readonly
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: openclaw-triage-readonly
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: openclaw-triage
|
||||
namespace: openclaw
|
||||
|
||||
35
services/openclaw/service.yaml
Normal file
35
services/openclaw/service.yaml
Normal file
@ -0,0 +1,35 @@
|
||||
# services/openclaw/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: openclaw
|
||||
namespace: openclaw
|
||||
labels:
|
||||
app: openclaw
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: openclaw
|
||||
ports:
|
||||
- name: gateway
|
||||
port: 18789
|
||||
targetPort: gateway
|
||||
protocol: TCP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: openclaw-ollama
|
||||
namespace: openclaw
|
||||
labels:
|
||||
app: openclaw-ollama
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: openclaw-ollama
|
||||
ports:
|
||||
- name: http
|
||||
port: 11434
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user