diff --git a/infrastructure/resource-guardrails/limitranges.yaml b/infrastructure/resource-guardrails/limitranges.yaml index ec13e258..facc6dfa 100644 --- a/infrastructure/resource-guardrails/limitranges.yaml +++ b/infrastructure/resource-guardrails/limitranges.yaml @@ -132,6 +132,12 @@ items: name: atlas-default-compute namespace: outline spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: openclaw + spec: *defaultCompute - apiVersion: v1 kind: LimitRange metadata: diff --git a/services/maintenance/ariadne-deployment.yaml b/services/maintenance/ariadne-deployment.yaml index e3d643f5..b8a0de62 100644 --- a/services/maintenance/ariadne-deployment.yaml +++ b/services/maintenance/ariadne-deployment.yaml @@ -404,6 +404,8 @@ spec: value: "10" - name: ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP value: "45 */6 * * *" + - name: ARIADNE_SCHEDULE_TESTING_TRIAGE + value: "*/15 * * * *" - name: JENKINS_WORKSPACE_NAMESPACE value: jenkins - name: JENKINS_WORKSPACE_PVC_PREFIX diff --git a/services/maintenance/ariadne-rbac.yaml b/services/maintenance/ariadne-rbac.yaml index 009d9c8a..5f184e52 100644 --- a/services/maintenance/ariadne-rbac.yaml +++ b/services/maintenance/ariadne-rbac.yaml @@ -15,7 +15,18 @@ rules: - create - apiGroups: [""] resources: + - events - pods + - pods/log + verbs: + - get + - list + - watch + - apiGroups: ["apps"] + resources: + - daemonsets + - deployments + - statefulsets verbs: - get - list diff --git a/services/openclaw/configmap.yaml b/services/openclaw/configmap.yaml index e8c7da05..77b529a1 100644 --- a/services/openclaw/configmap.yaml +++ b/services/openclaw/configmap.yaml @@ -86,6 +86,17 @@ data: You are OpenClaw running inside the Titan Kubernetes cluster as a read-only testing and operations triage assistant. + Ariadne owns deterministic evidence collection. Start every testing triage + by reading: + + - `curl -sS "$ARIADNE_BASE_URL/api/internal/testing/triage/latest"` + - if that is missing or stale, ask a human to run: + `curl -sS -X POST "$ARIADNE_BASE_URL/api/internal/testing/triage/collect"` + + Treat the Ariadne bundle as the source of truth. Your job is to explain the + evidence and propose small Flux/IaC changes, not to rediscover everything + from raw shell commands. + Your job is to explain failing or suspicious test runs without mutating the cluster. Prefer concise incident summaries with: @@ -97,6 +108,7 @@ data: Useful read-only commands: + - `curl -sS "$ARIADNE_BASE_URL/api/internal/testing/triage/latest"` - `kubectl get nodes -o wide` - `kubectl get pods -A -o wide` - `kubectl get pods -A --field-selector status.phase!=Running,status.phase!=Succeeded -o wide` diff --git a/services/openclaw/deployment.yaml b/services/openclaw/deployment.yaml index fae9f884..e232a28e 100644 --- a/services/openclaw/deployment.yaml +++ b/services/openclaw/deployment.yaml @@ -22,7 +22,7 @@ spec: ai.bstein.dev/model: qwen2.5:7b-instruct-q4_0 ai.bstein.dev/instructions: kubectl-field-selectors ai.bstein.dev/role: testing-triage - ai.bstein.dev/placement: gateway lane (titan-20) + ai.bstein.dev/placement: arm64 gateway lane (jetson preferred) spec: serviceAccountName: openclaw-triage automountServiceAccountToken: true @@ -35,10 +35,41 @@ spec: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: kubernetes.io/hostname + - key: kubernetes.io/arch operator: In values: - - titan-20 + - arm64 + - key: node-role.kubernetes.io/worker + operator: In + values: + - "true" + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: atlas.bstein.dev/spillover + operator: DoesNotExist + - weight: 95 + preference: + matchExpressions: + - key: jetson + operator: In + values: + - "true" + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - weight: 35 + preference: + matchExpressions: + - key: hardware + operator: In + values: + - rpi4 initContainers: - name: init-permissions image: busybox:1.37 @@ -136,6 +167,8 @@ spec: value: /home/node/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin - name: VICTORIA_METRICS_URL value: http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428 + - name: ARIADNE_BASE_URL + value: http://ariadne.maintenance.svc.cluster.local - name: JENKINS_BASE_URL value: http://jenkins.jenkins.svc.cluster.local:8080 - name: GITEA_BASE_URL