Compare commits
3 Commits
6c3c1342cd
...
e4a074f53e
| Author | SHA1 | Date | |
|---|---|---|---|
| e4a074f53e | |||
| b56222f40b | |||
| 30c677e6ed |
@ -1295,10 +1295,9 @@ def build_overview():
|
||||
},
|
||||
}
|
||||
)
|
||||
panels.append(
|
||||
timeseries_panel(
|
||||
test_success = timeseries_panel(
|
||||
42,
|
||||
"Ariadne + Metis Test Success Rate",
|
||||
"Platform Test Success Rate",
|
||||
TEST_SUCCESS_RATE,
|
||||
{"h": 6, "w": 6, "x": 12, "y": 14},
|
||||
unit="percent",
|
||||
@ -1306,11 +1305,14 @@ def build_overview():
|
||||
legend=None,
|
||||
legend_display="list",
|
||||
)
|
||||
test_success["description"] = (
|
||||
"Atlas Overview mirrors the Atlas Jobs internal dashboard for automation test health. "
|
||||
"Add new test series there first so they roll up here."
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
panels.append(test_success)
|
||||
test_failures = bargauge_panel(
|
||||
43,
|
||||
"Ariadne + Metis Tests with Failures (24h)",
|
||||
"Platform Tests with Failures (24h)",
|
||||
TEST_FAILURES_24H,
|
||||
{"h": 6, "w": 6, "x": 18, "y": 14},
|
||||
unit="none",
|
||||
@ -1336,7 +1338,10 @@ def build_overview():
|
||||
],
|
||||
},
|
||||
)
|
||||
test_failures["description"] = (
|
||||
"This summary is sourced from the Atlas Jobs internal dashboard rather than a separate overview-only query."
|
||||
)
|
||||
panels.append(test_failures)
|
||||
|
||||
cpu_scope = "$namespace_scope_cpu"
|
||||
gpu_scope = "$namespace_scope_gpu"
|
||||
@ -2654,10 +2659,9 @@ def build_jobs_dashboard():
|
||||
legend="{{status}}",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
coverage_panel = stat_panel(
|
||||
17,
|
||||
"Ariadne + Metis CI Coverage (%)",
|
||||
"Platform CI Coverage (%)",
|
||||
TEST_CI_COVERAGE,
|
||||
{"h": 6, "w": 4, "x": 8, "y": 11},
|
||||
unit="percent",
|
||||
@ -2665,18 +2669,21 @@ def build_jobs_dashboard():
|
||||
instant=True,
|
||||
legend="{{branch}}",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
coverage_panel["description"] = "Internal source panel for Atlas Overview automation test rollups."
|
||||
panels.append(coverage_panel)
|
||||
tests_panel = table_panel(
|
||||
18,
|
||||
"Ariadne + Metis CI Tests (latest)",
|
||||
"Platform CI Tests (latest)",
|
||||
TEST_CI_TESTS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 11},
|
||||
unit="none",
|
||||
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
||||
instant=True,
|
||||
)
|
||||
tests_panel["description"] = (
|
||||
"Atlas Overview test panels depend on these internal repo-tagged CI series."
|
||||
)
|
||||
panels.append(tests_panel)
|
||||
|
||||
return {
|
||||
"uid": "atlas-jobs",
|
||||
|
||||
@ -437,8 +437,7 @@ spec:
|
||||
- $patch: replace
|
||||
- name: VAULT_ENV_FILE
|
||||
value: /vault/secrets/harbor-jobservice-env.sh
|
||||
envFrom:
|
||||
- $patch: replace
|
||||
envFrom: []
|
||||
- configMapRef:
|
||||
name: harbor-jobservice-env
|
||||
volumeMounts:
|
||||
|
||||
@ -167,6 +167,58 @@ data:
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('metis') {
|
||||
properties {
|
||||
pipelineTriggers {
|
||||
triggers {
|
||||
scmTrigger {
|
||||
scmpoll_spec('H/2 * * * *')
|
||||
ignorePostCommitHooks(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
definition {
|
||||
cpsScm {
|
||||
scm {
|
||||
git {
|
||||
remote {
|
||||
url('https://scm.bstein.dev/bstein/metis.git')
|
||||
credentials('gitea-pat')
|
||||
}
|
||||
branches('*/master')
|
||||
}
|
||||
}
|
||||
scriptPath('Jenkinsfile')
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('metis') {
|
||||
properties {
|
||||
pipelineTriggers {
|
||||
triggers {
|
||||
scmTrigger {
|
||||
scmpoll_spec('H/5 * * * *')
|
||||
ignorePostCommitHooks(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
definition {
|
||||
cpsScm {
|
||||
scm {
|
||||
git {
|
||||
remote {
|
||||
url('https://scm.bstein.dev/bstein/metis.git')
|
||||
credentials('gitea-pat')
|
||||
}
|
||||
branches('*/master')
|
||||
}
|
||||
}
|
||||
scriptPath('Jenkinsfile')
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('atlasbot') {
|
||||
properties {
|
||||
pipelineTriggers {
|
||||
|
||||
@ -302,11 +302,11 @@ spec:
|
||||
- name: ARIADNE_SCHEDULE_FIREFLY_CRON
|
||||
value: "0 3 * * *"
|
||||
- name: ARIADNE_SCHEDULE_POD_CLEANER
|
||||
value: "0 * * * *"
|
||||
value: "*/30 * * * *"
|
||||
- name: ARIADNE_SCHEDULE_OPENSEARCH_PRUNE
|
||||
value: "23 3 * * *"
|
||||
- name: ARIADNE_SCHEDULE_IMAGE_SWEEPER
|
||||
value: "30 4 * * *"
|
||||
value: "0 */4 * * *"
|
||||
- name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH
|
||||
value: "*/15 * * * *"
|
||||
- name: ARIADNE_SCHEDULE_VAULT_OIDC
|
||||
@ -320,9 +320,9 @@ spec:
|
||||
- name: ARIADNE_SCHEDULE_COMMS_SEED_ROOM
|
||||
value: "*/10 * * * *"
|
||||
- name: ARIADNE_SCHEDULE_CLUSTER_STATE
|
||||
value: "*/15 * * * *"
|
||||
value: "*/10 * * * *"
|
||||
- name: ARIADNE_CLUSTER_STATE_KEEP
|
||||
value: "168"
|
||||
value: "720"
|
||||
- name: WELCOME_EMAIL_ENABLED
|
||||
value: "true"
|
||||
- name: K8S_API_TIMEOUT_SEC
|
||||
@ -339,6 +339,12 @@ spec:
|
||||
value: "1099511627776"
|
||||
- name: OPENSEARCH_INDEX_PATTERNS
|
||||
value: kube-*,journald-*,trace-analytics-*
|
||||
- name: METIS_BASE_URL
|
||||
value: http://metis.maintenance.svc.cluster.local
|
||||
- name: METIS_TIMEOUT_SEC
|
||||
value: "15"
|
||||
- name: ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH
|
||||
value: "*/30 * * * *"
|
||||
- name: METRICS_PATH
|
||||
value: "/metrics"
|
||||
resources:
|
||||
|
||||
@ -24,6 +24,52 @@ spec:
|
||||
---
|
||||
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||
kind: ImageRepository
|
||||
metadata:
|
||||
name: metis
|
||||
namespace: maintenance
|
||||
spec:
|
||||
image: registry.bstein.dev/bstein/metis
|
||||
interval: 1m0s
|
||||
secretRef:
|
||||
name: harbor-regcred
|
||||
---
|
||||
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||
kind: ImagePolicy
|
||||
metadata:
|
||||
name: metis
|
||||
namespace: maintenance
|
||||
spec:
|
||||
imageRepositoryRef:
|
||||
name: metis
|
||||
policy:
|
||||
semver:
|
||||
range: ">=0.1.0-0"
|
||||
---
|
||||
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||
kind: ImageRepository
|
||||
metadata:
|
||||
name: metis-sentinel
|
||||
namespace: maintenance
|
||||
spec:
|
||||
image: registry.bstein.dev/bstein/metis-sentinel
|
||||
interval: 1m0s
|
||||
secretRef:
|
||||
name: harbor-regcred
|
||||
---
|
||||
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||
kind: ImagePolicy
|
||||
metadata:
|
||||
name: metis-sentinel
|
||||
namespace: maintenance
|
||||
spec:
|
||||
imageRepositoryRef:
|
||||
name: metis-sentinel
|
||||
policy:
|
||||
semver:
|
||||
range: ">=0.1.0-0"
|
||||
---
|
||||
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||
kind: ImageRepository
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
|
||||
@ -6,32 +6,47 @@ resources:
|
||||
- image.yaml
|
||||
- secretproviderclass.yaml
|
||||
- soteria-configmap.yaml
|
||||
- metis-configmap.yaml
|
||||
- metis-data-pvc.yaml
|
||||
- vault-serviceaccount.yaml
|
||||
- vault-sync-deployment.yaml
|
||||
- ariadne-serviceaccount.yaml
|
||||
- ariadne-rbac.yaml
|
||||
- disable-k3s-traefik-serviceaccount.yaml
|
||||
- k3s-traefik-cleanup-rbac.yaml
|
||||
- metis-serviceaccount.yaml
|
||||
- metis-rbac.yaml
|
||||
- metis-token-sync-serviceaccount.yaml
|
||||
- metis-token-sync-rbac.yaml
|
||||
- node-nofile-serviceaccount.yaml
|
||||
- pod-cleaner-rbac.yaml
|
||||
- soteria-serviceaccount.yaml
|
||||
- soteria-rbac.yaml
|
||||
- ariadne-deployment.yaml
|
||||
- metis-deployment.yaml
|
||||
- oneoffs/ariadne-migrate-job.yaml
|
||||
- ariadne-service.yaml
|
||||
- soteria-deployment.yaml
|
||||
- disable-k3s-traefik-daemonset.yaml
|
||||
- oneoffs/k3s-traefik-cleanup-job.yaml
|
||||
- node-nofile-daemonset.yaml
|
||||
- metis-sentinel-daemonset.yaml
|
||||
- metis-k3s-token-sync-cronjob.yaml
|
||||
- k3s-agent-restart-daemonset.yaml
|
||||
- pod-cleaner-cronjob.yaml
|
||||
- node-image-sweeper-serviceaccount.yaml
|
||||
- node-image-sweeper-daemonset.yaml
|
||||
- image-sweeper-cronjob.yaml
|
||||
- metis-service.yaml
|
||||
- metis-ingress.yaml
|
||||
- soteria-service.yaml
|
||||
images:
|
||||
- name: registry.bstein.dev/bstein/ariadne
|
||||
newTag: 0.1.0-22 # {"$imagepolicy": "maintenance:ariadne:tag"}
|
||||
- name: registry.bstein.dev/bstein/metis
|
||||
newTag: 0.1.0-0 # {"$imagepolicy": "maintenance:metis:tag"}
|
||||
- name: registry.bstein.dev/bstein/metis-sentinel
|
||||
newTag: 0.1.0-0 # {"$imagepolicy": "maintenance:metis-sentinel:tag"}
|
||||
- name: registry.bstein.dev/bstein/soteria
|
||||
newTag: 0.1.0-11 # {"$imagepolicy": "maintenance:soteria:tag"}
|
||||
configMapGenerator:
|
||||
|
||||
20
services/maintenance/metis-configmap.yaml
Normal file
20
services/maintenance/metis-configmap.yaml
Normal file
@ -0,0 +1,20 @@
|
||||
# services/maintenance/metis-configmap.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: metis
|
||||
namespace: maintenance
|
||||
data:
|
||||
METIS_BIND_ADDR: :8080
|
||||
METIS_INVENTORY_PATH: /app/inventory.titan-rpi4.yaml
|
||||
METIS_DATA_DIR: /var/lib/metis
|
||||
METIS_DEFAULT_FLASH_HOST: titan-22
|
||||
METIS_FLASH_HOSTS: titan-22
|
||||
METIS_LOCAL_HOST: titan-22
|
||||
METIS_ALLOWED_GROUPS: admin,maintainer
|
||||
METIS_MAX_DEVICE_BYTES: "300000000000"
|
||||
METIS_SENTINEL_PUSH_URL: http://metis.maintenance.svc.cluster.local/internal/sentinel/snapshot
|
||||
METIS_SENTINEL_INTERVAL_SEC: "1800"
|
||||
METIS_SENTINEL_NSENTER: "1"
|
||||
METIS_IMAGE_RPI4_ARMBIAN_LONGHORN: https://armbian.chi.auroradev.org/dl/rpi4b/archive/Armbian_26.2.1_Rpi4b_noble_current_6.18.9_minimal.img.xz
|
||||
METIS_IMAGE_RPI4_ARMBIAN_LONGHORN_SHA256: sha256:c450687adf4cc6a59725c43aefd58baf42ec71bdd379227d403cdde281768e46
|
||||
13
services/maintenance/metis-data-pvc.yaml
Normal file
13
services/maintenance/metis-data-pvc.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
# services/maintenance/metis-data-pvc.yaml
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: metis-data
|
||||
namespace: maintenance
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 40Gi
|
||||
storageClassName: local-path
|
||||
47
services/maintenance/metis-deployment.yaml
Normal file
47
services/maintenance/metis-deployment.yaml
Normal file
@ -0,0 +1,47 @@
|
||||
# services/maintenance/metis-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: metis
|
||||
namespace: maintenance
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: metis
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: metis
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
serviceAccountName: metis
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: titan-22
|
||||
kubernetes.io/arch: amd64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: metis
|
||||
image: registry.bstein.dev/bstein/metis:latest
|
||||
imagePullPolicy: Always
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: metis
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
27
services/maintenance/metis-ingress.yaml
Normal file
27
services/maintenance/metis-ingress.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
# services/maintenance/metis-ingress.yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: metis
|
||||
namespace: maintenance
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: traefik
|
||||
cert-manager.io/cluster-issuer: letsencrypt
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-forward-auth@kubernetescrd
|
||||
spec:
|
||||
tls:
|
||||
- hosts: ["metis.bstein.dev"]
|
||||
secretName: metis-tls
|
||||
rules:
|
||||
- host: metis.bstein.dev
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: metis
|
||||
port:
|
||||
number: 80
|
||||
51
services/maintenance/metis-k3s-token-sync-cronjob.yaml
Normal file
51
services/maintenance/metis-k3s-token-sync-cronjob.yaml
Normal file
@ -0,0 +1,51 @@
|
||||
# services/maintenance/metis-k3s-token-sync-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: metis-k3s-token-sync
|
||||
namespace: maintenance
|
||||
spec:
|
||||
schedule: "11 */6 * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 2
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: metis-token-sync
|
||||
restartPolicy: OnFailure
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/control-plane: "true"
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: sync
|
||||
image: registry.bstein.dev/bstein/kubectl:1.35.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
args:
|
||||
- |
|
||||
set -euo pipefail
|
||||
token="$(tr -d '\n' < /host/var/lib/rancher/k3s/server/node-token)"
|
||||
kubectl -n maintenance create secret generic metis-runtime \
|
||||
--from-literal=k3s_token="${token}" \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
volumeMounts:
|
||||
- name: k3s-server
|
||||
mountPath: /host/var/lib/rancher/k3s/server
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: k3s-server
|
||||
hostPath:
|
||||
path: /var/lib/rancher/k3s/server
|
||||
27
services/maintenance/metis-rbac.yaml
Normal file
27
services/maintenance/metis-rbac.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
# services/maintenance/metis-rbac.yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: metis-node-manager
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- delete
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: metis-node-manager
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: metis
|
||||
namespace: maintenance
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: metis-node-manager
|
||||
133
services/maintenance/metis-sentinel-daemonset.yaml
Normal file
133
services/maintenance/metis-sentinel-daemonset.yaml
Normal file
@ -0,0 +1,133 @@
|
||||
# services/maintenance/metis-sentinel-daemonset.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: metis-sentinel
|
||||
namespace: maintenance
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: metis-sentinel
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: metis-sentinel
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
serviceAccountName: metis
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: metis-sentinel
|
||||
image: registry.bstein.dev/bstein/metis-sentinel:latest
|
||||
imagePullPolicy: Always
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
out_dir="${METIS_SENTINEL_OUT:-/var/run/metis-sentinel}"
|
||||
interval="${METIS_SENTINEL_INTERVAL_SEC:-120}"
|
||||
mkdir -p "${out_dir}"
|
||||
while true; do
|
||||
ts="$(date -u +%Y%m%dT%H%M%SZ)"
|
||||
node="${METIS_SENTINEL_NODE:-unknown}"
|
||||
tmp="${out_dir}/${node}-${ts}.json.tmp"
|
||||
out="${out_dir}/${node}-${ts}.json"
|
||||
if metis-sentinel > "${tmp}"; then
|
||||
mv "${tmp}" "${out}"
|
||||
else
|
||||
rm -f "${tmp}" || true
|
||||
fi
|
||||
sleep "${interval}"
|
||||
done
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: metis
|
||||
env:
|
||||
- name: METIS_SENTINEL_NODE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
volumeMounts:
|
||||
- name: sentinel-output
|
||||
mountPath: /var/run/metis-sentinel
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 256Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
runAsUser: 0
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
- name: sentinel-pusher
|
||||
image: curlimages/curl:8.12.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
out_dir="${METIS_SENTINEL_OUT:-/var/run/metis-sentinel}"
|
||||
push_url="${METIS_SENTINEL_PUSH_URL:-}"
|
||||
interval="${METIS_SENTINEL_PUSH_INTERVAL_SEC:-120}"
|
||||
timeout="${METIS_SENTINEL_PUSH_TIMEOUT_SEC:-10}"
|
||||
mkdir -p "${out_dir}"
|
||||
while true; do
|
||||
for snapshot in "${out_dir}"/*.json; do
|
||||
[ -f "${snapshot}" ] || continue
|
||||
if [ -z "${push_url}" ]; then
|
||||
break
|
||||
fi
|
||||
if curl -fsS --connect-timeout "${timeout}" --max-time "${timeout}" \
|
||||
-X POST \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Metis-Node: ${METIS_SENTINEL_NODE:-unknown}" \
|
||||
--data-binary "@${snapshot}" \
|
||||
"${push_url}"; then
|
||||
rm -f "${snapshot}"
|
||||
fi
|
||||
done
|
||||
sleep "${interval}"
|
||||
done
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: metis
|
||||
env:
|
||||
- name: METIS_SENTINEL_NODE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
volumeMounts:
|
||||
- name: sentinel-output
|
||||
mountPath: /var/run/metis-sentinel
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
runAsUser: 0
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
volumes:
|
||||
- name: sentinel-output
|
||||
emptyDir: {}
|
||||
18
services/maintenance/metis-service.yaml
Normal file
18
services/maintenance/metis-service.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
# services/maintenance/metis-service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: metis
|
||||
namespace: maintenance
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "80"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: metis
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: http
|
||||
6
services/maintenance/metis-serviceaccount.yaml
Normal file
6
services/maintenance/metis-serviceaccount.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
# services/maintenance/metis-serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: metis
|
||||
namespace: maintenance
|
||||
30
services/maintenance/metis-token-sync-rbac.yaml
Normal file
30
services/maintenance/metis-token-sync-rbac.yaml
Normal file
@ -0,0 +1,30 @@
|
||||
# services/maintenance/metis-token-sync-rbac.yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: metis-token-sync
|
||||
namespace: maintenance
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- secrets
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: metis-token-sync
|
||||
namespace: maintenance
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: metis-token-sync
|
||||
namespace: maintenance
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: metis-token-sync
|
||||
@ -0,0 +1,6 @@
|
||||
# services/maintenance/metis-token-sync-serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: metis-token-sync
|
||||
namespace: maintenance
|
||||
@ -1125,7 +1125,7 @@
|
||||
{
|
||||
"id": 17,
|
||||
"type": "stat",
|
||||
"title": "Ariadne + Metis CI Coverage (%)",
|
||||
"title": "Platform CI Coverage (%)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1183,12 +1183,13 @@
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
}
|
||||
},
|
||||
"description": "Internal source panel for Atlas Overview automation test rollups."
|
||||
},
|
||||
{
|
||||
"id": 18,
|
||||
"type": "table",
|
||||
"title": "Ariadne + Metis CI Tests (latest)",
|
||||
"title": "Platform CI Tests (latest)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1233,7 +1234,8 @@
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
],
|
||||
"description": "Atlas Overview test panels depend on these internal repo-tagged CI series."
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
|
||||
@ -1677,7 +1677,7 @@
|
||||
{
|
||||
"id": 42,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne + Metis Test Success Rate",
|
||||
"title": "Platform Test Success Rate",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1709,12 +1709,13 @@
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Atlas Overview mirrors the Atlas Jobs internal dashboard for automation test health. Add new test series there first so they roll up here."
|
||||
},
|
||||
{
|
||||
"id": 43,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne + Metis Tests with Failures (24h)",
|
||||
"title": "Platform Tests with Failures (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1814,7 +1815,8 @@
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
],
|
||||
"description": "This summary is sourced from the Atlas Jobs internal dashboard rather than a separate overview-only query."
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
|
||||
@ -1134,7 +1134,7 @@ data:
|
||||
{
|
||||
"id": 17,
|
||||
"type": "stat",
|
||||
"title": "Ariadne + Metis CI Coverage (%)",
|
||||
"title": "Platform CI Coverage (%)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1192,12 +1192,13 @@ data:
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
}
|
||||
},
|
||||
"description": "Internal source panel for Atlas Overview automation test rollups."
|
||||
},
|
||||
{
|
||||
"id": 18,
|
||||
"type": "table",
|
||||
"title": "Ariadne + Metis CI Tests (latest)",
|
||||
"title": "Platform CI Tests (latest)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1242,7 +1243,8 @@ data:
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
],
|
||||
"description": "Atlas Overview test panels depend on these internal repo-tagged CI series."
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
|
||||
@ -1686,7 +1686,7 @@ data:
|
||||
{
|
||||
"id": 42,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne + Metis Test Success Rate",
|
||||
"title": "Platform Test Success Rate",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1718,12 +1718,13 @@ data:
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
}
|
||||
},
|
||||
"description": "Atlas Overview mirrors the Atlas Jobs internal dashboard for automation test health. Add new test series there first so they roll up here."
|
||||
},
|
||||
{
|
||||
"id": 43,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne + Metis Tests with Failures (24h)",
|
||||
"title": "Platform Tests with Failures (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1823,7 +1824,8 @@ data:
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
],
|
||||
"description": "This summary is sourced from the Atlas Jobs internal dashboard rather than a separate overview-only query."
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user