Compare commits
No commits in common. "6c3c1342cdc09e1ef7c9e3609022372e7e22972f" and "e93aa6e33b7544dd71eec1e7ad92ca1ce0f36161" have entirely different histories.
6c3c1342cd
...
e93aa6e33b
@ -423,17 +423,16 @@ ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
|
|||||||
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
|
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
|
||||||
)
|
)
|
||||||
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
||||||
TEST_REPO_SELECTOR = 'repo=~"ariadne|metis"'
|
ARIADNE_CI_COVERAGE = 'ariadne_ci_coverage_percent{repo="ariadne"}'
|
||||||
TEST_CI_COVERAGE = f'ariadne_ci_coverage_percent{{{TEST_REPO_SELECTOR}}}'
|
ARIADNE_CI_TESTS = 'ariadne_ci_tests_total{repo="ariadne"}'
|
||||||
TEST_CI_TESTS = f'ariadne_ci_tests_total{{{TEST_REPO_SELECTOR}}}'
|
ARIADNE_TEST_SUCCESS_RATE = (
|
||||||
TEST_SUCCESS_RATE = (
|
|
||||||
"100 * "
|
"100 * "
|
||||||
f'sum(max_over_time(ariadne_ci_tests_total{{{TEST_REPO_SELECTOR},result="passed"}}[30d])) '
|
'sum(max_over_time(ariadne_ci_tests_total{repo="ariadne",result="passed"}[30d])) '
|
||||||
"/ clamp_min("
|
"/ clamp_min("
|
||||||
f'sum(max_over_time(ariadne_ci_tests_total{{{TEST_REPO_SELECTOR},result=~"passed|failed|error"}}[30d])), 1)'
|
'sum(max_over_time(ariadne_ci_tests_total{repo="ariadne",result=~"passed|failed|error"}[30d])), 1)'
|
||||||
)
|
)
|
||||||
TEST_FAILURES_24H = (
|
ARIADNE_TEST_FAILURES_24H = (
|
||||||
f'sum by (result) (max_over_time(ariadne_ci_tests_total{{{TEST_REPO_SELECTOR},result=~"failed|error"}}[24h]))'
|
'sum by (result) (max_over_time(ariadne_ci_tests_total{repo="ariadne",result=~"failed|error"}[24h]))'
|
||||||
)
|
)
|
||||||
POSTGRES_CONN_USED = (
|
POSTGRES_CONN_USED = (
|
||||||
'label_replace(sum(pg_stat_activity_count), "conn", "used", "__name__", ".*") '
|
'label_replace(sum(pg_stat_activity_count), "conn", "used", "__name__", ".*") '
|
||||||
@ -1298,8 +1297,8 @@ def build_overview():
|
|||||||
panels.append(
|
panels.append(
|
||||||
timeseries_panel(
|
timeseries_panel(
|
||||||
42,
|
42,
|
||||||
"Ariadne + Metis Test Success Rate",
|
"Ariadne Test Success Rate",
|
||||||
TEST_SUCCESS_RATE,
|
ARIADNE_TEST_SUCCESS_RATE,
|
||||||
{"h": 6, "w": 6, "x": 12, "y": 14},
|
{"h": 6, "w": 6, "x": 12, "y": 14},
|
||||||
unit="percent",
|
unit="percent",
|
||||||
max_value=100,
|
max_value=100,
|
||||||
@ -1310,8 +1309,8 @@ def build_overview():
|
|||||||
panels.append(
|
panels.append(
|
||||||
bargauge_panel(
|
bargauge_panel(
|
||||||
43,
|
43,
|
||||||
"Ariadne + Metis Tests with Failures (24h)",
|
"Tests with Failures (24h)",
|
||||||
TEST_FAILURES_24H,
|
ARIADNE_TEST_FAILURES_24H,
|
||||||
{"h": 6, "w": 6, "x": 18, "y": 14},
|
{"h": 6, "w": 6, "x": 18, "y": 14},
|
||||||
unit="none",
|
unit="none",
|
||||||
instant=True,
|
instant=True,
|
||||||
@ -2657,8 +2656,8 @@ def build_jobs_dashboard():
|
|||||||
panels.append(
|
panels.append(
|
||||||
stat_panel(
|
stat_panel(
|
||||||
17,
|
17,
|
||||||
"Ariadne + Metis CI Coverage (%)",
|
"Ariadne CI Coverage (%)",
|
||||||
TEST_CI_COVERAGE,
|
ARIADNE_CI_COVERAGE,
|
||||||
{"h": 6, "w": 4, "x": 8, "y": 11},
|
{"h": 6, "w": 4, "x": 8, "y": 11},
|
||||||
unit="percent",
|
unit="percent",
|
||||||
decimals=1,
|
decimals=1,
|
||||||
@ -2669,8 +2668,8 @@ def build_jobs_dashboard():
|
|||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
table_panel(
|
||||||
18,
|
18,
|
||||||
"Ariadne + Metis CI Tests (latest)",
|
"Ariadne CI Tests (latest)",
|
||||||
TEST_CI_TESTS,
|
ARIADNE_CI_TESTS,
|
||||||
{"h": 6, "w": 12, "x": 12, "y": 11},
|
{"h": 6, "w": 12, "x": 12, "y": 11},
|
||||||
unit="none",
|
unit="none",
|
||||||
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
||||||
|
|||||||
@ -10,8 +10,6 @@ spec:
|
|||||||
app: node-image-sweeper
|
app: node-image-sweeper
|
||||||
updateStrategy:
|
updateStrategy:
|
||||||
type: RollingUpdate
|
type: RollingUpdate
|
||||||
rollingUpdate:
|
|
||||||
maxUnavailable: 100%
|
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
@ -31,21 +29,6 @@ spec:
|
|||||||
- name: node-image-sweeper
|
- name: node-image-sweeper
|
||||||
image: python:3.12.9-alpine3.20
|
image: python:3.12.9-alpine3.20
|
||||||
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
||||||
env:
|
|
||||||
- name: SWEEP_INTERVAL_SEC
|
|
||||||
value: "21600"
|
|
||||||
- name: HIGH_USAGE_PERCENT
|
|
||||||
value: "70"
|
|
||||||
- name: EMERGENCY_USAGE_PERCENT
|
|
||||||
value: "80"
|
|
||||||
- name: BASE_THRESHOLD_DAYS
|
|
||||||
value: "14"
|
|
||||||
- name: HIGH_USAGE_THRESHOLD_DAYS
|
|
||||||
value: "3"
|
|
||||||
- name: LOG_RETENTION_DAYS
|
|
||||||
value: "7"
|
|
||||||
- name: JOURNAL_MAX_SIZE
|
|
||||||
value: "200M"
|
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
runAsUser: 0
|
runAsUser: 0
|
||||||
|
|||||||
@ -2,39 +2,26 @@
|
|||||||
set -eu
|
set -eu
|
||||||
|
|
||||||
ONE_SHOT=${ONE_SHOT:-false}
|
ONE_SHOT=${ONE_SHOT:-false}
|
||||||
SWEEP_INTERVAL_SEC=${SWEEP_INTERVAL_SEC:-21600}
|
THRESHOLD_DAYS=14
|
||||||
BASE_THRESHOLD_DAYS=${BASE_THRESHOLD_DAYS:-14}
|
|
||||||
HIGH_USAGE_THRESHOLD_DAYS=${HIGH_USAGE_THRESHOLD_DAYS:-3}
|
|
||||||
HIGH_USAGE_PERCENT=${HIGH_USAGE_PERCENT:-70}
|
|
||||||
EMERGENCY_USAGE_PERCENT=${EMERGENCY_USAGE_PERCENT:-85}
|
|
||||||
LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-7}
|
|
||||||
JOURNAL_MAX_SIZE=${JOURNAL_MAX_SIZE:-200M}
|
|
||||||
SKIP="registry.k8s.io/pause k8s.gcr.io/pause rancher/mirrored-pause"
|
|
||||||
|
|
||||||
sweep_once() {
|
usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage=""
|
||||||
usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage=""
|
if [ -n "${usage}" ] && [ "${usage}" -ge 70 ]; then
|
||||||
threshold_days="${BASE_THRESHOLD_DAYS}"
|
THRESHOLD_DAYS=3
|
||||||
if [ -n "${usage}" ] && [ "${usage}" -ge "${HIGH_USAGE_PERCENT}" ]; then
|
fi
|
||||||
threshold_days="${HIGH_USAGE_THRESHOLD_DAYS}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
cutoff=$(THRESHOLD_DAYS="${threshold_days}" python3 - <<'PY'
|
cutoff=$(python3 - <<'PY'
|
||||||
import os
|
import time, os
|
||||||
import time
|
print(int(time.time()) - int(os.environ.get("THRESHOLD_DAYS", "14")) * 86400)
|
||||||
|
|
||||||
days = int(os.environ.get("THRESHOLD_DAYS", "14"))
|
|
||||||
print(int(time.time()) - days * 86400)
|
|
||||||
PY
|
PY
|
||||||
)
|
)
|
||||||
|
|
||||||
RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ')
|
RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ')
|
||||||
IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}')
|
IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}')
|
||||||
|
|
||||||
prune_list=$(printf "%s" "${IMAGES_JSON}" | CUTOFF="${cutoff}" RUNNING="${RUNNING}" SKIP="${SKIP}" python3 - <<'PY'
|
SKIP="registry.k8s.io/pause k8s.gcr.io/pause rancher/mirrored-pause"
|
||||||
import json
|
|
||||||
import os
|
prune_list=$(printf "%s" "${IMAGES_JSON}" | CUTOFF="${cutoff}" RUNNING="${RUNNING}" SKIP="${SKIP}" python3 - <<'PY'
|
||||||
import sys
|
import json, os, sys, time
|
||||||
import time
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = json.load(sys.stdin)
|
data = json.load(sys.stdin)
|
||||||
@ -87,33 +74,19 @@ for p in prune:
|
|||||||
PY
|
PY
|
||||||
)
|
)
|
||||||
|
|
||||||
if [ -n "${prune_list}" ]; then
|
if [ -n "${prune_list}" ]; then
|
||||||
printf "%s" "${prune_list}" | while read -r image_id; do
|
printf "%s" "${prune_list}" | while read -r image_id; do
|
||||||
if [ -n "${image_id}" ]; then
|
if [ -n "${image_id}" ]; then
|
||||||
chroot /host /bin/sh -c "crictl rmi --prune ${image_id}" || true
|
chroot /host /bin/sh -c "crictl rmi --prune ${image_id}" || true
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
|
||||||
find /host/var/lib/rancher/k3s/agent/images -type f -name "*.tar" -mtime +7 -print -delete 2>/dev/null || true
|
find /host/var/lib/rancher/k3s/agent/images -type f -name "*.tar" -mtime +7 -print -delete 2>/dev/null || true
|
||||||
find /host/var/lib/rancher/k3s/agent/containerd -maxdepth 1 -type f -mtime +7 -print -delete 2>/dev/null || true
|
find /host/var/lib/rancher/k3s/agent/containerd -maxdepth 1 -type f -mtime +7 -print -delete 2>/dev/null || true
|
||||||
|
|
||||||
if [ -n "${usage}" ] && [ "${usage}" -ge "${EMERGENCY_USAGE_PERCENT}" ]; then
|
|
||||||
# Emergency pass for rootfs pressure on SD-backed nodes.
|
|
||||||
chroot /host /bin/sh -c "journalctl --vacuum-size='${JOURNAL_MAX_SIZE}' >/dev/null 2>&1 || true"
|
|
||||||
find /host/var/log -type f -name "*.gz" -mtime +"${LOG_RETENTION_DAYS}" -print -delete 2>/dev/null || true
|
|
||||||
find /host/var/log/pods -type f -name "*.log" -mtime +"${LOG_RETENTION_DAYS}" -print -delete 2>/dev/null || true
|
|
||||||
chroot /host /bin/sh -c "if command -v apt-get >/dev/null 2>&1; then apt-get clean >/dev/null 2>&1 || true; fi"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
sweep_once
|
|
||||||
|
|
||||||
if [ "${ONE_SHOT}" = "true" ]; then
|
if [ "${ONE_SHOT}" = "true" ]; then
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
while true; do
|
sleep infinity
|
||||||
sleep "${SWEEP_INTERVAL_SEC}"
|
|
||||||
sweep_once
|
|
||||||
done
|
|
||||||
|
|||||||
@ -1125,7 +1125,7 @@
|
|||||||
{
|
{
|
||||||
"id": 17,
|
"id": 17,
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
"title": "Ariadne + Metis CI Coverage (%)",
|
"title": "Ariadne CI Coverage (%)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1138,7 +1138,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_ci_coverage_percent{repo=~\"ariadne|metis\"}",
|
"expr": "ariadne_ci_coverage_percent{repo=\"ariadne\"}",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{branch}}",
|
"legendFormat": "{{branch}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -1188,7 +1188,7 @@
|
|||||||
{
|
{
|
||||||
"id": 18,
|
"id": 18,
|
||||||
"type": "table",
|
"type": "table",
|
||||||
"title": "Ariadne + Metis CI Tests (latest)",
|
"title": "Ariadne CI Tests (latest)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1201,7 +1201,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_ci_tests_total{repo=~\"ariadne|metis\"}",
|
"expr": "ariadne_ci_tests_total{repo=\"ariadne\"}",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1677,7 +1677,7 @@
|
|||||||
{
|
{
|
||||||
"id": 42,
|
"id": 42,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne + Metis Test Success Rate",
|
"title": "Ariadne Test Success Rate",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1690,7 +1690,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"passed|failed|error\"}[30d])), 1)",
|
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"passed|failed|error\"}[30d])), 1)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -1714,7 +1714,7 @@
|
|||||||
{
|
{
|
||||||
"id": 43,
|
"id": 43,
|
||||||
"type": "bargauge",
|
"type": "bargauge",
|
||||||
"title": "Ariadne + Metis Tests with Failures (24h)",
|
"title": "Tests with Failures (24h)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1727,7 +1727,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"failed|error\"}[24h])))",
|
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"failed|error\"}[24h])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{result}}",
|
"legendFormat": "{{result}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
|
|||||||
@ -22,24 +22,7 @@ data:
|
|||||||
- orgId: 1
|
- orgId: 1
|
||||||
receiver: email-admins
|
receiver: email-admins
|
||||||
group_by:
|
group_by:
|
||||||
- grafana_folder
|
|
||||||
- alertname
|
- alertname
|
||||||
group_wait: 1m
|
|
||||||
group_interval: 30m
|
|
||||||
repeat_interval: 12h
|
|
||||||
routes:
|
|
||||||
- receiver: email-admins
|
|
||||||
object_matchers:
|
|
||||||
- [severity, "=", "critical"]
|
|
||||||
group_wait: 30s
|
|
||||||
group_interval: 5m
|
|
||||||
repeat_interval: 2h
|
|
||||||
- receiver: email-admins
|
|
||||||
object_matchers:
|
|
||||||
- [severity, "=", "warning"]
|
|
||||||
group_wait: 5m
|
|
||||||
group_interval: 2h
|
|
||||||
repeat_interval: 24h
|
|
||||||
rules.yaml: |
|
rules.yaml: |
|
||||||
apiVersion: 1
|
apiVersion: 1
|
||||||
groups:
|
groups:
|
||||||
@ -49,7 +32,7 @@ data:
|
|||||||
interval: 1m
|
interval: 1m
|
||||||
rules:
|
rules:
|
||||||
- uid: disk-pressure-root
|
- uid: disk-pressure-root
|
||||||
title: "Node rootfs high (>85%)"
|
title: "Node rootfs high (>80%)"
|
||||||
condition: C
|
condition: C
|
||||||
for: "10m"
|
for: "10m"
|
||||||
data:
|
data:
|
||||||
@ -83,7 +66,7 @@ data:
|
|||||||
type: threshold
|
type: threshold
|
||||||
conditions:
|
conditions:
|
||||||
- evaluator:
|
- evaluator:
|
||||||
params: [85]
|
params: [80]
|
||||||
type: gt
|
type: gt
|
||||||
operator:
|
operator:
|
||||||
type: and
|
type: and
|
||||||
@ -93,7 +76,7 @@ data:
|
|||||||
noDataState: NoData
|
noDataState: NoData
|
||||||
execErrState: Error
|
execErrState: Error
|
||||||
annotations:
|
annotations:
|
||||||
summary: "{{ $labels.node }} rootfs >85% for 10m"
|
summary: "{{ $labels.node }} rootfs >80% for 10m"
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- uid: disk-growth-1h
|
- uid: disk-growth-1h
|
||||||
@ -518,7 +501,7 @@ data:
|
|||||||
model:
|
model:
|
||||||
intervalMs: 60000
|
intervalMs: 60000
|
||||||
maxDataPoints: 43200
|
maxDataPoints: 43200
|
||||||
expr: max(postmark_outbound_bounce_rate{window="1d"}) or on() vector(0)
|
expr: postmark_outbound_bounce_rate{window="1d"}
|
||||||
legendFormat: bounce 1d
|
legendFormat: bounce 1d
|
||||||
datasource:
|
datasource:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
@ -547,7 +530,7 @@ data:
|
|||||||
reducer:
|
reducer:
|
||||||
type: last
|
type: last
|
||||||
type: query
|
type: query
|
||||||
noDataState: OK
|
noDataState: NoData
|
||||||
execErrState: Error
|
execErrState: Error
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Postmark 1d bounce rate >5%"
|
summary: "Postmark 1d bounce rate >5%"
|
||||||
@ -566,7 +549,7 @@ data:
|
|||||||
model:
|
model:
|
||||||
intervalMs: 60000
|
intervalMs: 60000
|
||||||
maxDataPoints: 43200
|
maxDataPoints: 43200
|
||||||
expr: max(postmark_api_up) or on() vector(0)
|
expr: min_over_time(max by (instance) (postmark_api_up)[5m])
|
||||||
legendFormat: api up
|
legendFormat: api up
|
||||||
datasource:
|
datasource:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
@ -595,7 +578,7 @@ data:
|
|||||||
reducer:
|
reducer:
|
||||||
type: last
|
type: last
|
||||||
type: query
|
type: query
|
||||||
noDataState: OK
|
noDataState: NoData
|
||||||
execErrState: Error
|
execErrState: Error
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Postmark exporter reports API down"
|
summary: "Postmark exporter reports API down"
|
||||||
|
|||||||
@ -1134,7 +1134,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 17,
|
"id": 17,
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
"title": "Ariadne + Metis CI Coverage (%)",
|
"title": "Ariadne CI Coverage (%)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1147,7 +1147,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_ci_coverage_percent{repo=~\"ariadne|metis\"}",
|
"expr": "ariadne_ci_coverage_percent{repo=\"ariadne\"}",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{branch}}",
|
"legendFormat": "{{branch}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -1197,7 +1197,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 18,
|
"id": 18,
|
||||||
"type": "table",
|
"type": "table",
|
||||||
"title": "Ariadne + Metis CI Tests (latest)",
|
"title": "Ariadne CI Tests (latest)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1210,7 +1210,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_ci_tests_total{repo=~\"ariadne|metis\"}",
|
"expr": "ariadne_ci_tests_total{repo=\"ariadne\"}",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1686,7 +1686,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 42,
|
"id": 42,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne + Metis Test Success Rate",
|
"title": "Ariadne Test Success Rate",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1699,7 +1699,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"passed|failed|error\"}[30d])), 1)",
|
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"passed|failed|error\"}[30d])), 1)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -1723,7 +1723,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 43,
|
"id": 43,
|
||||||
"type": "bargauge",
|
"type": "bargauge",
|
||||||
"title": "Ariadne + Metis Tests with Failures (24h)",
|
"title": "Tests with Failures (24h)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1736,7 +1736,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"failed|error\"}[24h])))",
|
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"failed|error\"}[24h])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{result}}",
|
"legendFormat": "{{result}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
|
|||||||
@ -286,7 +286,7 @@ spec:
|
|||||||
podAnnotations:
|
podAnnotations:
|
||||||
vault.hashicorp.com/agent-inject: "true"
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
vault.hashicorp.com/role: "monitoring"
|
vault.hashicorp.com/role: "monitoring"
|
||||||
monitoring.bstein.dev/restart-rev: "6"
|
monitoring.bstein.dev/restart-rev: "4"
|
||||||
vault.hashicorp.com/agent-inject-secret-grafana-env.sh: "kv/data/atlas/monitoring/grafana-admin"
|
vault.hashicorp.com/agent-inject-secret-grafana-env.sh: "kv/data/atlas/monitoring/grafana-admin"
|
||||||
vault.hashicorp.com/agent-inject-template-grafana-env.sh: |
|
vault.hashicorp.com/agent-inject-template-grafana-env.sh: |
|
||||||
{{ with secret "kv/data/atlas/monitoring/grafana-admin" }}
|
{{ with secret "kv/data/atlas/monitoring/grafana-admin" }}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user