Compare commits
6 Commits
e93aa6e33b
...
6c3c1342cd
| Author | SHA1 | Date | |
|---|---|---|---|
| 6c3c1342cd | |||
| 7b43043838 | |||
| af74172b2d | |||
| df5ba74ab7 | |||
| 9e88b3fc88 | |||
| ca273c7337 |
@ -423,16 +423,17 @@ ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
|
|||||||
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
|
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
|
||||||
)
|
)
|
||||||
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
||||||
ARIADNE_CI_COVERAGE = 'ariadne_ci_coverage_percent{repo="ariadne"}'
|
TEST_REPO_SELECTOR = 'repo=~"ariadne|metis"'
|
||||||
ARIADNE_CI_TESTS = 'ariadne_ci_tests_total{repo="ariadne"}'
|
TEST_CI_COVERAGE = f'ariadne_ci_coverage_percent{{{TEST_REPO_SELECTOR}}}'
|
||||||
ARIADNE_TEST_SUCCESS_RATE = (
|
TEST_CI_TESTS = f'ariadne_ci_tests_total{{{TEST_REPO_SELECTOR}}}'
|
||||||
|
TEST_SUCCESS_RATE = (
|
||||||
"100 * "
|
"100 * "
|
||||||
'sum(max_over_time(ariadne_ci_tests_total{repo="ariadne",result="passed"}[30d])) '
|
f'sum(max_over_time(ariadne_ci_tests_total{{{TEST_REPO_SELECTOR},result="passed"}}[30d])) '
|
||||||
"/ clamp_min("
|
"/ clamp_min("
|
||||||
'sum(max_over_time(ariadne_ci_tests_total{repo="ariadne",result=~"passed|failed|error"}[30d])), 1)'
|
f'sum(max_over_time(ariadne_ci_tests_total{{{TEST_REPO_SELECTOR},result=~"passed|failed|error"}}[30d])), 1)'
|
||||||
)
|
)
|
||||||
ARIADNE_TEST_FAILURES_24H = (
|
TEST_FAILURES_24H = (
|
||||||
'sum by (result) (max_over_time(ariadne_ci_tests_total{repo="ariadne",result=~"failed|error"}[24h]))'
|
f'sum by (result) (max_over_time(ariadne_ci_tests_total{{{TEST_REPO_SELECTOR},result=~"failed|error"}}[24h]))'
|
||||||
)
|
)
|
||||||
POSTGRES_CONN_USED = (
|
POSTGRES_CONN_USED = (
|
||||||
'label_replace(sum(pg_stat_activity_count), "conn", "used", "__name__", ".*") '
|
'label_replace(sum(pg_stat_activity_count), "conn", "used", "__name__", ".*") '
|
||||||
@ -1297,8 +1298,8 @@ def build_overview():
|
|||||||
panels.append(
|
panels.append(
|
||||||
timeseries_panel(
|
timeseries_panel(
|
||||||
42,
|
42,
|
||||||
"Ariadne Test Success Rate",
|
"Ariadne + Metis Test Success Rate",
|
||||||
ARIADNE_TEST_SUCCESS_RATE,
|
TEST_SUCCESS_RATE,
|
||||||
{"h": 6, "w": 6, "x": 12, "y": 14},
|
{"h": 6, "w": 6, "x": 12, "y": 14},
|
||||||
unit="percent",
|
unit="percent",
|
||||||
max_value=100,
|
max_value=100,
|
||||||
@ -1309,8 +1310,8 @@ def build_overview():
|
|||||||
panels.append(
|
panels.append(
|
||||||
bargauge_panel(
|
bargauge_panel(
|
||||||
43,
|
43,
|
||||||
"Tests with Failures (24h)",
|
"Ariadne + Metis Tests with Failures (24h)",
|
||||||
ARIADNE_TEST_FAILURES_24H,
|
TEST_FAILURES_24H,
|
||||||
{"h": 6, "w": 6, "x": 18, "y": 14},
|
{"h": 6, "w": 6, "x": 18, "y": 14},
|
||||||
unit="none",
|
unit="none",
|
||||||
instant=True,
|
instant=True,
|
||||||
@ -2656,8 +2657,8 @@ def build_jobs_dashboard():
|
|||||||
panels.append(
|
panels.append(
|
||||||
stat_panel(
|
stat_panel(
|
||||||
17,
|
17,
|
||||||
"Ariadne CI Coverage (%)",
|
"Ariadne + Metis CI Coverage (%)",
|
||||||
ARIADNE_CI_COVERAGE,
|
TEST_CI_COVERAGE,
|
||||||
{"h": 6, "w": 4, "x": 8, "y": 11},
|
{"h": 6, "w": 4, "x": 8, "y": 11},
|
||||||
unit="percent",
|
unit="percent",
|
||||||
decimals=1,
|
decimals=1,
|
||||||
@ -2668,8 +2669,8 @@ def build_jobs_dashboard():
|
|||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
table_panel(
|
||||||
18,
|
18,
|
||||||
"Ariadne CI Tests (latest)",
|
"Ariadne + Metis CI Tests (latest)",
|
||||||
ARIADNE_CI_TESTS,
|
TEST_CI_TESTS,
|
||||||
{"h": 6, "w": 12, "x": 12, "y": 11},
|
{"h": 6, "w": 12, "x": 12, "y": 11},
|
||||||
unit="none",
|
unit="none",
|
||||||
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
||||||
|
|||||||
@ -10,6 +10,8 @@ spec:
|
|||||||
app: node-image-sweeper
|
app: node-image-sweeper
|
||||||
updateStrategy:
|
updateStrategy:
|
||||||
type: RollingUpdate
|
type: RollingUpdate
|
||||||
|
rollingUpdate:
|
||||||
|
maxUnavailable: 100%
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
@ -29,6 +31,21 @@ spec:
|
|||||||
- name: node-image-sweeper
|
- name: node-image-sweeper
|
||||||
image: python:3.12.9-alpine3.20
|
image: python:3.12.9-alpine3.20
|
||||||
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
||||||
|
env:
|
||||||
|
- name: SWEEP_INTERVAL_SEC
|
||||||
|
value: "21600"
|
||||||
|
- name: HIGH_USAGE_PERCENT
|
||||||
|
value: "70"
|
||||||
|
- name: EMERGENCY_USAGE_PERCENT
|
||||||
|
value: "80"
|
||||||
|
- name: BASE_THRESHOLD_DAYS
|
||||||
|
value: "14"
|
||||||
|
- name: HIGH_USAGE_THRESHOLD_DAYS
|
||||||
|
value: "3"
|
||||||
|
- name: LOG_RETENTION_DAYS
|
||||||
|
value: "7"
|
||||||
|
- name: JOURNAL_MAX_SIZE
|
||||||
|
value: "200M"
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
runAsUser: 0
|
runAsUser: 0
|
||||||
|
|||||||
@ -2,26 +2,39 @@
|
|||||||
set -eu
|
set -eu
|
||||||
|
|
||||||
ONE_SHOT=${ONE_SHOT:-false}
|
ONE_SHOT=${ONE_SHOT:-false}
|
||||||
THRESHOLD_DAYS=14
|
SWEEP_INTERVAL_SEC=${SWEEP_INTERVAL_SEC:-21600}
|
||||||
|
BASE_THRESHOLD_DAYS=${BASE_THRESHOLD_DAYS:-14}
|
||||||
|
HIGH_USAGE_THRESHOLD_DAYS=${HIGH_USAGE_THRESHOLD_DAYS:-3}
|
||||||
|
HIGH_USAGE_PERCENT=${HIGH_USAGE_PERCENT:-70}
|
||||||
|
EMERGENCY_USAGE_PERCENT=${EMERGENCY_USAGE_PERCENT:-85}
|
||||||
|
LOG_RETENTION_DAYS=${LOG_RETENTION_DAYS:-7}
|
||||||
|
JOURNAL_MAX_SIZE=${JOURNAL_MAX_SIZE:-200M}
|
||||||
|
SKIP="registry.k8s.io/pause k8s.gcr.io/pause rancher/mirrored-pause"
|
||||||
|
|
||||||
|
sweep_once() {
|
||||||
usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage=""
|
usage=$(df -P /host | awk 'NR==2 {gsub(/%/,"",$5); print $5}') || usage=""
|
||||||
if [ -n "${usage}" ] && [ "${usage}" -ge 70 ]; then
|
threshold_days="${BASE_THRESHOLD_DAYS}"
|
||||||
THRESHOLD_DAYS=3
|
if [ -n "${usage}" ] && [ "${usage}" -ge "${HIGH_USAGE_PERCENT}" ]; then
|
||||||
|
threshold_days="${HIGH_USAGE_THRESHOLD_DAYS}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cutoff=$(python3 - <<'PY'
|
cutoff=$(THRESHOLD_DAYS="${threshold_days}" python3 - <<'PY'
|
||||||
import time, os
|
import os
|
||||||
print(int(time.time()) - int(os.environ.get("THRESHOLD_DAYS", "14")) * 86400)
|
import time
|
||||||
|
|
||||||
|
days = int(os.environ.get("THRESHOLD_DAYS", "14"))
|
||||||
|
print(int(time.time()) - days * 86400)
|
||||||
PY
|
PY
|
||||||
)
|
)
|
||||||
|
|
||||||
RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ')
|
RUNNING=$(chroot /host /bin/sh -c "crictl ps -a --quiet 2>/dev/null" | tr -s ' ' '\n' | sort -u | tr '\n' ' ')
|
||||||
IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}')
|
IMAGES_JSON=$(chroot /host /bin/sh -c "crictl images -o json 2>/dev/null" || echo '{}')
|
||||||
|
|
||||||
SKIP="registry.k8s.io/pause k8s.gcr.io/pause rancher/mirrored-pause"
|
|
||||||
|
|
||||||
prune_list=$(printf "%s" "${IMAGES_JSON}" | CUTOFF="${cutoff}" RUNNING="${RUNNING}" SKIP="${SKIP}" python3 - <<'PY'
|
prune_list=$(printf "%s" "${IMAGES_JSON}" | CUTOFF="${cutoff}" RUNNING="${RUNNING}" SKIP="${SKIP}" python3 - <<'PY'
|
||||||
import json, os, sys, time
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = json.load(sys.stdin)
|
data = json.load(sys.stdin)
|
||||||
@ -85,8 +98,22 @@ fi
|
|||||||
find /host/var/lib/rancher/k3s/agent/images -type f -name "*.tar" -mtime +7 -print -delete 2>/dev/null || true
|
find /host/var/lib/rancher/k3s/agent/images -type f -name "*.tar" -mtime +7 -print -delete 2>/dev/null || true
|
||||||
find /host/var/lib/rancher/k3s/agent/containerd -maxdepth 1 -type f -mtime +7 -print -delete 2>/dev/null || true
|
find /host/var/lib/rancher/k3s/agent/containerd -maxdepth 1 -type f -mtime +7 -print -delete 2>/dev/null || true
|
||||||
|
|
||||||
|
if [ -n "${usage}" ] && [ "${usage}" -ge "${EMERGENCY_USAGE_PERCENT}" ]; then
|
||||||
|
# Emergency pass for rootfs pressure on SD-backed nodes.
|
||||||
|
chroot /host /bin/sh -c "journalctl --vacuum-size='${JOURNAL_MAX_SIZE}' >/dev/null 2>&1 || true"
|
||||||
|
find /host/var/log -type f -name "*.gz" -mtime +"${LOG_RETENTION_DAYS}" -print -delete 2>/dev/null || true
|
||||||
|
find /host/var/log/pods -type f -name "*.log" -mtime +"${LOG_RETENTION_DAYS}" -print -delete 2>/dev/null || true
|
||||||
|
chroot /host /bin/sh -c "if command -v apt-get >/dev/null 2>&1; then apt-get clean >/dev/null 2>&1 || true; fi"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
sweep_once
|
||||||
|
|
||||||
if [ "${ONE_SHOT}" = "true" ]; then
|
if [ "${ONE_SHOT}" = "true" ]; then
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
sleep infinity
|
while true; do
|
||||||
|
sleep "${SWEEP_INTERVAL_SEC}"
|
||||||
|
sweep_once
|
||||||
|
done
|
||||||
|
|||||||
@ -1125,7 +1125,7 @@
|
|||||||
{
|
{
|
||||||
"id": 17,
|
"id": 17,
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
"title": "Ariadne CI Coverage (%)",
|
"title": "Ariadne + Metis CI Coverage (%)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1138,7 +1138,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_ci_coverage_percent{repo=\"ariadne\"}",
|
"expr": "ariadne_ci_coverage_percent{repo=~\"ariadne|metis\"}",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{branch}}",
|
"legendFormat": "{{branch}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -1188,7 +1188,7 @@
|
|||||||
{
|
{
|
||||||
"id": 18,
|
"id": 18,
|
||||||
"type": "table",
|
"type": "table",
|
||||||
"title": "Ariadne CI Tests (latest)",
|
"title": "Ariadne + Metis CI Tests (latest)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1201,7 +1201,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_ci_tests_total{repo=\"ariadne\"}",
|
"expr": "ariadne_ci_tests_total{repo=~\"ariadne|metis\"}",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1677,7 +1677,7 @@
|
|||||||
{
|
{
|
||||||
"id": 42,
|
"id": 42,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne Test Success Rate",
|
"title": "Ariadne + Metis Test Success Rate",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1690,7 +1690,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"passed|failed|error\"}[30d])), 1)",
|
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"passed|failed|error\"}[30d])), 1)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -1714,7 +1714,7 @@
|
|||||||
{
|
{
|
||||||
"id": 43,
|
"id": 43,
|
||||||
"type": "bargauge",
|
"type": "bargauge",
|
||||||
"title": "Tests with Failures (24h)",
|
"title": "Ariadne + Metis Tests with Failures (24h)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1727,7 +1727,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"failed|error\"}[24h])))",
|
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"failed|error\"}[24h])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{result}}",
|
"legendFormat": "{{result}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
|
|||||||
@ -22,7 +22,24 @@ data:
|
|||||||
- orgId: 1
|
- orgId: 1
|
||||||
receiver: email-admins
|
receiver: email-admins
|
||||||
group_by:
|
group_by:
|
||||||
|
- grafana_folder
|
||||||
- alertname
|
- alertname
|
||||||
|
group_wait: 1m
|
||||||
|
group_interval: 30m
|
||||||
|
repeat_interval: 12h
|
||||||
|
routes:
|
||||||
|
- receiver: email-admins
|
||||||
|
object_matchers:
|
||||||
|
- [severity, "=", "critical"]
|
||||||
|
group_wait: 30s
|
||||||
|
group_interval: 5m
|
||||||
|
repeat_interval: 2h
|
||||||
|
- receiver: email-admins
|
||||||
|
object_matchers:
|
||||||
|
- [severity, "=", "warning"]
|
||||||
|
group_wait: 5m
|
||||||
|
group_interval: 2h
|
||||||
|
repeat_interval: 24h
|
||||||
rules.yaml: |
|
rules.yaml: |
|
||||||
apiVersion: 1
|
apiVersion: 1
|
||||||
groups:
|
groups:
|
||||||
@ -32,7 +49,7 @@ data:
|
|||||||
interval: 1m
|
interval: 1m
|
||||||
rules:
|
rules:
|
||||||
- uid: disk-pressure-root
|
- uid: disk-pressure-root
|
||||||
title: "Node rootfs high (>80%)"
|
title: "Node rootfs high (>85%)"
|
||||||
condition: C
|
condition: C
|
||||||
for: "10m"
|
for: "10m"
|
||||||
data:
|
data:
|
||||||
@ -66,7 +83,7 @@ data:
|
|||||||
type: threshold
|
type: threshold
|
||||||
conditions:
|
conditions:
|
||||||
- evaluator:
|
- evaluator:
|
||||||
params: [80]
|
params: [85]
|
||||||
type: gt
|
type: gt
|
||||||
operator:
|
operator:
|
||||||
type: and
|
type: and
|
||||||
@ -76,7 +93,7 @@ data:
|
|||||||
noDataState: NoData
|
noDataState: NoData
|
||||||
execErrState: Error
|
execErrState: Error
|
||||||
annotations:
|
annotations:
|
||||||
summary: "{{ $labels.node }} rootfs >80% for 10m"
|
summary: "{{ $labels.node }} rootfs >85% for 10m"
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- uid: disk-growth-1h
|
- uid: disk-growth-1h
|
||||||
@ -501,7 +518,7 @@ data:
|
|||||||
model:
|
model:
|
||||||
intervalMs: 60000
|
intervalMs: 60000
|
||||||
maxDataPoints: 43200
|
maxDataPoints: 43200
|
||||||
expr: postmark_outbound_bounce_rate{window="1d"}
|
expr: max(postmark_outbound_bounce_rate{window="1d"}) or on() vector(0)
|
||||||
legendFormat: bounce 1d
|
legendFormat: bounce 1d
|
||||||
datasource:
|
datasource:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
@ -530,7 +547,7 @@ data:
|
|||||||
reducer:
|
reducer:
|
||||||
type: last
|
type: last
|
||||||
type: query
|
type: query
|
||||||
noDataState: NoData
|
noDataState: OK
|
||||||
execErrState: Error
|
execErrState: Error
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Postmark 1d bounce rate >5%"
|
summary: "Postmark 1d bounce rate >5%"
|
||||||
@ -549,7 +566,7 @@ data:
|
|||||||
model:
|
model:
|
||||||
intervalMs: 60000
|
intervalMs: 60000
|
||||||
maxDataPoints: 43200
|
maxDataPoints: 43200
|
||||||
expr: min_over_time(max by (instance) (postmark_api_up)[5m])
|
expr: max(postmark_api_up) or on() vector(0)
|
||||||
legendFormat: api up
|
legendFormat: api up
|
||||||
datasource:
|
datasource:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
@ -578,7 +595,7 @@ data:
|
|||||||
reducer:
|
reducer:
|
||||||
type: last
|
type: last
|
||||||
type: query
|
type: query
|
||||||
noDataState: NoData
|
noDataState: OK
|
||||||
execErrState: Error
|
execErrState: Error
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Postmark exporter reports API down"
|
summary: "Postmark exporter reports API down"
|
||||||
|
|||||||
@ -1134,7 +1134,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 17,
|
"id": 17,
|
||||||
"type": "stat",
|
"type": "stat",
|
||||||
"title": "Ariadne CI Coverage (%)",
|
"title": "Ariadne + Metis CI Coverage (%)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1147,7 +1147,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_ci_coverage_percent{repo=\"ariadne\"}",
|
"expr": "ariadne_ci_coverage_percent{repo=~\"ariadne|metis\"}",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{branch}}",
|
"legendFormat": "{{branch}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
@ -1197,7 +1197,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 18,
|
"id": 18,
|
||||||
"type": "table",
|
"type": "table",
|
||||||
"title": "Ariadne CI Tests (latest)",
|
"title": "Ariadne + Metis CI Tests (latest)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1210,7 +1210,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "ariadne_ci_tests_total{repo=\"ariadne\"}",
|
"expr": "ariadne_ci_tests_total{repo=~\"ariadne|metis\"}",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"instant": true
|
"instant": true
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1686,7 +1686,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 42,
|
"id": 42,
|
||||||
"type": "timeseries",
|
"type": "timeseries",
|
||||||
"title": "Ariadne Test Success Rate",
|
"title": "Ariadne + Metis Test Success Rate",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1699,7 +1699,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"passed|failed|error\"}[30d])), 1)",
|
"expr": "100 * sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=\"passed\"}[30d])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"passed|failed|error\"}[30d])), 1)",
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -1723,7 +1723,7 @@ data:
|
|||||||
{
|
{
|
||||||
"id": 43,
|
"id": 43,
|
||||||
"type": "bargauge",
|
"type": "bargauge",
|
||||||
"title": "Tests with Failures (24h)",
|
"title": "Ariadne + Metis Tests with Failures (24h)",
|
||||||
"datasource": {
|
"datasource": {
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "atlas-vm"
|
"uid": "atlas-vm"
|
||||||
@ -1736,7 +1736,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"failed|error\"}[24h])))",
|
"expr": "sort_desc(sum by (result) (max_over_time(ariadne_ci_tests_total{repo=~\"ariadne|metis\",result=~\"failed|error\"}[24h])))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{result}}",
|
"legendFormat": "{{result}}",
|
||||||
"instant": true
|
"instant": true
|
||||||
|
|||||||
@ -286,7 +286,7 @@ spec:
|
|||||||
podAnnotations:
|
podAnnotations:
|
||||||
vault.hashicorp.com/agent-inject: "true"
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
vault.hashicorp.com/role: "monitoring"
|
vault.hashicorp.com/role: "monitoring"
|
||||||
monitoring.bstein.dev/restart-rev: "4"
|
monitoring.bstein.dev/restart-rev: "6"
|
||||||
vault.hashicorp.com/agent-inject-secret-grafana-env.sh: "kv/data/atlas/monitoring/grafana-admin"
|
vault.hashicorp.com/agent-inject-secret-grafana-env.sh: "kv/data/atlas/monitoring/grafana-admin"
|
||||||
vault.hashicorp.com/agent-inject-template-grafana-env.sh: |
|
vault.hashicorp.com/agent-inject-template-grafana-env.sh: |
|
||||||
{{ with secret "kv/data/atlas/monitoring/grafana-admin" }}
|
{{ with secret "kv/data/atlas/monitoring/grafana-admin" }}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user