monitoring: add glue dashboard and tag cronjobs

This commit is contained in:
Brad Stein 2026-01-18 02:50:07 -03:00
parent a6ac0c363e
commit 343d41ecc7
26 changed files with 1095 additions and 113 deletions

View File

@ -85,19 +85,17 @@ WORKER_TOTAL = len(WORKER_NODES)
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
# Namespaces considered infrastructure (excluded from workload counts)
INFRA_NAMESPACES = [
"kube-system",
"longhorn-system",
"metallb-system",
INFRA_PATTERNS = [
"kube-.*",
".*-system",
"traefik",
"monitoring",
"logging",
"cert-manager",
"flux-system",
"traefik",
"maintenance",
"postgres",
]
INFRA_REGEX = f"^({'|'.join(INFRA_NAMESPACES)})$"
INFRA_REGEX = f"^({'|'.join(INFRA_PATTERNS)})$"
# Namespaces allowed on control plane without counting as workloads
CP_ALLOWED_NS = INFRA_REGEX
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
@ -319,6 +317,21 @@ NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]
GLUE_LABEL = 'label_atlas_bstein_dev_glue="true"'
GLUE_JOBS = f"kube_cronjob_labels{{{GLUE_LABEL}}}"
GLUE_LAST_SUCCESS = f"kube_cronjob_status_last_successful_time{{{GLUE_LABEL}}}"
GLUE_LAST_SCHEDULE = f"kube_cronjob_status_last_schedule_time{{{GLUE_LABEL}}}"
GLUE_SUSPENDED = f"kube_cronjob_spec_suspend{{{GLUE_LABEL}}} == 1"
GLUE_LAST_SUCCESS_AGE = f"(time() - {GLUE_LAST_SUCCESS})"
GLUE_LAST_SCHEDULE_AGE = f"(time() - {GLUE_LAST_SCHEDULE})"
GLUE_LAST_SUCCESS_AGE_HOURS = f"({GLUE_LAST_SUCCESS_AGE}) / 3600"
GLUE_LAST_SCHEDULE_AGE_HOURS = f"({GLUE_LAST_SCHEDULE_AGE}) / 3600"
GLUE_STALE_WINDOW_SEC = 36 * 3600
GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})"
GLUE_MISSING = f"({GLUE_JOBS} unless {GLUE_LAST_SUCCESS})"
GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE}))"
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
GPU_NODE_REGEX = "|".join(GPU_NODES)
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
@ -965,7 +978,7 @@ def build_overview():
30,
"Mail Sent (1d)",
'max(postmark_outbound_sent{window="1d"})',
{"h": 2, "w": 6, "x": 0, "y": 8},
{"h": 2, "w": 5, "x": 0, "y": 8},
unit="none",
links=link_to("atlas-mail"),
)
@ -976,7 +989,7 @@ def build_overview():
"type": "stat",
"title": "Mail Bounces (1d)",
"datasource": PROM_DS,
"gridPos": {"h": 2, "w": 6, "x": 12, "y": 8},
"gridPos": {"h": 2, "w": 5, "x": 10, "y": 8},
"targets": [
{
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
@ -1022,7 +1035,7 @@ def build_overview():
32,
"Mail Success Rate (1d)",
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
{"h": 2, "w": 6, "x": 6, "y": 8},
{"h": 2, "w": 5, "x": 5, "y": 8},
unit="percent",
thresholds=mail_success_thresholds,
decimals=1,
@ -1034,13 +1047,24 @@ def build_overview():
33,
"Mail Limit Used (30d)",
"max(postmark_sending_limit_used_percent)",
{"h": 2, "w": 6, "x": 18, "y": 8},
{"h": 2, "w": 5, "x": 15, "y": 8},
unit="percent",
thresholds=mail_limit_thresholds,
decimals=1,
links=link_to("atlas-mail"),
)
)
panels.append(
stat_panel(
34,
"Glue Jobs Stale",
GLUE_STALE_COUNT,
{"h": 2, "w": 4, "x": 20, "y": 8},
unit="none",
thresholds=count_thresholds,
links=link_to("atlas-glue"),
)
)
storage_panels = [
(23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"),
@ -1072,7 +1096,7 @@ def build_overview():
namespace_cpu_share_expr(cpu_scope),
{"h": 9, "w": 8, "x": 0, "y": 16},
links=namespace_scope_links("namespace_scope_cpu"),
description="Values are normalized within the selected scope; use panel links to switch scope.",
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
)
panels.append(
@ -1082,7 +1106,7 @@ def build_overview():
namespace_gpu_share_expr(gpu_scope),
{"h": 9, "w": 8, "x": 8, "y": 16},
links=namespace_scope_links("namespace_scope_gpu"),
description="Values are normalized within the selected scope; use panel links to switch scope.",
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
)
panels.append(
@ -1092,7 +1116,7 @@ def build_overview():
namespace_ram_share_expr(ram_scope),
{"h": 9, "w": 8, "x": 16, "y": 16},
links=namespace_scope_links("namespace_scope_ram"),
description="Values are normalized within the selected scope; use panel links to switch scope.",
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
)
@ -2136,6 +2160,98 @@ def build_mail_dashboard():
}
def build_glue_dashboard():
panels = []
sort_desc = [{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}]
panels.append(
stat_panel(
1,
"Glue Jobs Stale (>36h)",
GLUE_STALE_COUNT,
{"h": 4, "w": 6, "x": 0, "y": 0},
unit="none",
thresholds={
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 1},
{"color": "orange", "value": 2},
{"color": "red", "value": 3},
],
},
)
)
panels.append(
table_panel(
2,
"Glue Jobs Missing Success",
GLUE_MISSING,
{"h": 4, "w": 6, "x": 6, "y": 0},
unit="none",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
3,
"Glue Jobs Suspended",
f"kube_cronjob_spec_suspend{{{GLUE_LABEL}}} == 1",
{"h": 4, "w": 6, "x": 12, "y": 0},
unit="none",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
4,
"Glue Jobs Active Runs",
f"kube_cronjob_status_active{{{GLUE_LABEL}}}",
{"h": 4, "w": 6, "x": 18, "y": 0},
unit="none",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
5,
"Glue Jobs Last Success (hours ago)",
GLUE_LAST_SUCCESS_AGE_HOURS,
{"h": 8, "w": 12, "x": 0, "y": 4},
unit="h",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
6,
"Glue Jobs Last Schedule (hours ago)",
GLUE_LAST_SCHEDULE_AGE_HOURS,
{"h": 8, "w": 12, "x": 12, "y": 4},
unit="h",
transformations=sort_desc,
instant=True,
)
)
return {
"uid": "atlas-glue",
"title": "Atlas Glue",
"folderUid": PRIVATE_FOLDER,
"editable": True,
"panels": panels,
"time": {"from": "now-7d", "to": "now"},
"annotations": {"list": []},
"schemaVersion": 39,
"style": "dark",
"tags": ["atlas", "glue"],
}
def build_gpu_dashboard():
panels = []
gpu_scope = "$namespace_scope_gpu"
@ -2146,7 +2262,7 @@ def build_gpu_dashboard():
namespace_gpu_share_expr(gpu_scope),
{"h": 8, "w": 12, "x": 0, "y": 0},
links=namespace_scope_links("namespace_scope_gpu"),
description="Values are normalized within the selected scope; use panel links to switch scope.",
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
)
)
panels.append(
@ -2229,6 +2345,10 @@ DASHBOARDS = {
"builder": build_mail_dashboard,
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml",
},
"atlas-glue": {
"builder": build_glue_dashboard,
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-glue.yaml",
},
"atlas-gpu": {
"builder": build_gpu_dashboard,
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: vaultwarden-cred-sync
namespace: bstein-dev-home
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/15 * * * *"
concurrencyPolicy: Forbid

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: guest-name-randomizer
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/1 * * * *"
suspend: false

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: pin-othrys-invite
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/30 * * * *"
suspend: true
@ -164,4 +166,4 @@ spec:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555
defaultMode: 0555

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: othrys-room-reset
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 0 1 1 *"
suspend: true
@ -307,4 +309,4 @@ spec:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555
defaultMode: 0555

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: seed-othrys-room
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/10 * * * *"
suspend: true
@ -180,4 +182,4 @@ spec:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555
defaultMode: 0555

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: firefly-user-sync
namespace: finance
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 6 * * *"
suspend: true

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: wger-admin-ensure
namespace: health
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "15 3 * * *"
concurrencyPolicy: Forbid

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: wger-user-sync
namespace: health
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 5 * * *"
suspend: true

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: mailu-sync-nightly
namespace: mailu-mailserver
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "30 4 * * *"
concurrencyPolicy: Forbid
@ -79,4 +81,4 @@ spec:
- name: vault-scripts
configMap:
name: mailu-vault-env
defaultMode: 0555
defaultMode: 0555

View File

@ -0,0 +1,339 @@
{
"uid": "atlas-glue",
"title": "Atlas Glue",
"folderUid": "atlas-internal",
"editable": true,
"panels": [
{
"id": 1,
"type": "stat",
"title": "Glue Jobs Stale (>36h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 0
},
"targets": [
{
"expr": "(sum((((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) > bool 129600) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)))",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
"unit": "none",
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
}
},
{
"id": 2,
"type": "table",
"title": "Glue Jobs Missing Success",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 0
},
"targets": [
{
"expr": "(kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"})",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 3,
"type": "table",
"title": "Glue Jobs Suspended",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 0
},
"targets": [
{
"expr": "kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 4,
"type": "table",
"title": "Glue Jobs Active Runs",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 0
},
"targets": [
{
"expr": "kube_cronjob_status_active{label_atlas_bstein_dev_glue=\"true\"}",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 5,
"type": "table",
"title": "Glue Jobs Last Success (hours ago)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 4
},
"targets": [
{
"expr": "((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"})) / 3600",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "h",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 6,
"type": "table",
"title": "Glue Jobs Last Schedule (hours ago)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 4
},
"targets": [
{
"expr": "((time() - kube_cronjob_status_last_schedule_time{label_atlas_bstein_dev_glue=\"true\"})) / 3600",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "h",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
}
],
"time": {
"from": "now-7d",
"to": "now"
},
"annotations": {
"list": []
},
"schemaVersion": 39,
"style": "dark",
"tags": [
"atlas",
"glue"
]
}

View File

@ -57,7 +57,7 @@
"links": [
{
"title": "Workload namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
},
{
@ -67,11 +67,11 @@
},
{
"title": "Infrastructure namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
}
],
"description": "Values are normalized within the selected scope; use panel links to switch scope."
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
},
{
"id": 2,
@ -207,16 +207,16 @@
"name": "namespace_scope_cpu",
"label": "CPU namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -226,7 +226,7 @@
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],
@ -241,16 +241,16 @@
"name": "namespace_scope_gpu",
"label": "GPU namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -260,7 +260,7 @@
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],
@ -275,16 +275,16 @@
"name": "namespace_scope_ram",
"label": "RAM namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -294,7 +294,7 @@
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],

View File

@ -142,7 +142,7 @@
},
"targets": [
{
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})",
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})",
"refId": "A"
}
],

View File

@ -76,7 +76,7 @@
},
"targets": [
{
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)",
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"}) or on() vector(0)",
"refId": "A"
}
],
@ -796,7 +796,7 @@
},
"gridPos": {
"h": 2,
"w": 6,
"w": 5,
"x": 0,
"y": 8
},
@ -863,8 +863,8 @@
},
"gridPos": {
"h": 2,
"w": 6,
"x": 12,
"w": 5,
"x": 10,
"y": 8
},
"targets": [
@ -968,8 +968,8 @@
},
"gridPos": {
"h": 2,
"w": 6,
"x": 6,
"w": 5,
"x": 5,
"y": 8
},
"targets": [
@ -1044,8 +1044,8 @@
},
"gridPos": {
"h": 2,
"w": 6,
"x": 18,
"w": 5,
"x": 15,
"y": 8
},
"targets": [
@ -1110,6 +1110,81 @@
}
]
},
{
"id": 34,
"type": "stat",
"title": "Glue Jobs Stale",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 2,
"w": 4,
"x": 20,
"y": 8
},
"targets": [
{
"expr": "(sum((((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) > bool 129600) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)))",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
"unit": "none",
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
},
"links": [
{
"title": "Open atlas-glue dashboard",
"url": "/d/atlas-glue",
"targetBlank": true
}
]
},
{
"id": 23,
"type": "stat",
@ -1447,7 +1522,7 @@
"links": [
{
"title": "Workload namespaces only",
"url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
},
{
@ -1457,11 +1532,11 @@
},
{
"title": "Infrastructure namespaces only",
"url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
}
],
"description": "Values are normalized within the selected scope; use panel links to switch scope."
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
},
{
"id": 12,
@ -1516,7 +1591,7 @@
"links": [
{
"title": "Workload namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
},
{
@ -1526,11 +1601,11 @@
},
{
"title": "Infrastructure namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
}
],
"description": "Values are normalized within the selected scope; use panel links to switch scope."
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
},
{
"id": 13,
@ -1585,7 +1660,7 @@
"links": [
{
"title": "Workload namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
"targetBlank": false
},
{
@ -1595,11 +1670,11 @@
},
{
"title": "Infrastructure namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
"targetBlank": false
}
],
"description": "Values are normalized within the selected scope; use panel links to switch scope."
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
},
{
"id": 14,
@ -2174,16 +2249,16 @@
"name": "namespace_scope_cpu",
"label": "CPU namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -2193,7 +2268,7 @@
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],
@ -2208,16 +2283,16 @@
"name": "namespace_scope_gpu",
"label": "GPU namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -2227,7 +2302,7 @@
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],
@ -2242,16 +2317,16 @@
"name": "namespace_scope_ram",
"label": "RAM namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -2261,7 +2336,7 @@
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],

View File

@ -200,7 +200,7 @@
},
"targets": [
{
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})",
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})",
"refId": "A"
}
],

View File

@ -0,0 +1,348 @@
# services/monitoring/grafana-dashboard-glue.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-glue
labels:
grafana_dashboard: "1"
data:
atlas-glue.json: |
{
"uid": "atlas-glue",
"title": "Atlas Glue",
"folderUid": "atlas-internal",
"editable": true,
"panels": [
{
"id": 1,
"type": "stat",
"title": "Glue Jobs Stale (>36h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 0
},
"targets": [
{
"expr": "(sum((((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) > bool 129600) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)))",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
"unit": "none",
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
}
},
{
"id": 2,
"type": "table",
"title": "Glue Jobs Missing Success",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 0
},
"targets": [
{
"expr": "(kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"})",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 3,
"type": "table",
"title": "Glue Jobs Suspended",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 0
},
"targets": [
{
"expr": "kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 4,
"type": "table",
"title": "Glue Jobs Active Runs",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 0
},
"targets": [
{
"expr": "kube_cronjob_status_active{label_atlas_bstein_dev_glue=\"true\"}",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 5,
"type": "table",
"title": "Glue Jobs Last Success (hours ago)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 4
},
"targets": [
{
"expr": "((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"})) / 3600",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "h",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 6,
"type": "table",
"title": "Glue Jobs Last Schedule (hours ago)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 4
},
"targets": [
{
"expr": "((time() - kube_cronjob_status_last_schedule_time{label_atlas_bstein_dev_glue=\"true\"})) / 3600",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "h",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
}
],
"time": {
"from": "now-7d",
"to": "now"
},
"annotations": {
"list": []
},
"schemaVersion": 39,
"style": "dark",
"tags": [
"atlas",
"glue"
]
}

View File

@ -66,7 +66,7 @@ data:
"links": [
{
"title": "Workload namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
},
{
@ -76,11 +76,11 @@ data:
},
{
"title": "Infrastructure namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
}
],
"description": "Values are normalized within the selected scope; use panel links to switch scope."
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
},
{
"id": 2,
@ -216,16 +216,16 @@ data:
"name": "namespace_scope_cpu",
"label": "CPU namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -235,7 +235,7 @@ data:
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],
@ -250,16 +250,16 @@ data:
"name": "namespace_scope_gpu",
"label": "GPU namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -269,7 +269,7 @@ data:
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],
@ -284,16 +284,16 @@ data:
"name": "namespace_scope_ram",
"label": "RAM namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -303,7 +303,7 @@ data:
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],

View File

@ -151,7 +151,7 @@ data:
},
"targets": [
{
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})",
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})",
"refId": "A"
}
],

View File

@ -85,7 +85,7 @@ data:
},
"targets": [
{
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)",
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"}) or on() vector(0)",
"refId": "A"
}
],
@ -805,7 +805,7 @@ data:
},
"gridPos": {
"h": 2,
"w": 6,
"w": 5,
"x": 0,
"y": 8
},
@ -872,8 +872,8 @@ data:
},
"gridPos": {
"h": 2,
"w": 6,
"x": 12,
"w": 5,
"x": 10,
"y": 8
},
"targets": [
@ -977,8 +977,8 @@ data:
},
"gridPos": {
"h": 2,
"w": 6,
"x": 6,
"w": 5,
"x": 5,
"y": 8
},
"targets": [
@ -1053,8 +1053,8 @@ data:
},
"gridPos": {
"h": 2,
"w": 6,
"x": 18,
"w": 5,
"x": 15,
"y": 8
},
"targets": [
@ -1119,6 +1119,81 @@ data:
}
]
},
{
"id": 34,
"type": "stat",
"title": "Glue Jobs Stale",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 2,
"w": 4,
"x": 20,
"y": 8
},
"targets": [
{
"expr": "(sum((((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) > bool 129600) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)))",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
"unit": "none",
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
},
"links": [
{
"title": "Open atlas-glue dashboard",
"url": "/d/atlas-glue",
"targetBlank": true
}
]
},
{
"id": 23,
"type": "stat",
@ -1456,7 +1531,7 @@ data:
"links": [
{
"title": "Workload namespaces only",
"url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
},
{
@ -1466,11 +1541,11 @@ data:
},
{
"title": "Infrastructure namespaces only",
"url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
}
],
"description": "Values are normalized within the selected scope; use panel links to switch scope."
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
},
{
"id": 12,
@ -1525,7 +1600,7 @@ data:
"links": [
{
"title": "Workload namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
},
{
@ -1535,11 +1610,11 @@ data:
},
{
"title": "Infrastructure namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
"targetBlank": false
}
],
"description": "Values are normalized within the selected scope; use panel links to switch scope."
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
},
{
"id": 13,
@ -1594,7 +1669,7 @@ data:
"links": [
{
"title": "Workload namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
"targetBlank": false
},
{
@ -1604,11 +1679,11 @@ data:
},
{
"title": "Infrastructure namespaces only",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22",
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
"targetBlank": false
}
],
"description": "Values are normalized within the selected scope; use panel links to switch scope."
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
},
{
"id": 14,
@ -2183,16 +2258,16 @@ data:
"name": "namespace_scope_cpu",
"label": "CPU namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -2202,7 +2277,7 @@ data:
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],
@ -2217,16 +2292,16 @@ data:
"name": "namespace_scope_gpu",
"label": "GPU namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -2236,7 +2311,7 @@ data:
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],
@ -2251,16 +2326,16 @@ data:
"name": "namespace_scope_ram",
"label": "RAM namespace filter",
"type": "custom",
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"current": {
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
"options": [
{
"text": "workload namespaces only",
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": true
},
{
@ -2270,7 +2345,7 @@ data:
},
{
"text": "infrastructure namespaces only",
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
"selected": false
}
],

View File

@ -209,7 +209,7 @@ data:
},
"targets": [
{
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})",
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})",
"refId": "A"
}
],

View File

@ -14,6 +14,7 @@ resources:
- grafana-dashboard-network.yaml
- grafana-dashboard-gpu.yaml
- grafana-dashboard-mail.yaml
- grafana-dashboard-glue.yaml
- dcgm-exporter.yaml
- jetson-tegrastats-exporter.yaml
- postmark-exporter-service.yaml

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: nextcloud-mail-sync
namespace: nextcloud
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 5 * * *"
concurrencyPolicy: Forbid

View File

@ -41,9 +41,9 @@ spec:
export OIDC_USERINFO_URI="{{ .Data.data.OIDC_USERINFO_URI }}"
{{ end }}
{{ with secret "kv/data/atlas/outline/outline-smtp" }}
export SMTP_FROM_EMAIL="{{ .Data.data.SMTP_FROM_EMAIL }}"
export SMTP_HOST="{{ .Data.data.SMTP_HOST }}"
{{ end }}
export SMTP_FROM_EMAIL="no-reply-outline@bstein.dev"
{{ with secret "kv/data/atlas/shared/postmark-relay" }}
export SMTP_USERNAME="{{ index .Data.data "relay-username" }}"
export SMTP_PASSWORD="{{ index .Data.data "relay-password" }}"

View File

@ -41,12 +41,12 @@ spec:
export OIDC_USE_OAUTH_CALLBACK="{{ .Data.data.OIDC_USE_OAUTH_CALLBACK }}"
{{ end }}
{{ with secret "kv/data/atlas/planka/planka-smtp" }}
export SMTP_FROM="{{ .Data.data.SMTP_FROM }}"
export SMTP_HOST="{{ .Data.data.SMTP_HOST }}"
export SMTP_PORT="{{ .Data.data.SMTP_PORT }}"
export SMTP_SECURE="{{ .Data.data.SMTP_SECURE }}"
export SMTP_TLS_REJECT_UNAUTHORIZED="{{ .Data.data.SMTP_TLS_REJECT_UNAUTHORIZED }}"
{{ end }}
export SMTP_FROM="no-reply-planka@bstein.dev"
{{ with secret "kv/data/atlas/shared/postmark-relay" }}
export SMTP_USER="{{ index .Data.data "relay-username" }}"
export SMTP_PASSWORD="{{ index .Data.data "relay-password" }}"

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: vault-k8s-auth-config
namespace: vault
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/15 * * * *"
concurrencyPolicy: Forbid

View File

@ -4,6 +4,8 @@ kind: CronJob
metadata:
name: vault-oidc-config
namespace: vault
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/15 * * * *"
concurrencyPolicy: Forbid