monitoring: add glue dashboard and tag cronjobs
This commit is contained in:
parent
a6ac0c363e
commit
343d41ecc7
@ -85,19 +85,17 @@ WORKER_TOTAL = len(WORKER_NODES)
|
||||
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
||||
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
||||
# Namespaces considered infrastructure (excluded from workload counts)
|
||||
INFRA_NAMESPACES = [
|
||||
"kube-system",
|
||||
"longhorn-system",
|
||||
"metallb-system",
|
||||
INFRA_PATTERNS = [
|
||||
"kube-.*",
|
||||
".*-system",
|
||||
"traefik",
|
||||
"monitoring",
|
||||
"logging",
|
||||
"cert-manager",
|
||||
"flux-system",
|
||||
"traefik",
|
||||
"maintenance",
|
||||
"postgres",
|
||||
]
|
||||
INFRA_REGEX = f"^({'|'.join(INFRA_NAMESPACES)})$"
|
||||
INFRA_REGEX = f"^({'|'.join(INFRA_PATTERNS)})$"
|
||||
# Namespaces allowed on control plane without counting as workloads
|
||||
CP_ALLOWED_NS = INFRA_REGEX
|
||||
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
||||
@ -319,6 +317,21 @@ NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"'
|
||||
NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
|
||||
NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"'
|
||||
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]
|
||||
GLUE_LABEL = 'label_atlas_bstein_dev_glue="true"'
|
||||
GLUE_JOBS = f"kube_cronjob_labels{{{GLUE_LABEL}}}"
|
||||
GLUE_LAST_SUCCESS = f"kube_cronjob_status_last_successful_time{{{GLUE_LABEL}}}"
|
||||
GLUE_LAST_SCHEDULE = f"kube_cronjob_status_last_schedule_time{{{GLUE_LABEL}}}"
|
||||
GLUE_SUSPENDED = f"kube_cronjob_spec_suspend{{{GLUE_LABEL}}} == 1"
|
||||
GLUE_LAST_SUCCESS_AGE = f"(time() - {GLUE_LAST_SUCCESS})"
|
||||
GLUE_LAST_SCHEDULE_AGE = f"(time() - {GLUE_LAST_SCHEDULE})"
|
||||
GLUE_LAST_SUCCESS_AGE_HOURS = f"({GLUE_LAST_SUCCESS_AGE}) / 3600"
|
||||
GLUE_LAST_SCHEDULE_AGE_HOURS = f"({GLUE_LAST_SCHEDULE_AGE}) / 3600"
|
||||
GLUE_STALE_WINDOW_SEC = 36 * 3600
|
||||
GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})"
|
||||
GLUE_MISSING = f"({GLUE_JOBS} unless {GLUE_LAST_SUCCESS})"
|
||||
GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
|
||||
GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
|
||||
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE}))"
|
||||
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
|
||||
GPU_NODE_REGEX = "|".join(GPU_NODES)
|
||||
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
|
||||
@ -965,7 +978,7 @@ def build_overview():
|
||||
30,
|
||||
"Mail Sent (1d)",
|
||||
'max(postmark_outbound_sent{window="1d"})',
|
||||
{"h": 2, "w": 6, "x": 0, "y": 8},
|
||||
{"h": 2, "w": 5, "x": 0, "y": 8},
|
||||
unit="none",
|
||||
links=link_to("atlas-mail"),
|
||||
)
|
||||
@ -976,7 +989,7 @@ def build_overview():
|
||||
"type": "stat",
|
||||
"title": "Mail Bounces (1d)",
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": {"h": 2, "w": 6, "x": 12, "y": 8},
|
||||
"gridPos": {"h": 2, "w": 5, "x": 10, "y": 8},
|
||||
"targets": [
|
||||
{
|
||||
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
|
||||
@ -1022,7 +1035,7 @@ def build_overview():
|
||||
32,
|
||||
"Mail Success Rate (1d)",
|
||||
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
|
||||
{"h": 2, "w": 6, "x": 6, "y": 8},
|
||||
{"h": 2, "w": 5, "x": 5, "y": 8},
|
||||
unit="percent",
|
||||
thresholds=mail_success_thresholds,
|
||||
decimals=1,
|
||||
@ -1034,13 +1047,24 @@ def build_overview():
|
||||
33,
|
||||
"Mail Limit Used (30d)",
|
||||
"max(postmark_sending_limit_used_percent)",
|
||||
{"h": 2, "w": 6, "x": 18, "y": 8},
|
||||
{"h": 2, "w": 5, "x": 15, "y": 8},
|
||||
unit="percent",
|
||||
thresholds=mail_limit_thresholds,
|
||||
decimals=1,
|
||||
links=link_to("atlas-mail"),
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
34,
|
||||
"Glue Jobs Stale",
|
||||
GLUE_STALE_COUNT,
|
||||
{"h": 2, "w": 4, "x": 20, "y": 8},
|
||||
unit="none",
|
||||
thresholds=count_thresholds,
|
||||
links=link_to("atlas-glue"),
|
||||
)
|
||||
)
|
||||
|
||||
storage_panels = [
|
||||
(23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"),
|
||||
@ -1072,7 +1096,7 @@ def build_overview():
|
||||
namespace_cpu_share_expr(cpu_scope),
|
||||
{"h": 9, "w": 8, "x": 0, "y": 16},
|
||||
links=namespace_scope_links("namespace_scope_cpu"),
|
||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -1082,7 +1106,7 @@ def build_overview():
|
||||
namespace_gpu_share_expr(gpu_scope),
|
||||
{"h": 9, "w": 8, "x": 8, "y": 16},
|
||||
links=namespace_scope_links("namespace_scope_gpu"),
|
||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -1092,7 +1116,7 @@ def build_overview():
|
||||
namespace_ram_share_expr(ram_scope),
|
||||
{"h": 9, "w": 8, "x": 16, "y": 16},
|
||||
links=namespace_scope_links("namespace_scope_ram"),
|
||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
)
|
||||
|
||||
@ -2136,6 +2160,98 @@ def build_mail_dashboard():
|
||||
}
|
||||
|
||||
|
||||
def build_glue_dashboard():
|
||||
panels = []
|
||||
sort_desc = [{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}]
|
||||
|
||||
panels.append(
|
||||
stat_panel(
|
||||
1,
|
||||
"Glue Jobs Stale (>36h)",
|
||||
GLUE_STALE_COUNT,
|
||||
{"h": 4, "w": 6, "x": 0, "y": 0},
|
||||
unit="none",
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 2},
|
||||
{"color": "red", "value": 3},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
2,
|
||||
"Glue Jobs Missing Success",
|
||||
GLUE_MISSING,
|
||||
{"h": 4, "w": 6, "x": 6, "y": 0},
|
||||
unit="none",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
3,
|
||||
"Glue Jobs Suspended",
|
||||
f"kube_cronjob_spec_suspend{{{GLUE_LABEL}}} == 1",
|
||||
{"h": 4, "w": 6, "x": 12, "y": 0},
|
||||
unit="none",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
4,
|
||||
"Glue Jobs Active Runs",
|
||||
f"kube_cronjob_status_active{{{GLUE_LABEL}}}",
|
||||
{"h": 4, "w": 6, "x": 18, "y": 0},
|
||||
unit="none",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
5,
|
||||
"Glue Jobs Last Success (hours ago)",
|
||||
GLUE_LAST_SUCCESS_AGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 0, "y": 4},
|
||||
unit="h",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
6,
|
||||
"Glue Jobs Last Schedule (hours ago)",
|
||||
GLUE_LAST_SCHEDULE_AGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 4},
|
||||
unit="h",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"uid": "atlas-glue",
|
||||
"title": "Atlas Glue",
|
||||
"folderUid": PRIVATE_FOLDER,
|
||||
"editable": True,
|
||||
"panels": panels,
|
||||
"time": {"from": "now-7d", "to": "now"},
|
||||
"annotations": {"list": []},
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": ["atlas", "glue"],
|
||||
}
|
||||
|
||||
|
||||
def build_gpu_dashboard():
|
||||
panels = []
|
||||
gpu_scope = "$namespace_scope_gpu"
|
||||
@ -2146,7 +2262,7 @@ def build_gpu_dashboard():
|
||||
namespace_gpu_share_expr(gpu_scope),
|
||||
{"h": 8, "w": 12, "x": 0, "y": 0},
|
||||
links=namespace_scope_links("namespace_scope_gpu"),
|
||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -2229,6 +2345,10 @@ DASHBOARDS = {
|
||||
"builder": build_mail_dashboard,
|
||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml",
|
||||
},
|
||||
"atlas-glue": {
|
||||
"builder": build_glue_dashboard,
|
||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-glue.yaml",
|
||||
},
|
||||
"atlas-gpu": {
|
||||
"builder": build_gpu_dashboard,
|
||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: vaultwarden-cred-sync
|
||||
namespace: bstein-dev-home
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/15 * * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: guest-name-randomizer
|
||||
namespace: comms
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/1 * * * *"
|
||||
suspend: false
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: pin-othrys-invite
|
||||
namespace: comms
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/30 * * * *"
|
||||
suspend: true
|
||||
@ -164,4 +166,4 @@ spec:
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: comms-vault-env
|
||||
defaultMode: 0555
|
||||
defaultMode: 0555
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: othrys-room-reset
|
||||
namespace: comms
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "0 0 1 1 *"
|
||||
suspend: true
|
||||
@ -307,4 +309,4 @@ spec:
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: comms-vault-env
|
||||
defaultMode: 0555
|
||||
defaultMode: 0555
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: seed-othrys-room
|
||||
namespace: comms
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/10 * * * *"
|
||||
suspend: true
|
||||
@ -180,4 +182,4 @@ spec:
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: comms-vault-env
|
||||
defaultMode: 0555
|
||||
defaultMode: 0555
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: firefly-user-sync
|
||||
namespace: finance
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "0 6 * * *"
|
||||
suspend: true
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: wger-admin-ensure
|
||||
namespace: health
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "15 3 * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: wger-user-sync
|
||||
namespace: health
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "0 5 * * *"
|
||||
suspend: true
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: mailu-sync-nightly
|
||||
namespace: mailu-mailserver
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "30 4 * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
@ -79,4 +81,4 @@ spec:
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: mailu-vault-env
|
||||
defaultMode: 0555
|
||||
defaultMode: 0555
|
||||
|
||||
339
services/monitoring/dashboards/atlas-glue.json
Normal file
339
services/monitoring/dashboards/atlas-glue.json
Normal file
@ -0,0 +1,339 @@
|
||||
{
|
||||
"uid": "atlas-glue",
|
||||
"title": "Atlas Glue",
|
||||
"folderUid": "atlas-internal",
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Glue Jobs Stale (>36h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum((((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) > bool 129600) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 3
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Missing Success",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"})",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Suspended",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Active Runs",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "kube_cronjob_status_active{label_atlas_bstein_dev_glue=\"true\"}",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Last Success (hours ago)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 4
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"})) / 3600",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "h",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Last Schedule (hours ago)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 4
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - kube_cronjob_status_last_schedule_time{label_atlas_bstein_dev_glue=\"true\"})) / 3600",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "h",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
"from": "now-7d",
|
||||
"to": "now"
|
||||
},
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"atlas",
|
||||
"glue"
|
||||
]
|
||||
}
|
||||
@ -57,7 +57,7 @@
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
@ -67,11 +67,11 @@
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Values are normalized within the selected scope; use panel links to switch scope."
|
||||
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
@ -207,16 +207,16 @@
|
||||
"name": "namespace_scope_cpu",
|
||||
"label": "CPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -226,7 +226,7 @@
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
@ -241,16 +241,16 @@
|
||||
"name": "namespace_scope_gpu",
|
||||
"label": "GPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -260,7 +260,7 @@
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
@ -275,16 +275,16 @@
|
||||
"name": "namespace_scope_ram",
|
||||
"label": "RAM namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -294,7 +294,7 @@
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
|
||||
@ -142,7 +142,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -76,7 +76,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"}) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -796,7 +796,7 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 6,
|
||||
"w": 5,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
@ -863,8 +863,8 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"w": 5,
|
||||
"x": 10,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -968,8 +968,8 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"w": 5,
|
||||
"x": 5,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -1044,8 +1044,8 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"w": 5,
|
||||
"x": 15,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -1110,6 +1110,81 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 34,
|
||||
"type": "stat",
|
||||
"title": "Glue Jobs Stale",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum((((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) > bool 129600) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 3
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-glue dashboard",
|
||||
"url": "/d/atlas-glue",
|
||||
"targetBlank": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"type": "stat",
|
||||
@ -1447,7 +1522,7 @@
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
@ -1457,11 +1532,11 @@
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Values are normalized within the selected scope; use panel links to switch scope."
|
||||
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
@ -1516,7 +1591,7 @@
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
@ -1526,11 +1601,11 @@
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Values are normalized within the selected scope; use panel links to switch scope."
|
||||
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
@ -1585,7 +1660,7 @@
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
@ -1595,11 +1670,11 @@
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Values are normalized within the selected scope; use panel links to switch scope."
|
||||
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
@ -2174,16 +2249,16 @@
|
||||
"name": "namespace_scope_cpu",
|
||||
"label": "CPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -2193,7 +2268,7 @@
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
@ -2208,16 +2283,16 @@
|
||||
"name": "namespace_scope_gpu",
|
||||
"label": "GPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -2227,7 +2302,7 @@
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
@ -2242,16 +2317,16 @@
|
||||
"name": "namespace_scope_ram",
|
||||
"label": "RAM namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -2261,7 +2336,7 @@
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
|
||||
@ -200,7 +200,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
348
services/monitoring/grafana-dashboard-glue.yaml
Normal file
348
services/monitoring/grafana-dashboard-glue.yaml
Normal file
@ -0,0 +1,348 @@
|
||||
# services/monitoring/grafana-dashboard-glue.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-glue
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
atlas-glue.json: |
|
||||
{
|
||||
"uid": "atlas-glue",
|
||||
"title": "Atlas Glue",
|
||||
"folderUid": "atlas-internal",
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Glue Jobs Stale (>36h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum((((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) > bool 129600) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 3
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Missing Success",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"})",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Suspended",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Active Runs",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "kube_cronjob_status_active{label_atlas_bstein_dev_glue=\"true\"}",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Last Success (hours ago)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 4
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"})) / 3600",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "h",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "table",
|
||||
"title": "Glue Jobs Last Schedule (hours ago)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 4
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "((time() - kube_cronjob_status_last_schedule_time{label_atlas_bstein_dev_glue=\"true\"})) / 3600",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "h",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
"from": "now-7d",
|
||||
"to": "now"
|
||||
},
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"atlas",
|
||||
"glue"
|
||||
]
|
||||
}
|
||||
@ -66,7 +66,7 @@ data:
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
@ -76,11 +76,11 @@ data:
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Values are normalized within the selected scope; use panel links to switch scope."
|
||||
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
@ -216,16 +216,16 @@ data:
|
||||
"name": "namespace_scope_cpu",
|
||||
"label": "CPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -235,7 +235,7 @@ data:
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
@ -250,16 +250,16 @@ data:
|
||||
"name": "namespace_scope_gpu",
|
||||
"label": "GPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -269,7 +269,7 @@ data:
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
@ -284,16 +284,16 @@ data:
|
||||
"name": "namespace_scope_ram",
|
||||
"label": "RAM namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -303,7 +303,7 @@ data:
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
|
||||
@ -151,7 +151,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -85,7 +85,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"}) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -805,7 +805,7 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 6,
|
||||
"w": 5,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
@ -872,8 +872,8 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"w": 5,
|
||||
"x": 10,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -977,8 +977,8 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"w": 5,
|
||||
"x": 5,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -1053,8 +1053,8 @@ data:
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"w": 5,
|
||||
"x": 15,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
@ -1119,6 +1119,81 @@ data:
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 34,
|
||||
"type": "stat",
|
||||
"title": "Glue Jobs Stale",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 2,
|
||||
"w": 4,
|
||||
"x": 20,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum((((time() - kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) > bool 129600) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless kube_cronjob_status_last_successful_time{label_atlas_bstein_dev_glue=\"true\"}) unless on(namespace,cronjob) kube_cronjob_spec_suspend{label_atlas_bstein_dev_glue=\"true\"} == 1)))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 3
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-glue dashboard",
|
||||
"url": "/d/atlas-glue",
|
||||
"targetBlank": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"type": "stat",
|
||||
@ -1456,7 +1531,7 @@ data:
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
@ -1466,11 +1541,11 @@ data:
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Values are normalized within the selected scope; use panel links to switch scope."
|
||||
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
@ -1525,7 +1600,7 @@ data:
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
@ -1535,11 +1610,11 @@ data:
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Values are normalized within the selected scope; use panel links to switch scope."
|
||||
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
@ -1594,7 +1669,7 @@ data:
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
@ -1604,11 +1679,11 @@ data:
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Values are normalized within the selected scope; use panel links to switch scope."
|
||||
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
@ -2183,16 +2258,16 @@ data:
|
||||
"name": "namespace_scope_cpu",
|
||||
"label": "CPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -2202,7 +2277,7 @@ data:
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
@ -2217,16 +2292,16 @@ data:
|
||||
"name": "namespace_scope_gpu",
|
||||
"label": "GPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -2236,7 +2311,7 @@ data:
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
@ -2251,16 +2326,16 @@ data:
|
||||
"name": "namespace_scope_ram",
|
||||
"label": "RAM namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -2270,7 +2345,7 @@ data:
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"",
|
||||
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
|
||||
@ -209,7 +209,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -14,6 +14,7 @@ resources:
|
||||
- grafana-dashboard-network.yaml
|
||||
- grafana-dashboard-gpu.yaml
|
||||
- grafana-dashboard-mail.yaml
|
||||
- grafana-dashboard-glue.yaml
|
||||
- dcgm-exporter.yaml
|
||||
- jetson-tegrastats-exporter.yaml
|
||||
- postmark-exporter-service.yaml
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: nextcloud-mail-sync
|
||||
namespace: nextcloud
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "0 5 * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
|
||||
@ -41,9 +41,9 @@ spec:
|
||||
export OIDC_USERINFO_URI="{{ .Data.data.OIDC_USERINFO_URI }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/outline/outline-smtp" }}
|
||||
export SMTP_FROM_EMAIL="{{ .Data.data.SMTP_FROM_EMAIL }}"
|
||||
export SMTP_HOST="{{ .Data.data.SMTP_HOST }}"
|
||||
{{ end }}
|
||||
export SMTP_FROM_EMAIL="no-reply-outline@bstein.dev"
|
||||
{{ with secret "kv/data/atlas/shared/postmark-relay" }}
|
||||
export SMTP_USERNAME="{{ index .Data.data "relay-username" }}"
|
||||
export SMTP_PASSWORD="{{ index .Data.data "relay-password" }}"
|
||||
|
||||
@ -41,12 +41,12 @@ spec:
|
||||
export OIDC_USE_OAUTH_CALLBACK="{{ .Data.data.OIDC_USE_OAUTH_CALLBACK }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/planka/planka-smtp" }}
|
||||
export SMTP_FROM="{{ .Data.data.SMTP_FROM }}"
|
||||
export SMTP_HOST="{{ .Data.data.SMTP_HOST }}"
|
||||
export SMTP_PORT="{{ .Data.data.SMTP_PORT }}"
|
||||
export SMTP_SECURE="{{ .Data.data.SMTP_SECURE }}"
|
||||
export SMTP_TLS_REJECT_UNAUTHORIZED="{{ .Data.data.SMTP_TLS_REJECT_UNAUTHORIZED }}"
|
||||
{{ end }}
|
||||
export SMTP_FROM="no-reply-planka@bstein.dev"
|
||||
{{ with secret "kv/data/atlas/shared/postmark-relay" }}
|
||||
export SMTP_USER="{{ index .Data.data "relay-username" }}"
|
||||
export SMTP_PASSWORD="{{ index .Data.data "relay-password" }}"
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: vault-k8s-auth-config
|
||||
namespace: vault
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/15 * * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
|
||||
@ -4,6 +4,8 @@ kind: CronJob
|
||||
metadata:
|
||||
name: vault-oidc-config
|
||||
namespace: vault
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/15 * * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user