Compare commits
383 Commits
main
...
feature/at
| Author | SHA1 | Date | |
|---|---|---|---|
| fb5064fa17 | |||
| e29b31ff42 | |||
| ab33af3401 | |||
| f275764b15 | |||
|
|
d082bee3bc | ||
| b1ecc3eef8 | |||
|
|
6e6beb071b | ||
| c9a9c801ec | |||
| 4edc888246 | |||
| 917ee077ad | |||
| 8c931f6a58 | |||
| 7ec1b812d6 | |||
| 62376138dd | |||
| 0bab0deedf | |||
| 411ad0e4ba | |||
| 7c419748b7 | |||
| c901a0a0cb | |||
| 6df05c9adc | |||
| 578ccd97e5 | |||
| 5e0c5b200c | |||
| 28278d6c67 | |||
| e3ab256336 | |||
|
|
dd02a49626 | ||
| d430a480f0 | |||
| 0d09492984 | |||
| 45b2c79c72 | |||
|
|
aad8e11b37 | ||
| cfebff5f08 | |||
| e463674ca9 | |||
|
|
a6ceaa4cf1 | ||
|
|
64b70bf391 | ||
| 543880d06f | |||
|
|
873f392b88 | ||
|
|
286925857a | ||
| 36311b877b | |||
| cd2e2dff17 | |||
| b5a357d477 | |||
| bda5871035 | |||
| 89490d5aa5 | |||
|
|
97de9b6d18 | ||
|
|
e5ceb234c3 | ||
|
|
a714c9994a | ||
| ced6d511ff | |||
|
|
e0600baa4b | ||
|
|
557ccb7bbd | ||
| c16113088e | |||
|
|
18524a0065 | ||
|
|
0a7e05a735 | ||
|
|
14d90298e8 | ||
|
|
2523ebee2a | ||
|
|
76f27b7eed | ||
|
|
4abf16687b | ||
|
|
b20922b3ec | ||
|
|
6007050545 | ||
|
|
1b9c78166e | ||
|
|
b8c5f547aa | ||
|
|
8bc999a7f2 | ||
|
|
93cb39cd23 | ||
|
|
6c84d63500 | ||
|
|
9b341a865d | ||
|
|
66541c29ca | ||
|
|
00c8be0dd8 | ||
|
|
57d672c264 | ||
|
|
3252409a7b | ||
|
|
e656120be9 | ||
|
|
49151ad13e | ||
|
|
9b7778f193 | ||
|
|
56cd01f4d1 | ||
|
|
c6c1ec9129 | ||
|
|
3a39e0972e | ||
|
|
5d87aefc4b | ||
|
|
cf36ed6279 | ||
|
|
d88648bdf8 | ||
|
|
d15779e6dc | ||
|
|
2992b8c581 | ||
|
|
0cc49081ff | ||
|
|
e6b8e4d39e | ||
|
|
cb42182358 | ||
|
|
12b81b2f0d | ||
|
|
454017d7ea | ||
|
|
4ebf2ad742 | ||
|
|
999be05fd9 | ||
|
|
cdb94ee7a4 | ||
|
|
e259ab8a8d | ||
|
|
8630e626fe | ||
|
|
3d655dda4f | ||
|
|
0f935f7a78 | ||
|
|
e4629ec198 | ||
|
|
30b024dfc1 | ||
|
|
194404619b | ||
|
|
77919cbf20 | ||
|
|
51db4e0612 | ||
|
|
42adbe98c0 | ||
|
|
65e3947f5a | ||
|
|
2b52d07f95 | ||
|
|
ce020e06c0 | ||
|
|
f5437db369 | ||
|
|
455a58b982 | ||
|
|
3e044ed3fc | ||
|
|
68d794c909 | ||
|
|
709ec5d039 | ||
|
|
33b9d678c1 | ||
|
|
c2ffad3937 | ||
|
|
ed73f69d60 | ||
|
|
773a9526dc | ||
| 80d7c585e1 | |||
|
|
1272357177 | ||
|
|
47e2d706c4 | ||
| ca5393bf4c | |||
| dc83ead648 | |||
| 72d3dffd1e | |||
| a2833f3c26 | |||
|
|
98c5981869 | ||
|
|
53088cc82d | ||
| 0ae534e387 | |||
|
|
e5fbc8f6ed | ||
|
|
791a14a9e5 | ||
|
|
9f29205201 | ||
|
|
bae9b1bfc2 | ||
| 44f376b492 | |||
|
|
e0410cfa33 | ||
|
|
45b86a3478 | ||
| 8523f7bc91 | |||
| f2b8f79a7a | |||
| d6a4c7f888 | |||
| 7bd069cb3b | |||
|
|
80e94c7d67 | ||
| 157c93f2a9 | |||
|
|
c259c5abe4 | ||
|
|
f1d628682b | ||
|
|
d375d8a680 | ||
|
|
9c297f6609 | ||
| 97b2385aa2 | |||
| 3cdde19de0 | |||
|
|
af99e0e315 | ||
|
|
7905da3b9a | ||
|
|
0bff5b0835 | ||
| 69744225bb | |||
|
|
b15c9a6a63 | ||
|
|
23a67f0ddf | ||
| f8c04770a3 | |||
|
|
e489ffca7c | ||
| 80cb818fa2 | |||
|
|
d295eec276 | ||
| 896b4c4890 | |||
|
|
82f9147c7f | ||
| 1f476b8541 | |||
|
|
d80e6bb6b6 | ||
| 9fe547aa09 | |||
| 090a22a0b5 | |||
| 05d6ee9d6e | |||
| a7c1774044 | |||
| b3b4cbecdd | |||
| 13f59fb5e7 | |||
| fa8777d056 | |||
| 4f2ae810a5 | |||
|
|
382ccfe0f1 | ||
|
|
af61d2109d | ||
|
|
c98b69e368 | ||
| ccd92f6014 | |||
|
|
ab672773dd | ||
|
|
d1a490a80a | ||
|
|
a9d235695a | ||
| 1f19ae46f5 | |||
|
|
f833b61a76 | ||
| b9d660fc9a | |||
| 300b13f995 | |||
| 17d8ca3b2a | |||
|
|
27c7aade1c | ||
| dae28077b5 | |||
| b202dacfb1 | |||
| d72d21268b | |||
| 58dc219452 | |||
| 7c6a731c7c | |||
| f0259086fd | |||
|
|
c3cf0e7900 | ||
|
|
a0e52401fc | ||
|
|
51fc85587a | ||
| 5d9526af73 | |||
|
|
b537a7def8 | ||
| 2f2a6f9132 | |||
|
|
01d0a16210 | ||
| d1ac654e99 | |||
|
|
42a8c48426 | ||
| 0cc155cfcc | |||
| c7189bff8c | |||
| 3ca0df9529 | |||
|
|
1c9259b5b4 | ||
|
|
387bfadc76 | ||
| 50bdd18c56 | |||
| 19d22abd0f | |||
| fd8396730c | |||
|
|
dcabfb2ebb | ||
|
|
5d5b5c031e | ||
|
|
d79cce72af | ||
| 9147d4107f | |||
|
|
a21e58ad2c | ||
|
|
10db6b4973 | ||
| 7c6a91d758 | |||
|
|
ced41aa633 | ||
| cb2026b511 | |||
|
|
38d826e4ea | ||
|
|
4a30c5c706 | ||
| c43efe20dc | |||
| a80047060f | |||
|
|
00b84f3b89 | ||
|
|
06a559c7ea | ||
| eae548bdd1 | |||
|
|
8024db9d11 | ||
|
|
5dcbef6be3 | ||
|
|
f3420b79bf | ||
|
|
e7588ac06f | ||
|
|
9272dcf0e6 | ||
| 76e44e949c | |||
| 0915ed2205 | |||
|
|
187c0c2c88 | ||
|
|
ae12b18d6f | ||
| 080e583cf9 | |||
|
|
a1b4c878c2 | ||
| 22bfb15bd6 | |||
|
|
8948523474 | ||
| 303defd745 | |||
|
|
5fb9e77634 | ||
| 454a759cda | |||
|
|
26ff18e983 | ||
| 0b77c3bc2c | |||
|
|
58eecc72bd | ||
| 2e0ac03458 | |||
|
|
8e74a4c0b3 | ||
| 0aed7fe3ee | |||
|
|
867172600f | ||
|
|
d915a2a9d7 | ||
| 4e92168abf | |||
| ddf51be7aa | |||
|
|
20447bb33b | ||
| 5863e09ec8 | |||
| 0cf33797ae | |||
| 71fbd8a6c1 | |||
| d1f06aeb1b | |||
|
|
ce39b3ae98 | ||
| 6021f2f283 | |||
|
|
4629548f99 | ||
| 1e0a2628b6 | |||
|
|
8594b736ec | ||
|
|
9df8ae2d38 | ||
|
|
d890b09dcd | ||
| 080b94aa8b | |||
|
|
d7070647aa | ||
|
|
3b321c5320 | ||
| 3e2699758f | |||
| 2c4ad486a5 | |||
|
|
85f749a481 | ||
| 75d3946276 | |||
| e95ea959bb | |||
|
|
63bf109acb | ||
| 982b401a8c | |||
| f4684092be | |||
|
|
97f0f0b508 | ||
| f1f0543885 | |||
|
|
241c305034 | ||
|
|
444cd3a04b | ||
|
|
f11329ad02 | ||
| af055bacb4 | |||
| da08a2687d | |||
| e733ee64da | |||
| 3ef7eed9a9 | |||
| 6a11b0fcfb | |||
| c4f404df23 | |||
|
|
ae2f7cadeb | ||
| 3c63722a4b | |||
|
|
86c859bc14 | ||
|
|
7d0537f2a2 | ||
|
|
a8424251aa | ||
|
|
180f814520 | ||
|
|
0c3d2f918d | ||
|
|
20b3193cb8 | ||
| a851e184ca | |||
|
|
a5d4c63cd3 | ||
|
|
27a8a701ef | ||
|
|
0ee2b8b059 | ||
|
|
beefc5ab31 | ||
| 92d557ed98 | |||
|
|
ed9a84aaa6 | ||
| f5b6c7ed97 | |||
|
|
22c90e301e | ||
| fadf071b31 | |||
|
|
5c69ea62a2 | ||
| 04550a116d | |||
|
|
05005ac676 | ||
| 215edef09d | |||
| f2b0f76d15 | |||
|
|
f3f80ee114 | ||
| 9affb59632 | |||
| 25c0202743 | |||
| bc9ee3138a | |||
| 8ee96e02c4 | |||
| b476f757f4 | |||
| d5cbc823c6 | |||
| 70048e1081 | |||
| 7b5ac0fbb5 | |||
| 96b1ceeb97 | |||
|
|
64782af56c | ||
| ce3332503b | |||
| 8351945510 | |||
| 7152829271 | |||
|
|
b68b778532 | ||
|
|
61354ef81b | ||
|
|
939e5cff89 | ||
|
|
c630a399ac | ||
|
|
79999c959b | ||
|
|
7f7a0e1a52 | ||
|
|
8e572bf910 | ||
| 97779b655d | |||
|
|
046cc509d9 | ||
|
|
254cc446af | ||
|
|
a8636be9a1 | ||
| ad4cf69498 | |||
|
|
c7fc5fd411 | ||
| 2828b19cf7 | |||
|
|
fbb1df323b | ||
|
|
150036626b | ||
| 91e6d5740d | |||
|
|
a108590d7a | ||
|
|
22c53aebdd | ||
|
|
8325797b3c | ||
|
|
5498e7d91b | ||
| bc817a3c6b | |||
|
|
a39bc77344 | ||
|
|
8ff45092cb | ||
|
|
a8677f36c6 | ||
|
|
6391633484 | ||
|
|
cacf4427e9 | ||
|
|
9a5e20bd5c | ||
|
|
aec7fa43d6 | ||
|
|
74cd39d183 | ||
| d286d169a0 | |||
| 454c7fbd20 | |||
| 2eca794ccc | |||
| 9787c19fac | |||
| 1aceaed741 | |||
| 9662d36ad3 | |||
| caa6b73336 | |||
| b2c7ca8cf1 | |||
| 22670e4730 | |||
| 87c6e085a4 | |||
| bb0acd4f60 | |||
| 171356a351 | |||
| 250fe22288 | |||
| 433f3ef3d6 | |||
| 1d248bf91a | |||
| 06cda3f540 | |||
| 55aa8eb0bb | |||
| 5e24ec17c9 | |||
| 5cf843cb6a | |||
|
|
ee6a6fae8d | ||
| baaa5dc79f | |||
| ebf0e4faaa | |||
| a20f3cc0ce | |||
| 78c72a71a2 | |||
| 26e35ffbaf | |||
| fb2feec1b5 | |||
| f94926a387 | |||
|
|
ded56ccf89 | ||
| 44788b3132 | |||
|
|
f913956d08 | ||
| cead8d3561 | |||
| 88a4e93194 | |||
| 976b5994dd | |||
| 1186722bb5 | |||
|
|
1f660f9dd5 | ||
|
|
1c480edb47 | ||
| 55d01ee539 | |||
|
|
d56438fe06 | ||
| cfa2b4c08b | |||
|
|
47bce5483c | ||
| ee3543e70d | |||
|
|
a97b58bf2d | ||
| 780cdd6c1b | |||
|
|
5cec8cbe32 | ||
| d9e34582ec | |||
| 2d0e1fab34 | |||
| 6350a07cc5 | |||
| c4ecc07e58 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,6 +2,7 @@
|
|||||||
!README.md
|
!README.md
|
||||||
!knowledge/**/*.md
|
!knowledge/**/*.md
|
||||||
!services/comms/knowledge/**/*.md
|
!services/comms/knowledge/**/*.md
|
||||||
|
!services/atlasbot/knowledge/**/*.md
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
.pytest_cache
|
.pytest_cache
|
||||||
|
|||||||
@ -0,0 +1,26 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/atlasbot/image-automation.yaml
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImageUpdateAutomation
|
||||||
|
metadata:
|
||||||
|
name: atlasbot
|
||||||
|
namespace: ai
|
||||||
|
spec:
|
||||||
|
interval: 1m0s
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
git:
|
||||||
|
checkout:
|
||||||
|
ref:
|
||||||
|
branch: feature/atlasbot
|
||||||
|
commit:
|
||||||
|
author:
|
||||||
|
email: ops@bstein.dev
|
||||||
|
name: flux-bot
|
||||||
|
messageTemplate: "chore(atlasbot): automated image update"
|
||||||
|
push:
|
||||||
|
branch: feature/atlasbot
|
||||||
|
update:
|
||||||
|
strategy: Setters
|
||||||
|
path: services/atlasbot
|
||||||
@ -0,0 +1,17 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/atlasbot/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: atlasbot
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
path: ./services/atlasbot
|
||||||
|
targetNamespace: ai
|
||||||
|
timeout: 2m
|
||||||
|
dependsOn:
|
||||||
|
- name: ai-llm
|
||||||
@ -13,14 +13,14 @@ spec:
|
|||||||
git:
|
git:
|
||||||
checkout:
|
checkout:
|
||||||
ref:
|
ref:
|
||||||
branch: feature/ariadne
|
branch: feature/atlasbot
|
||||||
commit:
|
commit:
|
||||||
author:
|
author:
|
||||||
email: ops@bstein.dev
|
email: ops@bstein.dev
|
||||||
name: flux-bot
|
name: flux-bot
|
||||||
messageTemplate: "chore(bstein-dev-home): automated image update"
|
messageTemplate: "chore(bstein-dev-home): automated image update"
|
||||||
push:
|
push:
|
||||||
branch: feature/ariadne
|
branch: feature/atlasbot
|
||||||
update:
|
update:
|
||||||
strategy: Setters
|
strategy: Setters
|
||||||
path: services/bstein-dev-home
|
path: services/bstein-dev-home
|
||||||
|
|||||||
@ -0,0 +1,26 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/comms/image-automation.yaml
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImageUpdateAutomation
|
||||||
|
metadata:
|
||||||
|
name: comms
|
||||||
|
namespace: comms
|
||||||
|
spec:
|
||||||
|
interval: 1m0s
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
git:
|
||||||
|
checkout:
|
||||||
|
ref:
|
||||||
|
branch: feature/atlasbot
|
||||||
|
commit:
|
||||||
|
author:
|
||||||
|
email: ops@bstein.dev
|
||||||
|
name: flux-bot
|
||||||
|
messageTemplate: "chore(comms): automated image update"
|
||||||
|
push:
|
||||||
|
branch: feature/atlasbot
|
||||||
|
update:
|
||||||
|
strategy: Setters
|
||||||
|
path: services/comms
|
||||||
@ -6,6 +6,9 @@ resources:
|
|||||||
- vault/kustomization.yaml
|
- vault/kustomization.yaml
|
||||||
- vaultwarden/kustomization.yaml
|
- vaultwarden/kustomization.yaml
|
||||||
- comms/kustomization.yaml
|
- comms/kustomization.yaml
|
||||||
|
- comms/image-automation.yaml
|
||||||
|
- atlasbot/kustomization.yaml
|
||||||
|
- atlasbot/image-automation.yaml
|
||||||
- crypto/kustomization.yaml
|
- crypto/kustomization.yaml
|
||||||
- monerod/kustomization.yaml
|
- monerod/kustomization.yaml
|
||||||
- pegasus/kustomization.yaml
|
- pegasus/kustomization.yaml
|
||||||
|
|||||||
@ -9,7 +9,7 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
interval: 1m0s
|
interval: 1m0s
|
||||||
ref:
|
ref:
|
||||||
branch: feature/ariadne
|
branch: feature/atlasbot
|
||||||
secretRef:
|
secretRef:
|
||||||
name: flux-system-gitea
|
name: flux-system-gitea
|
||||||
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
|
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
|
||||||
|
|||||||
@ -16,5 +16,6 @@ resources:
|
|||||||
- longhorn/kustomization.yaml
|
- longhorn/kustomization.yaml
|
||||||
- longhorn-ui/kustomization.yaml
|
- longhorn-ui/kustomization.yaml
|
||||||
- postgres/kustomization.yaml
|
- postgres/kustomization.yaml
|
||||||
|
- nats/kustomization.yaml
|
||||||
- ../platform/vault-csi/kustomization.yaml
|
- ../platform/vault-csi/kustomization.yaml
|
||||||
- ../platform/vault-injector/kustomization.yaml
|
- ../platform/vault-injector/kustomization.yaml
|
||||||
|
|||||||
@ -13,14 +13,14 @@ spec:
|
|||||||
git:
|
git:
|
||||||
checkout:
|
checkout:
|
||||||
ref:
|
ref:
|
||||||
branch: feature/ariadne
|
branch: feature/atlasbot
|
||||||
commit:
|
commit:
|
||||||
author:
|
author:
|
||||||
email: ops@bstein.dev
|
email: ops@bstein.dev
|
||||||
name: flux-bot
|
name: flux-bot
|
||||||
messageTemplate: "chore(maintenance): automated image update"
|
messageTemplate: "chore(maintenance): automated image update"
|
||||||
push:
|
push:
|
||||||
branch: feature/ariadne
|
branch: feature/atlasbot
|
||||||
update:
|
update:
|
||||||
strategy: Setters
|
strategy: Setters
|
||||||
path: services/maintenance
|
path: services/maintenance
|
||||||
|
|||||||
21
clusters/atlas/flux-system/platform/nats/kustomization.yaml
Normal file
21
clusters/atlas/flux-system/platform/nats/kustomization.yaml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# clusters/atlas/flux-system/platform/nats/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: nats
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./infrastructure/nats
|
||||||
|
prune: true
|
||||||
|
force: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
targetNamespace: nats
|
||||||
|
healthChecks:
|
||||||
|
- apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
name: nats
|
||||||
|
namespace: nats
|
||||||
|
wait: true
|
||||||
3
dockerfiles/Dockerfile.synapse-admin-ensure
Normal file
3
dockerfiles/Dockerfile.synapse-admin-ensure
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir psycopg2-binary bcrypt
|
||||||
@ -6,6 +6,7 @@ resources:
|
|||||||
- ../modules/profiles/atlas-ha
|
- ../modules/profiles/atlas-ha
|
||||||
- coredns-custom.yaml
|
- coredns-custom.yaml
|
||||||
- coredns-deployment.yaml
|
- coredns-deployment.yaml
|
||||||
|
- longhorn-node-taints.yaml
|
||||||
- ntp-sync-daemonset.yaml
|
- ntp-sync-daemonset.yaml
|
||||||
- ../sources/cert-manager/letsencrypt.yaml
|
- ../sources/cert-manager/letsencrypt.yaml
|
||||||
- ../sources/cert-manager/letsencrypt-prod.yaml
|
- ../sources/cert-manager/letsencrypt-prod.yaml
|
||||||
|
|||||||
40
infrastructure/core/longhorn-node-taints.yaml
Normal file
40
infrastructure/core/longhorn-node-taints.yaml
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# infrastructure/core/longhorn-node-taints.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Node
|
||||||
|
metadata:
|
||||||
|
name: titan-13
|
||||||
|
spec:
|
||||||
|
taints:
|
||||||
|
- key: longhorn
|
||||||
|
value: "true"
|
||||||
|
effect: PreferNoSchedule
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Node
|
||||||
|
metadata:
|
||||||
|
name: titan-15
|
||||||
|
spec:
|
||||||
|
taints:
|
||||||
|
- key: longhorn
|
||||||
|
value: "true"
|
||||||
|
effect: PreferNoSchedule
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Node
|
||||||
|
metadata:
|
||||||
|
name: titan-17
|
||||||
|
spec:
|
||||||
|
taints:
|
||||||
|
- key: longhorn
|
||||||
|
value: "true"
|
||||||
|
effect: PreferNoSchedule
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Node
|
||||||
|
metadata:
|
||||||
|
name: titan-19
|
||||||
|
spec:
|
||||||
|
taints:
|
||||||
|
- key: longhorn
|
||||||
|
value: "true"
|
||||||
|
effect: PreferNoSchedule
|
||||||
10
infrastructure/longhorn/core/backup-target.yaml
Normal file
10
infrastructure/longhorn/core/backup-target.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# infrastructure/longhorn/core/backup-target.yaml
|
||||||
|
apiVersion: longhorn.io/v1beta2
|
||||||
|
kind: BackupTarget
|
||||||
|
metadata:
|
||||||
|
name: default
|
||||||
|
namespace: longhorn-system
|
||||||
|
spec:
|
||||||
|
backupTargetURL: "s3://atlas-soteria@us-west-004/"
|
||||||
|
credentialSecret: longhorn-backup-b2
|
||||||
|
pollInterval: 5m0s
|
||||||
@ -6,6 +6,7 @@ resources:
|
|||||||
- vault-serviceaccount.yaml
|
- vault-serviceaccount.yaml
|
||||||
- secretproviderclass.yaml
|
- secretproviderclass.yaml
|
||||||
- vault-sync-deployment.yaml
|
- vault-sync-deployment.yaml
|
||||||
|
- backup-target.yaml
|
||||||
- helmrelease.yaml
|
- helmrelease.yaml
|
||||||
- longhorn-settings-ensure-job.yaml
|
- longhorn-settings-ensure-job.yaml
|
||||||
|
|
||||||
|
|||||||
@ -13,9 +13,27 @@ spec:
|
|||||||
- objectName: "harbor-pull__dockerconfigjson"
|
- objectName: "harbor-pull__dockerconfigjson"
|
||||||
secretPath: "kv/data/atlas/shared/harbor-pull"
|
secretPath: "kv/data/atlas/shared/harbor-pull"
|
||||||
secretKey: "dockerconfigjson"
|
secretKey: "dockerconfigjson"
|
||||||
|
- objectName: "longhorn_backup__AWS_ACCESS_KEY_ID"
|
||||||
|
secretPath: "kv/data/atlas/longhorn/backup-b2"
|
||||||
|
secretKey: "AWS_ACCESS_KEY_ID"
|
||||||
|
- objectName: "longhorn_backup__AWS_SECRET_ACCESS_KEY"
|
||||||
|
secretPath: "kv/data/atlas/longhorn/backup-b2"
|
||||||
|
secretKey: "AWS_SECRET_ACCESS_KEY"
|
||||||
|
- objectName: "longhorn_backup__AWS_ENDPOINTS"
|
||||||
|
secretPath: "kv/data/atlas/longhorn/backup-b2"
|
||||||
|
secretKey: "AWS_ENDPOINTS"
|
||||||
secretObjects:
|
secretObjects:
|
||||||
- secretName: longhorn-registry
|
- secretName: longhorn-registry
|
||||||
type: kubernetes.io/dockerconfigjson
|
type: kubernetes.io/dockerconfigjson
|
||||||
data:
|
data:
|
||||||
- objectName: harbor-pull__dockerconfigjson
|
- objectName: harbor-pull__dockerconfigjson
|
||||||
key: .dockerconfigjson
|
key: .dockerconfigjson
|
||||||
|
- secretName: longhorn-backup-b2
|
||||||
|
type: Opaque
|
||||||
|
data:
|
||||||
|
- objectName: longhorn_backup__AWS_ACCESS_KEY_ID
|
||||||
|
key: AWS_ACCESS_KEY_ID
|
||||||
|
- objectName: longhorn_backup__AWS_SECRET_ACCESS_KEY
|
||||||
|
key: AWS_SECRET_ACCESS_KEY
|
||||||
|
- objectName: longhorn_backup__AWS_ENDPOINTS
|
||||||
|
key: AWS_ENDPOINTS
|
||||||
|
|||||||
17
infrastructure/nats/configmap.yaml
Normal file
17
infrastructure/nats/configmap.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: nats-config
|
||||||
|
namespace: nats
|
||||||
|
labels:
|
||||||
|
app: nats
|
||||||
|
component: config
|
||||||
|
annotations:
|
||||||
|
description: "NATS JetStream configuration"
|
||||||
|
data:
|
||||||
|
nats.conf: |
|
||||||
|
jetstream {
|
||||||
|
store_dir: /data
|
||||||
|
max_mem_store: 128MB
|
||||||
|
max_file_store: 1GB
|
||||||
|
}
|
||||||
7
infrastructure/nats/kustomization.yaml
Normal file
7
infrastructure/nats/kustomization.yaml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- configmap.yaml
|
||||||
|
- service.yaml
|
||||||
|
- statefulset.yaml
|
||||||
4
infrastructure/nats/namespace.yaml
Normal file
4
infrastructure/nats/namespace.yaml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: nats
|
||||||
17
infrastructure/nats/service.yaml
Normal file
17
infrastructure/nats/service.yaml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: nats
|
||||||
|
namespace: nats
|
||||||
|
labels:
|
||||||
|
app: nats
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: nats
|
||||||
|
ports:
|
||||||
|
- name: client
|
||||||
|
port: 4222
|
||||||
|
targetPort: 4222
|
||||||
|
- name: monitoring
|
||||||
|
port: 8222
|
||||||
|
targetPort: 8222
|
||||||
54
infrastructure/nats/statefulset.yaml
Normal file
54
infrastructure/nats/statefulset.yaml
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: nats
|
||||||
|
namespace: nats
|
||||||
|
labels:
|
||||||
|
app: nats
|
||||||
|
spec:
|
||||||
|
serviceName: nats
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: nats
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: nats
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: nats
|
||||||
|
image: nats:2.10.18
|
||||||
|
args:
|
||||||
|
- "-c"
|
||||||
|
- "/etc/nats/nats.conf"
|
||||||
|
ports:
|
||||||
|
- name: client
|
||||||
|
containerPort: 4222
|
||||||
|
- name: monitoring
|
||||||
|
containerPort: 8222
|
||||||
|
volumeMounts:
|
||||||
|
- name: config
|
||||||
|
mountPath: /etc/nats
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
volumes:
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: nats-config
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: data
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 2Gi
|
||||||
@ -47,6 +47,7 @@ PERCENT_THRESHOLDS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_CPU_WINDOW = "1m"
|
NAMESPACE_CPU_WINDOW = "1m"
|
||||||
|
GPU_RESOURCE_REGEX = r"nvidia[.]com/gpu.*|nvidia_com_gpu.*"
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Cluster metadata
|
# Cluster metadata
|
||||||
@ -235,13 +236,16 @@ def gpu_util_by_hostname():
|
|||||||
|
|
||||||
|
|
||||||
def gpu_node_labels():
|
def gpu_node_labels():
|
||||||
return 'kube_node_labels{label_accelerator=~".+"} or kube_node_labels{label_jetson="true"}'
|
return (
|
||||||
|
f'(max by (node) (kube_node_status_allocatable{{resource=~"{GPU_RESOURCE_REGEX}"}} > bool 0))'
|
||||||
|
' or kube_node_labels{label_jetson="true"}'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def gpu_requests_by_namespace_node(scope_var):
|
def gpu_requests_by_namespace_node(scope_var):
|
||||||
return (
|
return (
|
||||||
"sum by (namespace,node) ("
|
"sum by (namespace,node) ("
|
||||||
f'kube_pod_container_resource_requests{{resource=~"nvidia.com/gpu.*",{scope_var}}} '
|
f'kube_pod_container_resource_requests{{resource=~"{GPU_RESOURCE_REGEX}",{scope_var}}} '
|
||||||
"* on(namespace,pod) group_left(node) kube_pod_info "
|
"* on(namespace,pod) group_left(node) kube_pod_info "
|
||||||
f"* on(node) group_left() ({gpu_node_labels()})"
|
f"* on(node) group_left() ({gpu_node_labels()})"
|
||||||
")"
|
")"
|
||||||
@ -253,7 +257,7 @@ def gpu_usage_by_namespace(scope_var):
|
|||||||
total_by_node = f"sum by (node) ({requests_by_ns})"
|
total_by_node = f"sum by (node) ({requests_by_ns})"
|
||||||
return (
|
return (
|
||||||
"sum by (namespace) ("
|
"sum by (namespace) ("
|
||||||
f"({requests_by_ns}) / clamp_min({total_by_node}, 1) "
|
f"({requests_by_ns}) / on(node) group_left() clamp_min({total_by_node}, 1) "
|
||||||
f"* on(node) group_left() ({gpu_util_by_node()})"
|
f"* on(node) group_left() ({gpu_util_by_node()})"
|
||||||
")"
|
")"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -539,9 +539,9 @@ def main() -> int:
|
|||||||
help="Write generated files (otherwise just print a summary).",
|
help="Write generated files (otherwise just print a summary).",
|
||||||
)
|
)
|
||||||
ap.add_argument(
|
ap.add_argument(
|
||||||
"--sync-comms",
|
"--sync-atlasbot",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Mirror rendered knowledge into services/comms/knowledge for atlasbot.",
|
help="Mirror rendered knowledge into services/atlasbot/knowledge for atlasbot.",
|
||||||
)
|
)
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
|
||||||
@ -632,10 +632,10 @@ def main() -> int:
|
|||||||
print(f"Wrote {runbooks_json_path.relative_to(REPO_ROOT)}")
|
print(f"Wrote {runbooks_json_path.relative_to(REPO_ROOT)}")
|
||||||
print(f"Wrote {metrics_json_path.relative_to(REPO_ROOT)}")
|
print(f"Wrote {metrics_json_path.relative_to(REPO_ROOT)}")
|
||||||
|
|
||||||
if args.sync_comms:
|
if args.sync_atlasbot:
|
||||||
comms_dir = REPO_ROOT / "services" / "comms" / "knowledge"
|
atlasbot_dir = REPO_ROOT / "services" / "atlasbot" / "knowledge"
|
||||||
_sync_tree(out_dir, comms_dir)
|
_sync_tree(out_dir, atlasbot_dir)
|
||||||
print(f"Synced {out_dir.relative_to(REPO_ROOT)} -> {comms_dir.relative_to(REPO_ROOT)}")
|
print(f"Synced {out_dir.relative_to(REPO_ROOT)} -> {atlasbot_dir.relative_to(REPO_ROOT)}")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -3,7 +3,7 @@ apiVersion: apps/v1
|
|||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: atlasbot
|
name: atlasbot
|
||||||
namespace: comms
|
namespace: ai
|
||||||
labels:
|
labels:
|
||||||
app: atlasbot
|
app: atlasbot
|
||||||
spec:
|
spec:
|
||||||
@ -18,7 +18,7 @@ spec:
|
|||||||
annotations:
|
annotations:
|
||||||
checksum/atlasbot-configmap: manual-atlasbot-101
|
checksum/atlasbot-configmap: manual-atlasbot-101
|
||||||
vault.hashicorp.com/agent-inject: "true"
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
vault.hashicorp.com/role: "comms"
|
vault.hashicorp.com/role: "ai"
|
||||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||||
vault.hashicorp.com/agent-inject-template-turn-secret: |
|
vault.hashicorp.com/agent-inject-template-turn-secret: |
|
||||||
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
|
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
|
||||||
@ -28,6 +28,15 @@ spec:
|
|||||||
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||||
vault.hashicorp.com/agent-inject-template-bot-pass: |
|
vault.hashicorp.com/agent-inject-template-bot-pass: |
|
||||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
|
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
|
||||||
|
vault.hashicorp.com/agent-inject-secret-bot-quick-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||||
|
vault.hashicorp.com/agent-inject-template-bot-quick-pass: |
|
||||||
|
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-quick-password" }}{{- end -}}
|
||||||
|
vault.hashicorp.com/agent-inject-secret-bot-smart-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||||
|
vault.hashicorp.com/agent-inject-template-bot-smart-pass: |
|
||||||
|
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-smart-password" }}{{- end -}}
|
||||||
|
vault.hashicorp.com/agent-inject-secret-bot-genius-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||||
|
vault.hashicorp.com/agent-inject-template-bot-genius-pass: |
|
||||||
|
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-genius-password" }}{{- end -}}
|
||||||
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||||
vault.hashicorp.com/agent-inject-template-seeder-pass: |
|
vault.hashicorp.com/agent-inject-template-seeder-pass: |
|
||||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
|
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
|
||||||
@ -58,17 +67,17 @@ spec:
|
|||||||
hardware: rpi5
|
hardware: rpi5
|
||||||
containers:
|
containers:
|
||||||
- name: atlasbot
|
- name: atlasbot
|
||||||
image: python:3.11-slim
|
image: registry.bstein.dev/bstein/atlasbot:0.1.0-55
|
||||||
command: ["/bin/sh","-c"]
|
command: ["/bin/sh","-c"]
|
||||||
args:
|
args:
|
||||||
- |
|
- |
|
||||||
. /vault/scripts/comms_vault_env.sh
|
. /vault/scripts/atlasbot_vault_env.sh
|
||||||
exec python /app/bot.py
|
exec python -m atlasbot.main
|
||||||
env:
|
env:
|
||||||
- name: MATRIX_BASE
|
- name: MATRIX_BASE
|
||||||
value: http://othrys-synapse-matrix-synapse:8008
|
value: http://othrys-synapse-matrix-synapse.comms.svc.cluster.local:8008
|
||||||
- name: AUTH_BASE
|
- name: AUTH_BASE
|
||||||
value: http://matrix-authentication-service:8080
|
value: http://matrix-authentication-service.comms.svc.cluster.local:8080
|
||||||
- name: KB_DIR
|
- name: KB_DIR
|
||||||
value: /kb
|
value: /kb
|
||||||
- name: VM_URL
|
- name: VM_URL
|
||||||
@ -76,27 +85,61 @@ spec:
|
|||||||
- name: ARIADNE_STATE_URL
|
- name: ARIADNE_STATE_URL
|
||||||
value: http://ariadne.maintenance.svc.cluster.local/api/internal/cluster/state
|
value: http://ariadne.maintenance.svc.cluster.local/api/internal/cluster/state
|
||||||
- name: BOT_USER
|
- name: BOT_USER
|
||||||
value: atlasbot
|
value: atlas-smart
|
||||||
|
- name: BOT_USER_QUICK
|
||||||
|
value: atlas-quick
|
||||||
|
- name: BOT_USER_SMART
|
||||||
|
value: atlas-smart
|
||||||
|
- name: BOT_USER_GENIUS
|
||||||
|
value: atlas-genius
|
||||||
- name: BOT_MENTIONS
|
- name: BOT_MENTIONS
|
||||||
value: atlasbot,aatlasbot,atlas_quick,atlas_smart
|
value: atlas-quick,atlas-smart,atlas-genius
|
||||||
- name: OLLAMA_URL
|
- name: OLLAMA_URL
|
||||||
value: http://ollama.ai.svc.cluster.local:11434
|
value: http://ollama.ai.svc.cluster.local:11434
|
||||||
- name: OLLAMA_MODEL
|
- name: OLLAMA_MODEL
|
||||||
value: qwen2.5:14b-instruct
|
value: qwen2.5:14b-instruct-q4_0
|
||||||
- name: ATLASBOT_MODEL_FAST
|
- name: ATLASBOT_MODEL_FAST
|
||||||
value: qwen2.5:14b-instruct-q4_0
|
value: qwen2.5:14b-instruct-q4_0
|
||||||
- name: ATLASBOT_MODEL_DEEP
|
- name: ATLASBOT_MODEL_SMART
|
||||||
value: qwen2.5:14b-instruct
|
value: qwen2.5:14b-instruct-q4_0
|
||||||
|
- name: ATLASBOT_MODEL_GENIUS
|
||||||
|
value: qwen2.5:14b-instruct-q4_0
|
||||||
- name: OLLAMA_FALLBACK_MODEL
|
- name: OLLAMA_FALLBACK_MODEL
|
||||||
value: qwen2.5:14b-instruct-q4_0
|
value: qwen2.5:14b-instruct-q4_0
|
||||||
- name: OLLAMA_TIMEOUT_SEC
|
- name: OLLAMA_TIMEOUT_SEC
|
||||||
value: "600"
|
value: "600"
|
||||||
- name: ATLASBOT_THINKING_INTERVAL_SEC
|
- name: ATLASBOT_THINKING_INTERVAL_SEC
|
||||||
value: "120"
|
value: "30"
|
||||||
- name: ATLASBOT_SNAPSHOT_TTL_SEC
|
- name: ATLASBOT_SNAPSHOT_TTL_SEC
|
||||||
value: "30"
|
value: "30"
|
||||||
- name: ATLASBOT_HTTP_PORT
|
- name: ATLASBOT_HTTP_PORT
|
||||||
value: "8090"
|
value: "8090"
|
||||||
|
- name: ATLASBOT_STATE_DB
|
||||||
|
value: /data/atlasbot_state.db
|
||||||
|
- name: ATLASBOT_QUEUE_ENABLED
|
||||||
|
value: "false"
|
||||||
|
- name: ATLASBOT_DEBUG_PIPELINE
|
||||||
|
value: "true"
|
||||||
|
- name: ATLASBOT_NATS_URL
|
||||||
|
value: nats://nats.nats.svc.cluster.local:4222
|
||||||
|
- name: ATLASBOT_NATS_STREAM
|
||||||
|
value: atlasbot
|
||||||
|
- name: ATLASBOT_NATS_SUBJECT
|
||||||
|
value: atlasbot.requests
|
||||||
|
- name: ATLASBOT_FAST_MAX_ANGLES
|
||||||
|
value: "2"
|
||||||
|
- name: ATLASBOT_SMART_MAX_ANGLES
|
||||||
|
value: "5"
|
||||||
|
- name: ATLASBOT_FAST_MAX_CANDIDATES
|
||||||
|
value: "2"
|
||||||
|
- name: ATLASBOT_SMART_MAX_CANDIDATES
|
||||||
|
value: "6"
|
||||||
|
- name: ATLASBOT_FAST_LLM_CALLS_MAX
|
||||||
|
value: "24"
|
||||||
|
- name: ATLASBOT_SMART_LLM_CALLS_MAX
|
||||||
|
value: "48"
|
||||||
|
- name: ATLASBOT_GENIUS_LLM_CALLS_MAX
|
||||||
|
value: "96"
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
containerPort: 8090
|
containerPort: 8090
|
||||||
@ -108,19 +151,15 @@ spec:
|
|||||||
cpu: 500m
|
cpu: 500m
|
||||||
memory: 512Mi
|
memory: 512Mi
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: code
|
|
||||||
mountPath: /app/bot.py
|
|
||||||
subPath: bot.py
|
|
||||||
- name: kb
|
- name: kb
|
||||||
mountPath: /kb
|
mountPath: /kb
|
||||||
readOnly: true
|
readOnly: true
|
||||||
- name: vault-scripts
|
- name: vault-scripts
|
||||||
mountPath: /vault/scripts
|
mountPath: /vault/scripts
|
||||||
readOnly: true
|
readOnly: true
|
||||||
|
- name: atlasbot-state
|
||||||
|
mountPath: /data
|
||||||
volumes:
|
volumes:
|
||||||
- name: code
|
|
||||||
configMap:
|
|
||||||
name: atlasbot
|
|
||||||
- name: kb
|
- name: kb
|
||||||
configMap:
|
configMap:
|
||||||
name: atlas-kb
|
name: atlas-kb
|
||||||
@ -139,5 +178,7 @@ spec:
|
|||||||
path: diagrams/atlas-http.mmd
|
path: diagrams/atlas-http.mmd
|
||||||
- name: vault-scripts
|
- name: vault-scripts
|
||||||
configMap:
|
configMap:
|
||||||
name: comms-vault-env
|
name: atlasbot-vault-env
|
||||||
defaultMode: 0555
|
defaultMode: 0555
|
||||||
|
- name: atlasbot-state
|
||||||
|
emptyDir: {}
|
||||||
@ -3,7 +3,9 @@ apiVersion: v1
|
|||||||
kind: ServiceAccount
|
kind: ServiceAccount
|
||||||
metadata:
|
metadata:
|
||||||
name: atlasbot
|
name: atlasbot
|
||||||
namespace: comms
|
namespace: ai
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: harbor-regcred
|
||||||
---
|
---
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
@ -43,5 +45,4 @@ roleRef:
|
|||||||
subjects:
|
subjects:
|
||||||
- kind: ServiceAccount
|
- kind: ServiceAccount
|
||||||
name: atlasbot
|
name: atlasbot
|
||||||
namespace: comms
|
namespace: ai
|
||||||
|
|
||||||
@ -2,7 +2,7 @@ apiVersion: v1
|
|||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: atlasbot
|
name: atlasbot
|
||||||
namespace: comms
|
namespace: ai
|
||||||
labels:
|
labels:
|
||||||
app: atlasbot
|
app: atlasbot
|
||||||
spec:
|
spec:
|
||||||
26
services/atlasbot/image-automation.yaml
Normal file
26
services/atlasbot/image-automation.yaml
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# services/atlasbot/image-automation.yaml
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1
|
||||||
|
kind: ImageUpdateAutomation
|
||||||
|
metadata:
|
||||||
|
name: atlasbot
|
||||||
|
namespace: ai
|
||||||
|
spec:
|
||||||
|
interval: 1m0s
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
git:
|
||||||
|
checkout:
|
||||||
|
ref:
|
||||||
|
branch: feature/atlasbot
|
||||||
|
commit:
|
||||||
|
author:
|
||||||
|
name: flux-bot
|
||||||
|
email: ops@bstein.dev
|
||||||
|
messageTemplate: "chore(atlasbot): automated image update"
|
||||||
|
push:
|
||||||
|
branch: feature/atlasbot
|
||||||
|
update:
|
||||||
|
path: services/atlasbot
|
||||||
|
strategy: Setters
|
||||||
23
services/atlasbot/image.yaml
Normal file
23
services/atlasbot/image.yaml
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# services/comms/image.yaml
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||||
|
kind: ImageRepository
|
||||||
|
metadata:
|
||||||
|
name: atlasbot
|
||||||
|
namespace: ai
|
||||||
|
spec:
|
||||||
|
image: registry.bstein.dev/bstein/atlasbot
|
||||||
|
interval: 1m0s
|
||||||
|
secretRef:
|
||||||
|
name: harbor-regcred
|
||||||
|
---
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||||
|
kind: ImagePolicy
|
||||||
|
metadata:
|
||||||
|
name: atlasbot
|
||||||
|
namespace: ai
|
||||||
|
spec:
|
||||||
|
imageRepositoryRef:
|
||||||
|
name: atlasbot
|
||||||
|
policy:
|
||||||
|
semver:
|
||||||
|
range: ">=0.1.0-0"
|
||||||
22
services/atlasbot/knowledge/INDEX.md
Normal file
22
services/atlasbot/knowledge/INDEX.md
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
Atlas Knowledge Base (KB)
|
||||||
|
|
||||||
|
This folder is the source-of-truth “memory” for Atlas/Titan assistants (and for humans). It is designed to be:
|
||||||
|
- Accurate (grounded in GitOps + read-only cluster tools)
|
||||||
|
- Maintainable (small docs + deterministic generators)
|
||||||
|
- Safe (no secrets; refer to Secret/Vault paths by name only)
|
||||||
|
|
||||||
|
Layout
|
||||||
|
- `knowledge/runbooks/`: human-written docs (short, chunkable Markdown).
|
||||||
|
- `knowledge/catalog/`: generated machine-readable facts (YAML/JSON).
|
||||||
|
- `knowledge/diagrams/`: generated Mermaid diagrams (`.mmd`) derived from the catalog.
|
||||||
|
|
||||||
|
Regeneration
|
||||||
|
- Update manifests/docs, then regenerate generated artifacts:
|
||||||
|
- `python scripts/knowledge_render_atlas.py --write`
|
||||||
|
|
||||||
|
Authoring rules
|
||||||
|
- Never include secret values. Prefer `secretRef` names or Vault paths like `kv/atlas/...`.
|
||||||
|
- Prefer stable identifiers: Kubernetes `namespace/name`, DNS hostnames, Flux kustomization paths.
|
||||||
|
- Keep each runbook small; one topic per file; use headings.
|
||||||
|
- When in doubt, link to the exact file path in this repo that configures the behavior.
|
||||||
|
|
||||||
8
services/atlasbot/knowledge/catalog/atlas-summary.json
Normal file
8
services/atlasbot/knowledge/catalog/atlas-summary.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"counts": {
|
||||||
|
"helmrelease_host_hints": 19,
|
||||||
|
"http_endpoints": 45,
|
||||||
|
"services": 47,
|
||||||
|
"workloads": 74
|
||||||
|
}
|
||||||
|
}
|
||||||
3445
services/atlasbot/knowledge/catalog/atlas.json
Normal file
3445
services/atlasbot/knowledge/catalog/atlas.json
Normal file
File diff suppressed because it is too large
Load Diff
1880
services/atlasbot/knowledge/catalog/metrics.json
Normal file
1880
services/atlasbot/knowledge/catalog/metrics.json
Normal file
File diff suppressed because it is too large
Load Diff
97
services/atlasbot/knowledge/catalog/runbooks.json
Normal file
97
services/atlasbot/knowledge/catalog/runbooks.json
Normal file
File diff suppressed because one or more lines are too long
234
services/atlasbot/knowledge/diagrams/atlas-http.mmd
Normal file
234
services/atlasbot/knowledge/diagrams/atlas-http.mmd
Normal file
@ -0,0 +1,234 @@
|
|||||||
|
flowchart LR
|
||||||
|
host_auth_bstein_dev["auth.bstein.dev"]
|
||||||
|
svc_sso_oauth2_proxy["sso/oauth2-proxy (Service)"]
|
||||||
|
host_auth_bstein_dev --> svc_sso_oauth2_proxy
|
||||||
|
wl_sso_oauth2_proxy["sso/oauth2-proxy (Deployment)"]
|
||||||
|
svc_sso_oauth2_proxy --> wl_sso_oauth2_proxy
|
||||||
|
host_bstein_dev["bstein.dev"]
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Service)"]
|
||||||
|
host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_frontend
|
||||||
|
wl_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Deployment)"]
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_frontend --> wl_bstein_dev_home_bstein_dev_home_frontend
|
||||||
|
svc_comms_matrix_wellknown["comms/matrix-wellknown (Service)"]
|
||||||
|
host_bstein_dev --> svc_comms_matrix_wellknown
|
||||||
|
wl_comms_matrix_wellknown["comms/matrix-wellknown (Deployment)"]
|
||||||
|
svc_comms_matrix_wellknown --> wl_comms_matrix_wellknown
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Service)"]
|
||||||
|
host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_backend
|
||||||
|
wl_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Deployment)"]
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_backend --> wl_bstein_dev_home_bstein_dev_home_backend
|
||||||
|
host_budget_bstein_dev["budget.bstein.dev"]
|
||||||
|
svc_finance_actual_budget["finance/actual-budget (Service)"]
|
||||||
|
host_budget_bstein_dev --> svc_finance_actual_budget
|
||||||
|
wl_finance_actual_budget["finance/actual-budget (Deployment)"]
|
||||||
|
svc_finance_actual_budget --> wl_finance_actual_budget
|
||||||
|
host_call_live_bstein_dev["call.live.bstein.dev"]
|
||||||
|
svc_comms_element_call["comms/element-call (Service)"]
|
||||||
|
host_call_live_bstein_dev --> svc_comms_element_call
|
||||||
|
wl_comms_element_call["comms/element-call (Deployment)"]
|
||||||
|
svc_comms_element_call --> wl_comms_element_call
|
||||||
|
host_chat_ai_bstein_dev["chat.ai.bstein.dev"]
|
||||||
|
svc_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Service)"]
|
||||||
|
host_chat_ai_bstein_dev --> svc_bstein_dev_home_chat_ai_gateway
|
||||||
|
wl_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Deployment)"]
|
||||||
|
svc_bstein_dev_home_chat_ai_gateway --> wl_bstein_dev_home_chat_ai_gateway
|
||||||
|
host_ci_bstein_dev["ci.bstein.dev"]
|
||||||
|
svc_jenkins_jenkins["jenkins/jenkins (Service)"]
|
||||||
|
host_ci_bstein_dev --> svc_jenkins_jenkins
|
||||||
|
wl_jenkins_jenkins["jenkins/jenkins (Deployment)"]
|
||||||
|
svc_jenkins_jenkins --> wl_jenkins_jenkins
|
||||||
|
host_cloud_bstein_dev["cloud.bstein.dev"]
|
||||||
|
svc_nextcloud_nextcloud["nextcloud/nextcloud (Service)"]
|
||||||
|
host_cloud_bstein_dev --> svc_nextcloud_nextcloud
|
||||||
|
wl_nextcloud_nextcloud["nextcloud/nextcloud (Deployment)"]
|
||||||
|
svc_nextcloud_nextcloud --> wl_nextcloud_nextcloud
|
||||||
|
host_health_bstein_dev["health.bstein.dev"]
|
||||||
|
svc_health_wger["health/wger (Service)"]
|
||||||
|
host_health_bstein_dev --> svc_health_wger
|
||||||
|
wl_health_wger["health/wger (Deployment)"]
|
||||||
|
svc_health_wger --> wl_health_wger
|
||||||
|
host_kit_live_bstein_dev["kit.live.bstein.dev"]
|
||||||
|
svc_comms_livekit_token_service["comms/livekit-token-service (Service)"]
|
||||||
|
host_kit_live_bstein_dev --> svc_comms_livekit_token_service
|
||||||
|
wl_comms_livekit_token_service["comms/livekit-token-service (Deployment)"]
|
||||||
|
svc_comms_livekit_token_service --> wl_comms_livekit_token_service
|
||||||
|
svc_comms_livekit["comms/livekit (Service)"]
|
||||||
|
host_kit_live_bstein_dev --> svc_comms_livekit
|
||||||
|
wl_comms_livekit["comms/livekit (Deployment)"]
|
||||||
|
svc_comms_livekit --> wl_comms_livekit
|
||||||
|
host_live_bstein_dev["live.bstein.dev"]
|
||||||
|
host_live_bstein_dev --> svc_comms_matrix_wellknown
|
||||||
|
svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"]
|
||||||
|
host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
||||||
|
svc_comms_matrix_guest_register["comms/matrix-guest-register (Service)"]
|
||||||
|
host_live_bstein_dev --> svc_comms_matrix_guest_register
|
||||||
|
wl_comms_matrix_guest_register["comms/matrix-guest-register (Deployment)"]
|
||||||
|
svc_comms_matrix_guest_register --> wl_comms_matrix_guest_register
|
||||||
|
svc_comms_matrix_authentication_service["comms/matrix-authentication-service (Service)"]
|
||||||
|
host_live_bstein_dev --> svc_comms_matrix_authentication_service
|
||||||
|
wl_comms_matrix_authentication_service["comms/matrix-authentication-service (Deployment)"]
|
||||||
|
svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service
|
||||||
|
host_logs_bstein_dev["logs.bstein.dev"]
|
||||||
|
svc_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Service)"]
|
||||||
|
host_logs_bstein_dev --> svc_logging_oauth2_proxy_logs
|
||||||
|
wl_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Deployment)"]
|
||||||
|
svc_logging_oauth2_proxy_logs --> wl_logging_oauth2_proxy_logs
|
||||||
|
host_longhorn_bstein_dev["longhorn.bstein.dev"]
|
||||||
|
svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"]
|
||||||
|
host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn
|
||||||
|
wl_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Deployment)"]
|
||||||
|
svc_longhorn_system_oauth2_proxy_longhorn --> wl_longhorn_system_oauth2_proxy_longhorn
|
||||||
|
host_mail_bstein_dev["mail.bstein.dev"]
|
||||||
|
svc_mailu_mailserver_mailu_front["mailu-mailserver/mailu-front (Service)"]
|
||||||
|
host_mail_bstein_dev --> svc_mailu_mailserver_mailu_front
|
||||||
|
host_matrix_live_bstein_dev["matrix.live.bstein.dev"]
|
||||||
|
host_matrix_live_bstein_dev --> svc_comms_matrix_authentication_service
|
||||||
|
host_matrix_live_bstein_dev --> svc_comms_matrix_wellknown
|
||||||
|
host_matrix_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
||||||
|
host_matrix_live_bstein_dev --> svc_comms_matrix_guest_register
|
||||||
|
host_monero_bstein_dev["monero.bstein.dev"]
|
||||||
|
svc_crypto_monerod["crypto/monerod (Service)"]
|
||||||
|
host_monero_bstein_dev --> svc_crypto_monerod
|
||||||
|
wl_crypto_monerod["crypto/monerod (Deployment)"]
|
||||||
|
svc_crypto_monerod --> wl_crypto_monerod
|
||||||
|
host_money_bstein_dev["money.bstein.dev"]
|
||||||
|
svc_finance_firefly["finance/firefly (Service)"]
|
||||||
|
host_money_bstein_dev --> svc_finance_firefly
|
||||||
|
wl_finance_firefly["finance/firefly (Deployment)"]
|
||||||
|
svc_finance_firefly --> wl_finance_firefly
|
||||||
|
host_notes_bstein_dev["notes.bstein.dev"]
|
||||||
|
svc_outline_outline["outline/outline (Service)"]
|
||||||
|
host_notes_bstein_dev --> svc_outline_outline
|
||||||
|
wl_outline_outline["outline/outline (Deployment)"]
|
||||||
|
svc_outline_outline --> wl_outline_outline
|
||||||
|
host_office_bstein_dev["office.bstein.dev"]
|
||||||
|
svc_nextcloud_collabora["nextcloud/collabora (Service)"]
|
||||||
|
host_office_bstein_dev --> svc_nextcloud_collabora
|
||||||
|
wl_nextcloud_collabora["nextcloud/collabora (Deployment)"]
|
||||||
|
svc_nextcloud_collabora --> wl_nextcloud_collabora
|
||||||
|
host_pegasus_bstein_dev["pegasus.bstein.dev"]
|
||||||
|
svc_jellyfin_pegasus["jellyfin/pegasus (Service)"]
|
||||||
|
host_pegasus_bstein_dev --> svc_jellyfin_pegasus
|
||||||
|
wl_jellyfin_pegasus["jellyfin/pegasus (Deployment)"]
|
||||||
|
svc_jellyfin_pegasus --> wl_jellyfin_pegasus
|
||||||
|
host_scm_bstein_dev["scm.bstein.dev"]
|
||||||
|
svc_gitea_gitea["gitea/gitea (Service)"]
|
||||||
|
host_scm_bstein_dev --> svc_gitea_gitea
|
||||||
|
wl_gitea_gitea["gitea/gitea (Deployment)"]
|
||||||
|
svc_gitea_gitea --> wl_gitea_gitea
|
||||||
|
host_secret_bstein_dev["secret.bstein.dev"]
|
||||||
|
svc_vault_vault["vault/vault (Service)"]
|
||||||
|
host_secret_bstein_dev --> svc_vault_vault
|
||||||
|
wl_vault_vault["vault/vault (StatefulSet)"]
|
||||||
|
svc_vault_vault --> wl_vault_vault
|
||||||
|
host_sso_bstein_dev["sso.bstein.dev"]
|
||||||
|
svc_sso_keycloak["sso/keycloak (Service)"]
|
||||||
|
host_sso_bstein_dev --> svc_sso_keycloak
|
||||||
|
wl_sso_keycloak["sso/keycloak (Deployment)"]
|
||||||
|
svc_sso_keycloak --> wl_sso_keycloak
|
||||||
|
host_stream_bstein_dev["stream.bstein.dev"]
|
||||||
|
svc_jellyfin_jellyfin["jellyfin/jellyfin (Service)"]
|
||||||
|
host_stream_bstein_dev --> svc_jellyfin_jellyfin
|
||||||
|
wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"]
|
||||||
|
svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin
|
||||||
|
host_tasks_bstein_dev["tasks.bstein.dev"]
|
||||||
|
svc_planka_planka["planka/planka (Service)"]
|
||||||
|
host_tasks_bstein_dev --> svc_planka_planka
|
||||||
|
wl_planka_planka["planka/planka (Deployment)"]
|
||||||
|
svc_planka_planka --> wl_planka_planka
|
||||||
|
host_vault_bstein_dev["vault.bstein.dev"]
|
||||||
|
svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"]
|
||||||
|
host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service
|
||||||
|
wl_vaultwarden_vaultwarden["vaultwarden/vaultwarden (Deployment)"]
|
||||||
|
svc_vaultwarden_vaultwarden_service --> wl_vaultwarden_vaultwarden
|
||||||
|
|
||||||
|
subgraph bstein_dev_home[bstein-dev-home]
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_frontend
|
||||||
|
wl_bstein_dev_home_bstein_dev_home_frontend
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_backend
|
||||||
|
wl_bstein_dev_home_bstein_dev_home_backend
|
||||||
|
svc_bstein_dev_home_chat_ai_gateway
|
||||||
|
wl_bstein_dev_home_chat_ai_gateway
|
||||||
|
end
|
||||||
|
subgraph comms[comms]
|
||||||
|
svc_comms_matrix_wellknown
|
||||||
|
wl_comms_matrix_wellknown
|
||||||
|
svc_comms_element_call
|
||||||
|
wl_comms_element_call
|
||||||
|
svc_comms_livekit_token_service
|
||||||
|
wl_comms_livekit_token_service
|
||||||
|
svc_comms_livekit
|
||||||
|
wl_comms_livekit
|
||||||
|
svc_comms_othrys_synapse_matrix_synapse
|
||||||
|
svc_comms_matrix_guest_register
|
||||||
|
wl_comms_matrix_guest_register
|
||||||
|
svc_comms_matrix_authentication_service
|
||||||
|
wl_comms_matrix_authentication_service
|
||||||
|
end
|
||||||
|
subgraph crypto[crypto]
|
||||||
|
svc_crypto_monerod
|
||||||
|
wl_crypto_monerod
|
||||||
|
end
|
||||||
|
subgraph finance[finance]
|
||||||
|
svc_finance_actual_budget
|
||||||
|
wl_finance_actual_budget
|
||||||
|
svc_finance_firefly
|
||||||
|
wl_finance_firefly
|
||||||
|
end
|
||||||
|
subgraph gitea[gitea]
|
||||||
|
svc_gitea_gitea
|
||||||
|
wl_gitea_gitea
|
||||||
|
end
|
||||||
|
subgraph health[health]
|
||||||
|
svc_health_wger
|
||||||
|
wl_health_wger
|
||||||
|
end
|
||||||
|
subgraph jellyfin[jellyfin]
|
||||||
|
svc_jellyfin_pegasus
|
||||||
|
wl_jellyfin_pegasus
|
||||||
|
svc_jellyfin_jellyfin
|
||||||
|
wl_jellyfin_jellyfin
|
||||||
|
end
|
||||||
|
subgraph jenkins[jenkins]
|
||||||
|
svc_jenkins_jenkins
|
||||||
|
wl_jenkins_jenkins
|
||||||
|
end
|
||||||
|
subgraph logging[logging]
|
||||||
|
svc_logging_oauth2_proxy_logs
|
||||||
|
wl_logging_oauth2_proxy_logs
|
||||||
|
end
|
||||||
|
subgraph longhorn_system[longhorn-system]
|
||||||
|
svc_longhorn_system_oauth2_proxy_longhorn
|
||||||
|
wl_longhorn_system_oauth2_proxy_longhorn
|
||||||
|
end
|
||||||
|
subgraph mailu_mailserver[mailu-mailserver]
|
||||||
|
svc_mailu_mailserver_mailu_front
|
||||||
|
end
|
||||||
|
subgraph nextcloud[nextcloud]
|
||||||
|
svc_nextcloud_nextcloud
|
||||||
|
wl_nextcloud_nextcloud
|
||||||
|
svc_nextcloud_collabora
|
||||||
|
wl_nextcloud_collabora
|
||||||
|
end
|
||||||
|
subgraph outline[outline]
|
||||||
|
svc_outline_outline
|
||||||
|
wl_outline_outline
|
||||||
|
end
|
||||||
|
subgraph planka[planka]
|
||||||
|
svc_planka_planka
|
||||||
|
wl_planka_planka
|
||||||
|
end
|
||||||
|
subgraph sso[sso]
|
||||||
|
svc_sso_oauth2_proxy
|
||||||
|
wl_sso_oauth2_proxy
|
||||||
|
svc_sso_keycloak
|
||||||
|
wl_sso_keycloak
|
||||||
|
end
|
||||||
|
subgraph vault[vault]
|
||||||
|
svc_vault_vault
|
||||||
|
wl_vault_vault
|
||||||
|
end
|
||||||
|
subgraph vaultwarden[vaultwarden]
|
||||||
|
svc_vaultwarden_vaultwarden_service
|
||||||
|
wl_vaultwarden_vaultwarden
|
||||||
|
end
|
||||||
29
services/atlasbot/kustomization.yaml
Normal file
29
services/atlasbot/kustomization.yaml
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# services/atlasbot/kustomization.yaml
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
namespace: ai
|
||||||
|
resources:
|
||||||
|
- atlasbot-deployment.yaml
|
||||||
|
- atlasbot-service.yaml
|
||||||
|
- atlasbot-rbac.yaml
|
||||||
|
- secretproviderclass.yaml
|
||||||
|
- vault-sync-deployment.yaml
|
||||||
|
- image.yaml
|
||||||
|
- image-automation.yaml
|
||||||
|
images:
|
||||||
|
- name: registry.bstein.dev/bstein/atlasbot
|
||||||
|
newTag: 0.1.2-97 # {"$imagepolicy": "ai:atlasbot:tag"}
|
||||||
|
configMapGenerator:
|
||||||
|
- name: atlasbot-vault-env
|
||||||
|
files:
|
||||||
|
- atlasbot_vault_env.sh=scripts/atlasbot_vault_env.sh
|
||||||
|
options:
|
||||||
|
disableNameSuffixHash: true
|
||||||
|
- name: atlas-kb
|
||||||
|
files:
|
||||||
|
- INDEX.md=knowledge/INDEX.md
|
||||||
|
- atlas.json=knowledge/catalog/atlas.json
|
||||||
|
- atlas-summary.json=knowledge/catalog/atlas-summary.json
|
||||||
|
- metrics.json=knowledge/catalog/metrics.json
|
||||||
|
- runbooks.json=knowledge/catalog/runbooks.json
|
||||||
|
- atlas-http.mmd=knowledge/diagrams/atlas-http.mmd
|
||||||
44
services/atlasbot/scripts/atlasbot_vault_env.sh
Normal file
44
services/atlasbot/scripts/atlasbot_vault_env.sh
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/env sh
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
vault_dir="/vault/secrets"
|
||||||
|
|
||||||
|
read_secret() {
|
||||||
|
tr -d '\r\n' < "${vault_dir}/$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
read_optional() {
|
||||||
|
if [ -f "${vault_dir}/$1" ]; then
|
||||||
|
tr -d '\r\n' < "${vault_dir}/$1"
|
||||||
|
else
|
||||||
|
printf ''
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
export TURN_STATIC_AUTH_SECRET="$(read_secret turn-secret)"
|
||||||
|
export TURN_PASSWORD="${TURN_STATIC_AUTH_SECRET}"
|
||||||
|
|
||||||
|
export LIVEKIT_API_SECRET="$(read_secret livekit-primary)"
|
||||||
|
export LIVEKIT_SECRET="${LIVEKIT_API_SECRET}"
|
||||||
|
|
||||||
|
export BOT_PASS="$(read_secret bot-pass)"
|
||||||
|
export BOT_PASS_QUICK="$(read_optional bot-quick-pass)"
|
||||||
|
export BOT_PASS_SMART="$(read_optional bot-smart-pass)"
|
||||||
|
export BOT_PASS_GENIUS="$(read_optional bot-genius-pass)"
|
||||||
|
if [ -z "${BOT_PASS_SMART}" ]; then
|
||||||
|
export BOT_PASS_SMART="${BOT_PASS}"
|
||||||
|
fi
|
||||||
|
if [ -z "${BOT_PASS_GENIUS}" ]; then
|
||||||
|
export BOT_PASS_GENIUS="${BOT_PASS_SMART}"
|
||||||
|
fi
|
||||||
|
export SEEDER_PASS="$(read_secret seeder-pass)"
|
||||||
|
|
||||||
|
export CHAT_API_KEY="$(read_secret chat-matrix)"
|
||||||
|
export CHAT_API_HOMEPAGE="$(read_secret chat-homepage)"
|
||||||
|
|
||||||
|
export MAS_ADMIN_CLIENT_SECRET_FILE="${vault_dir}/mas-admin-secret"
|
||||||
|
export PGPASSWORD="$(read_secret synapse-db-pass)"
|
||||||
|
|
||||||
|
export MAS_DB_PASSWORD="$(read_secret mas-db-pass)"
|
||||||
|
export MATRIX_SHARED_SECRET="$(read_secret mas-matrix-shared)"
|
||||||
|
export KEYCLOAK_CLIENT_SECRET="$(read_secret mas-kc-secret)"
|
||||||
21
services/atlasbot/secretproviderclass.yaml
Normal file
21
services/atlasbot/secretproviderclass.yaml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# services/atlasbot/secretproviderclass.yaml
|
||||||
|
apiVersion: secrets-store.csi.x-k8s.io/v1
|
||||||
|
kind: SecretProviderClass
|
||||||
|
metadata:
|
||||||
|
name: atlasbot-vault
|
||||||
|
namespace: ai
|
||||||
|
spec:
|
||||||
|
provider: vault
|
||||||
|
parameters:
|
||||||
|
vaultAddress: "http://vault.vault.svc.cluster.local:8200"
|
||||||
|
roleName: "ai"
|
||||||
|
objects: |
|
||||||
|
- objectName: "harbor-pull__dockerconfigjson"
|
||||||
|
secretPath: "kv/data/atlas/shared/harbor-pull"
|
||||||
|
secretKey: "dockerconfigjson"
|
||||||
|
secretObjects:
|
||||||
|
- secretName: harbor-regcred
|
||||||
|
type: kubernetes.io/dockerconfigjson
|
||||||
|
data:
|
||||||
|
- objectName: harbor-pull__dockerconfigjson
|
||||||
|
key: .dockerconfigjson
|
||||||
34
services/atlasbot/vault-sync-deployment.yaml
Normal file
34
services/atlasbot/vault-sync-deployment.yaml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
# services/atlasbot/vault-sync-deployment.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: atlasbot-vault-sync
|
||||||
|
namespace: ai
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: atlasbot-vault-sync
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: atlasbot-vault-sync
|
||||||
|
spec:
|
||||||
|
serviceAccountName: atlasbot
|
||||||
|
containers:
|
||||||
|
- name: sync
|
||||||
|
image: alpine:3.20
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- "sleep infinity"
|
||||||
|
volumeMounts:
|
||||||
|
- name: vault-secrets
|
||||||
|
mountPath: /vault/secrets
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: vault-secrets
|
||||||
|
csi:
|
||||||
|
driver: secrets-store.csi.k8s.io
|
||||||
|
readOnly: true
|
||||||
|
volumeAttributes:
|
||||||
|
secretProviderClass: atlasbot-vault
|
||||||
@ -68,7 +68,11 @@ spec:
|
|||||||
- name: AI_CHAT_TIMEOUT_SEC
|
- name: AI_CHAT_TIMEOUT_SEC
|
||||||
value: "480"
|
value: "480"
|
||||||
- name: AI_ATLASBOT_ENDPOINT
|
- name: AI_ATLASBOT_ENDPOINT
|
||||||
value: http://atlasbot.comms.svc.cluster.local:8090/v1/answer
|
value: http://atlasbot.ai.svc.cluster.local:8090/v1/answer
|
||||||
|
- name: AI_ATLASBOT_MODEL_FAST
|
||||||
|
value: qwen2.5:14b-instruct-q4_0
|
||||||
|
- name: AI_ATLASBOT_MODEL_SMART
|
||||||
|
value: qwen2.5:14b-instruct
|
||||||
- name: AI_ATLASBOT_TIMEOUT_SEC
|
- name: AI_ATLASBOT_TIMEOUT_SEC
|
||||||
value: "30"
|
value: "30"
|
||||||
- name: AI_NODE_NAME
|
- name: AI_NODE_NAME
|
||||||
|
|||||||
@ -20,9 +20,9 @@ resources:
|
|||||||
- ingress.yaml
|
- ingress.yaml
|
||||||
images:
|
images:
|
||||||
- name: registry.bstein.dev/bstein/bstein-dev-home-frontend
|
- name: registry.bstein.dev/bstein/bstein-dev-home-frontend
|
||||||
newTag: 0.1.1-162 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend:tag"}
|
newTag: 0.1.1-119 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend:tag"}
|
||||||
- name: registry.bstein.dev/bstein/bstein-dev-home-backend
|
- name: registry.bstein.dev/bstein/bstein-dev-home-backend
|
||||||
newTag: 0.1.1-162 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend:tag"}
|
newTag: 0.1.1-119 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend:tag"}
|
||||||
configMapGenerator:
|
configMapGenerator:
|
||||||
- name: chat-ai-gateway
|
- name: chat-ai-gateway
|
||||||
namespace: bstein-dev-home
|
namespace: bstein-dev-home
|
||||||
|
|||||||
@ -13,10 +13,7 @@ resources:
|
|||||||
- element-call-deployment.yaml
|
- element-call-deployment.yaml
|
||||||
- guest-register-deployment.yaml
|
- guest-register-deployment.yaml
|
||||||
- guest-register-service.yaml
|
- guest-register-service.yaml
|
||||||
- atlasbot-deployment.yaml
|
|
||||||
- atlasbot-service.yaml
|
|
||||||
- wellknown.yaml
|
- wellknown.yaml
|
||||||
- atlasbot-rbac.yaml
|
|
||||||
- mas-secrets-ensure-rbac.yaml
|
- mas-secrets-ensure-rbac.yaml
|
||||||
- comms-secrets-ensure-rbac.yaml
|
- comms-secrets-ensure-rbac.yaml
|
||||||
- mas-db-ensure-rbac.yaml
|
- mas-db-ensure-rbac.yaml
|
||||||
@ -43,7 +40,6 @@ resources:
|
|||||||
- livekit-ingress.yaml
|
- livekit-ingress.yaml
|
||||||
- livekit-middlewares.yaml
|
- livekit-middlewares.yaml
|
||||||
- matrix-ingress.yaml
|
- matrix-ingress.yaml
|
||||||
|
|
||||||
configMapGenerator:
|
configMapGenerator:
|
||||||
- name: comms-vault-env
|
- name: comms-vault-env
|
||||||
files:
|
files:
|
||||||
@ -60,21 +56,8 @@ configMapGenerator:
|
|||||||
- server.py=scripts/guest-register/server.py
|
- server.py=scripts/guest-register/server.py
|
||||||
options:
|
options:
|
||||||
disableNameSuffixHash: true
|
disableNameSuffixHash: true
|
||||||
- name: atlasbot
|
|
||||||
files:
|
|
||||||
- bot.py=scripts/atlasbot/bot.py
|
|
||||||
options:
|
|
||||||
disableNameSuffixHash: true
|
|
||||||
- name: othrys-element-host-config
|
- name: othrys-element-host-config
|
||||||
files:
|
files:
|
||||||
- 20-host-config.sh=scripts/element-host-config.sh
|
- 20-host-config.sh=scripts/element-host-config.sh
|
||||||
options:
|
options:
|
||||||
disableNameSuffixHash: true
|
disableNameSuffixHash: true
|
||||||
- name: atlas-kb
|
|
||||||
files:
|
|
||||||
- INDEX.md=knowledge/INDEX.md
|
|
||||||
- atlas.json=knowledge/catalog/atlas.json
|
|
||||||
- atlas-summary.json=knowledge/catalog/atlas-summary.json
|
|
||||||
- metrics.json=knowledge/catalog/metrics.json
|
|
||||||
- runbooks.json=knowledge/catalog/runbooks.json
|
|
||||||
- atlas-http.mmd=knowledge/diagrams/atlas-http.mmd
|
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
# services/comms/oneoffs/comms-secrets-ensure-job.yaml
|
# services/comms/oneoffs/comms-secrets-ensure-job.yaml
|
||||||
# One-off job for comms/comms-secrets-ensure-7.
|
# One-off job for comms/comms-secrets-ensure-8.
|
||||||
# Purpose: comms secrets ensure 7 (see container args/env in this file).
|
# Purpose: comms secrets ensure 8 (see container args/env in this file).
|
||||||
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
||||||
# Safe to delete the finished Job/pod; it should not run continuously.
|
# Safe to delete the finished Job/pod; it should not run continuously.
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: Job
|
kind: Job
|
||||||
metadata:
|
metadata:
|
||||||
name: comms-secrets-ensure-7
|
name: comms-secrets-ensure-8
|
||||||
namespace: comms
|
namespace: comms
|
||||||
spec:
|
spec:
|
||||||
suspend: true
|
suspend: true
|
||||||
@ -87,6 +87,9 @@ spec:
|
|||||||
ensure_key "comms/synapse-redis" "redis-password" >/dev/null
|
ensure_key "comms/synapse-redis" "redis-password" >/dev/null
|
||||||
ensure_key "comms/synapse-macaroon" "macaroon_secret_key" >/dev/null
|
ensure_key "comms/synapse-macaroon" "macaroon_secret_key" >/dev/null
|
||||||
ensure_key "comms/atlasbot-credentials-runtime" "bot-password" >/dev/null
|
ensure_key "comms/atlasbot-credentials-runtime" "bot-password" >/dev/null
|
||||||
|
ensure_key "comms/atlasbot-credentials-runtime" "bot-quick-password" >/dev/null
|
||||||
|
ensure_key "comms/atlasbot-credentials-runtime" "bot-smart-password" >/dev/null
|
||||||
|
ensure_key "comms/atlasbot-credentials-runtime" "bot-genius-password" >/dev/null
|
||||||
ensure_key "comms/atlasbot-credentials-runtime" "seeder-password" >/dev/null
|
ensure_key "comms/atlasbot-credentials-runtime" "seeder-password" >/dev/null
|
||||||
|
|
||||||
SYN_PASS="$(ensure_key "comms/synapse-db" "POSTGRES_PASSWORD")"
|
SYN_PASS="$(ensure_key "comms/synapse-db" "POSTGRES_PASSWORD")"
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
# services/comms/oneoffs/mas-local-users-ensure-job.yaml
|
# services/comms/oneoffs/mas-local-users-ensure-job.yaml
|
||||||
# One-off job for comms/mas-local-users-ensure-18.
|
# One-off job for comms/mas-local-users-ensure-19.
|
||||||
# Purpose: mas local users ensure 18 (see container args/env in this file).
|
# Purpose: mas local users ensure 18 (see container args/env in this file).
|
||||||
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
||||||
# Safe to delete the finished Job/pod; it should not run continuously.
|
# Safe to delete the finished Job/pod; it should not run continuously.
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: Job
|
kind: Job
|
||||||
metadata:
|
metadata:
|
||||||
name: mas-local-users-ensure-18
|
name: mas-local-users-ensure-19
|
||||||
namespace: comms
|
namespace: comms
|
||||||
spec:
|
spec:
|
||||||
suspend: true
|
suspend: true
|
||||||
@ -27,6 +27,12 @@ spec:
|
|||||||
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||||
vault.hashicorp.com/agent-inject-template-bot-pass: |
|
vault.hashicorp.com/agent-inject-template-bot-pass: |
|
||||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
|
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
|
||||||
|
vault.hashicorp.com/agent-inject-secret-bot-quick-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||||
|
vault.hashicorp.com/agent-inject-template-bot-quick-pass: |
|
||||||
|
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-quick-password" }}{{- end -}}
|
||||||
|
vault.hashicorp.com/agent-inject-secret-bot-smart-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||||
|
vault.hashicorp.com/agent-inject-template-bot-smart-pass: |
|
||||||
|
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-smart-password" }}{{- end -}}
|
||||||
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||||
vault.hashicorp.com/agent-inject-template-seeder-pass: |
|
vault.hashicorp.com/agent-inject-template-seeder-pass: |
|
||||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
|
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
|
||||||
@ -92,7 +98,13 @@ spec:
|
|||||||
- name: SEEDER_USER
|
- name: SEEDER_USER
|
||||||
value: othrys-seeder
|
value: othrys-seeder
|
||||||
- name: BOT_USER
|
- name: BOT_USER
|
||||||
value: atlasbot
|
value: atlas-smart
|
||||||
|
- name: BOT_USER_QUICK
|
||||||
|
value: atlas-quick
|
||||||
|
- name: BOT_USER_SMART
|
||||||
|
value: atlas-smart
|
||||||
|
- name: BOT_USER_GENIUS
|
||||||
|
value: atlas-genius
|
||||||
command:
|
command:
|
||||||
- /bin/sh
|
- /bin/sh
|
||||||
- -c
|
- -c
|
||||||
@ -225,11 +237,27 @@ spec:
|
|||||||
},
|
},
|
||||||
timeout=30,
|
timeout=30,
|
||||||
)
|
)
|
||||||
|
if r.status_code == 429:
|
||||||
|
return False
|
||||||
if r.status_code != 200:
|
if r.status_code != 200:
|
||||||
raise RuntimeError(f"login failed for {username}: {r.status_code} {r.text}")
|
raise RuntimeError(f"login failed for {username}: {r.status_code} {r.text}")
|
||||||
|
return True
|
||||||
|
|
||||||
wait_for_service(MAS_ADMIN_API_BASE)
|
wait_for_service(MAS_ADMIN_API_BASE)
|
||||||
token = admin_token()
|
token = admin_token()
|
||||||
|
bot_quick = os.environ.get("BOT_USER_QUICK", "")
|
||||||
|
bot_smart = os.environ.get("BOT_USER_SMART", "")
|
||||||
|
bot_genius = os.environ.get("BOT_USER_GENIUS", "")
|
||||||
|
bot_quick_pass = os.environ.get("BOT_PASS_QUICK", "")
|
||||||
|
bot_smart_pass = os.environ.get("BOT_PASS_SMART", "")
|
||||||
|
bot_genius_pass = os.environ.get("BOT_PASS_GENIUS", "") or bot_smart_pass
|
||||||
|
|
||||||
ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
|
ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
|
||||||
ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"])
|
ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"])
|
||||||
|
if bot_quick and bot_quick_pass:
|
||||||
|
ensure_user(token, bot_quick, bot_quick_pass)
|
||||||
|
if bot_smart and bot_smart_pass:
|
||||||
|
ensure_user(token, bot_smart, bot_smart_pass)
|
||||||
|
if bot_genius and bot_genius_pass:
|
||||||
|
ensure_user(token, bot_genius, bot_genius_pass)
|
||||||
PY
|
PY
|
||||||
|
|||||||
@ -1,15 +1,15 @@
|
|||||||
# services/comms/oneoffs/synapse-admin-ensure-job.yaml
|
# services/comms/oneoffs/synapse-admin-ensure-job.yaml
|
||||||
# One-off job for comms/synapse-admin-ensure-3.
|
# One-off job for comms/synapse-admin-ensure-15.
|
||||||
# Purpose: synapse admin ensure 3 (see container args/env in this file).
|
# Purpose: synapse admin ensure 15 (see container args/env in this file).
|
||||||
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
||||||
# Safe to delete the finished Job/pod; it should not run continuously.
|
# Safe to delete the finished Job/pod; it should not run continuously.
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: Job
|
kind: Job
|
||||||
metadata:
|
metadata:
|
||||||
name: synapse-admin-ensure-3
|
name: synapse-admin-ensure-15
|
||||||
namespace: comms
|
namespace: comms
|
||||||
spec:
|
spec:
|
||||||
suspend: true
|
suspend: false
|
||||||
backoffLimit: 0
|
backoffLimit: 0
|
||||||
ttlSecondsAfterFinished: 3600
|
ttlSecondsAfterFinished: 3600
|
||||||
template:
|
template:
|
||||||
@ -32,7 +32,8 @@ spec:
|
|||||||
values: ["arm64"]
|
values: ["arm64"]
|
||||||
containers:
|
containers:
|
||||||
- name: ensure
|
- name: ensure
|
||||||
image: python:3.11-slim
|
image: python:3.12-slim
|
||||||
|
imagePullPolicy: Always
|
||||||
env:
|
env:
|
||||||
- name: VAULT_ADDR
|
- name: VAULT_ADDR
|
||||||
value: http://vault.vault.svc.cluster.local:8200
|
value: http://vault.vault.svc.cluster.local:8200
|
||||||
@ -45,22 +46,20 @@ spec:
|
|||||||
- -c
|
- -c
|
||||||
- |
|
- |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
pip install --no-cache-dir psycopg2-binary bcrypt
|
python -m pip install --no-cache-dir psycopg2-binary
|
||||||
python - <<'PY'
|
python - <<'PY'
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import secrets
|
|
||||||
import string
|
|
||||||
import time
|
|
||||||
import urllib.error
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
import bcrypt
|
|
||||||
import psycopg2
|
import psycopg2
|
||||||
|
|
||||||
VAULT_ADDR = os.environ.get("VAULT_ADDR", "http://vault.vault.svc.cluster.local:8200").rstrip("/")
|
VAULT_ADDR = os.environ.get("VAULT_ADDR", "http://vault.vault.svc.cluster.local:8200").rstrip("/")
|
||||||
VAULT_ROLE = os.environ.get("VAULT_ROLE", "comms-secrets")
|
VAULT_ROLE = os.environ.get("VAULT_ROLE", "comms-secrets")
|
||||||
SA_TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
SA_TOKEN_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
||||||
|
SYNAPSE_ADMIN_URL = os.environ.get("SYNAPSE_ADMIN_URL", "").rstrip("/")
|
||||||
PGHOST = "postgres-service.postgres.svc.cluster.local"
|
PGHOST = "postgres-service.postgres.svc.cluster.local"
|
||||||
PGPORT = 5432
|
PGPORT = 5432
|
||||||
PGDATABASE = "synapse"
|
PGDATABASE = "synapse"
|
||||||
@ -113,48 +112,15 @@ spec:
|
|||||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
resp.read()
|
resp.read()
|
||||||
|
|
||||||
def random_password(length: int = 32) -> str:
|
|
||||||
alphabet = string.ascii_letters + string.digits
|
|
||||||
return "".join(secrets.choice(alphabet) for _ in range(length))
|
|
||||||
|
|
||||||
def ensure_admin_creds(token: str) -> dict:
|
def ensure_admin_creds(token: str) -> dict:
|
||||||
data = vault_get(token, "comms/synapse-admin")
|
data = vault_get(token, "comms/synapse-admin")
|
||||||
username = (data.get("username") or "").strip() or "synapse-admin"
|
username = "othrys-seeder"
|
||||||
password = (data.get("password") or "").strip()
|
if data.get("username") != username:
|
||||||
if not password:
|
|
||||||
password = random_password()
|
|
||||||
data["username"] = username
|
data["username"] = username
|
||||||
data["password"] = password
|
data.pop("access_token", None)
|
||||||
vault_put(token, "comms/synapse-admin", data)
|
vault_put(token, "comms/synapse-admin", data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def ensure_user(cur, cols, user_id, password, admin):
|
|
||||||
now_ms = int(time.time() * 1000)
|
|
||||||
values = {
|
|
||||||
"name": user_id,
|
|
||||||
"password_hash": bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode(),
|
|
||||||
"creation_ts": now_ms,
|
|
||||||
}
|
|
||||||
|
|
||||||
def add_flag(name, flag):
|
|
||||||
if name not in cols:
|
|
||||||
return
|
|
||||||
if cols[name]["type"] in ("smallint", "integer"):
|
|
||||||
values[name] = int(flag)
|
|
||||||
else:
|
|
||||||
values[name] = bool(flag)
|
|
||||||
|
|
||||||
add_flag("admin", admin)
|
|
||||||
add_flag("deactivated", False)
|
|
||||||
add_flag("shadow_banned", False)
|
|
||||||
add_flag("is_guest", False)
|
|
||||||
|
|
||||||
columns = list(values.keys())
|
|
||||||
placeholders = ", ".join(["%s"] * len(columns))
|
|
||||||
updates = ", ".join([f"{col}=EXCLUDED.{col}" for col in columns if col != "name"])
|
|
||||||
query = f"INSERT INTO users ({', '.join(columns)}) VALUES ({placeholders}) ON CONFLICT (name) DO UPDATE SET {updates};"
|
|
||||||
cur.execute(query, [values[c] for c in columns])
|
|
||||||
|
|
||||||
def get_cols(cur):
|
def get_cols(cur):
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""
|
"""
|
||||||
@ -172,30 +138,40 @@ spec:
|
|||||||
}
|
}
|
||||||
return cols
|
return cols
|
||||||
|
|
||||||
def ensure_access_token(cur, user_id, token_value):
|
def admin_token_valid(token: str, user_id: str) -> bool:
|
||||||
cur.execute("SELECT COALESCE(MAX(id), 0) + 1 FROM access_tokens")
|
if not token or not SYNAPSE_ADMIN_URL:
|
||||||
token_id = cur.fetchone()[0]
|
return False
|
||||||
cur.execute(
|
encoded = urllib.parse.quote(user_id, safe="")
|
||||||
"""
|
url = f"{SYNAPSE_ADMIN_URL}/_synapse/admin/v2/users/{encoded}"
|
||||||
INSERT INTO access_tokens (id, user_id, token, device_id, valid_until_ms)
|
req = urllib.request.Request(url, headers={"Authorization": f"Bearer {token}"})
|
||||||
VALUES (%s, %s, %s, %s, NULL)
|
try:
|
||||||
ON CONFLICT (token) DO NOTHING
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
""",
|
resp.read()
|
||||||
(token_id, user_id, token_value, "ariadne-admin"),
|
return True
|
||||||
)
|
except urllib.error.HTTPError as exc:
|
||||||
|
if exc.code == 404:
|
||||||
|
return True
|
||||||
|
if exc.code in (401, 403):
|
||||||
|
return False
|
||||||
|
raise
|
||||||
|
|
||||||
vault_token = vault_login()
|
vault_token = vault_login()
|
||||||
admin_data = ensure_admin_creds(vault_token)
|
admin_data = ensure_admin_creds(vault_token)
|
||||||
if admin_data.get("access_token"):
|
user_id = f"@{admin_data['username']}:live.bstein.dev"
|
||||||
log("synapse admin token already present")
|
existing_token = admin_data.get("access_token")
|
||||||
|
if existing_token and admin_token_valid(existing_token, user_id):
|
||||||
|
log("synapse admin token already present and valid")
|
||||||
raise SystemExit(0)
|
raise SystemExit(0)
|
||||||
|
if existing_token:
|
||||||
|
log("synapse admin token invalid; rotating")
|
||||||
|
admin_data.pop("access_token", None)
|
||||||
|
vault_put(vault_token, "comms/synapse-admin", admin_data)
|
||||||
|
|
||||||
synapse_db = vault_get(vault_token, "comms/synapse-db")
|
synapse_db = vault_get(vault_token, "comms/synapse-db")
|
||||||
pg_password = synapse_db.get("POSTGRES_PASSWORD")
|
pg_password = synapse_db.get("POSTGRES_PASSWORD")
|
||||||
if not pg_password:
|
if not pg_password:
|
||||||
raise RuntimeError("synapse db password missing")
|
raise RuntimeError("synapse db password missing")
|
||||||
|
|
||||||
user_id = f"@{admin_data['username']}:live.bstein.dev"
|
|
||||||
conn = psycopg2.connect(
|
conn = psycopg2.connect(
|
||||||
host=PGHOST,
|
host=PGHOST,
|
||||||
port=PGPORT,
|
port=PGPORT,
|
||||||
@ -203,17 +179,34 @@ spec:
|
|||||||
user=PGUSER,
|
user=PGUSER,
|
||||||
password=pg_password,
|
password=pg_password,
|
||||||
)
|
)
|
||||||
token_value = secrets.token_urlsafe(32)
|
|
||||||
try:
|
try:
|
||||||
with conn:
|
with conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cols = get_cols(cur)
|
cols = get_cols(cur)
|
||||||
ensure_user(cur, cols, user_id, admin_data["password"], True)
|
if "admin" not in cols:
|
||||||
ensure_access_token(cur, user_id, token_value)
|
raise RuntimeError("users.admin column missing")
|
||||||
|
cur.execute(
|
||||||
|
"UPDATE users SET admin = TRUE WHERE name = %s",
|
||||||
|
(user_id,),
|
||||||
|
)
|
||||||
|
cur.execute(
|
||||||
|
"""
|
||||||
|
SELECT token FROM access_tokens
|
||||||
|
WHERE user_id = %s AND valid_until_ms IS NULL
|
||||||
|
ORDER BY id DESC LIMIT 1
|
||||||
|
""",
|
||||||
|
(user_id,),
|
||||||
|
)
|
||||||
|
row = cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
raise RuntimeError(f"no access token found for {user_id}")
|
||||||
|
token_value = row[0]
|
||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
admin_data["access_token"] = token_value
|
admin_data["access_token"] = token_value
|
||||||
vault_put(vault_token, "comms/synapse-admin", admin_data)
|
vault_put(vault_token, "comms/synapse-admin", admin_data)
|
||||||
|
if not admin_token_valid(token_value, user_id):
|
||||||
|
raise RuntimeError("synapse admin token validation failed")
|
||||||
log("synapse admin token stored")
|
log("synapse admin token stored")
|
||||||
PY
|
PY
|
||||||
|
|||||||
@ -82,8 +82,6 @@ spec:
|
|||||||
value: synapse
|
value: synapse
|
||||||
- name: SEEDER_USER
|
- name: SEEDER_USER
|
||||||
value: othrys-seeder
|
value: othrys-seeder
|
||||||
- name: BOT_USER
|
|
||||||
value: atlasbot
|
|
||||||
command:
|
command:
|
||||||
- /bin/sh
|
- /bin/sh
|
||||||
- -c
|
- -c
|
||||||
@ -141,10 +139,8 @@ spec:
|
|||||||
cur.execute(query, [values[c] for c in columns])
|
cur.execute(query, [values[c] for c in columns])
|
||||||
|
|
||||||
seeder_user = os.environ["SEEDER_USER"]
|
seeder_user = os.environ["SEEDER_USER"]
|
||||||
bot_user = os.environ["BOT_USER"]
|
|
||||||
server = "live.bstein.dev"
|
server = "live.bstein.dev"
|
||||||
seeder_id = f"@{seeder_user}:{server}"
|
seeder_id = f"@{seeder_user}:{server}"
|
||||||
bot_id = f"@{bot_user}:{server}"
|
|
||||||
|
|
||||||
conn = psycopg2.connect(
|
conn = psycopg2.connect(
|
||||||
host=os.environ["PGHOST"],
|
host=os.environ["PGHOST"],
|
||||||
@ -158,7 +154,6 @@ spec:
|
|||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cols = get_cols(cur)
|
cols = get_cols(cur)
|
||||||
upsert_user(cur, cols, seeder_id, os.environ["SEEDER_PASS"], True)
|
upsert_user(cur, cols, seeder_id, os.environ["SEEDER_PASS"], True)
|
||||||
upsert_user(cur, cols, bot_id, os.environ["BOT_PASS"], False)
|
|
||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
PY
|
PY
|
||||||
|
|||||||
@ -76,7 +76,7 @@ spec:
|
|||||||
- name: SEEDER_USER
|
- name: SEEDER_USER
|
||||||
value: othrys-seeder
|
value: othrys-seeder
|
||||||
- name: BOT_USER
|
- name: BOT_USER
|
||||||
value: atlasbot
|
value: atlas-smart
|
||||||
command:
|
command:
|
||||||
- /bin/sh
|
- /bin/sh
|
||||||
- -c
|
- -c
|
||||||
|
|||||||
@ -11,8 +11,12 @@ from urllib import error, parse, request
|
|||||||
|
|
||||||
BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008")
|
BASE = os.environ.get("MATRIX_BASE", "http://othrys-synapse-matrix-synapse:8008")
|
||||||
AUTH_BASE = os.environ.get("AUTH_BASE", "http://matrix-authentication-service:8080")
|
AUTH_BASE = os.environ.get("AUTH_BASE", "http://matrix-authentication-service:8080")
|
||||||
USER = os.environ["BOT_USER"]
|
BOT_USER = os.environ["BOT_USER"]
|
||||||
PASSWORD = os.environ["BOT_PASS"]
|
BOT_PASS = os.environ["BOT_PASS"]
|
||||||
|
BOT_USER_QUICK = os.environ.get("BOT_USER_QUICK", "").strip()
|
||||||
|
BOT_PASS_QUICK = os.environ.get("BOT_PASS_QUICK", "").strip()
|
||||||
|
BOT_USER_SMART = os.environ.get("BOT_USER_SMART", "").strip()
|
||||||
|
BOT_PASS_SMART = os.environ.get("BOT_PASS_SMART", "").strip()
|
||||||
ROOM_ALIAS = "#othrys:live.bstein.dev"
|
ROOM_ALIAS = "#othrys:live.bstein.dev"
|
||||||
|
|
||||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/")
|
OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/")
|
||||||
@ -31,7 +35,7 @@ VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitor
|
|||||||
ARIADNE_STATE_URL = os.environ.get("ARIADNE_STATE_URL", "")
|
ARIADNE_STATE_URL = os.environ.get("ARIADNE_STATE_URL", "")
|
||||||
ARIADNE_STATE_TOKEN = os.environ.get("ARIADNE_STATE_TOKEN", "")
|
ARIADNE_STATE_TOKEN = os.environ.get("ARIADNE_STATE_TOKEN", "")
|
||||||
|
|
||||||
BOT_MENTIONS = os.environ.get("BOT_MENTIONS", f"{USER},atlas")
|
BOT_MENTIONS = os.environ.get("BOT_MENTIONS", f"{BOT_USER},atlas")
|
||||||
SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev")
|
SERVER_NAME = os.environ.get("MATRIX_SERVER_NAME", "live.bstein.dev")
|
||||||
|
|
||||||
MAX_KB_CHARS = int(os.environ.get("ATLASBOT_MAX_KB_CHARS", "2500"))
|
MAX_KB_CHARS = int(os.environ.get("ATLASBOT_MAX_KB_CHARS", "2500"))
|
||||||
@ -393,6 +397,31 @@ def _detect_mode_from_body(body: str, *, default: str = "deep") -> str:
|
|||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_mode(
|
||||||
|
content: dict[str, Any],
|
||||||
|
body: str,
|
||||||
|
*,
|
||||||
|
default: str = "deep",
|
||||||
|
account_user: str = "",
|
||||||
|
) -> str:
|
||||||
|
mode = _detect_mode_from_body(body, default=default)
|
||||||
|
mentions = content.get("m.mentions", {})
|
||||||
|
user_ids = mentions.get("user_ids", [])
|
||||||
|
if isinstance(user_ids, list):
|
||||||
|
normalized = {normalize_user_id(uid).lower() for uid in user_ids if isinstance(uid, str)}
|
||||||
|
if BOT_USER_QUICK and normalize_user_id(BOT_USER_QUICK).lower() in normalized:
|
||||||
|
return "fast"
|
||||||
|
if BOT_USER_SMART and normalize_user_id(BOT_USER_SMART).lower() in normalized:
|
||||||
|
return "deep"
|
||||||
|
if BOT_USER and normalize_user_id(BOT_USER).lower() in normalized:
|
||||||
|
return "deep"
|
||||||
|
if account_user and BOT_USER_QUICK and normalize_user_id(account_user) == normalize_user_id(BOT_USER_QUICK):
|
||||||
|
return "fast"
|
||||||
|
if account_user and BOT_USER_SMART and normalize_user_id(account_user) == normalize_user_id(BOT_USER_SMART):
|
||||||
|
return "deep"
|
||||||
|
return mode
|
||||||
|
|
||||||
|
|
||||||
def _model_for_mode(mode: str) -> str:
|
def _model_for_mode(mode: str) -> str:
|
||||||
if mode == "fast" and MODEL_FAST:
|
if mode == "fast" and MODEL_FAST:
|
||||||
return MODEL_FAST
|
return MODEL_FAST
|
||||||
@ -416,12 +445,12 @@ def req(method: str, path: str, token: str | None = None, body=None, timeout=60,
|
|||||||
raw = resp.read()
|
raw = resp.read()
|
||||||
return json.loads(raw.decode()) if raw else {}
|
return json.loads(raw.decode()) if raw else {}
|
||||||
|
|
||||||
def login() -> str:
|
def login(user: str, password: str) -> str:
|
||||||
login_user = normalize_user_id(USER)
|
login_user = normalize_user_id(user)
|
||||||
payload = {
|
payload = {
|
||||||
"type": "m.login.password",
|
"type": "m.login.password",
|
||||||
"identifier": {"type": "m.id.user", "user": login_user},
|
"identifier": {"type": "m.id.user", "user": login_user},
|
||||||
"password": PASSWORD,
|
"password": password,
|
||||||
}
|
}
|
||||||
res = req("POST", "/_matrix/client/v3/login", body=payload, base=AUTH_BASE)
|
res = req("POST", "/_matrix/client/v3/login", body=payload, base=AUTH_BASE)
|
||||||
return res["access_token"]
|
return res["access_token"]
|
||||||
@ -4820,7 +4849,7 @@ def open_ended_with_thinking(
|
|||||||
thread.join(timeout=1)
|
thread.join(timeout=1)
|
||||||
return result["reply"] or "Model backend is busy. Try again in a moment."
|
return result["reply"] or "Model backend is busy. Try again in a moment."
|
||||||
|
|
||||||
def sync_loop(token: str, room_id: str):
|
def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str):
|
||||||
since = None
|
since = None
|
||||||
try:
|
try:
|
||||||
res = req("GET", "/_matrix/client/v3/sync?timeout=0", token, timeout=10)
|
res = req("GET", "/_matrix/client/v3/sync?timeout=0", token, timeout=10)
|
||||||
@ -4861,7 +4890,7 @@ def sync_loop(token: str, room_id: str):
|
|||||||
if not body:
|
if not body:
|
||||||
continue
|
continue
|
||||||
sender = ev.get("sender", "")
|
sender = ev.get("sender", "")
|
||||||
if sender == f"@{USER}:live.bstein.dev":
|
if account_user and sender == normalize_user_id(account_user):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
mentioned = is_mentioned(content, body)
|
mentioned = is_mentioned(content, body)
|
||||||
@ -4874,7 +4903,12 @@ def sync_loop(token: str, room_id: str):
|
|||||||
|
|
||||||
cleaned_body = _strip_bot_mention(body)
|
cleaned_body = _strip_bot_mention(body)
|
||||||
lower_body = cleaned_body.lower()
|
lower_body = cleaned_body.lower()
|
||||||
mode = _detect_mode_from_body(body, default="deep" if is_dm else "deep")
|
mode = _detect_mode(
|
||||||
|
content,
|
||||||
|
body,
|
||||||
|
default=default_mode if default_mode in ("fast", "deep") else "deep",
|
||||||
|
account_user=account_user,
|
||||||
|
)
|
||||||
|
|
||||||
# Only do live cluster introspection in DMs.
|
# Only do live cluster introspection in DMs.
|
||||||
allow_tools = is_dm
|
allow_tools = is_dm
|
||||||
@ -4951,26 +4985,65 @@ def sync_loop(token: str, room_id: str):
|
|||||||
history[hist_key].append(f"Atlas: {reply}")
|
history[hist_key].append(f"Atlas: {reply}")
|
||||||
history[hist_key] = history[hist_key][-80:]
|
history[hist_key] = history[hist_key][-80:]
|
||||||
|
|
||||||
def login_with_retry():
|
def login_with_retry(user: str, password: str):
|
||||||
last_err = None
|
last_err = None
|
||||||
for attempt in range(10):
|
for attempt in range(10):
|
||||||
try:
|
try:
|
||||||
return login()
|
return login(user, password)
|
||||||
except Exception as exc: # noqa: BLE001
|
except Exception as exc: # noqa: BLE001
|
||||||
last_err = exc
|
last_err = exc
|
||||||
time.sleep(min(30, 2 ** attempt))
|
time.sleep(min(30, 2 ** attempt))
|
||||||
raise last_err
|
raise last_err
|
||||||
|
|
||||||
|
def _bot_accounts() -> list[dict[str, str]]:
|
||||||
|
accounts: list[dict[str, str]] = []
|
||||||
|
|
||||||
|
def add(user: str, password: str, mode: str):
|
||||||
|
if not user or not password:
|
||||||
|
return
|
||||||
|
accounts.append({"user": user, "password": password, "mode": mode})
|
||||||
|
|
||||||
|
add(BOT_USER_SMART or BOT_USER, BOT_PASS_SMART or BOT_PASS, "deep")
|
||||||
|
if BOT_USER_QUICK and BOT_PASS_QUICK:
|
||||||
|
add(BOT_USER_QUICK, BOT_PASS_QUICK, "fast")
|
||||||
|
if BOT_USER and BOT_PASS and all(acc["user"] != BOT_USER for acc in accounts):
|
||||||
|
add(BOT_USER, BOT_PASS, "deep")
|
||||||
|
|
||||||
|
seen: set[str] = set()
|
||||||
|
unique: list[dict[str, str]] = []
|
||||||
|
for acc in accounts:
|
||||||
|
uid = normalize_user_id(acc["user"]).lower()
|
||||||
|
if uid in seen:
|
||||||
|
continue
|
||||||
|
seen.add(uid)
|
||||||
|
unique.append(acc)
|
||||||
|
return unique
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
load_kb()
|
load_kb()
|
||||||
_start_http_server()
|
_start_http_server()
|
||||||
token = login_with_retry()
|
accounts = _bot_accounts()
|
||||||
|
threads: list[threading.Thread] = []
|
||||||
|
for acc in accounts:
|
||||||
|
token = login_with_retry(acc["user"], acc["password"])
|
||||||
try:
|
try:
|
||||||
room_id = resolve_alias(token, ROOM_ALIAS)
|
room_id = resolve_alias(token, ROOM_ALIAS)
|
||||||
join_room(token, room_id)
|
join_room(token, room_id)
|
||||||
except Exception:
|
except Exception:
|
||||||
room_id = None
|
room_id = None
|
||||||
sync_loop(token, room_id)
|
thread = threading.Thread(
|
||||||
|
target=sync_loop,
|
||||||
|
args=(token, room_id),
|
||||||
|
kwargs={
|
||||||
|
"account_user": acc["user"],
|
||||||
|
"default_mode": acc["mode"],
|
||||||
|
},
|
||||||
|
daemon=True,
|
||||||
|
)
|
||||||
|
thread.start()
|
||||||
|
threads.append(thread)
|
||||||
|
for thread in threads:
|
||||||
|
thread.join()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@ -7,6 +7,14 @@ read_secret() {
|
|||||||
tr -d '\r\n' < "${vault_dir}/$1"
|
tr -d '\r\n' < "${vault_dir}/$1"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
read_optional() {
|
||||||
|
if [ -f "${vault_dir}/$1" ]; then
|
||||||
|
tr -d '\r\n' < "${vault_dir}/$1"
|
||||||
|
else
|
||||||
|
printf ''
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
export TURN_STATIC_AUTH_SECRET="$(read_secret turn-secret)"
|
export TURN_STATIC_AUTH_SECRET="$(read_secret turn-secret)"
|
||||||
export TURN_PASSWORD="${TURN_STATIC_AUTH_SECRET}"
|
export TURN_PASSWORD="${TURN_STATIC_AUTH_SECRET}"
|
||||||
|
|
||||||
@ -14,6 +22,15 @@ export LIVEKIT_API_SECRET="$(read_secret livekit-primary)"
|
|||||||
export LIVEKIT_SECRET="${LIVEKIT_API_SECRET}"
|
export LIVEKIT_SECRET="${LIVEKIT_API_SECRET}"
|
||||||
|
|
||||||
export BOT_PASS="$(read_secret bot-pass)"
|
export BOT_PASS="$(read_secret bot-pass)"
|
||||||
|
export BOT_PASS_QUICK="$(read_optional bot-quick-pass)"
|
||||||
|
export BOT_PASS_SMART="$(read_optional bot-smart-pass)"
|
||||||
|
export BOT_PASS_GENIUS="$(read_optional bot-genius-pass)"
|
||||||
|
if [ -z "${BOT_PASS_SMART}" ]; then
|
||||||
|
export BOT_PASS_SMART="${BOT_PASS}"
|
||||||
|
fi
|
||||||
|
if [ -z "${BOT_PASS_GENIUS}" ]; then
|
||||||
|
export BOT_PASS_GENIUS="${BOT_PASS_SMART}"
|
||||||
|
fi
|
||||||
export SEEDER_PASS="$(read_secret seeder-pass)"
|
export SEEDER_PASS="$(read_secret seeder-pass)"
|
||||||
|
|
||||||
export CHAT_API_KEY="$(read_secret chat-matrix)"
|
export CHAT_API_KEY="$(read_secret chat-matrix)"
|
||||||
|
|||||||
@ -66,7 +66,7 @@ spec:
|
|||||||
- name: SEEDER_USER
|
- name: SEEDER_USER
|
||||||
value: othrys-seeder
|
value: othrys-seeder
|
||||||
- name: BOT_USER
|
- name: BOT_USER
|
||||||
value: atlasbot
|
value: atlas-smart
|
||||||
command:
|
command:
|
||||||
- /bin/sh
|
- /bin/sh
|
||||||
- -c
|
- -c
|
||||||
|
|||||||
@ -29,12 +29,18 @@ spec:
|
|||||||
operator: In
|
operator: In
|
||||||
values: ["rpi4","rpi5"]
|
values: ["rpi4","rpi5"]
|
||||||
preferredDuringSchedulingIgnoredDuringExecution:
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
- weight: 50
|
- weight: 80
|
||||||
preference:
|
preference:
|
||||||
matchExpressions:
|
matchExpressions:
|
||||||
- key: hardware
|
- key: hardware
|
||||||
operator: In
|
operator: In
|
||||||
values: ["rpi4"]
|
values: ["rpi5"]
|
||||||
|
- weight: 60
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: kubernetes.io/hostname
|
||||||
|
operator: NotIn
|
||||||
|
values: ["titan-12","titan-13","titan-15","titan-17","titan-19"]
|
||||||
containers:
|
containers:
|
||||||
- name: monerod
|
- name: monerod
|
||||||
image: registry.bstein.dev/crypto/monerod:0.18.4.1
|
image: registry.bstein.dev/crypto/monerod:0.18.4.1
|
||||||
|
|||||||
@ -23,7 +23,7 @@ spec:
|
|||||||
- matchExpressions:
|
- matchExpressions:
|
||||||
- key: hardware
|
- key: hardware
|
||||||
operator: In
|
operator: In
|
||||||
values: ["rpi4","rpi5"]
|
values: ["rpi5"]
|
||||||
containers:
|
containers:
|
||||||
- name: xmrig
|
- name: xmrig
|
||||||
image: ghcr.io/tari-project/xmrig@sha256:80defbfd0b640d604c91cb5101d3642db7928e1e68ee3c6b011289b3565a39d9
|
image: ghcr.io/tari-project/xmrig@sha256:80defbfd0b640d604c91cb5101d3642db7928e1e68ee3c6b011289b3565a39d9
|
||||||
|
|||||||
@ -123,13 +123,22 @@ spec:
|
|||||||
- key: hardware
|
- key: hardware
|
||||||
operator: In
|
operator: In
|
||||||
values: ["rpi4","rpi5"]
|
values: ["rpi4","rpi5"]
|
||||||
|
- key: longhorn
|
||||||
|
operator: NotIn
|
||||||
|
values: ["true"]
|
||||||
preferredDuringSchedulingIgnoredDuringExecution:
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
- weight: 100
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: kubernetes.io/hostname
|
||||||
|
operator: NotIn
|
||||||
|
values: ["titan-13","titan-15","titan-17","titan-19"]
|
||||||
- weight: 50
|
- weight: 50
|
||||||
preference:
|
preference:
|
||||||
matchExpressions:
|
matchExpressions:
|
||||||
- key: hardware
|
- key: hardware
|
||||||
operator: In
|
operator: In
|
||||||
values: ["rpi4"]
|
values: ["rpi5"]
|
||||||
containers:
|
containers:
|
||||||
- name: gitea
|
- name: gitea
|
||||||
image: gitea/gitea:1.23
|
image: gitea/gitea:1.23
|
||||||
|
|||||||
@ -245,6 +245,17 @@ spec:
|
|||||||
image:
|
image:
|
||||||
repository: registry.bstein.dev/infra/harbor-registry
|
repository: registry.bstein.dev/infra/harbor-registry
|
||||||
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-registry:tag"}
|
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-registry:tag"}
|
||||||
|
extraEnvVars:
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_NAME
|
||||||
|
value: harbor-core
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_URL
|
||||||
|
value: http://harbor-registry:8080/service/notifications
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_TIMEOUT
|
||||||
|
value: 5s
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_THRESHOLD
|
||||||
|
value: "5"
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_BACKOFF
|
||||||
|
value: 1s
|
||||||
controller:
|
controller:
|
||||||
image:
|
image:
|
||||||
repository: registry.bstein.dev/infra/harbor-registryctl
|
repository: registry.bstein.dev/infra/harbor-registryctl
|
||||||
@ -263,6 +274,10 @@ spec:
|
|||||||
export REGISTRY_HTTP_SECRET="{{ .Data.data.REGISTRY_HTTP_SECRET }}"
|
export REGISTRY_HTTP_SECRET="{{ .Data.data.REGISTRY_HTTP_SECRET }}"
|
||||||
export REGISTRY_REDIS_PASSWORD="{{ .Data.data.REGISTRY_REDIS_PASSWORD }}"
|
export REGISTRY_REDIS_PASSWORD="{{ .Data.data.REGISTRY_REDIS_PASSWORD }}"
|
||||||
{{ end }}
|
{{ end }}
|
||||||
|
{{ with secret "kv/data/atlas/harbor/harbor-jobservice" }}
|
||||||
|
export JOBSERVICE_SECRET="{{ .Data.data.JOBSERVICE_SECRET }}"
|
||||||
|
export REGISTRY_NOTIFICATIONS_ENDPOINTS_0_HEADERS_Authorization="Harbor-Secret ${JOBSERVICE_SECRET}"
|
||||||
|
{{ end }}
|
||||||
vault.hashicorp.com/agent-inject-secret-harbor-registryctl-env.sh: "kv/data/atlas/harbor/harbor-registry"
|
vault.hashicorp.com/agent-inject-secret-harbor-registryctl-env.sh: "kv/data/atlas/harbor/harbor-registry"
|
||||||
vault.hashicorp.com/agent-inject-template-harbor-registryctl-env.sh: |
|
vault.hashicorp.com/agent-inject-template-harbor-registryctl-env.sh: |
|
||||||
{{ with secret "kv/data/atlas/harbor/harbor-core" }}
|
{{ with secret "kv/data/atlas/harbor/harbor-core" }}
|
||||||
@ -397,10 +412,10 @@ spec:
|
|||||||
patch: |-
|
patch: |-
|
||||||
- op: replace
|
- op: replace
|
||||||
path: /spec/rules/0/http/paths/2/backend/service/name
|
path: /spec/rules/0/http/paths/2/backend/service/name
|
||||||
value: harbor-registry
|
value: harbor-core
|
||||||
- op: replace
|
- op: replace
|
||||||
path: /spec/rules/0/http/paths/2/backend/service/port/number
|
path: /spec/rules/0/http/paths/2/backend/service/port/number
|
||||||
value: 5000
|
value: 80
|
||||||
- target:
|
- target:
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
name: harbor-jobservice
|
name: harbor-jobservice
|
||||||
@ -464,6 +479,16 @@ spec:
|
|||||||
value: /vault/secrets/harbor-registry-env.sh
|
value: /vault/secrets/harbor-registry-env.sh
|
||||||
- name: VAULT_COPY_FILES
|
- name: VAULT_COPY_FILES
|
||||||
value: /vault/secrets/harbor-registry-htpasswd:/etc/registry/passwd
|
value: /vault/secrets/harbor-registry-htpasswd:/etc/registry/passwd
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_NAME
|
||||||
|
value: harbor-core
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_URL
|
||||||
|
value: http://harbor-registry:8080/service/notifications
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_TIMEOUT
|
||||||
|
value: 5s
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_THRESHOLD
|
||||||
|
value: "5"
|
||||||
|
- name: REGISTRY_NOTIFICATIONS_ENDPOINTS_0_BACKOFF
|
||||||
|
value: 1s
|
||||||
envFrom:
|
envFrom:
|
||||||
- $patch: replace
|
- $patch: replace
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
|
|||||||
@ -67,7 +67,7 @@ data:
|
|||||||
url('https://scm.bstein.dev/bstein/harbor-arm-build.git')
|
url('https://scm.bstein.dev/bstein/harbor-arm-build.git')
|
||||||
credentials('gitea-pat')
|
credentials('gitea-pat')
|
||||||
}
|
}
|
||||||
branches('*/master')
|
branches('*/main')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -108,7 +108,7 @@ data:
|
|||||||
url('https://scm.bstein.dev/bstein/ci-demo.git')
|
url('https://scm.bstein.dev/bstein/ci-demo.git')
|
||||||
credentials('gitea-pat')
|
credentials('gitea-pat')
|
||||||
}
|
}
|
||||||
branches('*/master')
|
branches('*/main')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
scriptPath('Jenkinsfile')
|
scriptPath('Jenkinsfile')
|
||||||
@ -167,6 +167,58 @@ data:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pipelineJob('atlasbot') {
|
||||||
|
properties {
|
||||||
|
pipelineTriggers {
|
||||||
|
triggers {
|
||||||
|
scmTrigger {
|
||||||
|
scmpoll_spec('H/2 * * * *')
|
||||||
|
ignorePostCommitHooks(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
definition {
|
||||||
|
cpsScm {
|
||||||
|
scm {
|
||||||
|
git {
|
||||||
|
remote {
|
||||||
|
url('https://scm.bstein.dev/bstein/atlasbot.git')
|
||||||
|
credentials('gitea-pat')
|
||||||
|
}
|
||||||
|
branches('*/main')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scriptPath('Jenkinsfile')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pipelineJob('Soteria') {
|
||||||
|
properties {
|
||||||
|
pipelineTriggers {
|
||||||
|
triggers {
|
||||||
|
scmTrigger {
|
||||||
|
scmpoll_spec('H/5 * * * *')
|
||||||
|
ignorePostCommitHooks(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
definition {
|
||||||
|
cpsScm {
|
||||||
|
scm {
|
||||||
|
git {
|
||||||
|
remote {
|
||||||
|
url('https://scm.bstein.dev/bstein/soteria.git')
|
||||||
|
credentials('gitea-pat')
|
||||||
|
}
|
||||||
|
branches('*/main')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
scriptPath('Jenkinsfile')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
pipelineJob('data-prepper') {
|
pipelineJob('data-prepper') {
|
||||||
properties {
|
properties {
|
||||||
pipelineTriggers {
|
pipelineTriggers {
|
||||||
|
|||||||
@ -48,7 +48,7 @@ spec:
|
|||||||
TITAN_IAC_WEBHOOK_TOKEN={{ .Data.data.titan_iac_quality_gate }}
|
TITAN_IAC_WEBHOOK_TOKEN={{ .Data.data.titan_iac_quality_gate }}
|
||||||
GIT_NOTIFY_TOKEN_BSTEIN_DEV_HOME={{ .Data.data.git_notify_bstein_dev_home }}
|
GIT_NOTIFY_TOKEN_BSTEIN_DEV_HOME={{ .Data.data.git_notify_bstein_dev_home }}
|
||||||
{{ end }}
|
{{ end }}
|
||||||
bstein.dev/restarted-at: "2026-01-20T14:52:41Z"
|
bstein.dev/restarted-at: "2026-02-02T15:10:33Z"
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: jenkins
|
serviceAccountName: jenkins
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
|
|||||||
13
services/jenkins/dind-pvc.yaml
Normal file
13
services/jenkins/dind-pvc.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# services/jenkins/dind-pvc.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: jenkins-dind-cache
|
||||||
|
namespace: jenkins
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 30Gi
|
||||||
|
storageClassName: astreae
|
||||||
@ -8,6 +8,7 @@ resources:
|
|||||||
- vault-serviceaccount.yaml
|
- vault-serviceaccount.yaml
|
||||||
- pvc.yaml
|
- pvc.yaml
|
||||||
- cache-pvc.yaml
|
- cache-pvc.yaml
|
||||||
|
- dind-pvc.yaml
|
||||||
- plugins-pvc.yaml
|
- plugins-pvc.yaml
|
||||||
- configmap-jcasc.yaml
|
- configmap-jcasc.yaml
|
||||||
- configmap-plugins.yaml
|
- configmap-plugins.yaml
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
# services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml
|
# services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml
|
||||||
# One-off job for sso/keycloak-portal-e2e-execute-actions-email-14.
|
# One-off job for sso/keycloak-portal-e2e-execute-actions-email-18.
|
||||||
# Purpose: keycloak portal e2e execute actions email 14 (see container args/env in this file).
|
# Purpose: keycloak portal e2e execute actions email 18 (see container args/env in this file).
|
||||||
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
||||||
# Safe to delete the finished Job/pod; it should not run continuously.
|
# Safe to delete the finished Job/pod; it should not run continuously.
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: Job
|
kind: Job
|
||||||
metadata:
|
metadata:
|
||||||
name: keycloak-portal-e2e-execute-actions-email-14
|
name: keycloak-portal-e2e-execute-actions-email-18
|
||||||
namespace: sso
|
namespace: sso
|
||||||
spec:
|
spec:
|
||||||
suspend: true
|
suspend: true
|
||||||
@ -70,7 +70,7 @@ spec:
|
|||||||
- name: E2E_PROBE_USERNAME
|
- name: E2E_PROBE_USERNAME
|
||||||
value: robotuser
|
value: robotuser
|
||||||
- name: E2E_PROBE_EMAIL
|
- name: E2E_PROBE_EMAIL
|
||||||
value: robotuser@bstein.dev
|
value: brad.stein+robot@gmail.com
|
||||||
- name: EXECUTE_ACTIONS_CLIENT_ID
|
- name: EXECUTE_ACTIONS_CLIENT_ID
|
||||||
value: bstein-dev-home
|
value: bstein-dev-home
|
||||||
- name: EXECUTE_ACTIONS_REDIRECT_URI
|
- name: EXECUTE_ACTIONS_REDIRECT_URI
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
# services/keycloak/oneoffs/realm-settings-job.yaml
|
# services/keycloak/oneoffs/realm-settings-job.yaml
|
||||||
# One-off job for sso/keycloak-realm-settings-36.
|
# One-off job for sso/keycloak-realm-settings-38.
|
||||||
# Purpose: keycloak realm settings 36 (see container args/env in this file).
|
# Purpose: keycloak realm settings 38 (see container args/env in this file).
|
||||||
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
# Run by setting spec.suspend to false, reconcile, then set it back to true.
|
||||||
# Safe to delete the finished Job/pod; it should not run continuously.
|
# Safe to delete the finished Job/pod; it should not run continuously.
|
||||||
apiVersion: batch/v1
|
apiVersion: batch/v1
|
||||||
kind: Job
|
kind: Job
|
||||||
metadata:
|
metadata:
|
||||||
name: keycloak-realm-settings-36
|
name: keycloak-realm-settings-38
|
||||||
namespace: sso
|
namespace: sso
|
||||||
spec:
|
spec:
|
||||||
suspend: true
|
suspend: true
|
||||||
@ -64,7 +64,7 @@ spec:
|
|||||||
- name: KEYCLOAK_REALM
|
- name: KEYCLOAK_REALM
|
||||||
value: atlas
|
value: atlas
|
||||||
- name: KEYCLOAK_SMTP_HOST
|
- name: KEYCLOAK_SMTP_HOST
|
||||||
value: mail.bstein.dev
|
value: smtp.postmarkapp.com
|
||||||
- name: KEYCLOAK_SMTP_PORT
|
- name: KEYCLOAK_SMTP_PORT
|
||||||
value: "587"
|
value: "587"
|
||||||
- name: KEYCLOAK_SMTP_FROM
|
- name: KEYCLOAK_SMTP_FROM
|
||||||
|
|||||||
@ -18,6 +18,7 @@ spec:
|
|||||||
prometheus.io/scrape: "true"
|
prometheus.io/scrape: "true"
|
||||||
prometheus.io/port: "8080"
|
prometheus.io/port: "8080"
|
||||||
prometheus.io/path: "/metrics"
|
prometheus.io/path: "/metrics"
|
||||||
|
maintenance.bstein.dev/restart-rev: "20260207-2"
|
||||||
vault.hashicorp.com/agent-inject: "true"
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
vault.hashicorp.com/role: "maintenance"
|
vault.hashicorp.com/role: "maintenance"
|
||||||
vault.hashicorp.com/agent-inject-secret-ariadne-env.sh: "kv/data/atlas/maintenance/ariadne-db"
|
vault.hashicorp.com/agent-inject-secret-ariadne-env.sh: "kv/data/atlas/maintenance/ariadne-db"
|
||||||
@ -105,7 +106,7 @@ spec:
|
|||||||
node-role.kubernetes.io/worker: "true"
|
node-role.kubernetes.io/worker: "true"
|
||||||
containers:
|
containers:
|
||||||
- name: ariadne
|
- name: ariadne
|
||||||
image: registry.bstein.dev/bstein/ariadne:0.1.0-0
|
image: registry.bstein.dev/bstein/ariadne:latest
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
command: ["/bin/sh", "-c"]
|
command: ["/bin/sh", "-c"]
|
||||||
args:
|
args:
|
||||||
@ -285,7 +286,7 @@ spec:
|
|||||||
- name: ARIADNE_SCHEDULE_MAILU_SYNC
|
- name: ARIADNE_SCHEDULE_MAILU_SYNC
|
||||||
value: "30 4 * * *"
|
value: "30 4 * * *"
|
||||||
- name: ARIADNE_SCHEDULE_NEXTCLOUD_SYNC
|
- name: ARIADNE_SCHEDULE_NEXTCLOUD_SYNC
|
||||||
value: "0 5 * * *"
|
value: "*/15 * * * *"
|
||||||
- name: ARIADNE_SCHEDULE_NEXTCLOUD_CRON
|
- name: ARIADNE_SCHEDULE_NEXTCLOUD_CRON
|
||||||
value: "*/5 * * * *"
|
value: "*/5 * * * *"
|
||||||
- name: ARIADNE_SCHEDULE_NEXTCLOUD_MAINTENANCE
|
- name: ARIADNE_SCHEDULE_NEXTCLOUD_MAINTENANCE
|
||||||
@ -293,11 +294,11 @@ spec:
|
|||||||
- name: ARIADNE_SCHEDULE_VAULTWARDEN_SYNC
|
- name: ARIADNE_SCHEDULE_VAULTWARDEN_SYNC
|
||||||
value: "0 * * * *"
|
value: "0 * * * *"
|
||||||
- name: ARIADNE_SCHEDULE_WGER_USER_SYNC
|
- name: ARIADNE_SCHEDULE_WGER_USER_SYNC
|
||||||
value: "0 5 * * *"
|
value: "*/15 * * * *"
|
||||||
- name: ARIADNE_SCHEDULE_WGER_ADMIN
|
- name: ARIADNE_SCHEDULE_WGER_ADMIN
|
||||||
value: "15 3 * * *"
|
value: "15 3 * * *"
|
||||||
- name: ARIADNE_SCHEDULE_FIREFLY_USER_SYNC
|
- name: ARIADNE_SCHEDULE_FIREFLY_USER_SYNC
|
||||||
value: "0 6 * * *"
|
value: "*/15 * * * *"
|
||||||
- name: ARIADNE_SCHEDULE_FIREFLY_CRON
|
- name: ARIADNE_SCHEDULE_FIREFLY_CRON
|
||||||
value: "0 3 * * *"
|
value: "0 3 * * *"
|
||||||
- name: ARIADNE_SCHEDULE_POD_CLEANER
|
- name: ARIADNE_SCHEDULE_POD_CLEANER
|
||||||
@ -305,11 +306,11 @@ spec:
|
|||||||
- name: ARIADNE_SCHEDULE_OPENSEARCH_PRUNE
|
- name: ARIADNE_SCHEDULE_OPENSEARCH_PRUNE
|
||||||
value: "23 3 * * *"
|
value: "23 3 * * *"
|
||||||
- name: ARIADNE_SCHEDULE_IMAGE_SWEEPER
|
- name: ARIADNE_SCHEDULE_IMAGE_SWEEPER
|
||||||
value: "30 4 * * 0"
|
value: "30 4 * * *"
|
||||||
- name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH
|
- name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH
|
||||||
value: "0 * * * *"
|
value: "*/15 * * * *"
|
||||||
- name: ARIADNE_SCHEDULE_VAULT_OIDC
|
- name: ARIADNE_SCHEDULE_VAULT_OIDC
|
||||||
value: "0 * * * *"
|
value: "*/15 * * * *"
|
||||||
- name: ARIADNE_SCHEDULE_COMMS_GUEST_NAME
|
- name: ARIADNE_SCHEDULE_COMMS_GUEST_NAME
|
||||||
value: "*/5 * * * *"
|
value: "*/5 * * * *"
|
||||||
- name: ARIADNE_SCHEDULE_COMMS_PIN_INVITE
|
- name: ARIADNE_SCHEDULE_COMMS_PIN_INVITE
|
||||||
@ -330,6 +331,8 @@ spec:
|
|||||||
value: http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428
|
value: http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428
|
||||||
- name: ARIADNE_CLUSTER_STATE_VM_TIMEOUT_SEC
|
- name: ARIADNE_CLUSTER_STATE_VM_TIMEOUT_SEC
|
||||||
value: "5"
|
value: "5"
|
||||||
|
- name: ARIADNE_ALERTMANAGER_URL
|
||||||
|
value: http://alertmanager.monitoring.svc.cluster.local
|
||||||
- name: OPENSEARCH_URL
|
- name: OPENSEARCH_URL
|
||||||
value: http://opensearch-master.logging.svc.cluster.local:9200
|
value: http://opensearch-master.logging.svc.cluster.local:9200
|
||||||
- name: OPENSEARCH_LIMIT_BYTES
|
- name: OPENSEARCH_LIMIT_BYTES
|
||||||
|
|||||||
@ -29,6 +29,29 @@ rules:
|
|||||||
- get
|
- get
|
||||||
- list
|
- list
|
||||||
- watch
|
- watch
|
||||||
|
- apiGroups: ["apps"]
|
||||||
|
resources:
|
||||||
|
- deployments
|
||||||
|
- statefulsets
|
||||||
|
- daemonsets
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups: ["longhorn.io"]
|
||||||
|
resources:
|
||||||
|
- volumes
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources:
|
||||||
|
- events
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
- apiGroups: [""]
|
- apiGroups: [""]
|
||||||
resources:
|
resources:
|
||||||
- pods/exec
|
- pods/exec
|
||||||
@ -56,3 +79,17 @@ roleRef:
|
|||||||
apiGroup: rbac.authorization.k8s.io
|
apiGroup: rbac.authorization.k8s.io
|
||||||
kind: ClusterRole
|
kind: ClusterRole
|
||||||
name: ariadne-job-spawner
|
name: ariadne-job-spawner
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: ariadne-auth-delegator
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: ariadne
|
||||||
|
namespace: maintenance
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: system:auth-delegator
|
||||||
|
|||||||
@ -21,3 +21,26 @@ spec:
|
|||||||
policy:
|
policy:
|
||||||
semver:
|
semver:
|
||||||
range: ">=0.1.0-0"
|
range: ">=0.1.0-0"
|
||||||
|
---
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||||
|
kind: ImageRepository
|
||||||
|
metadata:
|
||||||
|
name: soteria
|
||||||
|
namespace: maintenance
|
||||||
|
spec:
|
||||||
|
image: registry.bstein.dev/bstein/soteria
|
||||||
|
interval: 1m0s
|
||||||
|
secretRef:
|
||||||
|
name: harbor-regcred
|
||||||
|
---
|
||||||
|
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||||
|
kind: ImagePolicy
|
||||||
|
metadata:
|
||||||
|
name: soteria
|
||||||
|
namespace: maintenance
|
||||||
|
spec:
|
||||||
|
imageRepositoryRef:
|
||||||
|
name: soteria
|
||||||
|
policy:
|
||||||
|
semver:
|
||||||
|
range: ">=0.1.0-0"
|
||||||
|
|||||||
@ -5,6 +5,7 @@ resources:
|
|||||||
- namespace.yaml
|
- namespace.yaml
|
||||||
- image.yaml
|
- image.yaml
|
||||||
- secretproviderclass.yaml
|
- secretproviderclass.yaml
|
||||||
|
- soteria-configmap.yaml
|
||||||
- vault-serviceaccount.yaml
|
- vault-serviceaccount.yaml
|
||||||
- vault-sync-deployment.yaml
|
- vault-sync-deployment.yaml
|
||||||
- ariadne-serviceaccount.yaml
|
- ariadne-serviceaccount.yaml
|
||||||
@ -13,9 +14,12 @@ resources:
|
|||||||
- k3s-traefik-cleanup-rbac.yaml
|
- k3s-traefik-cleanup-rbac.yaml
|
||||||
- node-nofile-serviceaccount.yaml
|
- node-nofile-serviceaccount.yaml
|
||||||
- pod-cleaner-rbac.yaml
|
- pod-cleaner-rbac.yaml
|
||||||
|
- soteria-serviceaccount.yaml
|
||||||
|
- soteria-rbac.yaml
|
||||||
- ariadne-deployment.yaml
|
- ariadne-deployment.yaml
|
||||||
- oneoffs/ariadne-migrate-job.yaml
|
- oneoffs/ariadne-migrate-job.yaml
|
||||||
- ariadne-service.yaml
|
- ariadne-service.yaml
|
||||||
|
- soteria-deployment.yaml
|
||||||
- disable-k3s-traefik-daemonset.yaml
|
- disable-k3s-traefik-daemonset.yaml
|
||||||
- oneoffs/k3s-traefik-cleanup-job.yaml
|
- oneoffs/k3s-traefik-cleanup-job.yaml
|
||||||
- node-nofile-daemonset.yaml
|
- node-nofile-daemonset.yaml
|
||||||
@ -24,9 +28,12 @@ resources:
|
|||||||
- node-image-sweeper-serviceaccount.yaml
|
- node-image-sweeper-serviceaccount.yaml
|
||||||
- node-image-sweeper-daemonset.yaml
|
- node-image-sweeper-daemonset.yaml
|
||||||
- image-sweeper-cronjob.yaml
|
- image-sweeper-cronjob.yaml
|
||||||
|
- soteria-service.yaml
|
||||||
images:
|
images:
|
||||||
- name: registry.bstein.dev/bstein/ariadne
|
- name: registry.bstein.dev/bstein/ariadne
|
||||||
newTag: 0.1.0-59 # {"$imagepolicy": "maintenance:ariadne:tag"}
|
newTag: 0.1.0-22 # {"$imagepolicy": "maintenance:ariadne:tag"}
|
||||||
|
- name: registry.bstein.dev/bstein/soteria
|
||||||
|
newTag: 0.1.0-11 # {"$imagepolicy": "maintenance:soteria:tag"}
|
||||||
configMapGenerator:
|
configMapGenerator:
|
||||||
- name: disable-k3s-traefik-script
|
- name: disable-k3s-traefik-script
|
||||||
namespace: maintenance
|
namespace: maintenance
|
||||||
|
|||||||
10
services/maintenance/soteria-configmap.yaml
Normal file
10
services/maintenance/soteria-configmap.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# services/maintenance/soteria-configmap.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: soteria
|
||||||
|
namespace: maintenance
|
||||||
|
data:
|
||||||
|
SOTERIA_BACKUP_DRIVER: "longhorn"
|
||||||
|
SOTERIA_LONGHORN_URL: "http://longhorn-backend.longhorn-system.svc:9500"
|
||||||
|
SOTERIA_LONGHORN_BACKUP_MODE: "incremental"
|
||||||
73
services/maintenance/soteria-deployment.yaml
Normal file
73
services/maintenance/soteria-deployment.yaml
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# services/maintenance/soteria-deployment.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: soteria
|
||||||
|
namespace: maintenance
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: soteria
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: soteria
|
||||||
|
spec:
|
||||||
|
serviceAccountName: soteria
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/arch: arm64
|
||||||
|
node-role.kubernetes.io/worker: "true"
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
preferredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
- weight: 90
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: hardware
|
||||||
|
operator: In
|
||||||
|
values: ["rpi5"]
|
||||||
|
- weight: 50
|
||||||
|
preference:
|
||||||
|
matchExpressions:
|
||||||
|
- key: hardware
|
||||||
|
operator: In
|
||||||
|
values: ["rpi4"]
|
||||||
|
containers:
|
||||||
|
- name: soteria
|
||||||
|
image: registry.bstein.dev/bstein/soteria:latest
|
||||||
|
imagePullPolicy: Always
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: soteria
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
timeoutSeconds: 2
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /readyz
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 2
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 2
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
cpu: 200m
|
||||||
|
memory: 256Mi
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 65532
|
||||||
|
capabilities:
|
||||||
|
drop: ["ALL"]
|
||||||
22
services/maintenance/soteria-rbac.yaml
Normal file
22
services/maintenance/soteria-rbac.yaml
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
# services/maintenance/soteria-rbac.yaml
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: soteria
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["persistentvolumeclaims", "persistentvolumes"]
|
||||||
|
verbs: ["get", "list"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: soteria
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: soteria
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: soteria
|
||||||
|
namespace: maintenance
|
||||||
14
services/maintenance/soteria-service.yaml
Normal file
14
services/maintenance/soteria-service.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# services/maintenance/soteria-service.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: soteria
|
||||||
|
namespace: maintenance
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: soteria
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: http
|
||||||
8
services/maintenance/soteria-serviceaccount.yaml
Normal file
8
services/maintenance/soteria-serviceaccount.yaml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# services/maintenance/soteria-serviceaccount.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: soteria
|
||||||
|
namespace: maintenance
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: harbor-regcred
|
||||||
@ -20,7 +20,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(100 * (sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) / clamp_min((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") * scalar((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)) == bool 0))",
|
"expr": "(100 * (sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) / clamp_min((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") * scalar((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)) == bool 0))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}"
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
@ -89,7 +89,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))",
|
"expr": "sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}"
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1901,7 +1901,7 @@
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(100 * (sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) / clamp_min((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") * scalar((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)) == bool 0))",
|
"expr": "(100 * (sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) / clamp_min((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") * scalar((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)) == bool 0))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}"
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -145,7 +145,7 @@ data:
|
|||||||
model:
|
model:
|
||||||
intervalMs: 60000
|
intervalMs: 60000
|
||||||
maxDataPoints: 43200
|
maxDataPoints: 43200
|
||||||
expr: avg_over_time((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100)[10m:1m] * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")
|
expr: avg_over_time((1 - avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m]))) * 100)[10m:1m] * on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)")
|
||||||
legendFormat: '{{instance}}'
|
legendFormat: '{{instance}}'
|
||||||
datasource:
|
datasource:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
@ -286,8 +286,8 @@ data:
|
|||||||
summary: "node-image-sweeper not fully ready"
|
summary: "node-image-sweeper not fully ready"
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- uid: maint-cron-stale
|
- uid: maint-ariadne-image-sweeper-stale
|
||||||
title: "Maintenance CronJobs stale (>3h since success)"
|
title: "Ariadne image sweeper stale (schedule >8d)"
|
||||||
condition: C
|
condition: C
|
||||||
for: "5m"
|
for: "5m"
|
||||||
data:
|
data:
|
||||||
@ -297,10 +297,10 @@ data:
|
|||||||
to: 0
|
to: 0
|
||||||
datasourceUid: atlas-vm
|
datasourceUid: atlas-vm
|
||||||
model:
|
model:
|
||||||
expr: time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob="image-sweeper"}) and on(cronjob) (kube_cronjob_spec_suspend{namespace="maintenance",cronjob="image-sweeper"} == 0)
|
expr: time() - ariadne_schedule_last_success_timestamp_seconds{task="schedule.image_sweeper"}
|
||||||
intervalMs: 60000
|
intervalMs: 60000
|
||||||
maxDataPoints: 43200
|
maxDataPoints: 43200
|
||||||
legendFormat: '{{cronjob}}'
|
legendFormat: '{{task}}'
|
||||||
datasource:
|
datasource:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
uid: atlas-vm
|
uid: atlas-vm
|
||||||
@ -321,17 +321,166 @@ data:
|
|||||||
type: threshold
|
type: threshold
|
||||||
conditions:
|
conditions:
|
||||||
- evaluator:
|
- evaluator:
|
||||||
params: [10800]
|
params: [691200]
|
||||||
type: gt
|
type: gt
|
||||||
operator:
|
operator:
|
||||||
type: and
|
type: and
|
||||||
reducer:
|
reducer:
|
||||||
type: last
|
type: last
|
||||||
type: query
|
type: query
|
||||||
noDataState: NoData
|
noDataState: OK
|
||||||
execErrState: Error
|
execErrState: Error
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Maintenance cronjob stale >3h since last success"
|
summary: "Ariadne image sweeper stale >8d since last success"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- uid: maint-cron-stale
|
||||||
|
title: "Maintenance CronJobs stale (legacy disabled)"
|
||||||
|
condition: C
|
||||||
|
for: "5m"
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 300
|
||||||
|
to: 0
|
||||||
|
datasourceUid: atlas-vm
|
||||||
|
model:
|
||||||
|
expr: vector(0)
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
legendFormat: legacy
|
||||||
|
datasource:
|
||||||
|
type: prometheus
|
||||||
|
uid: atlas-vm
|
||||||
|
- refId: B
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
expression: A
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
reducer: last
|
||||||
|
type: reduce
|
||||||
|
- refId: C
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
expression: B
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
type: threshold
|
||||||
|
conditions:
|
||||||
|
- evaluator:
|
||||||
|
params: [1]
|
||||||
|
type: gt
|
||||||
|
operator:
|
||||||
|
type: and
|
||||||
|
reducer:
|
||||||
|
type: last
|
||||||
|
type: query
|
||||||
|
noDataState: OK
|
||||||
|
execErrState: OK
|
||||||
|
annotations:
|
||||||
|
summary: "Legacy cronjob alert disabled"
|
||||||
|
labels:
|
||||||
|
severity: info
|
||||||
|
- orgId: 1
|
||||||
|
name: ariadne
|
||||||
|
folder: Alerts
|
||||||
|
interval: 1m
|
||||||
|
rules:
|
||||||
|
- uid: ariadne-schedule-error
|
||||||
|
title: "Ariadne schedule task failed"
|
||||||
|
condition: C
|
||||||
|
for: "10m"
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 300
|
||||||
|
to: 0
|
||||||
|
datasourceUid: atlas-vm
|
||||||
|
model:
|
||||||
|
expr: max by (task) (ariadne_schedule_last_status{task=~"schedule\\..+"})
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
legendFormat: '{{task}}'
|
||||||
|
datasource:
|
||||||
|
type: prometheus
|
||||||
|
uid: atlas-vm
|
||||||
|
- refId: B
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
expression: A
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
reducer: last
|
||||||
|
type: reduce
|
||||||
|
- refId: C
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
expression: B
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
type: threshold
|
||||||
|
conditions:
|
||||||
|
- evaluator:
|
||||||
|
params: [1]
|
||||||
|
type: lt
|
||||||
|
operator:
|
||||||
|
type: and
|
||||||
|
reducer:
|
||||||
|
type: last
|
||||||
|
type: query
|
||||||
|
noDataState: OK
|
||||||
|
execErrState: Error
|
||||||
|
annotations:
|
||||||
|
summary: "Ariadne schedule failed ({{ $labels.task }})"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
- uid: ariadne-scheduler-stalled
|
||||||
|
title: "Ariadne scheduler behind (>15m)"
|
||||||
|
condition: C
|
||||||
|
for: "10m"
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 300
|
||||||
|
to: 0
|
||||||
|
datasourceUid: atlas-vm
|
||||||
|
model:
|
||||||
|
expr: time() - ariadne_schedule_next_run_timestamp_seconds{task=~"schedule\\..+"}
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
legendFormat: '{{task}}'
|
||||||
|
datasource:
|
||||||
|
type: prometheus
|
||||||
|
uid: atlas-vm
|
||||||
|
- refId: B
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
expression: A
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
reducer: last
|
||||||
|
type: reduce
|
||||||
|
- refId: C
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
expression: B
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
type: threshold
|
||||||
|
conditions:
|
||||||
|
- evaluator:
|
||||||
|
params: [900]
|
||||||
|
type: gt
|
||||||
|
operator:
|
||||||
|
type: and
|
||||||
|
reducer:
|
||||||
|
type: last
|
||||||
|
type: query
|
||||||
|
noDataState: OK
|
||||||
|
execErrState: Error
|
||||||
|
annotations:
|
||||||
|
summary: "Ariadne scheduler behind for {{ $labels.task }}"
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
- orgId: 1
|
- orgId: 1
|
||||||
@ -352,7 +501,7 @@ data:
|
|||||||
model:
|
model:
|
||||||
intervalMs: 60000
|
intervalMs: 60000
|
||||||
maxDataPoints: 43200
|
maxDataPoints: 43200
|
||||||
expr: POSTMARK_OUTBOUND_BOUNCE_RATE{window="1d"}
|
expr: postmark_outbound_bounce_rate{window="1d"}
|
||||||
legendFormat: bounce 1d
|
legendFormat: bounce 1d
|
||||||
datasource:
|
datasource:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
@ -400,7 +549,7 @@ data:
|
|||||||
model:
|
model:
|
||||||
intervalMs: 60000
|
intervalMs: 60000
|
||||||
maxDataPoints: 43200
|
maxDataPoints: 43200
|
||||||
expr: POSTMARK_API_UP
|
expr: min_over_time(max by (instance) (postmark_api_up)[5m])
|
||||||
legendFormat: api up
|
legendFormat: api up
|
||||||
datasource:
|
datasource:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
|
|||||||
@ -29,7 +29,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(100 * (sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) / clamp_min((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") * scalar((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)) == bool 0))",
|
"expr": "(100 * (sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) / clamp_min((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") * scalar((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)) == bool 0))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}"
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
@ -98,7 +98,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))",
|
"expr": "sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}"
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1910,7 +1910,7 @@ data:
|
|||||||
},
|
},
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"expr": "(100 * (sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) / clamp_min((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") * scalar((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)) == bool 0))",
|
"expr": "(100 * (sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) / clamp_min((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") * scalar((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))) / on(node) group_left() clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() ((max by (node) (kube_node_status_allocatable{resource=~\"nvidia[.]com/gpu.*|nvidia_com_gpu.*\"} > bool 0)) or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)) == bool 0))",
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"legendFormat": "{{namespace}}"
|
"legendFormat": "{{namespace}}"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -286,7 +286,7 @@ spec:
|
|||||||
podAnnotations:
|
podAnnotations:
|
||||||
vault.hashicorp.com/agent-inject: "true"
|
vault.hashicorp.com/agent-inject: "true"
|
||||||
vault.hashicorp.com/role: "monitoring"
|
vault.hashicorp.com/role: "monitoring"
|
||||||
monitoring.bstein.dev/restart-rev: "1"
|
monitoring.bstein.dev/restart-rev: "4"
|
||||||
vault.hashicorp.com/agent-inject-secret-grafana-env.sh: "kv/data/atlas/monitoring/grafana-admin"
|
vault.hashicorp.com/agent-inject-secret-grafana-env.sh: "kv/data/atlas/monitoring/grafana-admin"
|
||||||
vault.hashicorp.com/agent-inject-template-grafana-env.sh: |
|
vault.hashicorp.com/agent-inject-template-grafana-env.sh: |
|
||||||
{{ with secret "kv/data/atlas/monitoring/grafana-admin" }}
|
{{ with secret "kv/data/atlas/monitoring/grafana-admin" }}
|
||||||
|
|||||||
@ -43,6 +43,12 @@ spec:
|
|||||||
value: /var/run/secrets/vault-token-reviewer/token
|
value: /var/run/secrets/vault-token-reviewer/token
|
||||||
- name: VAULT_K8S_ROLE_TTL
|
- name: VAULT_K8S_ROLE_TTL
|
||||||
value: 1h
|
value: 1h
|
||||||
|
- name: VAULT_K8S_BOUND_AUDIENCES
|
||||||
|
value: "https://kubernetes.default.svc,https://kubernetes.default.svc.cluster.local,k3s"
|
||||||
|
- name: VAULT_K8S_ISSUER
|
||||||
|
value: https://kubernetes.default.svc.cluster.local
|
||||||
|
- name: VAULT_K8S_DISABLE_ISS_VALIDATION
|
||||||
|
value: "false"
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: k8s-auth-config-script
|
- name: k8s-auth-config-script
|
||||||
mountPath: /scripts
|
mountPath: /scripts
|
||||||
|
|||||||
@ -53,6 +53,8 @@ ensure_token
|
|||||||
k8s_host="https://${KUBERNETES_SERVICE_HOST}:443"
|
k8s_host="https://${KUBERNETES_SERVICE_HOST}:443"
|
||||||
k8s_ca="$(cat /var/run/secrets/kubernetes.io/serviceaccount/ca.crt)"
|
k8s_ca="$(cat /var/run/secrets/kubernetes.io/serviceaccount/ca.crt)"
|
||||||
k8s_token="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
k8s_token="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
||||||
|
k8s_issuer="${VAULT_K8S_ISSUER:-}"
|
||||||
|
disable_iss_validation="${VAULT_K8S_DISABLE_ISS_VALIDATION:-true}"
|
||||||
role_ttl="${VAULT_K8S_ROLE_TTL:-1h}"
|
role_ttl="${VAULT_K8S_ROLE_TTL:-1h}"
|
||||||
token_reviewer_jwt="${VAULT_K8S_TOKEN_REVIEWER_JWT:-}"
|
token_reviewer_jwt="${VAULT_K8S_TOKEN_REVIEWER_JWT:-}"
|
||||||
|
|
||||||
@ -68,11 +70,36 @@ if ! vault_cmd auth list -format=json | grep -q '"kubernetes/"'; then
|
|||||||
vault_cmd auth enable kubernetes
|
vault_cmd auth enable kubernetes
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
ensure_default_policy_login() {
|
||||||
|
default_policy="$(vault_cmd policy read default)"
|
||||||
|
if printf '%s' "${default_policy}" | grep -q 'auth/kubernetes/login'; then
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
log "updating default policy to allow kubernetes login"
|
||||||
|
default_policy="${default_policy}
|
||||||
|
path \"auth/kubernetes/login\" {
|
||||||
|
capabilities = [\"create\", \"update\"]
|
||||||
|
}
|
||||||
|
"
|
||||||
|
printf '%s\n' "${default_policy}" | vault_cmd policy write default -
|
||||||
|
}
|
||||||
|
|
||||||
log "configuring kubernetes auth"
|
log "configuring kubernetes auth"
|
||||||
vault_cmd write auth/kubernetes/config \
|
if [ -n "${k8s_issuer}" ]; then
|
||||||
|
vault_cmd write auth/kubernetes/config \
|
||||||
|
token_reviewer_jwt="${token_reviewer_jwt}" \
|
||||||
|
kubernetes_host="${k8s_host}" \
|
||||||
|
kubernetes_ca_cert="${k8s_ca}" \
|
||||||
|
issuer="${k8s_issuer}" \
|
||||||
|
disable_iss_validation="${disable_iss_validation}"
|
||||||
|
else
|
||||||
|
vault_cmd write auth/kubernetes/config \
|
||||||
token_reviewer_jwt="${token_reviewer_jwt}" \
|
token_reviewer_jwt="${token_reviewer_jwt}" \
|
||||||
kubernetes_host="${k8s_host}" \
|
kubernetes_host="${k8s_host}" \
|
||||||
kubernetes_ca_cert="${k8s_ca}"
|
kubernetes_ca_cert="${k8s_ca}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
ensure_default_policy_login
|
||||||
|
|
||||||
write_raw_policy() {
|
write_raw_policy() {
|
||||||
name="$1"
|
name="$1"
|
||||||
@ -87,6 +114,7 @@ write_policy_and_role() {
|
|||||||
service_accounts="$3"
|
service_accounts="$3"
|
||||||
read_paths="$4"
|
read_paths="$4"
|
||||||
write_paths="$5"
|
write_paths="$5"
|
||||||
|
audiences="${VAULT_K8S_BOUND_AUDIENCES:-}"
|
||||||
|
|
||||||
policy_body=""
|
policy_body=""
|
||||||
for path in ${read_paths}; do
|
for path in ${read_paths}; do
|
||||||
@ -109,11 +137,42 @@ path \"kv/metadata/atlas/${path}\" {
|
|||||||
}
|
}
|
||||||
"
|
"
|
||||||
done
|
done
|
||||||
|
if [ "${role}" = "maintenance" ]; then
|
||||||
|
policy_body="${policy_body}
|
||||||
|
path \"sys/auth\" {
|
||||||
|
capabilities = [\"read\"]
|
||||||
|
}
|
||||||
|
path \"sys/auth/*\" {
|
||||||
|
capabilities = [\"create\", \"update\", \"read\", \"sudo\"]
|
||||||
|
}
|
||||||
|
path \"auth/kubernetes/*\" {
|
||||||
|
capabilities = [\"create\", \"update\", \"read\"]
|
||||||
|
}
|
||||||
|
path \"auth/oidc/*\" {
|
||||||
|
capabilities = [\"create\", \"update\", \"read\"]
|
||||||
|
}
|
||||||
|
path \"sys/policies/acl\" {
|
||||||
|
capabilities = [\"list\"]
|
||||||
|
}
|
||||||
|
path \"sys/policies/acl/*\" {
|
||||||
|
capabilities = [\"create\", \"update\", \"read\"]
|
||||||
|
}
|
||||||
|
"
|
||||||
|
fi
|
||||||
|
|
||||||
log "writing policy ${role}"
|
log "writing policy ${role}"
|
||||||
printf '%s\n' "${policy_body}" | vault_cmd policy write "${role}" -
|
printf '%s\n' "${policy_body}" | vault_cmd policy write "${role}" -
|
||||||
|
|
||||||
log "writing role ${role}"
|
log "writing role ${role}"
|
||||||
|
if [ -n "${audiences}" ]; then
|
||||||
|
vault_cmd write "auth/kubernetes/role/${role}" \
|
||||||
|
bound_service_account_audiences="${audiences}" \
|
||||||
|
bound_service_account_names="${service_accounts}" \
|
||||||
|
bound_service_account_namespaces="${namespace}" \
|
||||||
|
policies="${role}" \
|
||||||
|
ttl="${role_ttl}"
|
||||||
|
return
|
||||||
|
fi
|
||||||
vault_cmd write "auth/kubernetes/role/${role}" \
|
vault_cmd write "auth/kubernetes/role/${role}" \
|
||||||
bound_service_account_names="${service_accounts}" \
|
bound_service_account_names="${service_accounts}" \
|
||||||
bound_service_account_namespaces="${namespace}" \
|
bound_service_account_namespaces="${namespace}" \
|
||||||
@ -218,6 +277,8 @@ write_policy_and_role "nextcloud" "nextcloud" "nextcloud-vault" \
|
|||||||
"nextcloud/* shared/keycloak-admin shared/postmark-relay" ""
|
"nextcloud/* shared/keycloak-admin shared/postmark-relay" ""
|
||||||
write_policy_and_role "comms" "comms" "comms-vault,atlasbot" \
|
write_policy_and_role "comms" "comms" "comms-vault,atlasbot" \
|
||||||
"comms/* shared/chat-ai-keys-runtime shared/harbor-pull" ""
|
"comms/* shared/chat-ai-keys-runtime shared/harbor-pull" ""
|
||||||
|
write_policy_and_role "ai" "ai" "atlasbot" \
|
||||||
|
"comms/* shared/chat-ai-keys-runtime shared/harbor-pull" ""
|
||||||
write_policy_and_role "jenkins" "jenkins" "jenkins,jenkins-vault-sync" \
|
write_policy_and_role "jenkins" "jenkins" "jenkins,jenkins-vault-sync" \
|
||||||
"jenkins/* shared/harbor-pull" ""
|
"jenkins/* shared/harbor-pull" ""
|
||||||
write_policy_and_role "monitoring" "monitoring" "monitoring-vault-sync" \
|
write_policy_and_role "monitoring" "monitoring" "monitoring-vault-sync" \
|
||||||
@ -231,7 +292,7 @@ write_policy_and_role "crypto" "crypto" "crypto-vault-sync" \
|
|||||||
write_policy_and_role "health" "health" "health-vault-sync" \
|
write_policy_and_role "health" "health" "health-vault-sync" \
|
||||||
"health/*" ""
|
"health/*" ""
|
||||||
write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync" \
|
write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync" \
|
||||||
"maintenance/ariadne-db portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull" ""
|
"maintenance/ariadne-db maintenance/soteria-restic portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull" ""
|
||||||
write_policy_and_role "finance" "finance" "finance-vault" \
|
write_policy_and_role "finance" "finance" "finance-vault" \
|
||||||
"finance/* shared/postmark-relay" ""
|
"finance/* shared/postmark-relay" ""
|
||||||
write_policy_and_role "finance-secrets" "finance" "finance-secrets-ensure" \
|
write_policy_and_role "finance-secrets" "finance" "finance-secrets-ensure" \
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user