Compare commits

...

1075 Commits

Author SHA1 Message Date
07fde43749 platform: move postgres to infrastructure 2026-01-13 17:53:04 -03:00
730b9775a3 Merge pull request 'feature/sso-hardening' (#9) from feature/sso-hardening into main
Reviewed-on: #9
2026-01-13 20:23:24 +00:00
flux-bot
b09100e787 chore(bstein-dev-home): automated image update 2026-01-13 15:57:24 +00:00
eefaf7df2e merge main into sso-hardening 2026-01-13 12:56:21 -03:00
073b44e0c3 gitea: auto-link oidc accounts 2026-01-13 12:47:41 -03:00
5aeec67bfb postgres: add flux + vault csi 2026-01-13 12:35:59 -03:00
3fc9f7bbdb iac: localize configmap scripts 2026-01-13 12:07:03 -03:00
6da576a707 iac: externalize ConfigMap scripts 2026-01-13 10:00:19 -03:00
flux-bot
17b733c65e chore(bstein-dev-home): automated image update 2026-01-13 12:48:56 +00:00
flux-bot
6d213e5b25 chore(bstein-dev-home): automated image update 2026-01-13 12:47:56 +00:00
flux-bot
b01ac8da25 chore(bstein-dev-home): automated image update 2026-01-13 12:00:52 +00:00
flux-bot
27460f8dc3 chore(bstein-dev-home): automated image update 2026-01-13 11:59:53 +00:00
flux-bot
4d884bfcb1 chore(bstein-dev-home): automated image update 2026-01-13 02:38:08 +00:00
flux-bot
606718459e chore(bstein-dev-home): automated image update 2026-01-13 02:37:08 +00:00
4d6d0b89b2 planka: default users to project owners 2026-01-12 23:24:09 -03:00
35a19a2f7b outline: move to local storage 2026-01-12 23:14:17 -03:00
1a50f51115 planka: enable project owners via oidc 2026-01-12 23:14:17 -03:00
flux-bot
ed9a41bd70 chore(bstein-dev-home): automated image update 2026-01-13 01:58:04 +00:00
flux-bot
e12d020c51 chore(bstein-dev-home): automated image update 2026-01-13 01:57:04 +00:00
5a5766c9b5 planka: avoid mounting over assets 2026-01-12 22:47:23 -03:00
7a49e99e62 planka: fix init permissions 2026-01-12 22:02:07 -03:00
6376beebb1 services: fix outline pg ssl and planka init 2026-01-12 21:45:00 -03:00
d673493f89 minio: rerun bucket bootstrap job 2026-01-12 21:40:43 -03:00
flux-bot
d87d584992 chore(bstein-dev-home): automated image update 2026-01-13 00:30:57 +00:00
flux-bot
9df1eb85c3 chore(bstein-dev-home): automated image update 2026-01-13 00:29:57 +00:00
29192b9e7f services: add minio, outline, planka 2026-01-12 21:22:54 -03:00
5f4d9b498e chore: remove ci-demo workload 2026-01-12 00:46:16 -03:00
13df82e07a monitoring: treat cert-manager as infrastructure 2026-01-12 00:26:46 -03:00
fb2c7b22d5 monitoring: regenerate dashboards with expanded infra namespaces 2026-01-11 23:55:43 -03:00
21b9129abf monitoring: classify logging/postgres/maintenance as infra 2026-01-11 23:52:40 -03:00
fcc0a49369 monitoring: fix infra scopes and add jetson metrics 2026-01-11 23:46:24 -03:00
3a798ae3b1 mailu: use postmark token for relay auth 2026-01-11 19:01:31 -03:00
a8e6b575af knowledge: record k3s versions across node classes 2026-01-11 10:15:55 -03:00
35dca13026 knowledge: add control-plane storage details 2026-01-11 10:06:35 -03:00
0c0b328a1a knowledge: add titan-db and titan-jh details 2026-01-11 09:54:11 -03:00
c8a2e8caf4 knowledge: add jetson (titan-20/21) details 2026-01-11 09:44:40 -03:00
c13b161171 knowledge: relocate metis doc; monitoring: add cpu high alert 2026-01-11 08:59:51 -03:00
cac8506929 knowledge: add metis recovery notes 2026-01-11 02:32:20 -03:00
54358df569 monitoring: maintenance panels, extra alerts, update overview 2026-01-11 02:28:39 -03:00
33b89c7dc2 monitoring: remove titan-16 and add titan-20/21 to worker dashboards 2026-01-11 02:20:47 -03:00
734a537a28 monitoring: add alert rules and include titan-20/21 in dashboards 2026-01-11 02:02:47 -03:00
f533443c42 Fix Jetson device plugin args 2026-01-11 01:57:20 -03:00
1ffcb28be5 monitoring: fix grafana alerting root policy 2026-01-11 01:40:07 -03:00
95c3e2de37 monitoring: allow smtp sync to get target secret 2026-01-11 00:32:41 -03:00
abb15ddbb4 monitoring: fix smtp sync image reference 2026-01-11 00:30:45 -03:00
b53c7d4a1c monitoring: wire grafana smtp sync and alerting provisioning 2026-01-11 00:29:20 -03:00
1517dec30b maintenance: run image sweeper on all nodes 2026-01-10 23:57:26 -03:00
b7e5a04265 maintenance: fix image sweeper script indentation 2026-01-10 20:26:46 -03:00
99a6b4c054 maintenance: sweep unused images on arm workers 2026-01-10 20:20:54 -03:00
e2efeeacba logging: tune rpi4 image gc and rpi5 prune 2026-01-10 06:57:07 -03:00
6f8696eb0d logging: tune kubelet image GC on rpi5 2026-01-10 06:22:56 -03:00
753cc5900a logging: extend fluent-bit helm timeout 2026-01-10 05:55:45 -03:00
63f1d902b6 logging: add data-prepper pull secret 2026-01-10 05:52:16 -03:00
76f3b3f4ea logging: force data-prepper repo override 2026-01-10 05:42:39 -03:00
5e245caf45 logging: use streaming repo for data-prepper 2026-01-10 05:28:03 -03:00
de8709bc2e logging: use kaniko debug image 2026-01-10 05:22:27 -03:00
1595898947 logging: drop timestamps option from data-prepper job 2026-01-10 05:15:19 -03:00
f4b1519527 logging: add rpi5 log retention tuning 2026-01-10 05:06:34 -03:00
c6c7259a71 logging: add Jenkins build for data-prepper 2026-01-10 05:01:17 -03:00
a870aa6916 logging: pin otel collector image 2026-01-10 00:16:41 -03:00
af9ab30849 logging: add trace analytics ingestion 2026-01-10 00:13:59 -03:00
flux-bot
67415e665c chore(bstein-dev-home): automated image update 2026-01-10 03:05:43 +00:00
flux-bot
c60f405846 chore(bstein-dev-home): automated image update 2026-01-10 03:03:44 +00:00
b3e03623bd logging: seed OpenSearch observability 2026-01-09 23:58:12 -03:00
flux-bot
c531c206c5 chore(bstein-dev-home): automated image update 2026-01-10 02:05:39 +00:00
flux-bot
5072cd0b5a chore(bstein-dev-home): automated image update 2026-01-10 02:04:39 +00:00
dd705aeb4a logging: expand OpenSearch dashboards 2026-01-09 22:55:39 -03:00
a25ddd8082 logging: add OpenSearch dashboards generator 2026-01-09 22:20:36 -03:00
64ddd73b50 logging: force dark theme in dashboards 2026-01-09 21:17:08 -03:00
087026bd23 logging: throttle fluent-bit backfill 2026-01-09 18:18:58 -03:00
b40a995225 logging: force opensearch replicas to 0 2026-01-09 18:17:02 -03:00
ecf28580b9 logging: manage opensearch pvc size 2026-01-09 18:11:32 -03:00
0b78ec663d logging: remove loki and backfill to opensearch 2026-01-09 18:08:39 -03:00
456677cfbb logging: extend dashboards helm timeout 2026-01-09 09:07:40 -03:00
0239f57a84 logging: fix opensearch ism job yaml 2026-01-09 09:01:15 -03:00
8e94038858 logging: pin opensearch to rpi5 2026-01-09 09:00:25 -03:00
b668e2d29e logging: pin opensearch ISM job to rpi 2026-01-09 08:58:48 -03:00
7a9cf1df98 keycloak: fix logs oauth2 cookie secret 2026-01-09 08:57:13 -03:00
b9383c9709 logging: fix dashboards cpu limits 2026-01-09 08:55:39 -03:00
cac71e4a41 logging: add opensearch dashboards ui 2026-01-09 08:54:07 -03:00
719f16c4e3 logging: route oauth2-proxy via loki gateway 2026-01-09 08:07:46 -03:00
afb7eb80f2 logging: keep loki canary on rpi5 workers 2026-01-09 07:26:12 -03:00
5004bbd8ec logging: pin loki canary to rpi5 nodes 2026-01-09 07:19:59 -03:00
0b8caa4c7c logging: shrink loki caches for rpi nodes 2026-01-09 07:16:10 -03:00
9e496cb8d6 logging: fix oauth2 scope and pin loki to rpi 2026-01-09 07:12:40 -03:00
3694b8f76e logging: point systemd input at /var/log/journal 2026-01-08 23:58:42 -03:00
c4980b975c logging: set systemd journal path 2026-01-08 23:54:04 -03:00
f8fad8d599 logging: fix fluent-bit loki labels 2026-01-08 23:47:52 -03:00
19f1060b87 logging: disable fluent-bit inotify watcher 2026-01-08 23:44:48 -03:00
e305d312b1 logging: add loki delete_request_store 2026-01-08 23:22:24 -03:00
c87a34a0f9 logging: trim loki compactor config 2026-01-08 23:11:00 -03:00
19d236ab43 logging: keep loki defaults for canary and gateway 2026-01-08 23:02:34 -03:00
0463c2bf60 logging: drop fluent-bit label_keys 2026-01-08 22:41:07 -03:00
e5d04f2bcf logging: fix loki config and fluent-bit output 2026-01-08 22:40:02 -03:00
1fd4a426b4 logging: fix loki single-binary mode 2026-01-08 22:33:27 -03:00
1027fe5ce5 logging: add loki and fluent-bit 2026-01-08 22:31:45 -03:00
a57448f074 comms: kick numeric members from Othrys 2026-01-08 12:44:00 -03:00
a272a219a4 comms: serialize guest renamer inserts 2026-01-08 12:15:59 -03:00
41a762d6a6 comms: update numeric guest rename logic 2026-01-08 12:12:08 -03:00
1cce304872 comms: include full_user_id when renaming 2026-01-08 12:07:46 -03:00
d8c3bb2f1b comms: fix guest renamer db sql quoting 2026-01-08 12:03:53 -03:00
831f368493 comms: rename numeric guests via db 2026-01-08 11:59:51 -03:00
59305ca27c comms: mint guest tokens via MAS login 2026-01-08 11:56:35 -03:00
b86800cd6d comms: skip synapse admin list on 403 2026-01-08 06:14:32 -03:00
70a707872e comms: rerun MAS local user ensure (v5) 2026-01-08 06:11:47 -03:00
ffddd71116 comms: make room reset a suspended cronjob 2026-01-08 06:09:34 -03:00
d870e97b38 comms: use full user IDs for MAS logins 2026-01-08 06:05:20 -03:00
4eb82811b5 comms: set MAS user passwords via set-password 2026-01-08 06:01:45 -03:00
835146bd5b comms: rerun MAS local user ensure 2026-01-08 05:51:43 -03:00
c909d45fda comms: make guest renamer MAS-only 2026-01-08 05:47:21 -03:00
0fc4b299da keycloak: re-run mas secrets ensure 2026-01-08 05:43:33 -03:00
d3c6ddeead comms: re-run signing key and synapse oidc 2026-01-08 05:40:28 -03:00
2a6f0a8db3 comms: tidy stack and guest naming 2026-01-08 05:34:03 -03:00
94c1395c8c comms: verify mas bot logins 2026-01-08 05:21:30 -03:00
fa6566ffc8 comms: rerun othrys room reset 2026-01-08 05:18:20 -03:00
7bea022311 comms: add mas bot users and revert synapse auth 2026-01-08 05:12:14 -03:00
acedad02c0 comms: bind synapse to ipv4 2026-01-08 05:03:43 -03:00
c05cb414aa comms: fix synapse seed booleans 2026-01-08 05:00:58 -03:00
28bcf716d0 comms: seed synapse bot users 2026-01-08 04:55:52 -03:00
fce33f02ff comms: route othrys reset via mas 2026-01-08 04:51:13 -03:00
a1f1c9ada0 comms: retry othrys reset login 2026-01-08 04:45:01 -03:00
0b09f46bb1 comms: accept missing rooms in cleanup 2026-01-08 04:42:19 -03:00
6b5deb886f comms: use mas proxy for leave job 2026-01-08 04:37:33 -03:00
7860003f15 comms: retry room leave actions 2026-01-08 04:32:05 -03:00
31ca499c04 comms: retry mas token for room cleanup 2026-01-08 04:29:29 -03:00
52df8094f5 comms: rerun bstein room cleanup 2026-01-08 04:26:48 -03:00
97e7c69244 comms: retry atlasbot login 2026-01-08 04:22:21 -03:00
ac7217a32c comms: switch bot auth back to synapse 2026-01-08 04:19:20 -03:00
9172f1e140 comms: enable synapse password login 2026-01-08 04:16:40 -03:00
12ab281528 comms: revert bot auth to mas 2026-01-08 04:11:20 -03:00
bfe623892a comms: bump othrys reset job 2026-01-08 04:07:57 -03:00
99ed78ea7f comms: fix auth env indentation 2026-01-08 04:05:03 -03:00
85dce4f975 comms: use synapse auth for bot jobs 2026-01-08 04:00:27 -03:00
5a23514a30 sso: install kubectl in synapse oidc job 2026-01-08 03:57:35 -03:00
220cc1f31a sso: run synapse oidc job with kubectl 2026-01-08 03:56:18 -03:00
76deb9a160 comms: ensure core secrets and synapse oidc 2026-01-08 03:53:49 -03:00
aa30a34828 comms: restart mas after secret cleanup 2026-01-08 03:46:02 -03:00
d3c3db612d sso: recheck mas encryption bytes 2026-01-08 03:44:54 -03:00
8d1284412f sso: validate mas encryption length 2026-01-08 03:43:06 -03:00
f8d172c5a2 comms: restart mas after secret regen 2026-01-08 03:39:46 -03:00
04817691c6 sso: strip mas secret newlines 2026-01-08 03:38:51 -03:00
c1e74c1001 comms: restart mas after encryption fix 2026-01-08 03:36:33 -03:00
072af083bc sso: fix mas encryption secret 2026-01-08 03:35:40 -03:00
39d8c9e687 comms: restart mas after secret fix 2026-01-08 03:33:14 -03:00
4db5ff68eb comms: let mas db secret be job-owned 2026-01-08 03:31:19 -03:00
bebb87fcf8 comms: restart mas after db sync 2026-01-08 03:28:22 -03:00
4f462b8fa7 comms: verify mas db login 2026-01-08 03:26:14 -03:00
05c2d245b9 comms: ensure mas password is url-safe 2026-01-08 03:23:09 -03:00
e384a9e417 comms: avoid psql vars for mas 2026-01-08 03:20:28 -03:00
898a33d8ee comms: simplify mas db creation 2026-01-08 03:18:03 -03:00
3d2f04d672 comms: fix mas db psql exec 2026-01-08 03:15:25 -03:00
df5a5127f1 comms: add mas db secret stub 2026-01-08 03:12:16 -03:00
8950306c53 comms: keep mas db job logs on failure 2026-01-08 03:09:27 -03:00
e18accc099 comms: allow postgres exec for mas db 2026-01-08 03:06:34 -03:00
0250de8636 comms: ensure mas db via postgres exec 2026-01-08 03:04:33 -03:00
72d4766d68 comms: stabilize mas db job 2026-01-08 03:00:19 -03:00
ef064ed2bb comms: bootstrap mas db secret 2026-01-08 02:53:53 -03:00
c8fc1dd10a comms: fix mas db ensure rbac 2026-01-08 02:47:47 -03:00
0e55dbeaa9 comms: ensure mas db secret 2026-01-08 02:45:00 -03:00
b95683da2a comms: restart MAS after secret bootstrap 2026-01-08 02:35:09 -03:00
6e0b3c43bd keycloak: rerun MAS secrets bootstrap 2026-01-08 02:32:31 -03:00
9a544010fb comms: grant MAS secret bootstrap cluster role 2026-01-08 02:31:54 -03:00
9d3b27e567 keycloak: rerun MAS secrets bootstrap 2026-01-08 02:25:55 -03:00
1c9efd6808 comms: allow MAS secrets create 2026-01-08 02:25:19 -03:00
6e7118c14d keycloak: use create for MAS secrets 2026-01-08 02:23:40 -03:00
6c99eb452e keycloak: make MAS secret job idempotent 2026-01-08 02:21:37 -03:00
e9fb11af40 keycloak: allow MAS secret apply read access 2026-01-08 02:19:21 -03:00
afce04b9b2 keycloak: rerun MAS secrets bootstrap 2026-01-08 02:17:04 -03:00
2aea7e3601 keycloak: retry MAS secret bootstrap 2026-01-08 02:12:40 -03:00
05848223eb comms: ensure MAS secrets via keycloak admin job 2026-01-08 02:09:23 -03:00
3aa36e87b8 comms: retry guest rename when MAS restarts 2026-01-08 02:00:52 -03:00
9a76680cc4 comms: track local knowledge markdown 2026-01-08 01:58:17 -03:00
660b49bc5d comms: consolidate stack manifests 2026-01-08 01:55:58 -03:00
d3ac4726e2 comms: rename guests via MAS admin sessions 2026-01-08 00:26:20 -03:00
57e414adc6 comms: rerun synapse admin seeder job 2026-01-08 00:20:55 -03:00
ca49c84086 comms: fix guest randomizer syntax 2026-01-08 00:15:41 -03:00
47f0ff7c01 comms: fix guest rename job with MAS admin sessions 2026-01-08 00:13:40 -03:00
e44ee3ab2d comms: fix guest registration via MAS admin API 2026-01-07 20:02:03 -03:00
70e40b281f comms: issue guest tokens via MAS 2026-01-07 19:51:33 -03:00
cd4b963db4 comms: serve register flows for guest UI 2026-01-07 19:09:13 -03:00
695e1ec322 comms: set guest displayname at registration 2026-01-07 11:23:53 -03:00
c950c32e93 comms: re-enable guest name randomizer 2026-01-07 11:17:33 -03:00
658e434e65 comms: return 405 for GET /register 2026-01-07 11:14:28 -03:00
49ec3d1be8 comms: restart synapse + guest proxy 2026-01-07 10:46:33 -03:00
eb1cb8cb00 comms: move guest register module endpoint 2026-01-07 10:42:11 -03:00
44404aa2f2 comms: restore Element guest registration 2026-01-07 10:34:52 -03:00
949995a8a0 comms: add guest register module scaffolding 2026-01-07 10:25:10 -03:00
c111f773b7 nextcloud: reset storage claims 2026-01-07 10:13:09 -03:00
376cbf6d70 comms: mint guest sessions via MAS 2026-01-07 10:12:37 -03:00
7ba578ed21 comms: restore Synapse guest join 2026-01-07 09:54:41 -03:00
4a55b39b0d comms: add Synapse guest appservice secret job 2026-01-07 09:49:08 -03:00
9bb90053a1 nextcloud: persist web root in pvc 2026-01-07 09:40:25 -03:00
a711c450d3 comms: implement MAS-backed guest register 2026-01-07 09:36:45 -03:00
1bcb9baba2 comms: ensure seeder is Synapse admin 2026-01-07 09:31:46 -03:00
9d5ba6adfe nextcloud: preserve config merge and stop db reset 2026-01-07 09:20:22 -03:00
ff395f7cf2 comms: restore Matrix guest join 2026-01-07 09:17:45 -03:00
6850f7b2fc nextcloud: avoid forcing installed flag 2026-01-07 09:14:20 -03:00
c928b7805c nextcloud: install oidc app from release tarball 2026-01-07 09:02:22 -03:00
77ce04c562 nextcloud: reset external app config and force reinstall 2026-01-07 08:58:50 -03:00
59b719da54 nextcloud: install oidc login via app store 2026-01-07 08:51:07 -03:00
52295538a0 nextcloud: fix db reset command 2026-01-07 08:46:57 -03:00
3db0661a48 nextcloud: reset storage mounts and restore office 2026-01-07 08:43:45 -03:00
cb7429a6a1 nextcloud: stabilize install guardrails 2026-01-07 04:49:55 -03:00
5a92e99c8d nextcloud-mail-sync: align data mount 2026-01-07 04:43:13 -03:00
7506919394 nextcloud: align app/data mounts 2026-01-07 04:41:00 -03:00
46c0a4e290 nextcloud: restore single data volume mount 2026-01-07 03:52:14 -03:00
da81946771 nextcloud: rebind user data pvc to restore data 2026-01-07 03:43:57 -03:00
428c2b5435 nextcloud: restore app and user-data volumes 2026-01-07 03:39:59 -03:00
ef0dfab20c mailu: harden postfix relay restrictions 2026-01-07 02:47:12 -03:00
8749d8a884 nextcloud: rebind data pvc to prior volume 2026-01-07 01:10:24 -03:00
58bc646621 nextcloud: allow OIDC auto user creation 2026-01-07 00:12:21 -03:00
16dc0e16f1 nextcloud: enforce OIDC-only config 2026-01-07 00:03:57 -03:00
9d9aa5b64b nextcloud: force OIDC login 2026-01-06 23:54:33 -03:00
2d6883eb67 nextcloud: restore mimetype defaults for external app 2026-01-06 22:16:51 -03:00
a15a2ce923 nextcloud: reinstall custom apps with compatible mail 2026-01-06 22:09:16 -03:00
f1e94717ed nextcloud: pin mail/external app versions for 29 2026-01-06 22:03:01 -03:00
99e56fe1b4 nextcloud: register custom apps path 2026-01-06 21:51:19 -03:00
93b219e571 nextcloud: pin app download URLs 2026-01-06 21:43:36 -03:00
7a7433f824 nextcloud: install oidc/mail/external apps from releases 2026-01-06 21:39:55 -03:00
5fe584cc5f nextcloud: ensure oidc/mail/external apps installed 2026-01-06 21:35:31 -03:00
39d57613db nextcloud: remove db reset job 2026-01-06 21:27:06 -03:00
36552e425f nextcloud: fix su command quoting 2026-01-06 21:24:36 -03:00
e5cb4571d8 nextcloud: fix install command quoting 2026-01-06 21:22:12 -03:00
b9d75d279c nextcloud: reinstall when config not installed 2026-01-06 21:18:16 -03:00
c954fb7546 nextcloud: add one-time db reset job 2026-01-06 21:15:52 -03:00
45563f74b3 nextcloud: run install occ as www-data 2026-01-06 21:07:33 -03:00
221fda50a6 atlasbot: add PromQL + cluster snapshot 2026-01-06 14:58:29 -03:00
b313569e2f atlasbot: fix kb loader import 2026-01-06 14:55:19 -03:00
4a5f3d4c92 nextcloud: install without runuser 2026-01-06 14:53:58 -03:00
0a8e8e27da knowledge: add runbooks skeleton 2026-01-06 14:53:19 -03:00
91d4ecf451 nextcloud: run install init as root 2026-01-06 14:52:25 -03:00
6728b4f4ae atlasbot: add KB + read-only tools 2026-01-06 14:46:36 -03:00
7283a740e6 nextcloud: install when config missing 2026-01-06 14:46:16 -03:00
46884bdd0c nextcloud: ensure data dir and perms 2026-01-06 14:43:18 -03:00
d4f1d01b9c nextcloud: reset empty config on boot 2026-01-06 14:40:29 -03:00
556b714e50 nextcloud/monitoring: fix perms and mail panels 2026-01-06 14:38:10 -03:00
37e8e691e2 nextcloud: restore app files for maintenance job 2026-01-06 14:22:26 -03:00
8a12c8cdbd nextcloud: call occ via absolute path 2026-01-06 14:16:47 -03:00
bf358bcdfd flux: track nextcloud app 2026-01-06 14:14:38 -03:00
e8cf4070b5 nextcloud: set theming via app config 2026-01-06 14:11:24 -03:00
4d92263871 mailu: enable smtpd sasl auth 2026-01-06 14:06:55 -03:00
c693e695b4 mailu: harden relay + fix postmark exporter 2026-01-06 14:00:14 -03:00
6a4b7f4431 titan-jh: enable node exporter 2026-01-06 12:47:34 -03:00
109c17dd95 nextcloud: default mail html 2026-01-06 10:02:50 -03:00
a14726350c monitoring: add titan-jh control plane node 2026-01-06 09:50:40 -03:00
7d64f0d1d9 mailu: harden relay restrictions 2026-01-06 09:03:28 -03:00
5fcff4fc8a monitoring: refine mail overview panels 2026-01-06 02:34:52 -03:00
d5d2fc66b9 monitoring: refine mail stats and add send-limit usage 2026-01-06 02:06:20 -03:00
1fb56bae70 monitoring: restart postmark exporter 2026-01-05 22:07:52 -03:00
12b579d951 monitoring: add Postmark today window 2026-01-05 22:06:24 -03:00
9be25e16fe monitoring: add Postmark mail dashboard 2026-01-05 21:55:59 -03:00
d132917d9e monitoring: add Postmark bounce exporter 2026-01-05 21:44:29 -03:00
ec208fe0f6 mailu: remove pod network relay 2026-01-05 21:27:19 -03:00
6195005206 mailu: disable unauthenticated pod relay 2026-01-05 21:21:47 -03:00
a4105c68db scripts: add vaultwarden test cleanup 2026-01-05 13:51:25 -03:00
28a5d53c98 monitoring(dashboards): tune namespace share metrics 2026-01-05 13:30:51 -03:00
89d47cba79 scripts: harden atlas cleanup script 2026-01-05 13:30:51 -03:00
flux-bot
55b25fbfd6 chore(bstein-dev-home): automated image update 2026-01-05 06:20:19 +00:00
flux-bot
5877611b4f chore(bstein-dev-home): automated image update 2026-01-05 06:19:15 +00:00
flux-bot
ad7ac5b38d chore(bstein-dev-home): automated image update 2026-01-05 06:00:18 +00:00
flux-bot
3e2a90e377 chore(bstein-dev-home): automated image update 2026-01-05 05:59:13 +00:00
flux-bot
9d9c2830f7 chore(bstein-dev-home): automated image update 2026-01-05 05:48:17 +00:00
flux-bot
5358559787 chore(bstein-dev-home): automated image update 2026-01-05 05:47:12 +00:00
flux-bot
86c6c5a0f8 chore(bstein-dev-home): automated image update 2026-01-05 05:34:16 +00:00
flux-bot
089e8155ae chore(bstein-dev-home): automated image update 2026-01-05 05:32:52 +00:00
08c54d3d01 scripts: add atlas test cleanup 2026-01-05 00:25:39 -03:00
flux-bot
38eceaadfa chore(bstein-dev-home): automated image update 2026-01-05 03:11:58 +00:00
flux-bot
3ede688676 chore(bstein-dev-home): automated image update 2026-01-05 03:10:47 +00:00
flux-bot
5470002e3e chore(bstein-dev-home): automated image update 2026-01-05 02:39:56 +00:00
flux-bot
010a0b5e22 chore(bstein-dev-home): automated image update 2026-01-05 02:38:45 +00:00
0805dbc5e9 test(portal): tolerate slow approval endpoint 2026-01-04 23:04:50 -03:00
2e52956155 test(portal): align onboarding E2E with vaultwarden-first flow 2026-01-04 23:01:01 -03:00
flux-bot
eff9bfb761 chore(bstein-dev-home): automated image update 2026-01-05 01:55:52 +00:00
flux-bot
4c59fccedf chore(bstein-dev-home): automated image update 2026-01-05 01:54:42 +00:00
flux-bot
70ed083d96 chore(bstein-dev-home): automated image update 2026-01-05 01:04:49 +00:00
flux-bot
cf0e5bfc89 chore(bstein-dev-home): automated image update 2026-01-05 01:03:38 +00:00
flux-bot
211504d47a chore(bstein-dev-home): automated image update 2026-01-04 16:16:11 +00:00
flux-bot
f735dba10d chore(bstein-dev-home): automated image update 2026-01-04 16:15:00 +00:00
flux-bot
513dce99b6 chore(bstein-dev-home): automated image update 2026-01-04 16:06:10 +00:00
flux-bot
64cdcec364 chore(bstein-dev-home): automated image update 2026-01-04 16:04:59 +00:00
flux-bot
291073884a chore(bstein-dev-home): automated image update 2026-01-04 15:36:08 +00:00
flux-bot
58f36edf92 chore(bstein-dev-home): automated image update 2026-01-04 15:34:57 +00:00
e66e782e4a portal: add test user cleanup tool 2026-01-04 09:39:26 -03:00
flux-bot
9ff8fc9e72 chore(bstein-dev-home): automated image update 2026-01-04 12:22:54 +00:00
flux-bot
31994f9243 chore(bstein-dev-home): automated image update 2026-01-04 12:21:44 +00:00
flux-bot
b28fdece0a chore(bstein-dev-home): automated image update 2026-01-04 11:50:52 +00:00
flux-bot
b82e2b99db chore(bstein-dev-home): automated image update 2026-01-04 11:49:41 +00:00
6eeff1271c test(portal): stop requiring totp 2026-01-04 08:35:49 -03:00
flux-bot
20332f7029 chore(bstein-dev-home): automated image update 2026-01-04 11:28:50 +00:00
flux-bot
1a8b3ce304 chore(bstein-dev-home): automated image update 2026-01-04 11:27:40 +00:00
flux-bot
333481bd67 chore(bstein-dev-home): automated image update 2026-01-04 10:36:47 +00:00
flux-bot
6f784c94a4 chore(bstein-dev-home): automated image update 2026-01-04 10:35:36 +00:00
55606e5b70 fix(portal): pin kubectl image digest 2026-01-04 03:40:13 -03:00
17a9a7e245 test(portal): sync e2e client secret 2026-01-04 03:35:26 -03:00
c53d310c59 test(portal): use external Keycloak URL 2026-01-04 03:27:32 -03:00
b9d2fa8277 test(portal): improve e2e auth errors 2026-01-04 03:01:56 -03:00
c298946ce0 test(portal): approve requests via admin API 2026-01-04 02:58:44 -03:00
0b96894e7a tests(portal): rerun onboarding e2e job (8) 2026-01-04 02:26:42 -03:00
4a841a1660 fix(bstein-dev-home): harden backend gunicorn 2026-01-04 02:25:40 -03:00
bbb15a6532 tests(portal): rerun onboarding e2e job (7) 2026-01-04 02:09:59 -03:00
4b77f909af tests(portal): refresh keycloak token during e2e 2026-01-04 02:09:36 -03:00
d0e088e50a tests(portal): rerun onboarding e2e job 2026-01-04 01:57:53 -03:00
flux-bot
d2fa996b8a chore(bstein-dev-home): automated image update 2026-01-04 04:55:22 +00:00
flux-bot
d7c44e65a6 chore(bstein-dev-home): automated image update 2026-01-04 04:53:11 +00:00
04b730dbab tests(portal): verify access requests via email 2026-01-04 01:48:46 -03:00
a7f68ddddb test: ensure smtp probe user has email 2026-01-04 01:08:17 -03:00
38b4935e1d test: send execute-actions-email to existing mailbox 2026-01-04 01:06:05 -03:00
7cbbb7e193 test: fix keycloak execute-actions-email probe 2026-01-04 00:59:24 -03:00
eb11eaff4e keycloak: allow e2e client execute-actions-email 2026-01-04 00:58:02 -03:00
cadb0daba0 tests: add Keycloak email probe 2026-01-04 00:53:13 -03:00
flux-bot
d21f18d920 chore(bstein-dev-home): automated image update 2026-01-04 03:46:18 +00:00
flux-bot
7407d42f98 chore(bstein-dev-home): automated image update 2026-01-04 03:45:07 +00:00
300873f743 bstein-dev-home: relax health probe timeouts 2026-01-03 22:34:39 -03:00
6bda606760 test: stabilize portal onboarding e2e 2026-01-03 22:27:33 -03:00
8cdd5fa1ba bstein-dev-home: fix onboarding e2e job url 2026-01-03 22:11:57 -03:00
f628d2768b bstein-dev-home: add onboarding e2e job 2026-01-03 21:53:45 -03:00
flux-bot
4b52203532 chore(bstein-dev-home): automated image update 2026-01-04 00:53:05 +00:00
flux-bot
d2d4b601f3 chore(bstein-dev-home): automated image update 2026-01-04 00:51:54 +00:00
flux-bot
eb560a38fa chore(bstein-dev-home): automated image update 2026-01-03 23:42:00 +00:00
flux-bot
aad5a29986 chore(bstein-dev-home): automated image update 2026-01-03 23:40:49 +00:00
762164aed4 bstein-dev-home: reduce lab status probe timeout 2026-01-03 20:02:53 -03:00
flux-bot
dd473b8a8c chore(bstein-dev-home): automated image update 2026-01-03 22:56:57 +00:00
flux-bot
558cab9a0b chore(bstein-dev-home): automated image update 2026-01-03 22:55:46 +00:00
c5fa1b5a38 vaultwarden: backfill synced_at 2026-01-03 18:43:25 -03:00
b63b724b52 keycloak: rerun realm settings job 2026-01-03 18:27:29 -03:00
ab658fa064 keycloak: allow vaultwarden user attributes 2026-01-03 18:25:48 -03:00
e8fab60d89 vaultwarden: skip reinvite when status set 2026-01-03 18:21:04 -03:00
51a733096f vaultwarden: make cred sync idempotent 2026-01-03 18:18:31 -03:00
12348258fa vaultwarden: allow internal SMTP TLS 2026-01-03 17:54:27 -03:00
b7c8b4693d vaultwarden: enable SMTP via Mailu 2026-01-03 17:44:24 -03:00
flux-bot
148bba0fd6 chore(bstein-dev-home): automated image update 2026-01-03 20:29:46 +00:00
flux-bot
dbd14fac8b chore(bstein-dev-home): automated image update 2026-01-03 20:28:35 +00:00
e1deeb1853 vaultwarden: avoid RWO multi-attach rollout 2026-01-03 17:12:46 -03:00
c11a663d05 vaultwarden: use Recreate strategy 2026-01-03 17:07:48 -03:00
2ee8f7da88 flux: resume vaultwarden 2026-01-03 17:00:19 -03:00
flux-bot
6be16eed1d chore(bstein-dev-home): automated image update 2026-01-03 19:59:44 +00:00
flux-bot
db27242ce1 chore(bstein-dev-home): automated image update 2026-01-03 19:58:33 +00:00
c386ff7c7a vaultwarden: disable signups and sync invites 2026-01-03 16:55:02 -03:00
70980a2ca9 keycloak: add token exchange E2E smoke test 2026-01-03 15:58:44 -03:00
e73baa6ecd keycloak: robust policy lookup for token exchange job 2026-01-03 15:50:43 -03:00
3f19d01d00 keycloak: make token exchange permissions job idempotent 2026-01-03 15:48:40 -03:00
cb37756f5f keycloak: fix token exchange permission patching 2026-01-03 15:46:26 -03:00
1f2bddc7fe keycloak: retry token exchange permissions job 2026-01-03 15:45:04 -03:00
df959ee17d keycloak: enable fine-grained token exchange authz 2026-01-03 15:43:07 -03:00
b21a79dad7 keycloak: allow token exchange to portal 2026-01-03 14:48:28 -03:00
e09589ec35 keycloak: add portal e2e client 2026-01-03 14:35:23 -03:00
f1d1e1bd7d keycloak: enable token exchange 2026-01-03 14:29:28 -03:00
c8f9b59e4a keycloak: allow nextcloud mail profile attrs 2026-01-03 12:36:23 -03:00
flux-bot
73728bcc09 chore(bstein-dev-home): automated image update 2026-01-03 15:23:24 +00:00
flux-bot
db17c95ee0 chore(bstein-dev-home): automated image update 2026-01-03 15:23:13 +00:00
565fad4522 nextcloud-mail-sync: portal RBAC 2026-01-03 12:22:41 -03:00
91106ee298 nextcloud: per-user mail sync + portal RBAC 2026-01-03 12:18:29 -03:00
51b0a88a62 nextcloud: delegate mail sync to separate kustomization 2026-01-03 07:44:24 -03:00
caa23e6f1c fix(nextcloud-mail-sync): fix bash syntax 2026-01-03 07:39:45 -03:00
c7c2e03ea2 fix(nextcloud-mail-sync): mawk-compatible email regex 2026-01-03 07:18:50 -03:00
6cd63b067d fix(nextcloud-mail-sync): capture occ export output reliably 2026-01-03 07:13:58 -03:00
c165087eda fix(nextcloud-mail-sync): portable email parsing 2026-01-03 07:06:30 -03:00
a76d944433 nextcloud-mail-sync: manage CronJob via Flux 2026-01-03 07:03:43 -03:00
flux-bot
b06dcb2263 chore(bstein-dev-home): automated image update 2026-01-03 09:54:01 +00:00
51f94194be fix(nextcloud): dedupe + update mail accounts 2026-01-03 06:53:23 -03:00
flux-bot
9d8c113850 chore(bstein-dev-home): automated image update 2026-01-03 09:52:50 +00:00
flux-bot
e82be4955b chore(bstein-dev-home): automated image update 2026-01-03 09:29:59 +00:00
flux-bot
2d17d03b3d chore(bstein-dev-home): automated image update 2026-01-03 09:28:48 +00:00
747b6aacb6 keycloak: set bstein mailu_email 2026-01-03 06:15:16 -03:00
flux-bot
034acdaaf2 chore(bstein-dev-home): automated image update 2026-01-03 08:16:54 +00:00
flux-bot
25ce112c82 chore(bstein-dev-home): automated image update 2026-01-03 08:15:43 +00:00
flux-bot
a7222878c3 chore(bstein-dev-home): automated image update 2026-01-03 08:01:52 +00:00
flux-bot
dd3b940ee7 chore(bstein-dev-home): automated image update 2026-01-03 08:00:42 +00:00
flux-bot
6360012155 chore(bstein-dev-home): automated image update 2026-01-03 07:33:50 +00:00
flux-bot
fff9ffbba7 chore(bstein-dev-home): automated image update 2026-01-03 07:32:40 +00:00
flux-bot
cc677eb7f3 chore(bstein-dev-home): automated image update 2026-01-03 07:14:49 +00:00
flux-bot
335906aafc chore(bstein-dev-home): automated image update 2026-01-03 07:13:39 +00:00
0b211520cb keycloak: allow mailu_email + groups 2026-01-03 03:32:38 -03:00
flux-bot
5c618c6560 chore(bstein-dev-home): automated image update 2026-01-03 06:17:45 +00:00
flux-bot
b1706397b6 chore(bstein-dev-home): automated image update 2026-01-03 06:16:34 +00:00
flux-bot
23ebcbaf92 chore(bstein-dev-home): automated image update 2026-01-03 05:41:43 +00:00
flux-bot
76bb48eac1 chore(bstein-dev-home): automated image update 2026-01-03 05:40:32 +00:00
e6eff8165a mailu: sync via mailu_email attribute 2026-01-03 02:35:47 -03:00
10e322e853 keycloak(atlas): default TOTP required action 2026-01-03 01:09:14 -03:00
flux-bot
c080d39375 chore(bstein-dev-home): automated image update 2026-01-03 04:04:36 +00:00
flux-bot
64138ea045 chore(bstein-dev-home): automated image update 2026-01-03 04:03:25 +00:00
flux-bot
14a9a8403a chore(bstein-dev-home): automated image update 2026-01-03 03:47:34 +00:00
flux-bot
c62e142a87 chore(bstein-dev-home): automated image update 2026-01-03 03:46:24 +00:00
c9d9a28c03 portal: fix vaultwarden sync job env 2026-01-02 21:11:44 -03:00
flux-bot
479cb81b3e chore(bstein-dev-home): automated image update 2026-01-03 00:09:19 +00:00
flux-bot
bb49d584f5 chore(bstein-dev-home): automated image update 2026-01-03 00:09:08 +00:00
flux-bot
efb226fe07 chore(bstein-dev-home): automated image update 2026-01-03 00:05:12 +00:00
5437cebb9e sso: provision vaultwarden users 2026-01-02 21:04:12 -03:00
flux-bot
727d8cfd48 chore(bstein-dev-home): automated image update 2026-01-02 23:27:16 +00:00
flux-bot
4e1ec914f6 chore(bstein-dev-home): automated image update 2026-01-02 23:27:05 +00:00
0f26bd508e keycloak(atlas): disable browser IdP redirector 2026-01-02 20:09:05 -03:00
21d8fc3788 keycloak(atlas): retry realm settings job 2026-01-02 20:04:47 -03:00
54d324f555 keycloak(atlas): harden realm settings job 2026-01-02 20:02:11 -03:00
flux-bot
3f1780daed chore(bstein-dev-home): automated image update 2026-01-02 22:24:11 +00:00
flux-bot
82b2c95bf0 chore(bstein-dev-home): automated image update 2026-01-02 22:23:00 +00:00
503a9264c5 keycloak: cleanup LDAP federation 2026-01-02 18:45:45 -03:00
b509234aee bstein-dev-home: allow vaultwarden admin secret read 2026-01-02 18:05:17 -03:00
5e3cfee3d5 bstein-dev-home: read vaultwarden admin token 2026-01-02 18:03:06 -03:00
flux-bot
ee0aee71f8 chore(bstein-dev-home): automated image update 2026-01-02 20:48:04 +00:00
flux-bot
a92a51c6c5 chore(bstein-dev-home): automated image update 2026-01-02 20:46:53 +00:00
2254532642 keycloak: roll update with no surge 2026-01-02 17:15:37 -03:00
22b7e7aa66 keycloak: clear rollingUpdate for recreate 2026-01-02 17:09:24 -03:00
23a9e1ec30 keycloak: use recreate strategy with pvc 2026-01-02 17:02:59 -03:00
e2e76592a0 keycloak: enable debug logging 2026-01-02 16:57:42 -03:00
e5f41cfa2b vaultwarden: suspend flux kustomization 2026-01-02 16:26:48 -03:00
c36d318d81 vaultwarden: add flux kustomization 2026-01-02 16:17:53 -03:00
1346ccd31b keycloak: repair ldap federation parentId 2026-01-02 14:12:20 -03:00
8a2f3c733e sso: fix keycloak ldap provider parentId 2026-01-02 14:02:05 -03:00
d70b685f27 sso: remove openldap bootstrap job 2026-01-02 13:50:02 -03:00
2c86a6d95f sso: bump openldap bootstrap job 2026-01-02 13:40:11 -03:00
5ae9bf578e sso: make openldap bootstrap POSIX sh 2026-01-02 13:34:16 -03:00
8651ada4d9 sso: fix openldap bootstrap job 2026-01-02 13:25:30 -03:00
de14d68fc9 sso: codify openldap bootstrap and keycloak federation 2026-01-02 13:18:32 -03:00
flux-bot
ee90817040 chore(bstein-dev-home): automated image update 2026-01-02 16:13:45 +00:00
flux-bot
750f1a2cbf chore(bstein-dev-home): automated image update 2026-01-02 16:12:33 +00:00
flux-bot
a9b7f86046 chore(bstein-dev-home): automated image update 2026-01-02 15:18:41 +00:00
flux-bot
f9462aae10 chore(bstein-dev-home): automated image update 2026-01-02 15:17:30 +00:00
flux-bot
78afccc53a chore(bstein-dev-home): automated image update 2026-01-02 14:19:37 +00:00
flux-bot
9c627087eb chore(bstein-dev-home): automated image update 2026-01-02 14:18:25 +00:00
flux-bot
f05a8a2200 chore(bstein-dev-home): automated image update 2026-01-02 13:34:33 +00:00
flux-bot
b06ae2c89d chore(bstein-dev-home): automated image update 2026-01-02 13:33:22 +00:00
flux-bot
30373c19e7 chore(bstein-dev-home): automated image update 2026-01-02 12:47:30 +00:00
flux-bot
d1294a0dc9 chore(bstein-dev-home): automated image update 2026-01-02 12:46:18 +00:00
flux-bot
d44b759f0b chore(bstein-dev-home): automated image update 2026-01-02 07:35:08 +00:00
flux-bot
1ff8c8cdec chore(bstein-dev-home): automated image update 2026-01-02 07:33:56 +00:00
46d4ab6dc8 keycloak: apply realm smtp via api 2026-01-02 04:03:27 -03:00
9fa081ca36 keycloak: set realm smtp server 2026-01-02 03:58:37 -03:00
77beacec53 keycloak: switch realm job to kcadm 2026-01-02 03:55:28 -03:00
flux-bot
6a155a7a7a chore(bstein-dev-home): automated image update 2026-01-02 06:55:05 +00:00
flux-bot
0736c4255e chore(bstein-dev-home): automated image update 2026-01-02 06:53:54 +00:00
816abca2df keycloak: fix realm job service URL 2026-01-02 03:49:19 -03:00
2ef3b7d45c keycloak: pin realm job to rpi nodes 2026-01-02 03:45:44 -03:00
7e464d3ec8 keycloak: enable reset password 2026-01-02 03:39:08 -03:00
flux-bot
89228d2d5e chore(bstein-dev-home): automated image update 2026-01-02 06:17:02 +00:00
flux-bot
d46d411154 chore(bstein-dev-home): automated image update 2026-01-02 06:15:51 +00:00
5f7ea4544d mailu: store app password as list 2026-01-02 03:09:46 -03:00
flux-bot
26f11db285 chore(bstein-dev-home): automated image update 2026-01-02 06:00:01 +00:00
flux-bot
05216a972f chore(bstein-dev-home): automated image update 2026-01-02 05:58:49 +00:00
b7e34865fe mailu: roll listener on script changes
Generate mailu-sync-listener ConfigMap from scripts/ and enable name-suffix hashing to trigger Deployment rollout.
2026-01-02 02:57:18 -03:00
b95eab5876 mailu: add wait-mode sync endpoint
Also bump portal timeouts and relax access request rate limits.
2026-01-02 02:54:20 -03:00
flux-bot
9daf8b345a chore(bstein-dev-home): automated image update 2026-01-02 04:52:56 +00:00
flux-bot
20ad6a76ca chore(bstein-dev-home): automated image update 2026-01-02 04:51:45 +00:00
flux-bot
0d79c4bcdc chore(bstein-dev-home): automated image update 2026-01-02 04:39:56 +00:00
flux-bot
616c82807e chore(bstein-dev-home): automated image update 2026-01-02 04:38:44 +00:00
7a97aa257b services: scaffold postgres and vaultwarden manifests 2026-01-02 01:13:25 -03:00
flux-bot
6eb3ca1fce chore(bstein-dev-home): automated image update 2026-01-02 03:59:52 +00:00
flux-bot
3ab4c866ea chore(bstein-dev-home): automated image update 2026-01-02 03:58:41 +00:00
flux-bot
ee9fa7fd36 chore(bstein-dev-home): automated image update 2026-01-02 03:48:52 +00:00
flux-bot
04c5ee91a0 chore(bstein-dev-home): automated image update 2026-01-02 03:47:40 +00:00
1995ba7ec9 bstein-dev-home: add portal db + relax account gating 2026-01-02 00:42:25 -03:00
flux-bot
a30df479aa chore(bstein-dev-home): automated image update 2026-01-02 02:46:47 +00:00
flux-bot
de8721cbaa chore(bstein-dev-home): automated image update 2026-01-02 02:45:36 +00:00
edd2189f3c nextcloud: make mail sync idempotent 2026-01-01 23:24:34 -03:00
flux-bot
90b071566a chore(bstein-dev-home): automated image update 2026-01-02 02:23:46 +00:00
flux-bot
a56235f391 chore(bstein-dev-home): automated image update 2026-01-02 02:22:34 +00:00
flux-bot
58d14f1cb6 chore(bstein-dev-home): automated image update 2026-01-02 01:20:41 +00:00
flux-bot
d431b04114 chore(bstein-dev-home): automated image update 2026-01-02 01:19:29 +00:00
flux-bot
157c036371 chore(bstein-dev-home): automated image update 2026-01-02 00:58:40 +00:00
flux-bot
77761c1e42 chore(bstein-dev-home): automated image update 2026-01-02 00:57:28 +00:00
592539e2d3 bstein-dev-home: enable Keycloak portal 2026-01-01 21:45:53 -03:00
flux-bot
335ead9df5 chore(bstein-dev-home): automated image update 2026-01-02 00:44:39 +00:00
flux-bot
41d81ee41a chore(bstein-dev-home): automated image update 2026-01-02 00:43:28 +00:00
ce6537a155 comms(synapse): enable MSC4108 QR login 2026-01-01 18:44:47 -03:00
d43e40d515 comms: leave stuck rooms via MAS admin 2026-01-01 18:26:50 -03:00
144467dfe2 comms(mas): enable internal admin API 2026-01-01 18:22:32 -03:00
32f1532508 monitoring: dual-provision overview orgs 2026-01-01 18:20:40 -03:00
b9dbeb98b0 comms(mas): drop flux-managed admin client secret 2026-01-01 18:20:03 -03:00
e1f163253b comms(mas): create admin client runtime secret 2026-01-01 18:19:56 -03:00
324ee34648 comms(mas): stop managing admin client secret data 2026-01-01 18:15:16 -03:00
0a7410302d comms(mas): fix admin secret job permissions 2026-01-01 18:12:21 -03:00
ae335fcff2 comms(mas): debug admin secret ensure job 2026-01-01 18:09:08 -03:00
9d979a69fe comms(mas): make secret ensure job portable 2026-01-01 18:02:31 -03:00
353f2e9210 monitoring: recreate grafana rollouts 2026-01-01 18:00:07 -03:00
0f36576bac comms(mas): patch admin secret via stringData 2026-01-01 17:56:39 -03:00
100a11e0de monitoring: split overview org 2026-01-01 17:54:01 -03:00
c72e1e1f9b comms(mas): fix admin client secret job 2026-01-01 17:52:18 -03:00
ed23d831b9 comms(mas): bootstrap admin client secret 2026-01-01 17:48:39 -03:00
eb3a6824e6 nextcloud: flux-manage mail sync 2026-01-01 17:47:07 -03:00
32f78c4f82 nextcloud: fix mail sync idempotency 2026-01-01 17:36:23 -03:00
70059dda33 comms: rerun bstein room cleanup after synapse restart 2026-01-01 17:27:24 -03:00
a8149bd993 comms: restart synapse to refresh admin cache 2026-01-01 17:25:09 -03:00
4e701c6340 comms: debug bstein room cleanup 2026-01-01 17:22:55 -03:00
b6c955e7da comms: delete old test rooms for bstein 2026-01-01 17:20:28 -03:00
4a584f538d comms: force leave old rooms (v3) 2026-01-01 17:16:57 -03:00
da972215d3 comms: force leave old rooms (v2) 2026-01-01 17:14:27 -03:00
8aecb88af3 comms: force leave old rooms 2026-01-01 17:01:55 -03:00
e1e95f9bef monitoring: drop anonymous folder role 2026-01-01 16:53:53 -03:00
f6dba2b8c1 comms: reset othrys without synapse admin 2026-01-01 16:36:55 -03:00
dca01199ce comms: reset othrys room 2026-01-01 16:29:11 -03:00
5da36a38c3 comms: fix atlas mention detection 2026-01-01 15:32:30 -03:00
0c1989c678 ai-llm: serialize rollout for RWO pvc 2026-01-01 14:48:54 -03:00
5093f77c0a monitoring: per-panel namespace share filters 2026-01-01 14:44:33 -03:00
7c31d25c24 comms(atlasbot): rollout on config changes 2026-01-01 14:30:49 -03:00
2d8540907a comms(atlasbot): respond to @atlas mentions and keep context 2026-01-01 14:28:11 -03:00
f18f1df1ce monitoring: ensure gpu idle share renders 2026-01-01 14:21:43 -03:00
6a76fc0fa3 gpu: enable time-slicing and refresh dashboards 2026-01-01 14:16:08 -03:00
7020d53fd8 communication: drop old namespace manifest 2026-01-01 13:53:35 -03:00
dcc5714a8b comms(synapse): fix signing key RBAC + rerun job 2026-01-01 13:47:33 -03:00
baed4737d9 comms(synapse): fix signingkey secret patch job 2026-01-01 13:37:21 -03:00
e82e66091c comms(synapse): fix signingkey job image 2026-01-01 13:31:37 -03:00
e47e6d6e45 comms(synapse): ensure signing key secret populated 2026-01-01 13:25:59 -03:00
6ddfd394cb communication: deploy into comms namespace 2026-01-01 13:12:45 -03:00
flux-bot
c6089fbf85 chore(bstein-dev-home): automated image update 2026-01-01 16:10:02 +00:00
flux-bot
d4a830da88 chore(bstein-dev-home): automated image update 2026-01-01 16:08:50 +00:00
79f99899ee communication: prune stack for comms cutover 2026-01-01 13:07:11 -03:00
a48486912b comms: create namespace via Flux 2026-01-01 13:03:43 -03:00
e503c40417 communication: stop staging comms namespace (kustomize conflict) 2026-01-01 13:00:56 -03:00
32e98a7836 communication: create comms namespace 2026-01-01 12:58:55 -03:00
554061711c communication: use MAS for internal password logins 2026-01-01 12:57:00 -03:00
0f1f34c52a communication(atlasbot): reduce spam and use atlasbot user 2026-01-01 12:50:26 -03:00
1f554e583a keycloak: read POSTGRES_* db secret keys 2026-01-01 12:32:57 -03:00
7955d9133c jellyfin: fix LDAP auth provider id 2026-01-01 12:22:43 -03:00
flux-bot
48d4e9c363 chore(bstein-dev-home): automated image update 2026-01-01 15:10:58 +00:00
flux-bot
47ac4a8580 chore(bstein-dev-home): automated image update 2026-01-01 15:09:46 +00:00
671b28b8f4 sso(openldap): remove bootstrap ldif 2026-01-01 12:02:21 -03:00
a4bcaf8912 sso(openldap): fix bootstrap ldif mount 2026-01-01 11:48:37 -03:00
flux-bot
9c6889440c chore(bstein-dev-home): automated image update 2026-01-01 14:39:55 +00:00
flux-bot
8c799faa61 chore(bstein-dev-home): automated image update 2026-01-01 14:38:43 +00:00
1e64075478 sso(openldap): restore in-cluster LDAP 2026-01-01 11:37:52 -03:00
beb975182a communication: render LiveKit TURN creds 2026-01-01 11:31:39 -03:00
5c59640bf5 communication: set LB externalTrafficPolicy Local 2026-01-01 04:19:12 -03:00
10f7f3a8c6 communication: advertise TURN over tcp 2026-01-01 03:54:19 -03:00
3948602c57 metallb: restore speaker log level info 2025-12-31 22:35:16 -03:00
b0bd7c97a5 metallb: set speaker lb-class 2025-12-31 22:15:08 -03:00
3a473ff482 metallb: enable speaker debug logs 2025-12-31 22:00:09 -03:00
8e702f14db metallb: run speaker on all nodes 2025-12-31 21:45:12 -03:00
b4ac308af8 metallb: schedule speaker on rpi4+rpi5 2025-12-31 21:00:18 -03:00
04f46ed491 communication: use Cluster LB traffic policy 2025-12-31 20:55:46 -03:00
c32d734a69 communication: set LB traffic policy local 2025-12-31 19:59:26 -03:00
50c23b592a communication: serve matrix well-known on matrix.live 2025-12-31 19:19:44 -03:00
af05370ad7 communication: fix well-known trailing slash and reload config 2025-12-31 19:17:31 -03:00
f1ca9d919d communication: fix well-known nginx regex escaping 2025-12-31 19:15:01 -03:00
9c60011261 communication: serve matrix well-known with trailing slash 2025-12-31 19:13:08 -03:00
db01ab02ef communication: fix LiveKit udp_port range and expose 7883 2025-12-31 18:48:18 -03:00
b7b1ffde6c communication: fix LiveKit udp_port mux syntax 2025-12-31 18:44:54 -03:00
a260d55826 communication: remove one-shot syn2mas jobs 2025-12-31 18:32:26 -03:00
6c1ff72af6 communication: scale MAS/Synapse back up 2025-12-31 18:29:25 -03:00
c4931c381c communication: prep syn2mas migrate (bcrypt, disable guests) 2025-12-31 18:27:04 -03:00
bbd3815f25 communication: rerun syn2mas migrate job 2025-12-31 18:22:22 -03:00
101fcc18a3 communication: syn2mas migrate mount MAS secrets 2025-12-31 18:16:53 -03:00
af03ac6dbc communication: add MAS syn2mas migrate job 2025-12-31 18:14:44 -03:00
06a1cde738 communication: scale down MAS and Synapse for syn2mas 2025-12-31 18:12:45 -03:00
35770a8b90 communication: syn2mas check include synapse secret 2025-12-31 18:08:30 -03:00
805a7215bc communication: fix syn2mas check db URI arg 2025-12-31 18:06:32 -03:00
9658e48a2d communication: add MAS syn2mas check job 2025-12-31 18:00:57 -03:00
73f577a49a communication: make suspended cronjobs fail-fast 2025-12-31 17:33:20 -03:00
26d82b3f85 communication: suspend flaky bootstrap cronjobs 2025-12-31 17:28:44 -03:00
bfd1c5dd49 communication: switch atlasbot to MAS login 2025-12-31 17:26:37 -03:00
be2c2ba33e communication: route Matrix SSO redirects to MAS 2025-12-31 17:21:40 -03:00
a5112d5f88 communication: fix MAS image tag 2025-12-31 17:10:45 -03:00
8b37ba3213 communication: bump MAS to v1.8.0 2025-12-31 17:04:11 -03:00
214a228bf5 communication: drop msc3861 config for MAS 2025-12-31 16:54:58 -03:00
f869d0ffb9 communication: configure Synapse msc3861 client creds 2025-12-31 16:44:44 -03:00
2fdcfbfbaf communication: add Synapse msc3861 admin token 2025-12-31 16:38:09 -03:00
650d210876 communication: move LiveKit media to 7882/7881 2025-12-31 16:27:09 -03:00
01dcb76966 communication: fix Matrix well-known auth JSON 2025-12-31 16:18:24 -03:00
385df610be communication: disable Synapse OIDC under MAS 2025-12-31 16:11:33 -03:00
07ae28e1b1 communication: fix Synapse delegated auth 2025-12-31 16:05:32 -03:00
20df5cfb6e communication: restart MAS on config change 2025-12-31 15:59:46 -03:00
683f495bd8 communication: make MAS listen on IPv4 2025-12-31 15:57:33 -03:00
cb82a44e2e communication: enable MAS delegated auth 2025-12-31 15:53:35 -03:00
940e0cc613 communication: wire MAS secrets via init render 2025-12-31 15:49:21 -03:00
45f62bc331 communication: fix MAS config permissions 2025-12-31 15:44:17 -03:00
d9c003ce5a communication: fix MAS container entrypoint 2025-12-31 15:41:15 -03:00
716059d9ac communication: add matrix-authentication-service 2025-12-31 15:37:54 -03:00
6203faae3f communication: make pin job mutable 2025-12-31 15:23:17 -03:00
d8d741bbd9 communication: remove plaintext secrets 2025-12-31 15:15:54 -03:00
aca05266fc comms: avoid Synapse PVC rollout deadlock 2025-12-31 13:49:49 -03:00
ee6bcec3c5 chat.ai: gate root with API key 2025-12-31 13:43:24 -03:00
a815322f6e comms: move LiveKit media to UDP 443 2025-12-31 13:25:45 -03:00
5ed650d19c communication: prune guest-helper and synapse-federation 2025-12-31 12:16:59 -03:00
6759817518 communication: stage guest-helper for prune 2025-12-31 12:15:18 -03:00
71c58ee081 communication: disable livekit room auto-create 2025-12-31 12:11:54 -03:00
a6bd6b8cc8 communication: add Othrys stack via Flux 2025-12-31 12:00:12 -03:00
c0a53e59b5 jitsi-launcher: add oauth2-proxy error middleware for redirects 2025-12-25 16:57:40 -03:00
c9ebcfc869 jitsi-launcher: allow any authenticated user (no group gate) 2025-12-25 16:54:33 -03:00
0e3d36a5ae jitsi-launcher: add health endpoint and readiness 2025-12-25 16:40:37 -03:00
a8fdcc5931 jitsi-launcher: pull image from docker hub 2025-12-25 16:35:44 -03:00
a55203a909 jitsi: add vault-backed jwt launcher 2025-12-25 16:33:56 -03:00
77ecf3229e vault: use dedicated service account for k8s auth 2025-12-25 03:43:17 -03:00
bb93f730d5 jitsi: fix secrets-store csi driver name 2025-12-25 03:36:55 -03:00
2acc7a06b2 vault-csi: deploy vault provider daemonset 2025-12-25 03:20:13 -03:00
5666eceec7 jitsi: use vault jwt via csi 2025-12-25 03:15:06 -03:00
fbe2490ef7 platform: add vault csi driver 2025-12-25 03:14:50 -03:00
9bbdbb5fab ci-demo: fix image tag value 2025-12-24 21:49:59 -03:00
25758b1cd9 jitsi: enforce auth flags on web/jicofo/jvb 2025-12-24 21:27:57 -03:00
4d47e2c693 vault: revert ui default auth block (not supported) 2025-12-24 20:16:33 -03:00
bd21e775ab jitsi: fix prosody auth init shell 2025-12-24 20:12:48 -03:00
cf2e4c8bb2 jitsi: require auth to start rooms; vault ui default oidc 2025-12-24 20:11:29 -03:00
bbe4fb2cff crypto: handle nested p2pool archive layout 2025-12-24 19:16:47 -03:00
1bbb88d9a3 crypto: fetch p2pool from github with debug 2025-12-24 19:14:44 -03:00
b71c145e6e crypto: download p2pool v4.9 arm64 at runtime 2025-12-24 19:09:40 -03:00
7876e4389c crypto: fetch p2pool binary at runtime 2025-12-24 19:06:40 -03:00
0db786c343 grafana,jitsi: enable pkce and tcp fallback 2025-12-24 18:15:25 -03:00
23f5f03047 jitsi: keep tcp config on pvc only 2025-12-24 17:53:59 -03:00
ad79ad0a3c jitsi: include sip communicator tcp props 2025-12-24 17:49:47 -03:00
39a8e551eb grafana: allow public overview via oidc 2025-12-24 17:43:07 -03:00
cd7ba1e8a8 jellyfin: enforce ldap auth provider on start 2025-12-24 17:25:07 -03:00
cecde3e197 jellyfin: drop OIDC plugin and strip injected script 2025-12-24 15:28:47 -03:00
e9308b6bd1 jitsi: add tcp harvester config for 4443 2025-12-24 15:28:47 -03:00
flux-bot
ebebd19a13 chore(bstein-dev-home): automated image update 2025-12-22 19:58:37 +00:00
flux-bot
0cd6d47940 chore(bstein-dev-home): automated image update 2025-12-22 19:57:34 +00:00
25c32da81e jitsi: add sip-communicator tcp harvester props 2025-12-22 13:51:05 -03:00
bde4002362 jitsi: force tcp harvester via system props 2025-12-22 13:49:28 -03:00
453776967a jitsi: fix init container placement 2025-12-22 13:47:18 -03:00
5baf62c915 jitsi: copy tcp custom config via init 2025-12-22 13:45:50 -03:00
638b37cb37 jitsi: add tcp harvester config for 4443 (configmap) 2025-12-22 13:44:07 -03:00
3330eb75c7 jitsi: add tcp harvester config for 4443 2025-12-22 13:43:55 -03:00
356f0de253 jitsi: advertise lan and public ips 2025-12-22 12:27:26 -03:00
752e75dca4 jitsi: use recreate for hostPort rollout 2025-12-22 11:49:31 -03:00
d436ed73bc jitsi: advertise wss colibri 2025-12-22 11:37:49 -03:00
1526906d7e jitsi: enable pods and fix colibri ws 2025-12-22 11:24:44 -03:00
flux-bot
8d6d2fc8fc chore(bstein-dev-home): automated image update 2025-12-21 04:40:52 +00:00
flux-bot
b05df744f2 chore(bstein-dev-home): automated image update 2025-12-21 04:39:48 +00:00
flux-bot
11463f63d1 chore(bstein-dev-home): automated image update 2025-12-21 04:32:51 +00:00
flux-bot
5a06496fbe chore(bstein-dev-home): automated image update 2025-12-21 04:31:48 +00:00
e7abd30b1d fix(ai): increase chat timeout to 60s 2025-12-21 01:31:20 -03:00
flux-bot
5f64778eeb chore(bstein-dev-home): automated image update 2025-12-21 04:22:50 +00:00
flux-bot
a9bf9178e6 chore(bstein-dev-home): automated image update 2025-12-21 04:21:47 +00:00
f37ce6fb85 fix(ai): ensure backend token mount and annotate ollama pods 2025-12-21 01:14:15 -03:00
flux-bot
85580ea128 chore(bstein-dev-home): automated image update 2025-12-21 03:50:48 +00:00
flux-bot
b597613dc3 chore(bstein-dev-home): automated image update 2025-12-21 03:49:45 +00:00
c6bae35bc6 chore(ai-llm): annotate pod with model and gpu 2025-12-21 00:47:57 -03:00
de693bafbe feat(bstein-dev-home): add SA/RBAC for ai pod discovery 2025-12-21 00:46:25 -03:00
flux-bot
24532fbdd5 chore(bstein-dev-home): automated image update 2025-12-21 03:36:47 +00:00
flux-bot
60f2c65ad3 chore(bstein-dev-home): automated image update 2025-12-21 03:35:44 +00:00
flux-bot
d6f44330c9 chore(bstein-dev-home): automated image update 2025-12-21 03:25:46 +00:00
flux-bot
4cf12144e9 chore(bstein-dev-home): automated image update 2025-12-21 03:24:43 +00:00
flux-bot
05c84daf2a chore(bstein-dev-home): automated image update 2025-12-21 03:20:46 +00:00
flux-bot
b519ef08bc chore(bstein-dev-home): automated image update 2025-12-21 03:19:43 +00:00
ef372bf8f2 chore(bstein-dev-home): scale to 1 replica and pass ai meta env 2025-12-21 00:17:08 -03:00
flux-bot
8a4e1993ec chore(bstein-dev-home): automated image update 2025-12-21 03:05:57 +00:00
0d1e3e8666 fix(bstein-dev-home): patch images via policies directly 2025-12-21 00:05:39 -03:00
flux-bot
9c90cfcc9c chore(bstein-dev-home): automated image update 2025-12-21 03:03:45 +00:00
flux-bot
9043d735f1 chore(bstein-dev-home): automated image update 2025-12-21 03:02:41 +00:00
flux-bot
204e92c1e8 chore(bstein-dev-home): automated image update 2025-12-21 02:59:51 +00:00
b6acab8ee6 bstein-dev-home: re-enable image automation 2025-12-20 23:59:31 -03:00
a4a5904201 bstein-dev-home: pin images and stop automation churn 2025-12-20 23:32:17 -03:00
flux-bot
b1ac53e1a1 chore(bstein-dev-home): automated image update 2025-12-21 02:29:58 +00:00
ba7563b0e5 bstein-dev-home: fix image tags, pause automation 2025-12-20 23:29:40 -03:00
flux-bot
05fa473582 chore(bstein-dev-home): automated image update 2025-12-21 02:27:44 +00:00
ba3b3a3d9f flux: simplify bstein-dev-home image update message 2025-12-20 23:27:24 -03:00
de317a3396 flux: fix bstein-dev-home automation template 2025-12-20 23:26:42 -03:00
3d19b54b12 flux: place bstein-dev-home image automation in app namespace 2025-12-20 23:25:56 -03:00
f23641be50 flux: let bstein-dev-home automation read policies in app ns 2025-12-20 23:24:29 -03:00
eaab2b7988 flux: run bstein-dev-home image automation on sso-hardening 2025-12-20 23:22:08 -03:00
4491a3681a jenkins: use main service for tunnel 2025-12-20 18:42:16 -03:00
1e72f2e371 jenkins: add RBAC serviceaccount and use for agents 2025-12-20 18:08:30 -03:00
a99293944a bstein-dev-home: default chat model to qwen2.5-coder 2025-12-20 15:22:05 -03:00
610ef7a552 bstein-dev-home: fix ingress indent for chat.ai host 2025-12-20 15:20:31 -03:00
9162f5789f ai-llm: GPU qwen2.5-coder on titan-24; add chat.ai host 2025-12-20 15:19:03 -03:00
39a914effd ai-llm: use phi3 mini model 2025-12-20 14:24:52 -03:00
16ab7a963d ai: allow ollama to share titan-24 gpu 2025-12-20 14:16:22 -03:00
c8adca5a5b ai: add ollama service and wire chat backend 2025-12-20 14:10:34 -03:00
f68668f987 jellyfin: fix oidc redirect to api/oidc/callback 2025-12-20 13:51:46 -03:00
5b0fbd344b jellyfin: pull oidc plugin from streaming harbor and fix oidc redirect 2025-12-20 13:32:36 -03:00
dba8364c74 vault: probes use http VAULT_ADDR for http listener 2025-12-20 00:09:44 -03:00
e354f8bc3f vault: keep probes HTTPS, drop ingress backend tweaks 2025-12-20 00:03:11 -03:00
fa977a69f4 vault: run http inside cluster (tls terminated at ingress) 2025-12-19 23:54:28 -03:00
d3ca57eabf vault: backend over https with serversTransport 2025-12-19 23:52:19 -03:00
c2dfba67c2 vault: remove serversTransport, speak http to service 2025-12-19 23:51:32 -03:00
f243be21e6 vault: drop unused redirect middleware 2025-12-19 23:50:44 -03:00
75b62e5ae2 vault: add traefik redirect middleware 2025-12-19 23:49:34 -03:00
af3d453e86 vault: let traefik speak http to service 2025-12-19 23:48:40 -03:00
65f8b7c893 vault: correct serversTransport reference 2025-12-19 23:16:20 -03:00
eb0db2ce81 jellyfin: clean old ldap plugin before oidc init 2025-12-19 21:32:40 -03:00
1b65987dfe jellyfin: upgrade to 10.11 and seed oidc plugin 2025-12-19 21:30:04 -03:00
ed868a5faa jellyfin: fix oidc installer script 2025-12-19 21:19:21 -03:00
b9144ebb5e jellyfin: bootstrap oidc plugin 2025-12-19 21:13:31 -03:00
303e7e770f vault: traefik serversTransport must include namespace 2025-12-19 21:08:10 -03:00
0071f13063 vault: pin to worker arm64 nodes 2025-12-19 21:02:49 -03:00
3db523335d vault: fix traefik serversTransport name 2025-12-19 20:58:29 -03:00
524868b05d vault: fix manifest and disable mlock 2025-12-19 20:32:10 -03:00
fad7204dfb mailu: switch relay to postmark 2025-12-19 19:58:06 -03:00
7533cec0ee vault: drop helm, add raw statefulset 2025-12-19 19:30:09 -03:00
6405cd823d Point bstein-dev-home to latest tags 2025-12-19 19:04:23 -03:00
ba47e00c88 Point bstein-dev-home to latest images (0.1.1-0) 2025-12-19 19:03:28 -03:00
f306baad35 Point bstein-dev-home to 0.1.1-0 images 2025-12-19 18:42:52 -03:00
f9c4967eed jenkins: switch healthcheck to deployment/service 2025-12-19 18:39:32 -03:00
f092f00bff jenkins: bind pvc to retained volume 2025-12-19 18:37:23 -03:00
9e7ded298e Flux image automation: track main branch for bstein-dev-home 2025-12-19 18:32:14 -03:00
b97b22fc01 jenkins: drop helm, run via raw manifests 2025-12-19 18:31:48 -03:00
fa44a00d0b Flux image automation: track main branch for bstein-dev-home 2025-12-19 18:31:19 -03:00
0c5bce93ca jenkins: fix oidc indent and harbor creds 2025-12-19 18:03:52 -03:00
c3ffde1b1f jenkins: restore harbor robot creds 2025-12-19 17:59:55 -03:00
1357d783de jenkins: fix oidc with wellknown config 2025-12-19 17:36:56 -03:00
f4fa44c842 jenkins: fix oidc jcasc schema 2025-12-19 16:44:24 -03:00
af411e795c flux: track feature/sso-hardening 2025-12-19 16:34:29 -03:00
70e1205f5f jenkins: pin oidc via jcasc 2025-12-19 16:24:13 -03:00
8afae161a0 Flux: track main branch 2025-12-19 15:48:23 -03:00
f5964439b0 Merge branch 'feature/bstein-dev-home' 2025-12-19 15:44:41 -03:00
a2b34c5712 Increase Atlas availability stat to 4 decimals 2025-12-19 15:18:14 -03:00
89f95157d8 Reduce Atlas availability query density 2025-12-19 14:56:29 -03:00
8be89cbd53 Expand Atlas availability window to 1y 2025-12-19 13:46:34 -03:00
6aadbadb9a Work around occasional OIDC DNS failures with hostAlias 2025-12-18 12:14:07 -03:00
5b113d798b Pin dev-home deploys to current semver tag 2025-12-18 12:03:01 -03:00
eac965b283 Let ImagePolicies rely on semver parsing without regex filters 2025-12-18 11:59:29 -03:00
d4b6406726 Loosen image tag regex for dev-home policies 2025-12-18 11:56:02 -03:00
d8863d84de Allow prerelease semver tags for dev-home images 2025-12-18 11:53:46 -03:00
b6bea73ab7 Align bstein-dev-home image automation with current branch 2025-12-18 11:47:40 -03:00
f3c96b7f8d Add pipeline utility steps plugin 2025-12-18 11:01:08 -03:00
25ec2b0354 jenkins: prefer rpi5 and bump controller resources 2025-12-18 10:58:30 -03:00
f1fb0450b4 jenkins: prefer rpi5 and bump controller resources 2025-12-18 10:56:43 -03:00
b7e4545734 Apply Jenkins theme via init script 2025-12-18 10:41:37 -03:00
5531ec045d Jenkins: add startupProbe delay to avoid premature restarts 2025-12-18 03:12:22 -03:00
ac41f61240 Jenkins theme: merge into base config to avoid JCasC conflict 2025-12-18 03:01:58 -03:00
6d8dbfc214 Fix Jenkins JCasC theme/job blocks 2025-12-18 02:55:25 -03:00
8e977f162c Jenkins: add dark theme via simple-theme-plugin 2025-12-18 02:50:31 -03:00
85cc80525c point flux to feature/bstein-dev-home branch 2025-12-18 02:19:59 -03:00
ba12854639 flux: lower controller log verbosity 2025-12-18 02:15:32 -03:00
aa1c7d62c1 flux: reset image automation log level 2025-12-18 02:15:32 -03:00
flux-bot
3de36441f4 chore(ci-demo): apply image updates 2025-12-18 02:15:32 -03:00
e5238a7f91 chore: simplify image automation commit messages 2025-12-18 02:15:32 -03:00
d8077798db chore: update image automation templates 2025-12-18 02:15:32 -03:00
5a52c8606b ci-demo: move image policy to flux-system 2025-12-18 02:15:32 -03:00
b1f8981b6c monero ingress + move pegasus to arm64 2025-12-18 02:02:21 -03:00
be23851878 Merge pull request 'feature/bstein-dev-home' (#7) from feature/bstein-dev-home into main
Reviewed-on: #7
2025-12-18 04:23:01 +00:00
6f6fb363b3 Add bstein-dev-home deployment and Jenkins job 2025-12-18 01:14:09 -03:00
449574d59f Merge remote-tracking branch 'origin/feature/ci-gitops' into feature/bstein-dev-home 2025-12-18 01:07:01 -03:00
5f300c47a5 flux: bump image automation api to v1 2025-12-18 00:46:25 -03:00
c04a38fac5 flux: enable debug logging for controllers 2025-12-18 00:44:11 -03:00
5d4a0814c1 flux: enable debug logging for image automation 2025-12-18 00:40:55 -03:00
61d9f05fef flux: update pegasus image automation api 2025-12-18 00:39:39 -03:00
609347991e flux: upgrade controllers to v2.7.5 2025-12-18 00:38:32 -03:00
9816354d0f ci-demo: bump to v0.0.0-2 2025-12-17 23:12:03 -03:00
39275db74e ci-demo: set tag v0.0.0-1 2025-12-17 19:49:53 -03:00
9635100675 ci-demo: fix imagepolicy tag regex 2025-12-17 19:45:15 -03:00
bbb84c1182 jenkins: add ci-demo job 2025-12-17 19:27:23 -03:00
daa354e2cd ci-demo: add flux image automation 2025-12-17 19:18:29 -03:00
0a42289516 harbor: pin components to v2.14.1-arm64 2025-12-17 17:54:50 -03:00
b7246f5835 harbor: suspend automation, pin redis 2025-12-17 17:29:03 -03:00
flux-bot
b7709b3f40 chore(harbor): update images to registry.bstein.dev/infra/harbor-redis:v2.14.1-arm64.14registry.bstein.dev/infra/harbor-core:v2.14.1-arm64.14registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64.14registry.bstein.dev/infra/harbor-portal:v2.14.1-arm64.14registry.bstein.dev/infra/harbor-registry:v2.14.1-arm64.14registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64.14registry.bstein.dev/infra/harbor-nginx:v2.14.1-arm64.14registry.bstein.dev/infra/harbor-prepare:v2.14.1-arm64.14 2025-12-17 19:38:57 +00:00
bb8de41cdb harbor: run image automation in harbor ns 2025-12-17 16:38:37 -03:00
1d788a5dc4 harbor: fix imagepolicy tag setters 2025-12-17 16:32:42 -03:00
37a50622a2 harbor: fix image automation push schema 2025-12-17 16:25:16 -03:00
cde135c59e harbor: enable image automation push 2025-12-17 16:17:07 -03:00
543f2a9ccd harbor: fix image policy tag regex 2025-12-17 13:16:57 -03:00
efa6d92b69 harbor: automate nginx and prepare 2025-12-17 13:14:31 -03:00
2f66afd970 flux(atlas): use scoped health checks 2025-12-17 04:47:12 -03:00
f55d3fd956 flux(atlas): limit kustomization health checks 2025-12-17 04:11:26 -03:00
1a8c6857e7 harbor: re-pin workloads to titan-05 2025-12-17 03:30:31 -03:00
f28d5680f2 harbor: add image automation 2025-12-17 03:21:35 -03:00
8d04f6c6c7 jenkins: pin controller to rpi4 2025-12-17 02:53:23 -03:00
d93d24d5ef jenkins: disable chart local auth realm 2025-12-17 02:30:41 -03:00
7dcfd5f6cf jenkins: stop JCasC resetting OIDC 2025-12-17 02:23:54 -03:00
d3aa456bee jenkins: poll harbor-arm-build scm 2025-12-17 01:58:10 -03:00
a52b811e5b jenkins: source pipeline creds from secrets 2025-12-17 01:47:33 -03:00
cd1b9b57b0 harbor: add helm remediation and timeouts 2025-12-17 01:39:49 -03:00
5e6f9c6c83 chore: stop tracking NOTES.md 2025-12-17 01:29:48 -03:00
f512e0fa29 jenkins: harden oidc and timeouts 2025-12-17 01:11:07 -03:00
4e479147ec jenkins: run jcasc cleanup initcontainer as jenkins user 2025-12-17 00:43:55 -03:00
1f98a5be12 jenkins: clean stale JCasC files on startup 2025-12-17 00:37:37 -03:00
30048a9ae5 jenkins: drop invalid JCasC OIDC realm (use init script) 2025-12-17 00:28:52 -03:00
60a8192f61 jenkins: enforce OIDC via JCasC (no node move) 2025-12-17 00:23:15 -03:00
ce7631f896 jenkins: enforce OIDC via JCasC and pin to arm64 2025-12-16 23:38:08 -03:00
f3335028b1 jenkins: disable scm trigger for harbor arm build 2025-12-16 23:12:27 -03:00
0385a653af fix: use FullControlOnceLoggedIn auth strategy 2025-12-16 20:33:03 -03:00
6759871b43 fix: add casc support plugin 2025-12-16 20:27:41 -03:00
3e4a49e7fb fix: add job-dsl plugin for JCasC jobs 2025-12-16 20:21:33 -03:00
b951058dc6 fix: enforce Jenkins OIDC via init groovy only 2025-12-16 20:16:18 -03:00
cfa7bd8198 fix: jenkins casc OIDC using explicit endpoints 2025-12-16 20:13:52 -03:00
162fe3339f fix: pin Jenkins OIDC realm via JCasC 2025-12-16 20:04:21 -03:00
fc858fc8df ci: seed harbor-arm-build pipeline in Jenkins 2025-12-16 19:26:46 -03:00
8b9fc8ff1c chore: remove zot stack 2025-12-16 14:10:04 -03:00
3066db793d harbor: bootstrap arm64 images on titan-05 2025-12-16 11:16:34 -03:00
759a77c745 harbor: run arm64 images on rpi workers 2025-12-16 03:22:01 -03:00
c661658a12 Add AC Infinity ingestion plan 2025-12-16 01:45:04 -03:00
144a860a88 harbor: use project paths for crypto/pegasus images 2025-12-16 00:15:22 -03:00
bd64a36165 registry: point workloads to harbor 2025-12-16 00:08:11 -03:00
22b611f8ea harbor: set redis affinity to amd64 titan-22 first 2025-12-15 23:14:26 -03:00
a8bde2edc7 harbor: pin to amd64, prefer titan-22 2025-12-15 23:02:58 -03:00
d51a19cab9 harbor: prefer rpi nodes 2025-12-15 23:00:11 -03:00
3e3cab6845 harbor: increase helm timeout 2025-12-15 22:32:29 -03:00
9cda32c0bf harbor: use astreae storageclass for pvc 2025-12-15 22:22:48 -03:00
0f49849761 Regenerate dashboards after availability thresholds tweak 2025-12-15 22:14:26 -03:00
252743e416 harbor: use existing secrets and correct admin key 2025-12-15 22:08:52 -03:00
dba7cf00a4 harbor: deploy chart via flux 2025-12-15 22:05:40 -03:00
aa0df1f62b harbor: add helm repo and deploy via helmrelease 2025-12-15 22:05:32 -03:00
aa2bb09873 zot: allow upstream basic auth from oauth2-proxy 2025-12-15 14:22:48 -03:00
54406661f2 zot: forward authorization header to ui 2025-12-15 14:14:49 -03:00
caef505677 zot ui: send basic creds from oauth2-proxy, remove traefik header 2025-12-15 14:08:18 -03:00
54eb9e1ac5 zot: restore UI basic header middleware 2025-12-15 14:01:18 -03:00
1899bb7677 zot: move basic auth to oauth2-proxy upstream 2025-12-15 13:53:53 -03:00
0416493f49 zot: fix htpasswd volume to avoid type conflict 2025-12-15 13:00:51 -03:00
b87f06f6ff zot: add oauth proxy and user sync scripts 2025-12-15 12:57:02 -03:00
828f66d18c gitea: enable OIDC auto-registration 2025-12-14 23:08:38 -03:00
7a1f3bfc3f gitea: add proxy/session headers for OIDC 2025-12-14 22:25:46 -03:00
294542e718 gitea: reference secret via env; remove secret file 2025-12-14 22:16:49 -03:00
c3a8c7ddae gitea: remove committed secret and env refs 2025-12-14 22:10:13 -03:00
29da4be557 gitea: pin secret/internal token and include secret manifest 2025-12-14 22:06:25 -03:00
fc5b0cccf8 gitea: drop required claim constraint on keycloak auth 2025-12-14 21:58:36 -03:00
c8b89c3120 gitea: enforce keycloak auth source via init container 2025-12-14 21:54:18 -03:00
9b994111cb gitea: remove bootstrap job (immutable error) 2025-12-14 21:49:07 -03:00
a174e451d9 gitea: fix bootstrap job immutability 2025-12-14 21:47:50 -03:00
d8dab08cd8 gitea: set trace logging for oidc 2025-12-14 21:44:43 -03:00
0d93929e3d gitea: relax required signin, set admin group+skip 2fa 2025-12-14 21:42:08 -03:00
2ffc906487 gitea: enable debug logging for oauth 2025-12-14 21:38:32 -03:00
37761fa118 jenkins: fix OIDC retriever null 2025-12-14 21:23:15 -03:00
a46226bb0a ci: enable oidc for jenkins/gitops/gitea 2025-12-14 20:58:57 -03:00
04602a2914 jenkins: auto-configure OIDC via init script 2025-12-14 19:22:47 -03:00
fc0fa59981 jenkins: drop JCasC OIDC script to unblock startup 2025-12-14 18:10:49 -03:00
0286f4f317 jenkins: restore plugin list without pinned versions 2025-12-14 17:59:48 -03:00
90bf1f7d8e jenkins: start without plugin installs to unblock bootstrap 2025-12-14 16:02:05 -03:00
6def1aa479 jenkins: use latest plugin versions to avoid 404 2025-12-14 16:00:45 -03:00
4eff9ebcc1 jenkins: add helm release with ingress + astreae storage 2025-12-14 15:57:42 -03:00
ccfc473521 cleanup: stop tracking extra md files; switch gitops cert to letsencrypt 2025-12-14 15:52:12 -03:00
b575c64de1 chore: drop stray NOTES.md 2025-12-14 15:43:06 -03:00
39d732d74d git: ignore fixed 2025-12-14 15:39:27 -03:00
b28e393524 gitops-ui: open ingress for acme solver 2025-12-14 15:14:11 -03:00
694bb4d12e gitops-ui: allow acme solver from kube-system traefik 2025-12-14 15:12:38 -03:00
6993f51ef7 gitops-ui: allow acme solver ingress from traefik 2025-12-14 15:08:44 -03:00
85cea34fe8 gitops-ui: cert + switch flux to feature/ci-gitops 2025-12-14 15:04:13 -03:00
055ce7d18c Merge pull request 'feature/mailu' (#5) from feature/mailu into main
Reviewed-on: #5
2025-12-14 17:48:02 +00:00
1a161b4d3c monitoring: longer data history 2025-12-14 14:47:20 -03:00
f7bf990d62 flux: bump gitops-ui kustomization 2025-12-14 14:41:52 -03:00
63bf153c8b flux: add weave gitops ui 2025-12-14 14:38:08 -03:00
8fceebd7a7 nextcloud: integration with mailu & gitops-ui: initial install 2025-12-14 14:21:40 -03:00
0d0216c8f5 Add tests and dedupe nextcloud mail sync 2025-12-14 14:15:19 -03:00
c8b49560b6 Keep nextcloud scripts single-sourced under scripts/ 2025-12-14 14:05:01 -03:00
327a7bed57 Extract nextcloud scripts to files 2025-12-14 13:59:16 -03:00
aae09c5074 Normalize doc layout and README guidance 2025-12-14 13:47:59 -03:00
56bb4e91b9 Group namespace plurality rows to one per namespace 2025-12-13 22:17:47 -03:00
18f3a2cefe Fix namespace plurality mask and bump v26 2025-12-13 20:53:11 -03:00
1ec3ca29a4 Use OR-joined node ranks for plurality tie-break 2025-12-13 19:04:22 -03:00
4812958e82 Deduplicate namespace plurality rows with ranked tie-break 2025-12-13 18:39:31 -03:00
9ad5f7f405 Restore namespace plurality panel data 2025-12-13 18:25:03 -03:00
57ea397027 Use table format for namespace plurality panel 2025-12-13 18:23:19 -03:00
be0ac48b33 Simplify namespace plurality table rendering 2025-12-13 18:07:56 -03:00
2156b6f6aa Hide table footer on namespace plurality table 2025-12-13 18:03:51 -03:00
4fcc7c84f2 Make namespace plurality table non-filterable 2025-12-13 17:55:52 -03:00
a4b3273bab Remove filter bar from namespace plurality table 2025-12-13 17:38:57 -03:00
c536a13d55 Disable column filters on namespace plurality table 2025-12-13 17:35:52 -03:00
13eb02c19b Hide filters on namespace plurality table 2025-12-13 17:32:19 -03:00
134a4ad001 Fix namespace plurality table query 2025-12-13 17:29:55 -03:00
3e0a84b074 atlas pods: plurality table v11 (deterministic top node) 2025-12-13 17:19:03 -03:00
7f67793ee5 atlas pods: plurality table v10 2025-12-13 16:36:25 -03:00
e87d54f19d atlas pods: per-namespace top node via topk 2025-12-13 15:51:45 -03:00
6ac01e5879 atlas pods: simplify plurality table (no filter) 2025-12-13 15:29:08 -03:00
d0ed188179 monitoring: drop README per convention 2025-12-13 15:25:21 -03:00
b703e66b98 monitoring: restore README 2025-12-13 15:11:50 -03:00
68d4f43903 atlas pods: stabilize plurality query to avoid 422 2025-12-13 15:11:21 -03:00
cf9dacd4ea atlas pods: show per-namespace top node without vars 2025-12-13 15:02:52 -03:00
6eee7b8853 atlas pods: drop non-leading nodes in plurality table 2025-12-13 13:39:06 -03:00
03a4ca4d84 atlas pods: simplify plurality table query 2025-12-13 12:06:18 -03:00
c7adb0c8cb atlas pods: fix plurality table query 2025-12-13 12:00:31 -03:00
9d1163f580 atlas pods: use prom share() for plurality table 2025-12-13 11:53:27 -03:00
001f0f95a6 atlas pods: fix plurality query with bool max match 2025-12-13 11:51:18 -03:00
2177a8009e atlas pods: robust per-namespace top-node share 2025-12-13 11:48:44 -03:00
6a3d1311b9 atlas pods: select per-namespace top node via max match 2025-12-13 04:15:03 -03:00
d916e5a7f1 atlas pods: sort plurality table by node then share 2025-12-13 04:10:10 -03:00
5d6d34c274 atlas pods: simplify namespace plurality query 2025-12-13 04:06:46 -03:00
53423c7a46 atlas pods: fix namespace plurality query 2025-12-13 04:00:57 -03:00
d274738e9e restore readmes removed in last commit 2025-12-13 03:57:44 -03:00
f0265d6b94 atlas pods: add namespace plurality by node table 2025-12-13 03:57:20 -03:00
8a755e0c42 mailu: forcing version 1.4 clamav over 1.2 2025-12-13 00:11:40 -03:00
e22293db3e forcing 12-r3 over 12-r6 for redis 2025-12-12 22:09:04 -03:00
6f8a70fd58 atlas overview: include titan-db in control plane panels 2025-12-12 21:55:53 -03:00
580d1731f9 monitoring: drop duplicate titan-db scrape job 2025-12-12 21:48:03 -03:00
4def298b83 monitoring: scrape titan-db node_exporter 2025-12-12 21:38:10 -03:00
1166069640 atlas dashboards: align percent thresholds and disk bars 2025-12-12 21:13:31 -03:00
e56bed284e atlas overview: refine alert thresholds and availability colors 2025-12-12 20:50:41 -03:00
24376594ff atlas dashboards: use threshold colors for stats 2025-12-12 20:44:20 -03:00
5277c98385 atlas dashboards: fix pod share display and zero/red stat thresholds 2025-12-12 20:40:32 -03:00
056b7b7770 atlas dashboards: show pod counts (not %) and make zero-friendly stats 2025-12-12 20:30:00 -03:00
b770575b42 atlas dashboards: show pod counts with top12 bars 2025-12-12 20:20:13 -03:00
9e76277c22 atlas dashboards: drop empty nodes and enforce top12 pod bars 2025-12-12 19:09:51 -03:00
93b3c6d2ec atlas dashboards: cap pod count bars at top12 2025-12-12 18:56:13 -03:00
596bf46863 atlas dashboards: sort pod counts and add pod row to overview 2025-12-12 18:51:43 -03:00
8b703f8655 atlas pods: add pod count bar and tidy pie 2025-12-12 18:45:29 -03:00
ec59d25ad8 atlas dashboards: fix overview links and add pods-by-node pie 2025-12-12 18:32:45 -03:00
bf6179f907 atlas internal dashboards: add SLO/burn and api health panels 2025-12-12 18:00:43 -03:00
0a0966db78 atlas overview: fix availability scaling 2025-12-12 16:36:47 -03:00
87fbba0d3e atlas overview: show availability percent with 3 decimals 2025-12-12 16:15:37 -03:00
b200dba5b9 atlas overview: show availability percent and keep uptime centered 2025-12-12 16:11:28 -03:00
697ce3c18f atlas overview: center uptime and reorder top row 2025-12-12 15:56:33 -03:00
8e39c6a28b atlas overview: add uptime and crashloop panels 2025-12-12 15:23:51 -03:00
38ab8e3364 standardize cert issuers to letsencrypt 2025-12-12 15:18:40 -03:00
29d22ba539 mailu: fix unbound sidecar mounts 2025-12-12 01:19:27 -03:00
118032d2c6 mailu: use mvance unbound sidecar and current redis image 2025-12-12 01:12:48 -03:00
4cfe92feb2 mailu: remove force upgrade to avoid pvc replace 2025-12-12 01:09:25 -03:00
ca27cc95b6 mailu: add validating dns sidecar and disable vip hostports 2025-12-12 01:06:38 -03:00
6c77b8e7f8 restore docs after gitignore change 2025-12-12 00:50:02 -03:00
78195c4685 mailu: fix admin dns and tame vip 2025-12-12 00:49:45 -03:00
5ef0b4edf6 mailu: capture helm release and cert 2025-12-11 23:54:43 -03:00
9f226c1584 Merge pull request 'feature/sso' (#4) from feature/sso into main
Reviewed-on: #4
2025-12-11 20:43:34 +00:00
319b515882 zot: restore main branch config 2025-12-11 17:26:15 -03:00
cb2b2ec1cd zot: revert to unauthenticated registry 2025-12-11 17:22:16 -03:00
20cd185c0b vault: drop traefik basicauth 2025-12-11 17:09:05 -03:00
2f368f6975 zot,vault: remove oauth2-proxy sso 2025-12-11 17:04:19 -03:00
6c62d42f7a longhorn/vault: gate via oauth2-proxy 2025-12-07 19:44:02 -03:00
a7e9f1f7d8 auth: remove error middleware to allow redirect 2025-12-07 13:19:45 -03:00
ceb692f7ee oauth2-proxy: drop groups scope to avoid invalid_scope 2025-12-07 13:09:29 -03:00
24fbaad040 auth: forward-auth via external auth host (svc traffic flaky) 2025-12-07 13:03:29 -03:00
04aa32a762 oauth2-proxy: schedule on worker rpis 2025-12-07 12:49:38 -03:00
25ee698021 oauth2-proxy: ensure error middleware on auth ingress 2025-12-07 12:03:14 -03:00
4a089876ba auth: use internal oauth2-proxy svc for forward-auth 2025-12-07 11:25:29 -03:00
20bb776625 auth: add 401 redirect middleware to oauth2-proxy 2025-12-07 11:14:25 -03:00
5e59f20bc3 auth: point forward-auth to external auth host 2025-12-07 11:09:09 -03:00
dbede55ad4 oauth2-proxy: temporarily drop group restriction 2025-12-07 10:42:13 -03:00
27e5c9391c auth: add namespace-local forward-auth middlewares 2025-12-07 10:25:44 -03:00
8d5e6c267c auth: wire oauth2-proxy and enable grafana oidc 2025-12-07 02:01:21 -03:00
a55502fe27 add oauth2-proxy for SSO forward-auth 2025-12-06 14:42:24 -03:00
598bdfc727 keycloak: restrict to worker rpis with titan-24 fallback 2025-12-06 01:44:23 -03:00
88c7a1c2aa keycloak: require rpi nodes with titan-24 fallback 2025-12-06 01:40:24 -03:00
f4da27271e keycloak: prefer rpi nodes, avoid titan-24 2025-12-06 01:36:33 -03:00
141c05b08f keycloak: honor xforwarded headers and hostname url 2025-12-06 01:23:07 -03:00
f0a8f6d35e keycloak: enable health/metrics management port 2025-12-06 00:51:47 -03:00
1b01052eda keycloak: set fsGroup for data volume 2025-12-06 00:49:17 -03:00
1d346edd28 keycloak: remove optimized flag for first start 2025-12-06 00:43:24 -03:00
b14a9dcb98 chore: drop AGENTS.md from repo 2025-12-06 00:43:17 -03:00
47caf08885 notes: capture GPU share change and flux branch 2025-12-03 12:28:45 -03:00
0db149605d monitoring: show GPU share over dashboard range 2025-12-02 20:28:35 -03:00
f64e60c5a2 flux: add keycloak kustomization 2025-12-02 18:10:20 -03:00
61c5db5c99 flux: track feature/sso 2025-12-02 18:00:49 -03:00
2db550afdd keycloak: add raw manifests backed by shared postgres 2025-12-02 17:58:19 -03:00
65d389193f Merge pull request 'feature/atlas-monitoring' (#3) from feature/atlas-monitoring into main
Reviewed-on: #3
2025-12-02 20:52:35 +00:00
e80505a773 notes: add postgres centralization guidance 2025-12-02 17:36:37 -03:00
762aa7bb0f notes: add sso plan sketch 2025-12-02 17:14:45 -03:00
839fb94836 notes: update monitoring and next steps 2025-12-02 17:01:32 -03:00
6eba26b359 monitoring: show top12 root disks 2025-12-02 15:21:02 -03:00
ace383bedd monitoring: expand worker/control/root rows 2025-12-02 15:15:21 -03:00
b93636ecb9 monitoring: shrink hottest node row height 2025-12-02 15:12:16 -03:00
5df94a7937 monitoring: fix gpu share query and root bar labels 2025-12-02 14:56:36 -03:00
a3dc9391ee monitoring: polish dashboards and folders 2025-12-02 14:41:39 -03:00
eed67b3db0 monitoring: regen dashboards with gpu details 2025-12-02 13:16:00 -03:00
f1d0970aa0 monitoring: mirror dcgm-exporter as multi-arch 2025-12-02 12:36:24 -03:00
e26ef44d1a monitoring: run dcgm-exporter with nvidia runtime 2025-12-02 12:25:30 -03:00
a18c3e6f67 monitoring: always pull dcgm-exporter tag 2025-12-02 12:19:16 -03:00
ee923df567 monitoring: add registry pull secret for dcgm-exporter 2025-12-02 12:07:11 -03:00
d87a1dbc47 monitoring: allow dcgm rollout with unavailable node 2025-12-02 11:59:55 -03:00
5b89b0533e monitoring: use mirrored dcgm-exporter tag 2025-12-02 11:54:53 -03:00
d99bb06eeb monitoring: reenable dcgm exporter 2025-11-20 13:11:13 -03:00
75f6a59316 traefik: use responding timeouts only 2025-11-18 20:01:16 -03:00
630f1f2a81 traefik: extend upload timeouts 2025-11-18 19:43:19 -03:00
e4f93e85d2 monitoring: control-plane stat and namespace share tweaks 2025-11-18 17:09:13 -03:00
f06be37f44 monitoring: refine network metrics and control-plane allowance 2025-11-18 16:18:52 -03:00
c7b7bc7a6d monitoring: adjust overview spacing and net panels 2025-11-18 15:55:24 -03:00
7b2a69cfe3 monitoring: disable dcgm exporter 2025-11-18 15:10:58 -03:00
909cb4ff26 flux: disable wait for monitoring 2025-11-18 15:04:18 -03:00
5a2575d54e flux: scope monitoring health checks 2025-11-18 14:33:24 -03:00
46410c9a9d monitoring: fix dcgm image 2025-11-18 14:19:23 -03:00
ff056551c7 monitoring: refresh overview dashboards 2025-11-18 14:08:33 -03:00
8e6c0a3cfe monitoring: rework gpu share + gauges 2025-11-18 12:11:47 -03:00
497164a1ad monitoring: clean namespace gpu share and layout 2025-11-18 11:42:24 -03:00
fab5552039 monitoring: resolve pie errors and network data 2025-11-18 11:30:33 -03:00
7009a4f9ff monitoring: fix namespace gpu share and network stats 2025-11-18 11:12:03 -03:00
d7e4bcd533 monitoring: add gpu node fallback 2025-11-18 10:47:24 -03:00
ec76563a86 monitoring: source gpu pie from limits and node nets 2025-11-18 01:01:10 -03:00
5144bbe1f2 monitoring: fix gpu pie data and network panels 2025-11-18 00:31:51 -03:00
ac62387e07 monitoring: stabilize namespace pies and labels 2025-11-18 00:19:45 -03:00
2ba642d49f monitoring: add gpu pie and tidy net panels 2025-11-18 00:11:39 -03:00
beb3243839 Revert GPU pie chart additions 2025-11-17 23:42:55 -03:00
aef3176c1c monitoring: fix hottest stats and gpu share 2025-11-17 23:40:22 -03:00
f4dd1de43f monitoring: reorder namespace pies and add gpu data 2025-11-17 23:18:53 -03:00
0708522b28 monitoring: add namespace gpu share 2025-11-17 23:12:16 -03:00
c53c518301 monitoring: express namespace share as cluster percent 2025-11-17 22:58:57 -03:00
442a89d327 monitoring: fix pie colors & thresholds 2025-11-17 22:39:50 -03:00
255e014e0a monitoring: color namespace pies 2025-11-17 22:36:50 -03:00
cc62f497e9 monitoring: fix namespace share percentages 2025-11-17 22:19:01 -03:00
37e51b361b monitoring: normalize namespace share 2025-11-17 22:06:06 -03:00
be6052c47c monitoring: unify namespace share panels 2025-11-17 21:57:40 -03:00
b59677615c monitoring: worker/control-plane splits 2025-11-17 21:48:12 -03:00
76d3dc6ae2 monitoring: restore top1 hottest stats 2025-11-17 21:20:19 -03:00
53427cc8fa monitoring: fix net/io legend labels 2025-11-17 20:19:20 -03:00
b8998a3c6a monitoring: attach nodes to net/io stats 2025-11-17 20:14:11 -03:00
a67a6a1f3a monitoring: tidy hottest node labels 2025-11-17 20:04:50 -03:00
b28e7501b7 monitoring: show hottest node labels 2025-11-17 20:00:40 -03:00
4aece7e5cb monitoring: fix hottest node labels 2025-11-17 19:56:57 -03:00
bcaa0a3327 monitoring: show hottest node names 2025-11-17 19:53:39 -03:00
41e8a6a582 monitoring: reorder overview stats 2025-11-17 19:49:50 -03:00
a1e731e929 monitoring: fix hottest stats and titan-db scrape 2025-11-17 19:38:40 -03:00
fe8deea9c7 monitoring: tighten overview stats 2025-11-17 19:24:03 -03:00
349d9c56ac monitoring: polish dashboards 2025-11-17 18:55:11 -03:00
8f5781d3cf monitoring: rebuild atlas dashboards 2025-11-17 16:27:38 -03:00
a41f25e66d monitoring: restructure grafana dashboards 2025-11-17 14:22:46 -03:00
b004bf99dc monitoring: enrich dashboards 2025-11-16 12:58:08 -03:00
0b1437b77c monitoring: refresh grafana dashboards 2025-11-15 21:03:11 -03:00
eb3991b628 dashboards: improve public view and fix color 2025-11-15 11:59:48 -03:00
46b6b1f3b8 grafana: set datasource uid 2025-11-15 11:35:27 -03:00
683dc84289 grafana: use atlas metrics hostname 2025-11-15 11:18:40 -03:00
d0b6fbe763 victoria-metrics: revert storageclass change 2025-11-15 11:16:37 -03:00
3cfe639387 monitoring: fix domain 2025-11-14 19:13:40 -03:00
418329e173 monitoring: fix ingress and env formats 2025-11-14 08:51:09 -03:00
394fcf2ee4 grafana: use string host format 2025-11-14 08:37:46 -03:00
465103a57e grafana: fix dashboard provider list 2025-11-14 08:33:53 -03:00
c2cb901102 monitoring: fix grafana values 2025-11-14 08:29:59 -03:00
06337f2b9d monitoring: add grafana and alertmanager 2025-11-14 00:02:59 -03:00
a875b0a42e flux-system: track main branch 2025-11-12 01:06:26 -03:00
a08a2189e1 monitoring: disable wait on node-exporter 2025-11-09 14:03:14 -03:00
45f0100784 core: disable wait to unblock reconciliation 2025-11-09 13:46:56 -03:00
d5da49e566 core: remove gpu health gate 2025-11-09 13:37:59 -03:00
e0e27445c7 gpu: drop runtimeClass from minipc plugin 2025-11-09 13:28:40 -03:00
9f61854bc2 monitoring: disable kube-state annotations 2025-11-09 13:20:50 -03:00
ded87979c5 monitoring: clean helm values 2025-11-09 13:16:21 -03:00
538fca4195 monitoring: disable chart prometheusScrape 2025-11-09 13:11:40 -03:00
5ffcfc7d01 monitoring: annotate kube-state svc manually 2025-11-09 13:07:39 -03:00
f958d65528 monitoring: drop duplicate annotations 2025-11-09 13:03:40 -03:00
4197072593 monitoring: reference prometheus repo 2025-11-09 12:59:03 -03:00
d6f0f375b7 core: point flux to infrastructure path 2025-11-09 12:49:54 -03:00
051691e71f platform: fix relative paths 2025-11-09 12:39:32 -03:00
4a709391e6 platform: include cert-manager clusterissuer 2025-11-09 12:38:20 -03:00
1880df2525 chore: fix vmagent relabel indentation 2025-11-09 12:33:11 -03:00
02ed3e3145 fix: flux automation and monitoring config 2025-11-09 12:31:38 -03:00
b59025d495 refactor: restructure atlas flux layout 2025-11-09 11:48:45 -03:00
306b4b8458 pegasus on 2025-10-09 23:26:20 -05:00
2e6f811d12 Merge pull request 'minor tweaks' (#2) from fea/titan24-gpu into main
Reviewed-on: #2
2025-10-10 02:23:01 +00:00
ea08411128 minor tweaks 2025-10-09 21:21:54 -05:00
a09333ba38 Merge pull request 'gpu(titan-24): add RuntimeClass + NVIDIA device-plugin DS; enable containerd nvidia runtime' (#1) from fea/titan24-gpu into main
Reviewed-on: #1
2025-10-09 23:29:26 +00:00
bff6b83d11 gpu(titan-24): add RuntimeClass + NVIDIA device-plugin DS; enable containerd nvidia runtime 2025-10-09 18:28:20 -05:00
a94bd95248 pegasus chill 2025-10-08 04:26:26 -05:00
2c0622583e storageclass update 2025-10-08 03:13:12 -05:00
86490b74c4 asteria corrections 2025-10-08 00:50:42 -05:00
2ef8a7bbc2 jellyfin restart 2025-10-07 23:28:40 -05:00
ae85dcfeaa monitoring add, jellyfin/pegasus update, and traefik tweaks 2025-10-07 23:26:27 -05:00
41292eff0b jellyfin pvc size increase 2025-10-04 09:00:41 -05:00
a69bd45455 fixed jellyfin pv issue 2025-10-04 08:50:56 -05:00
a3a5b1a9bd jellyfin and pegasus in same group 2025-09-18 10:12:08 -05:00
938f6b336c jellyfin and pegasus in same group 2025-09-18 09:55:00 -05:00
3c97a02fa7 jellyfin and pegasus in same group 2025-09-18 09:38:46 -05:00
980892a5b4 jellyfin and pegasus in same group 2025-09-18 08:52:58 -05:00
adf7d7eb31 pegasus 1.2.32 2025-09-18 02:33:37 -05:00
2fe8f7ea6a gavilon to gavilan 2025-09-17 19:12:03 -05:00
c00b760976 added gavilon to account for pegasus 2025-09-17 18:29:33 -05:00
d78fc77825 pegasus 1.2.31 2025-09-17 18:08:49 -05:00
a6ab2b44af pegasus 1.2.31 2025-09-17 09:38:49 -05:00
3a207c7d94 pegasus 1.2.30 2025-09-17 09:09:24 -05:00
d45cf950ec pegasus 1.2.29 2025-09-17 09:00:52 -05:00
193c820fc6 pegasus 1.2.28 2025-09-17 08:52:11 -05:00
c3524cec3d pegasus 1.2.27 2025-09-17 08:21:51 -05:00
f214e394d0 pegasus 1.2.26 2025-09-17 07:57:36 -05:00
07cffbeec0 pegasus 1.2.25 2025-09-17 07:46:48 -05:00
576221c47d pegasus 1.2.24 2025-09-17 07:24:10 -05:00
f63d39e5aa pegasus 1.2.22 2025-09-17 01:33:11 -05:00
48bce52660 pegasus 1.2.22 2025-09-17 01:02:33 -05:00
5b1a209d9a pegasus 1.2.21 2025-09-17 00:08:18 -05:00
5437b985e8 pegasus 1.2.20 2025-09-16 23:10:58 -05:00
f49e341445 pegasus 1.2.17 2025-09-16 22:45:15 -05:00
8c64a4b067 pegasus 1.2.17 2025-09-16 20:08:50 -05:00
7b5001c581 pegasus 1.2.17 2025-09-16 18:02:55 -05:00
fc0c5c1250 pegasus 1.2.16 2025-09-16 17:18:42 -05:00
39fc2aacde pegasus 1.2.15 2025-09-16 16:56:49 -05:00
33f0d67b34 pegasus 1.2.14 2025-09-16 09:53:26 -05:00
48a2a53023 pegasus 1.2.13 2025-09-16 09:12:41 -05:00
269b6cd7ad pegasus 1.2.12 2025-09-16 08:54:32 -05:00
b06b5d7612 pegasus 1.2.11 2025-09-16 08:29:47 -05:00
0f1994c384 pegasus 1.2.10 2025-09-16 07:19:54 -05:00
3df06948a9 pegasus 1.2.9 2025-09-16 05:33:36 -05:00
30ac7e5ac1 pegasus 1.2.8 2025-09-16 04:09:10 -05:00
0b8e4f012a pegasus 1.2.7 - json fix 2025-09-16 03:35:12 -05:00
2eecba7f55 pegasus 1.2.6 - json fix 2025-09-16 03:05:50 -05:00
bd5f1b3a67 mapping to list 2025-09-16 02:36:43 -05:00
9ff70673e3 pegasus updates 1.2.5 2025-09-16 01:55:36 -05:00
755c54f26b pegasus updates 1.2.4 2025-09-16 01:01:23 -05:00
f4588b4304 pegasus updates 2025-09-16 00:06:26 -05:00
e36f7059ea pegasus updates 2025-09-15 22:52:58 -05:00
6deefc514e pegasus updates 2025-09-15 22:40:00 -05:00
33ff3d20aa pegasus updates 2025-09-15 19:55:20 -05:00
65de7602c9 pegasus: pin image digest + command + probes + tls 2025-09-15 13:00:39 -05:00
9b77a89b0d pegasus flux'd 2025-09-15 12:32:52 -05:00
6a86590484 pegasus flux'd 2025-09-15 12:28:56 -05:00
8cc80f695f pegasus fix 2025-09-15 12:09:24 -05:00
50c25b1b92 pegasus on 2025-09-15 02:45:22 -05:00
a85fac9002 zot fix 2025-09-15 02:15:27 -05:00
5bfeffe31f zot fix 2025-09-15 01:03:32 -05:00
8459ea7058 zot middleware add 2025-09-09 11:27:42 -05:00
6efe79819f zot middleware add 2025-09-09 01:43:13 -05:00
33d07dcf5c zot simplification 2025-09-09 01:16:33 -05:00
7257762c45 zot simplification 2025-09-09 00:22:24 -05:00
bff64dba65 zot configmap update 2025-09-08 23:08:32 -05:00
f72dc43f76 zot version pin 2025-09-08 22:52:41 -05:00
47a73af27e zot troubleshooting 2025-09-08 22:25:41 -05:00
1ee60d9534 zot middleware fix 2025-09-08 21:58:50 -05:00
63d82af268 jitsi corrections 2025-09-07 14:31:53 -05:00
47cbc9b9f6 pegasus corrections 2025-09-07 13:34:06 -05:00
001e9c36fe jitsi setup 2025-09-07 13:20:49 -05:00
456 changed files with 57254 additions and 3901 deletions

0
-c
View File

8
.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
*.md
!README.md
!knowledge/**/*.md
!services/comms/knowledge/**/*.md
__pycache__/
*.py[cod]
.pytest_cache
.venv

3
README.md Normal file
View File

@ -0,0 +1,3 @@
# titan-iac
Flux-managed Kubernetes cluster for bstein.dev services.

View File

@ -0,0 +1,13 @@
# clusters/atlas/applications/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../services/crypto
- ../../services/gitea
- ../../services/jellyfin
- ../../services/comms
- ../../services/monitoring
- ../../services/logging
- ../../services/pegasus
- ../../services/vault
- ../../services/bstein-dev-home

View File

@ -1,22 +1,23 @@
# infrastructure/flux-system/kustomization-core.yaml
# clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: core
name: ai-llm
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/core
path: ./services/ai-llm
targetNamespace: ai
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true
# Only wait for the NVIDIA device-plugin DaemonSet on titan-22
healthChecks:
- apiVersion: apps/v1
kind: DaemonSet
name: nvidia-device-plugin-minipc
namespace: kube-system
kind: Deployment
name: ollama
namespace: ai
dependsOn:
- name: core

View File

@ -0,0 +1,26 @@
# clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: bstein-dev-home
namespace: flux-system
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: main
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}"
push:
branch: main
update:
strategy: Setters
path: services/bstein-dev-home

View File

@ -0,0 +1,15 @@
# clusters/atlas/flux-system/applications/bstein-dev-home/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: bstein-dev-home
namespace: flux-system
spec:
interval: 10m
path: ./services/bstein-dev-home
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: bstein-dev-home
wait: false

View File

@ -1,18 +1,17 @@
# infrastructure/flux-system/kustomization-zot.yaml
# clusters/atlas/flux-system/applications/communication/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: zot
name: comms
namespace: flux-system
spec:
interval: 10m
path: ./services/zot
targetNamespace: zot
prune: false
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true
path: ./services/comms
targetNamespace: comms
timeout: 2m
dependsOn:
- name: core
- name: traefik

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-crypto.yaml
# clusters/atlas/flux-system/applications/crypto/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-gitea.yaml
# clusters/atlas/flux-system/applications/gitea/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:

View File

@ -0,0 +1,27 @@
# clusters/atlas/flux-system/applications/harbor/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: harbor
namespace: harbor
spec:
suspend: true
interval: 5m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: feature/ci-gitops
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(harbor): apply image updates"
push:
branch: feature/ci-gitops
update:
strategy: Setters
path: ./services/harbor

View File

@ -0,0 +1,23 @@
# clusters/atlas/flux-system/applications/harbor/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: harbor
namespace: flux-system
spec:
interval: 10m
path: ./services/harbor
targetNamespace: harbor
prune: false
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
healthChecks:
- apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
name: harbor
namespace: harbor
wait: false
dependsOn:
- name: core

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-jellyfin.yaml
# clusters/atlas/flux-system/applications/jellyfin/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
@ -15,5 +15,6 @@ spec:
namespace: flux-system
dependsOn:
- name: core
- name: openldap
wait: true
timeout: 5m

View File

@ -0,0 +1,27 @@
# clusters/atlas/flux-system/applications/jenkins/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: jenkins
namespace: flux-system
spec:
interval: 10m
path: ./services/jenkins
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: jenkins
dependsOn:
- name: helm
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: jenkins
namespace: jenkins
- apiVersion: v1
kind: Service
name: jenkins
namespace: jenkins
wait: false

View File

@ -0,0 +1,15 @@
# clusters/atlas/flux-system/applications/keycloak/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: keycloak
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/keycloak
targetNamespace: sso
timeout: 2m

View File

@ -0,0 +1,29 @@
# clusters/atlas/flux-system/applications/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gitea/kustomization.yaml
- vault/kustomization.yaml
- vaultwarden/kustomization.yaml
- comms/kustomization.yaml
- crypto/kustomization.yaml
- monerod/kustomization.yaml
- pegasus/kustomization.yaml
- pegasus/image-automation.yaml
- bstein-dev-home/kustomization.yaml
- bstein-dev-home/image-automation.yaml
- harbor/kustomization.yaml
- harbor/image-automation.yaml
- jellyfin/kustomization.yaml
- xmr-miner/kustomization.yaml
- sui-metrics/kustomization.yaml
- openldap/kustomization.yaml
- keycloak/kustomization.yaml
- oauth2-proxy/kustomization.yaml
- mailu/kustomization.yaml
- jenkins/kustomization.yaml
- ai-llm/kustomization.yaml
- nextcloud/kustomization.yaml
- nextcloud-mail-sync/kustomization.yaml
- outline/kustomization.yaml
- planka/kustomization.yaml

View File

@ -0,0 +1,18 @@
# clusters/atlas/flux-system/applications/mailu/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: mailu
namespace: flux-system
spec:
interval: 10m
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./services/mailu
targetNamespace: mailu-mailserver
prune: true
wait: true
dependsOn:
- name: helm

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-monerod.yaml
# clusters/atlas/flux-system/applications/monerod/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -0,0 +1,17 @@
# clusters/atlas/flux-system/applications/nextcloud-mail-sync/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: nextcloud-mail-sync
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/nextcloud-mail-sync
targetNamespace: nextcloud
timeout: 2m
dependsOn:
- name: keycloak

View File

@ -0,0 +1,16 @@
# clusters/atlas/flux-system/applications/nextcloud/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:
name: nextcloud
namespace: flux-system
spec:
interval: 10m
path: ./services/nextcloud
targetNamespace: nextcloud
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true

View File

@ -0,0 +1,15 @@
# clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: oauth2-proxy
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/oauth2-proxy
targetNamespace: sso
timeout: 2m

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/applications/openldap/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: openldap
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./services/openldap
targetNamespace: sso
dependsOn:
- name: core
wait: true
timeout: 5m

View File

@ -0,0 +1,28 @@
# clusters/atlas/flux-system/applications/outline/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: outline
namespace: flux-system
spec:
interval: 10m
path: ./services/outline
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: outline
dependsOn:
- name: keycloak
- name: mailu
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: outline
namespace: outline
- apiVersion: v1
kind: Service
name: outline
namespace: outline
wait: false

View File

@ -0,0 +1,26 @@
# clusters/atlas/flux-system/applications/pegasus/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: pegasus
namespace: flux-system
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: feature/ci-gitops
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(pegasus): apply image updates"
push:
branch: feature/ci-gitops
update:
strategy: Setters
path: services/pegasus

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/applications/pegasus/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: pegasus
namespace: flux-system
spec:
interval: 10m
path: ./services/pegasus
targetNamespace: jellyfin
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: core
wait: true
timeout: 5m

View File

@ -0,0 +1,28 @@
# clusters/atlas/flux-system/applications/planka/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: planka
namespace: flux-system
spec:
interval: 10m
path: ./services/planka
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: planka
dependsOn:
- name: keycloak
- name: mailu
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: planka
namespace: planka
- apiVersion: v1
kind: Service
name: planka
namespace: planka
wait: false

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/applications/sui-metrics/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: sui-metrics
namespace: flux-system
spec:
interval: 10m
path: ./services/sui-metrics/overlays/atlas
prune: true
dependsOn:
- name: monitoring
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true
timeout: 5m
targetNamespace: sui-metrics

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-vault.yaml
# clusters/atlas/flux-system/applications/vault/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -0,0 +1,20 @@
# clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: vaultwarden
namespace: flux-system
spec:
interval: 10m
suspend: false
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./services/vaultwarden
targetNamespace: vaultwarden
prune: true
wait: true
dependsOn:
- name: helm
- name: traefik

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-core.yaml
# clusters/atlas/flux-system/applications/xmr-miner/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -8,7 +8,7 @@ metadata:
spec:
interval: 1m0s
ref:
branch: main
branch: feature/sso-hardening
secretRef:
name: flux-system-gitea
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
@ -20,7 +20,7 @@ metadata:
namespace: flux-system
spec:
interval: 10m0s
path: ./
path: ./clusters/atlas/flux-system
prune: true
sourceRef:
kind: GitRepository

View File

@ -0,0 +1,8 @@
# clusters/atlas/flux-system/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gotk-components.yaml
- gotk-sync.yaml
- platform
- applications

View File

@ -0,0 +1,15 @@
# clusters/atlas/flux-system/platform/core/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: core
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/core
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: false

View File

@ -0,0 +1,20 @@
# clusters/atlas/flux-system/platform/gitops-ui/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: gitops-ui
namespace: flux-system
spec:
interval: 10m
timeout: 10m
path: ./services/gitops-ui
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: flux-system
dependsOn:
- name: helm
- name: traefik
wait: true

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-helm.yaml
# clusters/atlas/flux-system/platform/helm/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -0,0 +1,15 @@
# clusters/atlas/flux-system/platform/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- core/kustomization.yaml
- helm/kustomization.yaml
- metallb/kustomization.yaml
- traefik/kustomization.yaml
- gitops-ui/kustomization.yaml
- monitoring/kustomization.yaml
- logging/kustomization.yaml
- maintenance/kustomization.yaml
- longhorn-ui/kustomization.yaml
- postgres/kustomization.yaml
- ../platform/vault-csi/kustomization.yaml

View File

@ -0,0 +1,14 @@
# clusters/atlas/flux-system/platform/logging/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: logging
namespace: flux-system
spec:
interval: 10m
path: ./services/logging
prune: true
sourceRef:
kind: GitRepository
name: flux-system
wait: false

View File

@ -1,3 +1,4 @@
# clusters/atlas/flux-system/platform/longhorn-ui/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -0,0 +1,14 @@
# clusters/atlas/flux-system/platform/maintenance/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: maintenance
namespace: flux-system
spec:
interval: 10m
path: ./services/maintenance
prune: true
sourceRef:
kind: GitRepository
name: flux-system
wait: false

View File

@ -0,0 +1,16 @@
# clusters/atlas/flux-system/platform/metallb/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: metallb
namespace: flux-system
spec:
interval: 30m
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./infrastructure/metallb
prune: true
wait: true
targetNamespace: metallb-system

View File

@ -0,0 +1,14 @@
# clusters/atlas/flux-system/platform/monitoring/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: monitoring
namespace: flux-system
spec:
interval: 10m
path: ./services/monitoring
prune: true
sourceRef:
kind: GitRepository
name: flux-system
wait: false

View File

@ -0,0 +1,24 @@
# clusters/atlas/flux-system/platform/postgres/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: postgres
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/postgres
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: postgres
dependsOn:
- name: vault
- name: vault-csi
healthChecks:
- apiVersion: apps/v1
kind: StatefulSet
name: postgres
namespace: postgres
wait: true

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/platform/traefik/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: traefik
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/traefik
targetNamespace: traefik
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: core
- name: metallb
wait: true

View File

@ -0,0 +1,16 @@
# clusters/atlas/flux-system/platform/vault-csi/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: vault-csi
namespace: flux-system
spec:
interval: 30m
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./infrastructure/vault-csi
prune: true
wait: true
targetNamespace: kube-system

View File

@ -0,0 +1,8 @@
# clusters/atlas/platform/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../../infrastructure/modules/base
- ../../../infrastructure/modules/profiles/atlas-ha
- ../../../infrastructure/sources/cert-manager/letsencrypt.yaml
- ../../../infrastructure/metallb

View File

@ -0,0 +1,4 @@
# clusters/oceanus/applications/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources: []

View File

@ -0,0 +1,9 @@
# clusters/oceanus/flux-system/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Populate when oceanus cluster is bootstrapped with Flux.
# - gotk-components.yaml
# - gotk-sync.yaml
- ../platform
- ../applications

View File

@ -0,0 +1,6 @@
# clusters/oceanus/platform/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../infrastructure/modules/base
- ../../infrastructure/modules/profiles/oceanus-validator

View File

@ -0,0 +1,16 @@
FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
ENV DATA_PREPPER_PATH=/usr/share/data-prepper
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
&& mkdir -p /var/log/data-prepper
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
USER 10001
WORKDIR /usr/share/data-prepper
CMD ["bin/data-prepper"]

View File

@ -0,0 +1,2 @@
# hosts/group_vars/all.yaml
validator_version: latest

View File

@ -0,0 +1,2 @@
# hosts/host_vars/titan-24.yaml
validator_compose_path: /opt/sui-validator

28
hosts/inventory/lab.yaml Normal file
View File

@ -0,0 +1,28 @@
# hosts/inventory/lab.yaml
# Replace ansible_host and ansible_user values with real connectivity details.
all:
children:
atlas:
hosts:
titan-24:
ansible_host: REPLACE_ME
ansible_user: ubuntu
roleset: tethys_hybrid
titan-22:
ansible_host: REPLACE_ME
ansible_user: debian
roleset: minipc_gpu
baremetal:
hosts:
titan-db:
ansible_host: REPLACE_ME
ansible_user: postgres
roleset: database
titan-jh:
ansible_host: REPLACE_ME
ansible_user: jump
roleset: jumphost
oceanus:
ansible_host: REPLACE_ME
ansible_user: validator
roleset: validator

29
hosts/playbooks/site.yaml Normal file
View File

@ -0,0 +1,29 @@
# hosts/playbooks/site.yaml
---
- name: Configure titan-db
hosts: titan-db
gather_facts: true
roles:
- common
- titan_db
- name: Configure titan-jh
hosts: titan-jh
gather_facts: true
roles:
- common
- titan_jh
- name: Configure oceanus validator host
hosts: oceanus
gather_facts: true
roles:
- common
- oceanus_base
- name: Prepare hybrid tethys node
hosts: titan-24
gather_facts: true
roles:
- common
- tethys_canary

View File

@ -0,0 +1,9 @@
# hosts/roles/common/tasks/main.yaml
---
- name: Ensure base packages present
ansible.builtin.package:
name:
- curl
- vim
state: present
tags: ['common', 'packages']

View File

@ -0,0 +1,6 @@
# hosts/roles/oceanus_base/tasks/main.yaml
---
- name: Placeholder for oceanus base configuration
ansible.builtin.debug:
msg: "Install validator prerequisites and monitoring exporters here."
tags: ['oceanus']

View File

@ -0,0 +1,6 @@
# hosts/roles/tethys_canary/tasks/main.yaml
---
- name: Placeholder for SUI validator container runtime setup
ansible.builtin.debug:
msg: "Configure container runtime and validator compose stack here."
tags: ['tethys', 'validator']

View File

@ -0,0 +1,6 @@
# hosts/roles/titan_db/tasks/main.yaml
---
- name: Placeholder for titan-db provisioning
ansible.builtin.debug:
msg: "Install database packages, configure backups, and manage users here."
tags: ['titan_db']

View File

@ -0,0 +1,19 @@
# hosts/roles/titan_jh/tasks/main.yaml
---
- name: Install node exporter
ansible.builtin.package:
name: prometheus-node-exporter
state: present
tags: ['jumphost', 'monitoring']
- name: Enable node exporter
ansible.builtin.service:
name: prometheus-node-exporter
enabled: true
state: started
tags: ['jumphost', 'monitoring']
- name: Placeholder for jumphost hardening
ansible.builtin.debug:
msg: "Harden SSH, manage bastion tooling, and configure audit logging here."
tags: ['jumphost']

View File

@ -1,5 +0,0 @@
# infrastructure/core/gpu/daemonsets/profiles/jetson-only/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-jetson

View File

@ -1,6 +0,0 @@
# infrastructure/core/gpu/daemonsets/profiles/minipc-and-jetson/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-minipc
- ../../device-plugin-jetson

View File

@ -2,7 +2,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base
# - gpu/profiles/jetson-only
# - gpu/profiles/minipc-and-jetson
- gpu/profiles/minipc-only
- ../modules/base
- ../modules/profiles/atlas-ha
- ../sources/cert-manager/letsencrypt.yaml
- ../sources/cert-manager/letsencrypt-prod.yaml

View File

@ -2,15 +2,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gotk-components.yaml
- gotk-sync.yaml
- kustomization-zot.yaml
- kustomization-core.yaml
- kustomization-helm.yaml
- kustomization-gitea.yaml
- kustomization-vault.yaml
- kustomization-crypto.yaml
- kustomization-monerod.yaml
- kustomization-jellyfin.yaml
- kustomization-xmr-miner.yaml
- kustomization-longhorn-ui.yaml
- ../clusters/atlas/flux-system

View File

@ -7,7 +7,7 @@ metadata:
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-basicauth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd
traefik.ingress.kubernetes.io/router.middlewares: ""
spec:
ingressClassName: traefik
tls:
@ -21,6 +21,6 @@ spec:
pathType: Prefix
backend:
service:
name: longhorn-frontend
name: oauth2-proxy-longhorn
port:
number: 80

View File

@ -4,3 +4,4 @@ kind: Kustomization
resources:
- middleware.yaml
- ingress.yaml
- oauth2-proxy-longhorn.yaml

View File

@ -20,3 +20,20 @@ spec:
headers:
customRequestHeaders:
X-Forwarded-Proto: "https"
---
apiVersion: traefik.io/v1alpha1
kind: Middleware
metadata:
name: longhorn-forward-auth
namespace: longhorn-system
spec:
forwardAuth:
address: https://auth.bstein.dev/oauth2/auth
trustForwardHeader: true
authResponseHeaders:
- Authorization
- X-Auth-Request-Email
- X-Auth-Request-User
- X-Auth-Request-Groups

View File

@ -0,0 +1,102 @@
# infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml
apiVersion: v1
kind: Service
metadata:
name: oauth2-proxy-longhorn
namespace: longhorn-system
labels:
app: oauth2-proxy-longhorn
spec:
ports:
- name: http
port: 80
targetPort: 4180
selector:
app: oauth2-proxy-longhorn
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: oauth2-proxy-longhorn
namespace: longhorn-system
labels:
app: oauth2-proxy-longhorn
spec:
replicas: 2
selector:
matchLabels:
app: oauth2-proxy-longhorn
template:
metadata:
labels:
app: oauth2-proxy-longhorn
spec:
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5","rpi4"]
containers:
- name: oauth2-proxy
image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0
imagePullPolicy: IfNotPresent
args:
- --provider=oidc
- --redirect-url=https://longhorn.bstein.dev/oauth2/callback
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
- --scope=openid profile email groups
- --email-domain=*
- --allowed-group=admin
- --set-xauthrequest=true
- --pass-access-token=true
- --set-authorization-header=true
- --cookie-secure=true
- --cookie-samesite=lax
- --cookie-refresh=20m
- --cookie-expire=168h
- --insecure-oidc-allow-unverified-email=true
- --upstream=http://longhorn-frontend.longhorn-system.svc.cluster.local
- --http-address=0.0.0.0:4180
- --skip-provider-button=true
- --skip-jwt-bearer-tokens=true
- --oidc-groups-claim=groups
- --cookie-domain=longhorn.bstein.dev
env:
- name: OAUTH2_PROXY_CLIENT_ID
valueFrom:
secretKeyRef:
name: oauth2-proxy-longhorn-oidc
key: client_id
- name: OAUTH2_PROXY_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: oauth2-proxy-longhorn-oidc
key: client_secret
- name: OAUTH2_PROXY_COOKIE_SECRET
valueFrom:
secretKeyRef:
name: oauth2-proxy-longhorn-oidc
key: cookie_secret
ports:
- containerPort: 4180
name: http
readinessProbe:
httpGet:
path: /ping
port: 4180
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
httpGet:
path: /ping
port: 4180
initialDelaySeconds: 20
periodSeconds: 20

View File

@ -0,0 +1,20 @@
# infrastructure/metallb/ippool.yaml
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: communication-pool
namespace: metallb-system
spec:
addresses:
- 192.168.22.4-192.168.22.6
- 192.168.22.9-192.168.22.9
autoAssign: true
---
apiVersion: metallb.io/v1beta1
kind: L2Advertisement
metadata:
name: communication-adv
namespace: metallb-system
spec:
ipAddressPools:
- communication-pool

View File

@ -0,0 +1,10 @@
# infrastructure/metallb/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- metallb-rendered.yaml
- ippool.yaml
patchesStrategicMerge:
- patches/node-placement.yaml
- patches/speaker-loglevel.yaml

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,5 @@
# infrastructure/metallb/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: metallb-system

View File

@ -0,0 +1,27 @@
# infrastructure/metallb/patches/node-placement.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: metallb-controller
namespace: metallb-system
spec:
template:
spec:
containers:
- name: controller
args:
- --port=7472
- --log-level=info
- --webhook-mode=enabled
- --tls-min-version=VersionTLS12
- --lb-class=metallb
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi4
- rpi5

View File

@ -0,0 +1,15 @@
# infrastructure/metallb/patches/speaker-loglevel.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: metallb-speaker
namespace: metallb-system
spec:
template:
spec:
containers:
- name: speaker
args:
- --port=7472
- --log-level=info
- --lb-class=metallb

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/kustomization.yaml
# infrastructure/modules/base/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/priorityclass/kustomization.yaml
# infrastructure/modules/base/priorityclass/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/priorityclass/scavenger.yaml
# infrastructure/modules/base/priorityclass/scavenger.yaml
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/storageclass/kustomization.yaml
# infrastructure/modules/base/runtimeclass/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -1,4 +1,4 @@
# services/jellyfin/runtimeclass.yaml
# infrastructure/modules/base/runtimeclass/runtimeclass.yaml
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/storageclass/asteria.yaml
# infrastructure/modules/base/storageclass/asteria.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
@ -8,6 +8,9 @@ parameters:
fromBackup: ""
numberOfReplicas: "2"
staleReplicaTimeout: "30"
fsType: "ext4"
replicaAutoBalance: "least-effort"
dataLocality: "disabled"
provisioner: driver.longhorn.io
reclaimPolicy: Retain
allowVolumeExpansion: true

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/storageclass/astreae.yaml
# infrastructure/modules/base/storageclass/astreae.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/storageclass/kustomization.yaml
# infrastructure/modules/base/storageclass/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -0,0 +1,8 @@
# infrastructure/modules/profiles/atlas-ha/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../components/device-plugin-config
- ../components/device-plugin-jetson
- ../components/device-plugin-minipc
- ../components/device-plugin-tethys

View File

@ -0,0 +1,15 @@
# infrastructure/modules/profiles/components/device-plugin-config/configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: nvidia-device-plugin-config
namespace: kube-system
data:
config.yaml: |
version: v1
sharing:
timeSlicing:
renameByDefault: true
resources:
- name: nvidia.com/gpu
replicas: 4

View File

@ -0,0 +1,5 @@
# infrastructure/modules/profiles/components/device-plugin-config/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- configmap.yaml

View File

@ -1,4 +1,4 @@
# infrastructure/core/gpu/daemonsets/device-plugin-jetson/daemonset.yaml
# infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
@ -30,7 +30,8 @@ spec:
imagePullPolicy: IfNotPresent
args:
- "--fail-on-init-error=false"
- "--device-list-strategy=envvar,cdi"
- "--device-list-strategy=envvar"
- "--config-file=/config/config.yaml"
securityContext:
privileged: true
env:
@ -41,7 +42,12 @@ spec:
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: config
mountPath: /config
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: config
configMap:
name: nvidia-device-plugin-config

View File

@ -1,4 +1,4 @@
# infrastructure/core/gpu/daemonsets/device-plugin-jetson/kustomization.yaml
# infrastructure/modules/profiles/components/device-plugin-jetson/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -1,4 +1,4 @@
# infrastructure/core/gpu/daemonsets/device-plugin-minipc/daemonset.yaml
# infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
@ -24,7 +24,6 @@ spec:
tolerations:
- operator: Exists
priorityClassName: system-node-critical
runtimeClassName: nvidia
containers:
- name: nvidia-device-plugin-ctr
image: nvcr.io/nvidia/k8s-device-plugin:v0.16.2
@ -33,6 +32,7 @@ spec:
- "--fail-on-init-error=false"
- "--device-list-strategy=envvar"
- "--mig-strategy=none"
- "--config-file=/config/config.yaml"
securityContext:
privileged: true
env:
@ -43,7 +43,12 @@ spec:
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: config
mountPath: /config
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: config
configMap:
name: nvidia-device-plugin-config

View File

@ -1,4 +1,4 @@
# infrastructure/core/gpu/daemonsets/device-plugin-minipc/kustomization.yaml
# infrastructure/modules/profiles/components/device-plugin-minipc/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -0,0 +1,55 @@
# infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-tethys
namespace: kube-system
labels:
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/instance: titan24
spec:
selector:
matchLabels:
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/instance: titan24
template:
metadata:
labels:
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/instance: titan24
spec:
nodeSelector:
kubernetes.io/hostname: titan-24
kubernetes.io/arch: amd64
tolerations:
- operator: Exists
priorityClassName: system-node-critical
runtimeClassName: nvidia
containers:
- name: nvidia-device-plugin-ctr
image: nvcr.io/nvidia/k8s-device-plugin:v0.16.2
imagePullPolicy: IfNotPresent
args:
- "--fail-on-init-error=false"
- "--device-list-strategy=envvar"
- "--mig-strategy=none"
- "--config-file=/config/config.yaml"
securityContext:
privileged: true
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: NVIDIA_DRIVER_CAPABILITIES
value: "compute,video,utility"
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- name: config
mountPath: /config
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
- name: config
configMap:
name: nvidia-device-plugin-config

View File

@ -0,0 +1,5 @@
# infrastructure/modules/profiles/components/device-plugin-tethys/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- daemonset.yaml

View File

@ -0,0 +1,4 @@
# infrastructure/modules/profiles/oceanus-validator/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources: []

View File

@ -0,0 +1,6 @@
# infrastructure/modules/profiles/tethys-hybrid/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../components/device-plugin-config
- ../components/device-plugin-tethys

View File

@ -0,0 +1,10 @@
# infrastructure/postgres/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: postgres
resources:
- namespace.yaml
- serviceaccount.yaml
- secretproviderclass.yaml
- service.yaml
- statefulset.yaml

View File

@ -0,0 +1,5 @@
# infrastructure/postgres/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: postgres

View File

@ -0,0 +1,15 @@
# infrastructure/postgres/secretproviderclass.yaml
apiVersion: secrets-store.csi.x-k8s.io/v1
kind: SecretProviderClass
metadata:
name: postgres-vault
namespace: postgres
spec:
provider: vault
parameters:
vaultAddress: "http://vault.vault.svc.cluster.local:8200"
roleName: "postgres"
objects: |
- objectName: "postgres_password"
secretPath: "kv/data/postgres"
secretKey: "POSTGRES_PASSWORD"

View File

@ -0,0 +1,15 @@
# infrastructure/postgres/service.yaml
apiVersion: v1
kind: Service
metadata:
name: postgres-service
namespace: postgres
spec:
clusterIP: None
ports:
- name: postgres
port: 5432
protocol: TCP
targetPort: 5432
selector:
app: postgres

View File

@ -0,0 +1,6 @@
# infrastructure/postgres/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: postgres-vault
namespace: postgres

View File

@ -0,0 +1,76 @@
# infrastructure/postgres/statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: postgres
namespace: postgres
labels:
app: postgres
spec:
serviceName: postgres-service
replicas: 1
selector:
matchLabels:
app: postgres
persistentVolumeClaimRetentionPolicy:
whenDeleted: Retain
whenScaled: Retain
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
app: postgres
spec:
serviceAccountName: postgres-vault
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: In
values: ["true"]
- key: hardware
operator: In
values: ["rpi4", "rpi5"]
containers:
- name: postgres
image: postgres:15
ports:
- name: postgres
containerPort: 5432
protocol: TCP
env:
- name: PGDATA
value: /var/lib/postgresql/data/pgdata
- name: POSTGRES_USER
value: postgres
- name: POSTGRES_PASSWORD_FILE
value: /mnt/vault/postgres_password
- name: POSTGRES_DB
value: postgres
volumeMounts:
- name: postgres-data
mountPath: /var/lib/postgresql/data
- name: vault-secrets
mountPath: /mnt/vault
readOnly: true
volumes:
- name: vault-secrets
csi:
driver: secrets-store.csi.k8s.io
readOnly: true
volumeAttributes:
secretProviderClass: postgres-vault
volumeClaimTemplates:
- metadata:
name: postgres-data
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: astreae
resources:
requests:
storage: 100Gi

View File

@ -0,0 +1,14 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
spec:
acme:
email: brad.stein@gmail.com
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-prod-account-key
solvers:
- http01:
ingress:
class: traefik

View File

@ -4,7 +4,7 @@ metadata:
name: letsencrypt
spec:
acme:
email: you@bstein.dev
email: brad.stein@gmail.com
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-account-key

Some files were not shown because too many files have changed in this diff Show More