Compare commits

...

262 Commits

Author SHA1 Message Date
573cde6cad monitoring: longer data history 2025-12-14 14:47:20 -03:00
ee2f83ffc9 flux: bump gitops-ui kustomization 2025-12-14 14:41:52 -03:00
97b14715c3 flux: add weave gitops ui 2025-12-14 14:38:08 -03:00
8d6650129e nextcloud: integration with mailu & gitops-ui: initial install 2025-12-14 14:21:40 -03:00
1a76744985 Add tests and dedupe nextcloud mail sync 2025-12-14 14:15:19 -03:00
29436d04cc Keep nextcloud scripts single-sourced under scripts/ 2025-12-14 14:05:01 -03:00
1ec3896638 Extract nextcloud scripts to files 2025-12-14 13:59:16 -03:00
de8d4d9331 Normalize doc layout and README guidance 2025-12-14 13:47:59 -03:00
917178a392 Group namespace plurality rows to one per namespace 2025-12-13 22:17:47 -03:00
88ec7d5690 Fix namespace plurality mask and bump v26 2025-12-13 20:53:11 -03:00
81105b0b7e Use OR-joined node ranks for plurality tie-break 2025-12-13 19:04:22 -03:00
28b1056324 Deduplicate namespace plurality rows with ranked tie-break 2025-12-13 18:39:31 -03:00
9b45775575 Restore namespace plurality panel data 2025-12-13 18:25:03 -03:00
2baa537ec7 Use table format for namespace plurality panel 2025-12-13 18:23:19 -03:00
8af4a689eb Simplify namespace plurality table rendering 2025-12-13 18:07:56 -03:00
1adefc41e5 Hide table footer on namespace plurality table 2025-12-13 18:03:51 -03:00
d4c7455804 Make namespace plurality table non-filterable 2025-12-13 17:55:52 -03:00
c03999ad35 Remove filter bar from namespace plurality table 2025-12-13 17:38:57 -03:00
ac4d9d5e35 Disable column filters on namespace plurality table 2025-12-13 17:35:52 -03:00
9daa9404da Hide filters on namespace plurality table 2025-12-13 17:32:19 -03:00
22cd934b15 Fix namespace plurality table query 2025-12-13 17:29:55 -03:00
f2ca30dcb1 atlas pods: plurality table v11 (deterministic top node) 2025-12-13 17:19:03 -03:00
c289924cb2 atlas pods: plurality table v10 2025-12-13 16:36:25 -03:00
e95cdd6b27 atlas pods: per-namespace top node via topk 2025-12-13 15:51:45 -03:00
b0389b219b atlas pods: simplify plurality table (no filter) 2025-12-13 15:29:08 -03:00
4929a776cf monitoring: drop README per convention 2025-12-13 15:25:21 -03:00
8299684264 monitoring: restore README 2025-12-13 15:11:50 -03:00
d367d0164f atlas pods: stabilize plurality query to avoid 422 2025-12-13 15:11:21 -03:00
4f08872205 atlas pods: show per-namespace top node without vars 2025-12-13 15:02:52 -03:00
e64beee718 atlas pods: drop non-leading nodes in plurality table 2025-12-13 13:39:06 -03:00
c76bef69f2 atlas pods: simplify plurality table query 2025-12-13 12:06:18 -03:00
ca42b32b9e atlas pods: fix plurality table query 2025-12-13 12:00:31 -03:00
789ace779f atlas pods: use prom share() for plurality table 2025-12-13 11:53:27 -03:00
c82bbf32f6 atlas pods: fix plurality query with bool max match 2025-12-13 11:51:18 -03:00
f19539eb25 atlas pods: robust per-namespace top-node share 2025-12-13 11:48:44 -03:00
996f008593 atlas pods: select per-namespace top node via max match 2025-12-13 04:15:03 -03:00
b049997959 atlas pods: sort plurality table by node then share 2025-12-13 04:10:10 -03:00
f9ccd292d6 atlas pods: simplify namespace plurality query 2025-12-13 04:06:46 -03:00
0d938ad758 atlas pods: fix namespace plurality query 2025-12-13 04:00:57 -03:00
1acc865db4 restore readmes removed in last commit 2025-12-13 03:57:44 -03:00
e06a6826b7 atlas pods: add namespace plurality by node table 2025-12-13 03:57:20 -03:00
294cf324de mailu: forcing version 1.4 clamav over 1.2 2025-12-13 00:11:40 -03:00
47730f6260 forcing 12-r3 over 12-r6 for redis 2025-12-12 22:09:04 -03:00
c9c13372a8 atlas overview: include titan-db in control plane panels 2025-12-12 21:55:53 -03:00
5905c0f243 monitoring: drop duplicate titan-db scrape job 2025-12-12 21:48:03 -03:00
df9c0c1ae0 monitoring: scrape titan-db node_exporter 2025-12-12 21:38:10 -03:00
f884ce8146 atlas dashboards: align percent thresholds and disk bars 2025-12-12 21:13:31 -03:00
755a6926ab atlas overview: refine alert thresholds and availability colors 2025-12-12 20:50:41 -03:00
73deee09af atlas dashboards: use threshold colors for stats 2025-12-12 20:44:20 -03:00
2e18a4e1c5 atlas dashboards: fix pod share display and zero/red stat thresholds 2025-12-12 20:40:32 -03:00
da8ed7a3b0 atlas dashboards: show pod counts (not %) and make zero-friendly stats 2025-12-12 20:30:00 -03:00
ca1b2351c0 atlas dashboards: show pod counts with top12 bars 2025-12-12 20:20:13 -03:00
0a520e1d4b atlas dashboards: drop empty nodes and enforce top12 pod bars 2025-12-12 19:09:51 -03:00
1fefca3b3e atlas dashboards: cap pod count bars at top12 2025-12-12 18:56:13 -03:00
8ed23c673c atlas dashboards: sort pod counts and add pod row to overview 2025-12-12 18:51:43 -03:00
66f537185d atlas pods: add pod count bar and tidy pie 2025-12-12 18:45:29 -03:00
c093f98522 atlas dashboards: fix overview links and add pods-by-node pie 2025-12-12 18:32:45 -03:00
4a7822d6f0 atlas internal dashboards: add SLO/burn and api health panels 2025-12-12 18:00:43 -03:00
1a38bffdf3 atlas overview: fix availability scaling 2025-12-12 16:36:47 -03:00
92a7688a2f atlas overview: show availability percent with 3 decimals 2025-12-12 16:15:37 -03:00
72d4fd60d2 atlas overview: show availability percent and keep uptime centered 2025-12-12 16:11:28 -03:00
9320d809f4 atlas overview: center uptime and reorder top row 2025-12-12 15:56:33 -03:00
27f4e60f30 atlas overview: add uptime and crashloop panels 2025-12-12 15:23:51 -03:00
78a542b81a standardize cert issuers to letsencrypt 2025-12-12 15:18:40 -03:00
3fbcc435f0 mailu: fix unbound sidecar mounts 2025-12-12 01:19:27 -03:00
cf06e4b92a mailu: use mvance unbound sidecar and current redis image 2025-12-12 01:12:48 -03:00
842b1c2fb4 mailu: remove force upgrade to avoid pvc replace 2025-12-12 01:09:25 -03:00
a8c7525fc2 mailu: add validating dns sidecar and disable vip hostports 2025-12-12 01:06:38 -03:00
a7704beda6 restore docs after gitignore change 2025-12-12 00:50:02 -03:00
27deb933bc mailu: fix admin dns and tame vip 2025-12-12 00:49:45 -03:00
0771bc954d mailu: capture helm release and cert 2025-12-11 23:54:43 -03:00
4fcdc8819a Merge pull request 'feature/sso' (#4) from feature/sso into main
Reviewed-on: #4
2025-12-11 20:43:34 +00:00
55fa2cbce4 zot: restore main branch config 2025-12-11 17:26:15 -03:00
d5a526c5fa zot: revert to unauthenticated registry 2025-12-11 17:22:16 -03:00
efd258fc71 vault: drop traefik basicauth 2025-12-11 17:09:05 -03:00
3852ebc0f1 zot,vault: remove oauth2-proxy sso 2025-12-11 17:04:19 -03:00
88db462f8f longhorn/vault: gate via oauth2-proxy 2025-12-07 19:44:02 -03:00
e44def25f8 auth: remove error middleware to allow redirect 2025-12-07 13:19:45 -03:00
7ae8bf9705 oauth2-proxy: drop groups scope to avoid invalid_scope 2025-12-07 13:09:29 -03:00
088fed6720 auth: forward-auth via external auth host (svc traffic flaky) 2025-12-07 13:03:29 -03:00
84e4dc0616 oauth2-proxy: schedule on worker rpis 2025-12-07 12:49:38 -03:00
96a8d271a9 oauth2-proxy: ensure error middleware on auth ingress 2025-12-07 12:03:14 -03:00
84aa870cda auth: use internal oauth2-proxy svc for forward-auth 2025-12-07 11:25:29 -03:00
876ec19543 auth: add 401 redirect middleware to oauth2-proxy 2025-12-07 11:14:25 -03:00
ec1d33f1ca auth: point forward-auth to external auth host 2025-12-07 11:09:09 -03:00
1de9d94138 oauth2-proxy: temporarily drop group restriction 2025-12-07 10:42:13 -03:00
571bf759a2 auth: add namespace-local forward-auth middlewares 2025-12-07 10:25:44 -03:00
7525289a0c auth: wire oauth2-proxy and enable grafana oidc 2025-12-07 02:01:21 -03:00
c7b73555c4 add oauth2-proxy for SSO forward-auth 2025-12-06 14:42:24 -03:00
de727eee07 keycloak: restrict to worker rpis with titan-24 fallback 2025-12-06 01:44:23 -03:00
2122ce3e31 keycloak: require rpi nodes with titan-24 fallback 2025-12-06 01:40:24 -03:00
f2d496c6c0 keycloak: prefer rpi nodes, avoid titan-24 2025-12-06 01:36:33 -03:00
127d09755e keycloak: honor xforwarded headers and hostname url 2025-12-06 01:23:07 -03:00
9f5e61ebed keycloak: enable health/metrics management port 2025-12-06 00:51:47 -03:00
b1b39c4dcd keycloak: set fsGroup for data volume 2025-12-06 00:49:17 -03:00
65d8986279 keycloak: remove optimized flag for first start 2025-12-06 00:43:24 -03:00
b9202b6829 chore: drop AGENTS.md from repo 2025-12-06 00:43:17 -03:00
1e8de60198 notes: capture GPU share change and flux branch 2025-12-03 12:28:45 -03:00
2906e3e5d9 monitoring: show GPU share over dashboard range 2025-12-02 20:28:35 -03:00
7210c0784d flux: add keycloak kustomization 2025-12-02 18:10:20 -03:00
46b6d471eb flux: track feature/sso 2025-12-02 18:00:49 -03:00
7e46ffc075 keycloak: add raw manifests backed by shared postgres 2025-12-02 17:58:19 -03:00
d8f466e53e Merge pull request 'feature/atlas-monitoring' (#3) from feature/atlas-monitoring into main
Reviewed-on: #3
2025-12-02 20:52:35 +00:00
ffdb4ed010 notes: add postgres centralization guidance 2025-12-02 17:36:37 -03:00
5af23034de notes: add sso plan sketch 2025-12-02 17:14:45 -03:00
72a83a1af9 notes: update monitoring and next steps 2025-12-02 17:01:32 -03:00
42b3ac0139 monitoring: show top12 root disks 2025-12-02 15:21:02 -03:00
e53ca4dd91 monitoring: expand worker/control/root rows 2025-12-02 15:15:21 -03:00
134e39d9a4 monitoring: shrink hottest node row height 2025-12-02 15:12:16 -03:00
12fd5229dc monitoring: fix gpu share query and root bar labels 2025-12-02 14:56:36 -03:00
1963fadec1 monitoring: polish dashboards and folders 2025-12-02 14:41:39 -03:00
d23e2fe78c monitoring: regen dashboards with gpu details 2025-12-02 13:16:00 -03:00
e7d521f203 monitoring: mirror dcgm-exporter as multi-arch 2025-12-02 12:36:24 -03:00
54e4a1ed93 monitoring: run dcgm-exporter with nvidia runtime 2025-12-02 12:25:30 -03:00
9895695b36 monitoring: always pull dcgm-exporter tag 2025-12-02 12:19:16 -03:00
2fc73097ba monitoring: add registry pull secret for dcgm-exporter 2025-12-02 12:07:11 -03:00
7b1cc7061a monitoring: allow dcgm rollout with unavailable node 2025-12-02 11:59:55 -03:00
f44370c41f monitoring: use mirrored dcgm-exporter tag 2025-12-02 11:54:53 -03:00
3fbaa54f4f monitoring: reenable dcgm exporter 2025-11-20 13:11:13 -03:00
ea60425d42 traefik: use responding timeouts only 2025-11-18 20:01:16 -03:00
a8cb8c0287 traefik: extend upload timeouts 2025-11-18 19:43:19 -03:00
f7f124ad71 monitoring: control-plane stat and namespace share tweaks 2025-11-18 17:09:13 -03:00
d062c10675 monitoring: refine network metrics and control-plane allowance 2025-11-18 16:18:52 -03:00
97b7b479bc monitoring: adjust overview spacing and net panels 2025-11-18 15:55:24 -03:00
0b44f2d1d4 monitoring: disable dcgm exporter 2025-11-18 15:10:58 -03:00
bcda1b396d flux: disable wait for monitoring 2025-11-18 15:04:18 -03:00
a15ee26ae2 flux: scope monitoring health checks 2025-11-18 14:33:24 -03:00
1970b820e7 monitoring: fix dcgm image 2025-11-18 14:19:23 -03:00
e4f0eeca99 monitoring: refresh overview dashboards 2025-11-18 14:08:33 -03:00
00e9c90746 monitoring: rework gpu share + gauges 2025-11-18 12:11:47 -03:00
b1d84d646a monitoring: clean namespace gpu share and layout 2025-11-18 11:42:24 -03:00
7e4b2f8ba2 monitoring: resolve pie errors and network data 2025-11-18 11:30:33 -03:00
a028fde4f7 monitoring: fix namespace gpu share and network stats 2025-11-18 11:12:03 -03:00
703e1d4e3c monitoring: add gpu node fallback 2025-11-18 10:47:24 -03:00
16f8b5f30b monitoring: source gpu pie from limits and node nets 2025-11-18 01:01:10 -03:00
ebfeb78e87 monitoring: fix gpu pie data and network panels 2025-11-18 00:31:51 -03:00
d5e1003de8 monitoring: stabilize namespace pies and labels 2025-11-18 00:19:45 -03:00
a411694bda monitoring: add gpu pie and tidy net panels 2025-11-18 00:11:39 -03:00
1df06f18f6 Revert GPU pie chart additions 2025-11-17 23:42:55 -03:00
9bd7effdee monitoring: fix hottest stats and gpu share 2025-11-17 23:40:22 -03:00
991d6defc4 monitoring: reorder namespace pies and add gpu data 2025-11-17 23:18:53 -03:00
43b9265cdf monitoring: add namespace gpu share 2025-11-17 23:12:16 -03:00
9233ba60fc monitoring: express namespace share as cluster percent 2025-11-17 22:58:57 -03:00
ccca363fb4 monitoring: fix pie colors & thresholds 2025-11-17 22:39:50 -03:00
f22c19bc5d monitoring: color namespace pies 2025-11-17 22:36:50 -03:00
0e9b293e95 monitoring: fix namespace share percentages 2025-11-17 22:19:01 -03:00
5a2cafb5db monitoring: normalize namespace share 2025-11-17 22:06:06 -03:00
5ce1493b3b monitoring: unify namespace share panels 2025-11-17 21:57:40 -03:00
c85c6b1bc3 monitoring: worker/control-plane splits 2025-11-17 21:48:12 -03:00
64059a08f5 monitoring: restore top1 hottest stats 2025-11-17 21:20:19 -03:00
2073ffe944 monitoring: fix net/io legend labels 2025-11-17 20:19:20 -03:00
a99e1ba227 monitoring: attach nodes to net/io stats 2025-11-17 20:14:11 -03:00
8d42f501e5 monitoring: tidy hottest node labels 2025-11-17 20:04:50 -03:00
7358f9e618 monitoring: show hottest node labels 2025-11-17 20:00:40 -03:00
831d1fe707 monitoring: fix hottest node labels 2025-11-17 19:56:57 -03:00
8c263b36b9 monitoring: show hottest node names 2025-11-17 19:53:39 -03:00
bf31272339 monitoring: reorder overview stats 2025-11-17 19:49:50 -03:00
a34e58d319 monitoring: fix hottest stats and titan-db scrape 2025-11-17 19:38:40 -03:00
6a60e4284a monitoring: tighten overview stats 2025-11-17 19:24:03 -03:00
0f7d0b7bac monitoring: polish dashboards 2025-11-17 18:55:11 -03:00
665dfa2e52 monitoring: rebuild atlas dashboards 2025-11-17 16:27:38 -03:00
5858a80c72 monitoring: restructure grafana dashboards 2025-11-17 14:22:46 -03:00
d844e068ec monitoring: enrich dashboards 2025-11-16 12:58:08 -03:00
77c3e260a3 monitoring: refresh grafana dashboards 2025-11-15 21:03:11 -03:00
2e6b9a47c8 dashboards: improve public view and fix color 2025-11-15 11:59:48 -03:00
48f9c6d715 grafana: set datasource uid 2025-11-15 11:35:27 -03:00
da82ebd469 grafana: use atlas metrics hostname 2025-11-15 11:18:40 -03:00
37b93de3e7 victoria-metrics: revert storageclass change 2025-11-15 11:16:37 -03:00
89c0fbfd44 monitoring: fix domain 2025-11-14 19:13:40 -03:00
cb402d0bb9 monitoring: fix ingress and env formats 2025-11-14 08:51:09 -03:00
597556d1c0 grafana: use string host format 2025-11-14 08:37:46 -03:00
f886e2b873 grafana: fix dashboard provider list 2025-11-14 08:33:53 -03:00
94f0cd939d monitoring: fix grafana values 2025-11-14 08:29:59 -03:00
bc757265cf monitoring: add grafana and alertmanager 2025-11-14 00:02:59 -03:00
4d3a4cd2b4 flux-system: track main branch 2025-11-12 01:06:26 -03:00
ac7863802a monitoring: disable wait on node-exporter 2025-11-09 14:03:14 -03:00
afb926439f core: disable wait to unblock reconciliation 2025-11-09 13:46:56 -03:00
ebf5a8aef9 core: remove gpu health gate 2025-11-09 13:37:59 -03:00
dca749cc04 gpu: drop runtimeClass from minipc plugin 2025-11-09 13:28:40 -03:00
65b3e3fbb8 monitoring: disable kube-state annotations 2025-11-09 13:20:50 -03:00
45ad2a2b06 monitoring: clean helm values 2025-11-09 13:16:21 -03:00
396acb818a monitoring: disable chart prometheusScrape 2025-11-09 13:11:40 -03:00
aae55a14f8 monitoring: annotate kube-state svc manually 2025-11-09 13:07:39 -03:00
8ac040a7d8 monitoring: drop duplicate annotations 2025-11-09 13:03:40 -03:00
79a17412af monitoring: reference prometheus repo 2025-11-09 12:59:03 -03:00
1bdc0efdac core: point flux to infrastructure path 2025-11-09 12:49:54 -03:00
8b6ddcd44d platform: fix relative paths 2025-11-09 12:39:32 -03:00
ffbfee1ebd platform: include cert-manager clusterissuer 2025-11-09 12:38:20 -03:00
85aa07c0cc chore: fix vmagent relabel indentation 2025-11-09 12:33:11 -03:00
e2e2916139 fix: flux automation and monitoring config 2025-11-09 12:31:38 -03:00
077654fa2d refactor: restructure atlas flux layout 2025-11-09 11:48:45 -03:00
3c229baece pegasus on 2025-10-09 23:26:20 -05:00
48995cc6ed Merge pull request 'minor tweaks' (#2) from fea/titan24-gpu into main
Reviewed-on: #2
2025-10-10 02:23:01 +00:00
c94959a687 minor tweaks 2025-10-09 21:21:54 -05:00
d992be1061 Merge pull request 'gpu(titan-24): add RuntimeClass + NVIDIA device-plugin DS; enable containerd nvidia runtime' (#1) from fea/titan24-gpu into main
Reviewed-on: #1
2025-10-09 23:29:26 +00:00
79d71f471f gpu(titan-24): add RuntimeClass + NVIDIA device-plugin DS; enable containerd nvidia runtime 2025-10-09 18:28:20 -05:00
8f724e02be pegasus chill 2025-10-08 04:26:26 -05:00
d2ffd738ef storageclass update 2025-10-08 03:13:12 -05:00
16b2c15eda asteria corrections 2025-10-08 00:50:42 -05:00
761fdd29b2 jellyfin restart 2025-10-07 23:28:40 -05:00
4567b1685c monitoring add, jellyfin/pegasus update, and traefik tweaks 2025-10-07 23:26:27 -05:00
2182e98c05 jellyfin pvc size increase 2025-10-04 09:00:41 -05:00
503a95a8e8 fixed jellyfin pv issue 2025-10-04 08:50:56 -05:00
9dfe6bb700 jellyfin and pegasus in same group 2025-09-18 10:12:08 -05:00
358da0ea00 jellyfin and pegasus in same group 2025-09-18 09:55:00 -05:00
3b50199e1d jellyfin and pegasus in same group 2025-09-18 09:38:46 -05:00
5b97966395 jellyfin and pegasus in same group 2025-09-18 08:52:58 -05:00
9a34ee3d2e pegasus 1.2.32 2025-09-18 02:33:37 -05:00
53d3079bce gavilon to gavilan 2025-09-17 19:12:03 -05:00
259451e273 added gavilon to account for pegasus 2025-09-17 18:29:33 -05:00
518d7bb160 pegasus 1.2.31 2025-09-17 18:08:49 -05:00
632949c29c pegasus 1.2.31 2025-09-17 09:38:49 -05:00
6a77f7749f pegasus 1.2.30 2025-09-17 09:09:24 -05:00
16997fba10 pegasus 1.2.29 2025-09-17 09:00:52 -05:00
3637a99bfb pegasus 1.2.28 2025-09-17 08:52:11 -05:00
7e2baa343c pegasus 1.2.27 2025-09-17 08:21:51 -05:00
02bde10852 pegasus 1.2.26 2025-09-17 07:57:36 -05:00
e224215406 pegasus 1.2.25 2025-09-17 07:46:48 -05:00
03d43d097b pegasus 1.2.24 2025-09-17 07:24:10 -05:00
ca62df5508 pegasus 1.2.22 2025-09-17 01:33:11 -05:00
2f68bc664a pegasus 1.2.22 2025-09-17 01:02:33 -05:00
3878d39579 pegasus 1.2.21 2025-09-17 00:08:18 -05:00
19ae80e5e0 pegasus 1.2.20 2025-09-16 23:10:58 -05:00
46f02ee826 pegasus 1.2.17 2025-09-16 22:45:15 -05:00
e34744d144 pegasus 1.2.17 2025-09-16 20:08:50 -05:00
fdbd8ef048 pegasus 1.2.17 2025-09-16 18:02:55 -05:00
535c3de0bf pegasus 1.2.16 2025-09-16 17:18:42 -05:00
2be629a998 pegasus 1.2.15 2025-09-16 16:56:49 -05:00
0b5aed217d pegasus 1.2.14 2025-09-16 09:53:26 -05:00
eb6aeae2d2 pegasus 1.2.13 2025-09-16 09:12:41 -05:00
3276e4f196 pegasus 1.2.12 2025-09-16 08:54:32 -05:00
e31bf05cc1 pegasus 1.2.11 2025-09-16 08:29:47 -05:00
e0169b5bba pegasus 1.2.10 2025-09-16 07:19:54 -05:00
ba140fb638 pegasus 1.2.9 2025-09-16 05:33:36 -05:00
10b34c353b pegasus 1.2.8 2025-09-16 04:09:10 -05:00
26e15f7651 pegasus 1.2.7 - json fix 2025-09-16 03:35:12 -05:00
22683b0dc4 pegasus 1.2.6 - json fix 2025-09-16 03:05:50 -05:00
7468e62023 mapping to list 2025-09-16 02:36:43 -05:00
0d492eb622 pegasus updates 1.2.5 2025-09-16 01:55:36 -05:00
c8a91ebe4f pegasus updates 1.2.4 2025-09-16 01:01:23 -05:00
ee3b0f3f25 pegasus updates 2025-09-16 00:06:26 -05:00
ab02f4537e pegasus updates 2025-09-15 22:52:58 -05:00
f51c06efac pegasus updates 2025-09-15 22:40:00 -05:00
773637273d pegasus updates 2025-09-15 19:55:20 -05:00
8b1c083fe0 pegasus: pin image digest + command + probes + tls 2025-09-15 13:00:39 -05:00
128fad192c pegasus flux'd 2025-09-15 12:32:52 -05:00
eac7aaa91b pegasus flux'd 2025-09-15 12:28:56 -05:00
28903add8f pegasus fix 2025-09-15 12:09:24 -05:00
eea64c7eb1 pegasus on 2025-09-15 02:45:22 -05:00
c7a184eace zot fix 2025-09-15 02:15:27 -05:00
ba233fd909 zot fix 2025-09-15 01:03:32 -05:00
04cd5b0c62 zot middleware add 2025-09-09 11:27:42 -05:00
ec744e45bf zot middleware add 2025-09-09 01:43:13 -05:00
b16eda5894 zot simplification 2025-09-09 01:16:33 -05:00
1ba463001a zot simplification 2025-09-09 00:22:24 -05:00
2304c41ba8 zot configmap update 2025-09-08 23:08:32 -05:00
7ca10afce7 zot version pin 2025-09-08 22:52:41 -05:00
ead0c486a5 zot troubleshooting 2025-09-08 22:25:41 -05:00
1de7fcc287 zot middleware fix 2025-09-08 21:58:50 -05:00
7efc4a4dfb jitsi corrections 2025-09-07 14:31:53 -05:00
19bfa0878c pegasus corrections 2025-09-07 13:34:06 -05:00
fab2d944ff jitsi setup 2025-09-07 13:20:49 -05:00
182 changed files with 15814 additions and 109 deletions

0
-c
View File

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
# Ignore markdown by default, but keep top-level docs
*.md
!README.md
!AGENTS.md
!**/NOTES.md

81
AGENTS.md Normal file
View File

@ -0,0 +1,81 @@
Repository Guidelines
> Local-only note: apply changes through Flux-tracked manifests, not by manual kubectl edits in-cluster—manual tweaks will be reverted by Flux.
## Project Structure & Module Organization
- `infrastructure/`: cluster-scoped building blocks (core, flux-system, traefik, longhorn). Add new platform features by mirroring this layout.
- `services/`: workload manifests per app (`services/gitea/`, etc.) with `kustomization.yaml` plus one file per kind; keep diffs small and focused.
- `dockerfiles/` hosts bespoke images, while `scripts/` stores operational Fish/Bash helpers—extend these directories instead of relying on ad-hoc commands.
## Build, Test, and Development Commands
- `kustomize build services/<app>` (or `kubectl kustomize ...`) renders manifests exactly as Flux will.
- `kubectl apply --server-side --dry-run=client -k services/<app>` checks schema compatibility without touching the cluster.
- `flux reconcile kustomization <name> --namespace flux-system --with-source` pulls the latest Git state after merges or hotfixes.
- `fish scripts/flux_hammer.fish --help` explains the recovery tool; read it before running against production workloads.
## Coding Style & Naming Conventions
- YAML uses two-space indents; retain the leading path comment (e.g. `# services/gitea/deployment.yaml`) to speed code review.
- Keep resource names lowercase kebab-case, align labels/selectors, and mirror namespaces with directory names.
- List resources in `kustomization.yaml` from namespace/config, through storage, then workloads and networking for predictable diffs.
- Scripts start with `#!/usr/bin/env fish` or bash, stay executable, and follow snake_case names such as `flux_hammer.fish`.
## Testing Guidelines
- Run `kustomize build` and the dry-run apply for every service you touch; capture failures before opening a PR.
- `flux diff kustomization <name> --path services/<app>` previews reconciliations—link notable output when behavior shifts.
- Docker edits: `docker build -f dockerfiles/Dockerfile.monerod .` (swap the file you changed) to verify image builds.
## Commit & Pull Request Guidelines
- Keep commit subjects short, present-tense, and optionally scoped (`gpu(titan-24): add RuntimeClass`); squash fixups before review.
- Describe linked issues, affected services, and required operator steps (e.g. `flux reconcile kustomization services-gitea`) in the PR body.
- Focus each PR on one kustomization or service and update `infrastructure/flux-system` when Flux must track new folders.
- Record the validation you ran (dry-runs, diffs, builds) and add screenshots only when ingress or UI behavior changes.
## Security & Configuration Tips
- Never commit credentials; use Vault workflows (`services/vault/`) or SOPS-encrypted manifests wired through `infrastructure/flux-system`.
- Node selectors and tolerations gate workloads to hardware like `hardware: rpi4`; confirm labels before scaling or renaming nodes.
- Pin external images by digest or rely on Flux image automation to follow approved tags and avoid drift.
## Dashboard roadmap / context (2025-12-02)
- Atlas dashboards are generated via `scripts/dashboards_render_atlas.py --build`, which writes JSON under `services/monitoring/dashboards/` and ConfigMaps under `services/monitoring/`. Keep the Grafana manifests in sync by regenerating after edits.
- Atlas Overview panels are paired with internal dashboards (pods, nodes, storage, network, GPU). A new `atlas-gpu` internal dashboard holds the detailed GPU metrics that feed the overview share pie.
- Old Grafana folders (`Atlas Storage`, `Atlas SRE`, `Atlas Public`, `Atlas Nodes`) should be removed in Grafana UI when convenient; only `Atlas Overview` and `Atlas Internal` should remain provisioned.
- Future work: add a separate generator (e.g., `dashboards_render_oceanus.py`) for SUI/oceanus validation dashboards, mirroring the atlas pattern of internal dashboards feeding a public overview.
## Monitoring state (2025-12-03)
- dcgm-exporter DaemonSet pulls `registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04` with nvidia runtime/imagePullSecret; titan-24 exports metrics, titan-22 remains NotReady.
- Atlas Overview is the Grafana home (1h range, 1m refresh), Overview folder UID `overview`, internal folder `atlas-internal` (oceanus-internal stub).
- Panels standardized via generator; hottest row compressed, worker/control rows taller, root disk row taller and top12 bar gauge with labels. GPU share pie uses 1h avg_over_time to persist idle activity.
- Internal dashboards are provisioned without Viewer role; if anonymous still sees them, restart Grafana and tighten auth if needed.
- GPU share panel updated (feature/sso) to use `max_over_time(…[$__range])`, so longer ranges (e.g., 12h) keep recent activity visible. Flux tracking `feature/sso`.
## Upcoming priorities (SSO/storage/mail)
- Establish SSO (Keycloak or similar) and federate Grafana, Gitea, Zot, Nextcloud, Pegasus/Jellyfin; keep Vaultwarden separate until safe.
- Add Nextcloud (limit to rpi5 workers) with office suite; integrate with SSO; plan storage class and ingress.
- Plan mail: mostly self-hosted, relay through trusted provider for outbound; integrate with services (Nextcloud, Vaultwarden, etc.) for notifications and account flows.
## SSO plan sketch (2025-12-03)
- IdP: use Keycloak (preferred) in a new `sso` namespace, Bitnami or codecentric chart with Postgres backing store (single PVC), ingress `sso.bstein.dev`, admin user bound to brad@bstein.dev; stick with local DB initially (no external IdP).
- Auth flow goals: Grafana (OIDC), Gitea (OAuth2/Keycloak), Zot (via Traefik forward-auth/oauth2-proxy), Jellyfin/Pegasus via Jellyfin OAuth/OpenID plugin (map existing usernames; run migration to pre-create users in Keycloak with same usernames/emails and temporary passwords), Pegasus keeps using Jellyfin tokens.
- Steps to implement:
1) Add service folder `services/keycloak/` (namespace, PVC, HelmRelease, ingress, secret for admin creds). Verify with kustomize + Flux reconcile.
2) Seed realm `atlas` with users (import CSV/realm). Create client for Grafana (public/implicit), Gitea (confidential), and a “jellyfin” client for the OAuth plugin; set email for brad@bstein.dev as admin.
3) Reconfigure Grafana to OIDC (disable anonymous to internal folders, leave Overview public via folder permissions). Reconfigure Gitea to OIDC (app.ini).
4) Add Traefik forward-auth (oauth2-proxy) in front of Zot and any other services needing headers-based auth.
5) Deploy Jellyfin OpenID plugin; map Keycloak users to existing Jellyfin usernames; communicate password reset path.
- Migration caution: do not delete existing local creds until SSO validated; keep Pegasus working via Jellyfin tokens during transition.
## Postgres centralization (2025-12-03)
- Prefer a shared in-cluster Postgres deployment with per-service databases to reduce resource sprawl on Pi nodes. Use it for services that can easily point at an external DB.
- Candidates to migrate to shared Postgres: Keycloak (realm DB), Gitea (git DB), Nextcloud (app DB), possibly Grafana (if persistence needed beyond current provisioner), Jitsi prosody/JVB state (if external DB supported). Keep tightly-coupled or lightweight embedded DBs as-is when migration is painful or not supported.
## SSO integration snapshot (2025-12-08)
- Current blockers: Zot still prompts for basic auth/double-login; Vault still wants the token UI after Keycloak (previously 502/404 when vault-0 sealed). Forward-auth middleware on Zot Ingress likely still causing the 401/Found hop; Vault OIDC mount not completing UI flow unless unsealed and preferred login is set.
- Flux-only changes required: remove zot forward-auth middleware from Ingress (let oauth2-proxy handle redirect), ensure Vault OIDC mount is preferred UI login and bound to admin group; keep all edits in repo so Flux enforces them.
- Secrets present (per user): `zot-oidc-client` (client_secret only), `oauth2-proxy-zot-oidc`, `oauth2-proxy-vault-oidc`, `vault-oidc-admin-token`. Zot needs its regcred in the zot namespace if image pulls fail.
- Cluster validation blocked here: `kubectl get nodes` fails (403/permission) and DNS to `*.bstein.dev` fails in this session, so no live curl verification could be run. Re-test on a host with cluster/DNS access after Flux applies fixes.
## Docs hygiene
- Do not add per-service `README.md` files; use `NOTES.md` if documentation is needed inside service folders. Keep only the top-level repo README.
- Keep comments succinct and in a human voice—no AI-sounding notes. Use `NOTES.md` for scratch notes instead of sprinkling reminders into code or extra READMEs.

3
NOTES.md Normal file
View File

@ -0,0 +1,3 @@
# Rotation reminders (temporary secrets set by automation)
- Weave GitOps UI (`cd.bstein.dev`) admin: `admin` / `G1tOps!2025` — rotate immediately after first login.

3
README.md Normal file
View File

@ -0,0 +1,3 @@
# titan-iac
Flux-managed Kubernetes cluster for bstein.dev services.

View File

@ -0,0 +1,12 @@
# clusters/atlas/applications/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../services/crypto
- ../../services/gitea
- ../../services/jellyfin
- ../../services/jitsi
- ../../services/monitoring
- ../../services/pegasus
- ../../services/vault
- ../../services/zot

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-crypto.yaml
# clusters/atlas/flux-system/applications/crypto/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-gitea.yaml
# clusters/atlas/flux-system/applications/gitea/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
kind: Kustomization
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-jellyfin.yaml
# clusters/atlas/flux-system/applications/jellyfin/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/applications/jitsi/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: jitsi
namespace: flux-system
spec:
interval: 10m
path: ./services/jitsi
targetNamespace: jitsi
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: core
wait: true
timeout: 5m

View File

@ -0,0 +1,15 @@
# clusters/atlas/flux-system/applications/keycloak/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: keycloak
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/keycloak
targetNamespace: sso
timeout: 2m

View File

@ -0,0 +1,18 @@
# clusters/atlas/flux-system/applications/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- zot/kustomization.yaml
- gitea/kustomization.yaml
- vault/kustomization.yaml
- jitsi/kustomization.yaml
- crypto/kustomization.yaml
- monerod/kustomization.yaml
- pegasus/kustomization.yaml
- pegasus/image-automation.yaml
- jellyfin/kustomization.yaml
- xmr-miner/kustomization.yaml
- sui-metrics/kustomization.yaml
- keycloak/kustomization.yaml
- oauth2-proxy/kustomization.yaml
- mailu/kustomization.yaml

View File

@ -0,0 +1,18 @@
# clusters/atlas/flux-system/applications/mailu/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: mailu
namespace: flux-system
spec:
interval: 10m
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
path: ./services/mailu
targetNamespace: mailu-mailserver
prune: true
wait: true
dependsOn:
- name: helm

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-monerod.yaml
# clusters/atlas/flux-system/applications/monerod/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -0,0 +1,15 @@
# clusters/atlas/flux-system/applications/oauth2-proxy/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: oauth2-proxy
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/oauth2-proxy
targetNamespace: sso
timeout: 2m

View File

@ -0,0 +1,20 @@
# clusters/atlas/flux-system/applications/pegasus/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1beta1
kind: ImageUpdateAutomation
metadata:
name: pegasus
namespace: flux-system
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
git:
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(pegasus): update image to {{range .Updated.Images}}{{.}}{{end}}"
update:
strategy: Setters
path: ./services/pegasus

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/applications/pegasus/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: pegasus
namespace: flux-system
spec:
interval: 10m
path: ./services/pegasus
targetNamespace: jellyfin
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: core
wait: true
timeout: 5m

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/applications/sui-metrics/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: sui-metrics
namespace: flux-system
spec:
interval: 10m
path: ./services/sui-metrics/overlays/atlas
prune: true
dependsOn:
- name: monitoring
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true
timeout: 5m
targetNamespace: sui-metrics

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-vault.yaml
# clusters/atlas/flux-system/applications/vault/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-core.yaml
# clusters/atlas/flux-system/applications/xmr-miner/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-zot.yaml
# clusters/atlas/flux-system/applications/zot/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -1,6 +1,6 @@
---
# This manifest was generated by flux. DO NOT EDIT.
# Flux Version: v2.5.1
# Flux Version: v2.5.1f reconzaq1= zaq1= aq1= 1= w2cile kustomization flux-system --namespace flux-system --with-source
# Components: source-controller,kustomize-controller,helm-controller,notification-controller
apiVersion: v1
kind: Namespace

View File

@ -8,7 +8,7 @@ metadata:
spec:
interval: 1m0s
ref:
branch: main
branch: feature/mailu
secretRef:
name: flux-system-gitea
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
@ -20,7 +20,7 @@ metadata:
namespace: flux-system
spec:
interval: 10m0s
path: ./
path: ./clusters/atlas/flux-system
prune: true
sourceRef:
kind: GitRepository

View File

@ -0,0 +1,8 @@
# clusters/atlas/flux-system/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gotk-components.yaml
- gotk-sync.yaml
- platform
- applications

View File

@ -0,0 +1,15 @@
# clusters/atlas/flux-system/platform/core/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: core
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/core
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: false

View File

@ -0,0 +1,20 @@
# clusters/atlas/flux-system/platform/gitops-ui/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: gitops-ui
namespace: flux-system
spec:
interval: 10m
timeout: 10m
path: ./services/gitops-ui
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: flux-system
dependsOn:
- name: helm
- name: traefik
wait: true

View File

@ -1,4 +1,4 @@
# infrastructure/flux-system/kustomization-helm.yaml
# clusters/atlas/flux-system/platform/helm/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -0,0 +1,10 @@
# clusters/atlas/flux-system/platform/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- core/kustomization.yaml
- helm/kustomization.yaml
- traefik/kustomization.yaml
- gitops-ui/kustomization.yaml
- monitoring/kustomization.yaml
- longhorn-ui/kustomization.yaml

View File

@ -1,3 +1,4 @@
# clusters/atlas/flux-system/platform/longhorn-ui/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:

View File

@ -0,0 +1,14 @@
# clusters/atlas/flux-system/platform/monitoring/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: monitoring
namespace: flux-system
spec:
interval: 10m
path: ./services/monitoring
prune: true
sourceRef:
kind: GitRepository
name: flux-system
wait: false

View File

@ -0,0 +1,18 @@
# clusters/atlas/flux-system/platform/traefik/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: traefik
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/traefik
targetNamespace: traefik
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: core
wait: true

View File

@ -0,0 +1,7 @@
# clusters/atlas/platform/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../../infrastructure/modules/base
- ../../../infrastructure/modules/profiles/atlas-ha
- ../../../infrastructure/sources/cert-manager/letsencrypt.yaml

View File

@ -0,0 +1,4 @@
# clusters/oceanus/applications/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources: []

View File

@ -0,0 +1,9 @@
# clusters/oceanus/flux-system/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
# Populate when oceanus cluster is bootstrapped with Flux.
# - gotk-components.yaml
# - gotk-sync.yaml
- ../platform
- ../applications

View File

@ -0,0 +1,6 @@
# clusters/oceanus/platform/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../infrastructure/modules/base
- ../../infrastructure/modules/profiles/oceanus-validator

15
docs/topology.md Normal file
View File

@ -0,0 +1,15 @@
# Titan Homelab Topology
| Hostname | Role / Function | Managed By | Notes |
|------------|--------------------------------|---------------------|-------|
| titan-db | HA control plane database | Ansible | PostgreSQL / etcd backing services |
| titan-0a | Kubernetes control-plane | Flux (atlas cluster)| HA leader, tainted for control only |
| titan-0b | Kubernetes control-plane | Flux (atlas cluster)| Standby control node |
| titan-0c | Kubernetes control-plane | Flux (atlas cluster)| Standby control node |
| titan-04-19| Raspberry Pi workers | Flux (atlas cluster)| Workload nodes, labelled per hardware |
| titan-20&21| NVIDIA Jetson workers | Flux (atlas cluster)| Workload nodes, labelled per hardware |
| titan-22 | GPU mini-PC (Jellyfin) | Flux + Ansible | NVIDIA runtime managed via `modules/profiles/atlas-ha` |
| titan-23 | Dedicated SUI validator Oceanus| Manual + Ansible | Baremetal validator workloads, exposes metrics to atlas |
| titan-24 | Tethys hybrid node | Flux + Ansible | Runs SUI metrics via K8s, validator via Ansible |
| titan-jh | Jumphost & bastion & lesavka | Ansible | Entry point / future KVM services / custom kvm - lesavaka |
| styx | Air-gapped workstation | Manual / Scripts | Remains isolated, scripts tracked in `hosts/styx` |

View File

@ -0,0 +1,2 @@
# hosts/group_vars/all.yaml
validator_version: latest

View File

@ -0,0 +1,2 @@
# hosts/host_vars/titan-24.yaml
validator_compose_path: /opt/sui-validator

28
hosts/inventory/lab.yaml Normal file
View File

@ -0,0 +1,28 @@
# hosts/inventory/lab.yaml
# Replace ansible_host and ansible_user values with real connectivity details.
all:
children:
atlas:
hosts:
titan-24:
ansible_host: REPLACE_ME
ansible_user: ubuntu
roleset: tethys_hybrid
titan-22:
ansible_host: REPLACE_ME
ansible_user: debian
roleset: minipc_gpu
baremetal:
hosts:
titan-db:
ansible_host: REPLACE_ME
ansible_user: postgres
roleset: database
titan-jh:
ansible_host: REPLACE_ME
ansible_user: jump
roleset: jumphost
oceanus:
ansible_host: REPLACE_ME
ansible_user: validator
roleset: validator

29
hosts/playbooks/site.yaml Normal file
View File

@ -0,0 +1,29 @@
# hosts/playbooks/site.yaml
---
- name: Configure titan-db
hosts: titan-db
gather_facts: true
roles:
- common
- titan_db
- name: Configure titan-jh
hosts: titan-jh
gather_facts: true
roles:
- common
- titan_jh
- name: Configure oceanus validator host
hosts: oceanus
gather_facts: true
roles:
- common
- oceanus_base
- name: Prepare hybrid tethys node
hosts: titan-24
gather_facts: true
roles:
- common
- tethys_canary

View File

@ -0,0 +1,9 @@
# hosts/roles/common/tasks/main.yaml
---
- name: Ensure base packages present
ansible.builtin.package:
name:
- curl
- vim
state: present
tags: ['common', 'packages']

View File

@ -0,0 +1,6 @@
# hosts/roles/oceanus_base/tasks/main.yaml
---
- name: Placeholder for oceanus base configuration
ansible.builtin.debug:
msg: "Install validator prerequisites and monitoring exporters here."
tags: ['oceanus']

View File

@ -0,0 +1,6 @@
# hosts/roles/tethys_canary/tasks/main.yaml
---
- name: Placeholder for SUI validator container runtime setup
ansible.builtin.debug:
msg: "Configure container runtime and validator compose stack here."
tags: ['tethys', 'validator']

View File

@ -0,0 +1,6 @@
# hosts/roles/titan_db/tasks/main.yaml
---
- name: Placeholder for titan-db provisioning
ansible.builtin.debug:
msg: "Install database packages, configure backups, and manage users here."
tags: ['titan_db']

View File

@ -0,0 +1,6 @@
# hosts/roles/titan_jh/tasks/main.yaml
---
- name: Placeholder for jumphost hardening
ansible.builtin.debug:
msg: "Harden SSH, manage bastion tooling, and configure audit logging here."
tags: ['jumphost']

2
hosts/styx/NOTES.md Normal file
View File

@ -0,0 +1,2 @@
# hosts/styx/README.md
Styx is air-gapped; provisioning scripts live under `scripts/`.

View File

@ -1,5 +0,0 @@
# infrastructure/core/gpu/daemonsets/profiles/jetson-only/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-jetson

View File

@ -1,6 +0,0 @@
# infrastructure/core/gpu/daemonsets/profiles/minipc-and-jetson/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-minipc
- ../../device-plugin-jetson

View File

@ -1,5 +0,0 @@
# infrastructure/core/gpu/daemonsets/profiles/minipc-only/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../device-plugin-minipc

View File

@ -2,7 +2,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- base
# - gpu/profiles/jetson-only
# - gpu/profiles/minipc-and-jetson
- gpu/profiles/minipc-only
- ../modules/base
- ../modules/profiles/atlas-ha
- ../sources/cert-manager/letsencrypt.yaml
- ../sources/cert-manager/letsencrypt-prod.yaml

View File

@ -1,22 +0,0 @@
# infrastructure/flux-system/kustomization-core.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: core
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/core
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true
# Only wait for the NVIDIA device-plugin DaemonSet on titan-22
healthChecks:
- apiVersion: apps/v1
kind: DaemonSet
name: nvidia-device-plugin-minipc
namespace: kube-system

View File

@ -2,15 +2,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gotk-components.yaml
- gotk-sync.yaml
- kustomization-zot.yaml
- kustomization-core.yaml
- kustomization-helm.yaml
- kustomization-gitea.yaml
- kustomization-vault.yaml
- kustomization-crypto.yaml
- kustomization-monerod.yaml
- kustomization-jellyfin.yaml
- kustomization-xmr-miner.yaml
- kustomization-longhorn-ui.yaml
- ../clusters/atlas/flux-system

View File

@ -7,7 +7,7 @@ metadata:
annotations:
traefik.ingress.kubernetes.io/router.entrypoints: websecure
traefik.ingress.kubernetes.io/router.tls: "true"
traefik.ingress.kubernetes.io/router.middlewares: longhorn-system-longhorn-basicauth@kubernetescrd,longhorn-system-longhorn-headers@kubernetescrd
traefik.ingress.kubernetes.io/router.middlewares: ""
spec:
ingressClassName: traefik
tls:
@ -21,6 +21,6 @@ spec:
pathType: Prefix
backend:
service:
name: longhorn-frontend
name: oauth2-proxy-longhorn
port:
number: 80

View File

@ -4,3 +4,4 @@ kind: Kustomization
resources:
- middleware.yaml
- ingress.yaml
- oauth2-proxy-longhorn.yaml

View File

@ -20,3 +20,20 @@ spec:
headers:
customRequestHeaders:
X-Forwarded-Proto: "https"
---
apiVersion: traefik.io/v1alpha1
kind: Middleware
metadata:
name: longhorn-forward-auth
namespace: longhorn-system
spec:
forwardAuth:
address: https://auth.bstein.dev/oauth2/auth
trustForwardHeader: true
authResponseHeaders:
- Authorization
- X-Auth-Request-Email
- X-Auth-Request-User
- X-Auth-Request-Groups

View File

@ -0,0 +1,102 @@
# infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml
apiVersion: v1
kind: Service
metadata:
name: oauth2-proxy-longhorn
namespace: longhorn-system
labels:
app: oauth2-proxy-longhorn
spec:
ports:
- name: http
port: 80
targetPort: 4180
selector:
app: oauth2-proxy-longhorn
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: oauth2-proxy-longhorn
namespace: longhorn-system
labels:
app: oauth2-proxy-longhorn
spec:
replicas: 2
selector:
matchLabels:
app: oauth2-proxy-longhorn
template:
metadata:
labels:
app: oauth2-proxy-longhorn
spec:
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5","rpi4"]
containers:
- name: oauth2-proxy
image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0
imagePullPolicy: IfNotPresent
args:
- --provider=oidc
- --redirect-url=https://longhorn.bstein.dev/oauth2/callback
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
- --scope=openid profile email groups
- --email-domain=*
- --allowed-group=admin
- --set-xauthrequest=true
- --pass-access-token=true
- --set-authorization-header=true
- --cookie-secure=true
- --cookie-samesite=lax
- --cookie-refresh=20m
- --cookie-expire=168h
- --insecure-oidc-allow-unverified-email=true
- --upstream=http://longhorn-frontend.longhorn-system.svc.cluster.local
- --http-address=0.0.0.0:4180
- --skip-provider-button=true
- --skip-jwt-bearer-tokens=true
- --oidc-groups-claim=groups
- --cookie-domain=longhorn.bstein.dev
env:
- name: OAUTH2_PROXY_CLIENT_ID
valueFrom:
secretKeyRef:
name: oauth2-proxy-longhorn-oidc
key: client_id
- name: OAUTH2_PROXY_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: oauth2-proxy-longhorn-oidc
key: client_secret
- name: OAUTH2_PROXY_COOKIE_SECRET
valueFrom:
secretKeyRef:
name: oauth2-proxy-longhorn-oidc
key: cookie_secret
ports:
- containerPort: 4180
name: http
readinessProbe:
httpGet:
path: /ping
port: 4180
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
httpGet:
path: /ping
port: 4180
initialDelaySeconds: 20
periodSeconds: 20

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/kustomization.yaml
# infrastructure/modules/base/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/priorityclass/kustomization.yaml
# infrastructure/modules/base/priorityclass/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/priorityclass/scavenger.yaml
# infrastructure/modules/base/priorityclass/scavenger.yaml
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/storageclass/kustomization.yaml
# infrastructure/modules/base/runtimeclass/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -1,4 +1,4 @@
# services/jellyfin/runtimeclass.yaml
# infrastructure/modules/base/runtimeclass/runtimeclass.yaml
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/storageclass/asteria.yaml
# infrastructure/modules/base/storageclass/asteria.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
@ -8,6 +8,9 @@ parameters:
fromBackup: ""
numberOfReplicas: "2"
staleReplicaTimeout: "30"
fsType: "ext4"
replicaAutoBalance: "least-effort"
dataLocality: "disabled"
provisioner: driver.longhorn.io
reclaimPolicy: Retain
allowVolumeExpansion: true

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/storageclass/astreae.yaml
# infrastructure/modules/base/storageclass/astreae.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/core/base/storageclass/kustomization.yaml
# infrastructure/modules/base/storageclass/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -0,0 +1,7 @@
# infrastructure/modules/profiles/atlas-ha/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../components/device-plugin-jetson
- ../components/device-plugin-minipc
- ../components/device-plugin-tethys

View File

@ -1,4 +1,4 @@
# infrastructure/core/gpu/daemonsets/device-plugin-jetson/daemonset.yaml
# infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:

View File

@ -1,4 +1,4 @@
# infrastructure/core/gpu/daemonsets/device-plugin-jetson/kustomization.yaml
# infrastructure/modules/profiles/components/device-plugin-jetson/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -1,4 +1,4 @@
# infrastructure/core/gpu/daemonsets/device-plugin-minipc/daemonset.yaml
# infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
@ -24,7 +24,6 @@ spec:
tolerations:
- operator: Exists
priorityClassName: system-node-critical
runtimeClassName: nvidia
containers:
- name: nvidia-device-plugin-ctr
image: nvcr.io/nvidia/k8s-device-plugin:v0.16.2

View File

@ -1,4 +1,4 @@
# infrastructure/core/gpu/daemonsets/device-plugin-minipc/kustomization.yaml
# infrastructure/modules/profiles/components/device-plugin-minipc/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:

View File

@ -0,0 +1,49 @@
# infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-tethys
namespace: kube-system
labels:
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/instance: titan24
spec:
selector:
matchLabels:
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/instance: titan24
template:
metadata:
labels:
app.kubernetes.io/name: nvidia-device-plugin
app.kubernetes.io/instance: titan24
spec:
nodeSelector:
kubernetes.io/hostname: titan-24
kubernetes.io/arch: amd64
tolerations:
- operator: Exists
priorityClassName: system-node-critical
runtimeClassName: nvidia
containers:
- name: nvidia-device-plugin-ctr
image: nvcr.io/nvidia/k8s-device-plugin:v0.16.2
imagePullPolicy: IfNotPresent
args:
- "--fail-on-init-error=false"
- "--device-list-strategy=envvar"
- "--mig-strategy=none"
securityContext:
privileged: true
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: NVIDIA_DRIVER_CAPABILITIES
value: "compute,video,utility"
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins

View File

@ -0,0 +1,5 @@
# infrastructure/modules/profiles/components/device-plugin-tethys/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- daemonset.yaml

View File

@ -0,0 +1,4 @@
# infrastructure/modules/profiles/oceanus-validator/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources: []

View File

@ -0,0 +1,5 @@
# infrastructure/modules/profiles/tethys-hybrid/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../components/device-plugin-tethys

View File

@ -0,0 +1,14 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
spec:
acme:
email: brad.stein@gmail.com
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-prod-account-key
solvers:
- http01:
ingress:
class: traefik

View File

@ -4,7 +4,7 @@ metadata:
name: letsencrypt
spec:
acme:
email: you@bstein.dev
email: brad.stein@gmail.com
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
name: letsencrypt-account-key

View File

@ -1,3 +1,4 @@
# infrastructure/sources/helm/grafana.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:

View File

@ -1,3 +1,4 @@
# infrastructure/sources/helm/hashicorp.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:

View File

@ -1,3 +1,4 @@
# infrastructure/sources/helm/jetstack.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:

View File

@ -0,0 +1,10 @@
# infrastructure/sources/helm/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- grafana.yaml
- hashicorp.yaml
- jetstack.yaml
- mailu.yaml
- prometheus.yaml
- victoria-metrics.yaml

View File

@ -0,0 +1,9 @@
# infrastructure/sources/helm/mailu.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: mailu
namespace: flux-system
spec:
interval: 1h
url: https://mailu.github.io/helm-charts

View File

@ -1,3 +1,4 @@
# infrastructure/sources/helm/prometheus.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:

View File

@ -0,0 +1,9 @@
# infrastructure/sources/helm/victoria-metrics.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: victoria-metrics
namespace: flux-system
spec:
interval: 1h
url: https://victoriametrics.github.io/helm-charts/

View File

@ -35,6 +35,18 @@ items:
- --entrypoints.web.address=:80
- --entrypoints.websecure.address=:443
- --api.dashboard=true
- --metrics.prometheus=true
- --metrics.prometheus.addEntryPointsLabels=true
- --metrics.prometheus.addRoutersLabels=true
- --metrics.prometheus.addServicesLabels=true
- --entrypoints.web.transport.respondingTimeouts.readTimeout=0s
- --entrypoints.web.transport.respondingTimeouts.writeTimeout=0s
- --entrypoints.web.transport.respondingTimeouts.idleTimeout=0s
- --entrypoints.websecure.transport.respondingTimeouts.readTimeout=0s
- --entrypoints.websecure.transport.respondingTimeouts.writeTimeout=0s
- --entrypoints.websecure.transport.respondingTimeouts.idleTimeout=0s
- --entrypoints.metrics.address=:9100
- --metrics.prometheus.entryPoint=metrics
image: traefik:v3.3.3
imagePullPolicy: IfNotPresent
name: traefik
@ -48,6 +60,9 @@ items:
- containerPort: 8080
name: admin
protocol: TCP
- containerPort: 9100
name: metrics
protocol: TCP
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst

View File

@ -9,3 +9,4 @@ resources:
- serviceaccount.yaml
- clusterrole.yaml
- clusterrolebinding.yaml
- service.yaml

View File

@ -0,0 +1,20 @@
# infrastructure/traefik/service.yaml
apiVersion: v1
kind: Service
metadata:
name: traefik-metrics
namespace: traefik
labels:
app: traefik
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9100"
prometheus.io/path: "/metrics"
spec:
type: ClusterIP
selector:
app: traefik
ports:
- name: metrics
port: 9100
targetPort: metrics

File diff suppressed because it is too large Load Diff

2
scripts/longhorn_volume_usage.fish Normal file → Executable file
View File

@ -1,3 +1,5 @@
#!/usr/bin/env fish
function pvc-usage --description "Show Longhorn PVC usage (human-readable) mapped to namespace/name"
begin
kubectl -n longhorn-system get volumes.longhorn.io -o json \

204
scripts/mailu_sync.py Normal file
View File

@ -0,0 +1,204 @@
#!/usr/bin/env python3
"""
Sync Keycloak users to Mailu mailboxes.
- Generates/stores a mailu_app_password attribute in Keycloak (admin-only)
- Upserts the mailbox in Mailu Postgres using that password
"""
import os
import sys
import json
import time
import secrets
import string
import datetime
import requests
import psycopg2
from psycopg2.extras import RealDictCursor
from passlib.hash import bcrypt_sha256
KC_BASE = os.environ["KEYCLOAK_BASE_URL"].rstrip("/")
KC_REALM = os.environ["KEYCLOAK_REALM"]
KC_CLIENT_ID = os.environ["KEYCLOAK_CLIENT_ID"]
KC_CLIENT_SECRET = os.environ["KEYCLOAK_CLIENT_SECRET"]
MAILU_DOMAIN = os.environ["MAILU_DOMAIN"]
MAILU_DEFAULT_QUOTA = int(os.environ.get("MAILU_DEFAULT_QUOTA", "20000000000"))
DB_CONFIG = {
"host": os.environ["MAILU_DB_HOST"],
"port": int(os.environ.get("MAILU_DB_PORT", "5432")),
"dbname": os.environ["MAILU_DB_NAME"],
"user": os.environ["MAILU_DB_USER"],
"password": os.environ["MAILU_DB_PASSWORD"],
}
SESSION = requests.Session()
def log(msg):
sys.stdout.write(f"{msg}\n")
sys.stdout.flush()
def get_kc_token():
resp = SESSION.post(
f"{KC_BASE}/realms/{KC_REALM}/protocol/openid-connect/token",
data={
"grant_type": "client_credentials",
"client_id": KC_CLIENT_ID,
"client_secret": KC_CLIENT_SECRET,
},
timeout=15,
)
resp.raise_for_status()
return resp.json()["access_token"]
def kc_get_users(token):
users = []
first = 0
max_results = 200
headers = {"Authorization": f"Bearer {token}"}
while True:
resp = SESSION.get(
f"{KC_BASE}/admin/realms/{KC_REALM}/users",
params={"first": first, "max": max_results, "enabled": "true"},
headers=headers,
timeout=20,
)
resp.raise_for_status()
batch = resp.json()
users.extend(batch)
if len(batch) < max_results:
break
first += max_results
return users
def kc_update_attributes(token, user, attributes):
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
payload = {
"firstName": user.get("firstName"),
"lastName": user.get("lastName"),
"email": user.get("email"),
"enabled": user.get("enabled", True),
"username": user["username"],
"emailVerified": user.get("emailVerified", False),
"attributes": attributes,
}
user_url = f"{KC_BASE}/admin/realms/{KC_REALM}/users/{user['id']}"
resp = SESSION.put(user_url, headers=headers, json=payload, timeout=20)
resp.raise_for_status()
verify = SESSION.get(
user_url,
headers={"Authorization": f"Bearer {token}"},
params={"briefRepresentation": "false"},
timeout=15,
)
verify.raise_for_status()
attrs = verify.json().get("attributes") or {}
if not attrs.get("mailu_app_password"):
raise Exception(f"attribute not persisted for {user.get('email') or user['username']}")
def random_password():
alphabet = string.ascii_letters + string.digits
return "".join(secrets.choice(alphabet) for _ in range(24))
def ensure_mailu_user(cursor, email, password, display_name):
localpart, domain = email.split("@", 1)
if domain.lower() != MAILU_DOMAIN.lower():
return
hashed = bcrypt_sha256.hash(password)
now = datetime.datetime.utcnow()
cursor.execute(
"""
INSERT INTO "user" (
email, localpart, domain_name, password,
quota_bytes, quota_bytes_used,
global_admin, enabled, enable_imap, enable_pop, allow_spoofing,
forward_enabled, forward_destination, forward_keep,
reply_enabled, reply_subject, reply_body, reply_startdate, reply_enddate,
displayed_name, spam_enabled, spam_mark_as_read, spam_threshold,
change_pw_next_login, created_at, updated_at, comment
)
VALUES (
%(email)s, %(localpart)s, %(domain)s, %(password)s,
%(quota)s, 0,
false, true, true, true, false,
false, '', true,
false, NULL, NULL, DATE '1900-01-01', DATE '2999-12-31',
%(display)s, true, true, 80,
false, CURRENT_DATE, %(now)s, ''
)
ON CONFLICT (email) DO UPDATE
SET password = EXCLUDED.password,
enabled = true,
updated_at = EXCLUDED.updated_at
""",
{
"email": email,
"localpart": localpart,
"domain": domain,
"password": hashed,
"quota": MAILU_DEFAULT_QUOTA,
"display": display_name or localpart,
"now": now,
},
)
def main():
token = get_kc_token()
users = kc_get_users(token)
if not users:
log("No users found; exiting.")
return
conn = psycopg2.connect(**DB_CONFIG)
conn.autocommit = True
cursor = conn.cursor(cursor_factory=RealDictCursor)
for user in users:
attrs = user.get("attributes", {}) or {}
app_pw_value = attrs.get("mailu_app_password")
if isinstance(app_pw_value, list):
app_pw = app_pw_value[0] if app_pw_value else None
elif isinstance(app_pw_value, str):
app_pw = app_pw_value
else:
app_pw = None
email = user.get("email")
if not email:
email = f"{user['username']}@{MAILU_DOMAIN}"
if not app_pw:
app_pw = random_password()
attrs["mailu_app_password"] = app_pw
kc_update_attributes(token, user, attrs)
log(f"Set mailu_app_password for {email}")
display_name = " ".join(
part for part in [user.get("firstName"), user.get("lastName")] if part
).strip()
ensure_mailu_user(cursor, email, app_pw, display_name)
log(f"Synced mailbox for {email}")
cursor.close()
conn.close()
if __name__ == "__main__":
try:
main()
except Exception as exc:
log(f"ERROR: {exc}")
sys.exit(1)

49
scripts/nextcloud-mail-sync.sh Executable file
View File

@ -0,0 +1,49 @@
#!/bin/bash
set -euo pipefail
KC_BASE="${KC_BASE:?}"
KC_REALM="${KC_REALM:?}"
KC_ADMIN_USER="${KC_ADMIN_USER:?}"
KC_ADMIN_PASS="${KC_ADMIN_PASS:?}"
if ! command -v jq >/dev/null 2>&1; then
apt-get update && apt-get install -y jq curl >/dev/null
fi
account_exists() {
# Skip if the account email is already present in the mail app.
runuser -u www-data -- php occ mail:account:list 2>/dev/null | grep -Fq " ${1}" || \
runuser -u www-data -- php occ mail:account:list 2>/dev/null | grep -Fq "${1} "
}
token=$(
curl -s -d "grant_type=password" \
-d "client_id=admin-cli" \
-d "username=${KC_ADMIN_USER}" \
-d "password=${KC_ADMIN_PASS}" \
"${KC_BASE}/realms/master/protocol/openid-connect/token" | jq -r '.access_token'
)
if [[ -z "${token}" || "${token}" == "null" ]]; then
echo "Failed to obtain admin token"
exit 1
fi
users=$(curl -s -H "Authorization: Bearer ${token}" \
"${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000")
echo "${users}" | jq -c '.[]' | while read -r user; do
username=$(echo "${user}" | jq -r '.username')
email=$(echo "${user}" | jq -r '.email // empty')
app_pw=$(echo "${user}" | jq -r '.attributes.mailu_app_password[0] // empty')
[[ -z "${email}" || -z "${app_pw}" ]] && continue
if account_exists "${email}"; then
echo "Skipping ${email}, already exists"
continue
fi
echo "Syncing ${email}"
runuser -u www-data -- php occ mail:account:create \
"${username}" "${username}" "${email}" \
mail.bstein.dev 993 ssl "${email}" "${app_pw}" \
mail.bstein.dev 587 tls "${email}" "${app_pw}" login || true
done

View File

@ -0,0 +1,65 @@
#!/bin/bash
set -euo pipefail
NC_URL="${NC_URL:-https://cloud.bstein.dev}"
ADMIN_USER="${ADMIN_USER:?}"
ADMIN_PASS="${ADMIN_PASS:?}"
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y -qq curl jq >/dev/null
run_occ() {
runuser -u www-data -- php occ "$@"
}
log() { echo "[$(date -Is)] $*"; }
log "Applying Atlas theming"
run_occ theming:config name "Atlas Cloud"
run_occ theming:config slogan "Unified access to Atlas services"
run_occ theming:config url "https://cloud.bstein.dev"
run_occ theming:config color "#0f172a"
run_occ theming:config disable-user-theming yes
log "Setting default quota to 200 GB"
run_occ config:app:set files default_quota --value "200 GB"
API_BASE="${NC_URL}/ocs/v2.php/apps/external/api/v1"
AUTH=(-u "${ADMIN_USER}:${ADMIN_PASS}" -H "OCS-APIRequest: true")
log "Removing existing external links"
existing=$(curl -sf "${AUTH[@]}" "${API_BASE}?format=json" | jq -r '.ocs.data[].id // empty')
for id in ${existing}; do
curl -sf "${AUTH[@]}" -X DELETE "${API_BASE}/sites/${id}?format=json" >/dev/null || true
done
SITES=(
"Vaultwarden|https://vault.bstein.dev"
"Jellyfin|https://stream.bstein.dev"
"Gitea|https://scm.bstein.dev"
"Jenkins|https://ci.bstein.dev"
"Zot|https://registry.bstein.dev"
"Vault|https://secret.bstein.dev"
"Jitsi|https://meet.bstein.dev"
"Grafana|https://metrics.bstein.dev"
"Chat LLM|https://chat.ai.bstein.dev"
"Vision|https://draw.ai.bstein.dev"
"STT/TTS|https://talk.ai.bstein.dev"
)
log "Seeding external links"
for entry in "${SITES[@]}"; do
IFS="|" read -r name url <<<"${entry}"
curl -sf "${AUTH[@]}" -X POST "${API_BASE}/sites?format=json" \
-d "name=${name}" \
-d "url=${url}" \
-d "lang=" \
-d "type=link" \
-d "device=" \
-d "icon=" \
-d "groups[]=" \
-d "redirect=1" >/dev/null
done
log "Maintenance run completed"

View File

@ -0,0 +1,218 @@
#!/usr/bin/env bash
set -euo pipefail
# 0) Create dedicated user if it doesn't exist
if ! id -u styx >/dev/null 2>&1; then
sudo useradd -m -s /bin/bash styx
echo "Created user 'styx'"
fi
# 1) App directory
sudo mkdir -p /opt/styx-kiosk/keys
sudo chown -R styx:styx /opt/styx-kiosk
# 2) Drop the kiosk app (written below) into place
sudo tee /opt/styx-kiosk/kiosk.py >/dev/null <<'PY'
#!/usr/bin/env python3
import base64, json, os, subprocess, threading, tempfile
from datetime import datetime
import tkinter as tk
from tkinter import ttk, messagebox
APP_TITLE = "STYX Airgap Signer"
CAMERA_DEV = os.environ.get("ZBAR_DEV", "/dev/video0")
KEY_PATH = os.environ.get("STYX_KEY", "/vault/keys/signer_ed25519.pem") # in the LUKS vault
ALGO = os.environ.get("STYX_ALGO", "ed25519") # or 'secp256r1'
QR_TMP = "/tmp/styx_signed.png"
def zbar_scan_oneshot():
# --raw -> data only; --nodisplay -> no preview window; --oneshot -> exit after first code
# (zbarcam supports --oneshot; prints one code and exits). :contentReference[oaicite:2]{index=2}
cmd = ["zbarcam", "--raw", "--nodisplay", "--oneshot", CAMERA_DEV]
try:
out = subprocess.check_output(cmd, text=True, timeout=30)
out = out.strip()
return out if out else None
except Exception as e:
return None
def openssl_pub_der_b64(key_path):
der = subprocess.check_output(["openssl","pkey","-in",key_path,"-pubout","-outform","DER"])
return base64.b64encode(der).decode()
def sign_bytes(msg: bytes, key_path: str, algo: str) -> bytes:
with tempfile.NamedTemporaryFile(delete=False) as f:
f.write(msg)
msg_path = f.name
try:
if algo.lower() == "ed25519":
# Ed25519 expects raw message; OpenSSL handles hashing internally.
sig = subprocess.check_output(
["openssl","pkeyutl","-sign","-inkey",key_path,"-rawin","-in",msg_path]
)
return sig
elif algo.lower() in ("secp256r1","prime256v1","p256"):
# ECDSA over P-256; hash with SHA-256; OpenSSL returns DER-encoded (r,s)
sig = subprocess.check_output(
["openssl","dgst","-sha256","-sign",key_path,msg_path]
)
return sig
else:
raise RuntimeError(f"Unsupported algo: {algo}")
finally:
try: os.unlink(msg_path)
except: pass
def make_signed_envelope(scanned_text: str, key_path: str, algo: str) -> dict:
# Accept either raw string or JSON with 'tx_bytes' (base64) or 'message'
try:
obj = json.loads(scanned_text)
if "tx_bytes" in obj:
msg = base64.b64decode(obj["tx_bytes"])
elif "message" in obj:
msg = obj["message"].encode()
else:
# If it's JSON but doesn't carry known fields, sign canonical JSON bytes
msg = json.dumps(obj, sort_keys=True, separators=(",",":")).encode()
request_id = obj.get("request_id")
except Exception:
# Non-JSON → treat the scanned text as the message to sign
msg = scanned_text.encode()
request_id = None
sig = sign_bytes(msg, key_path, algo)
env = {
"algo": algo.lower(),
"signature_b64": base64.b64encode(sig).decode(),
"pubkey_spki_der_b64": openssl_pub_der_b64(key_path),
"payload_sha256_b64": base64.b64encode(subprocess.check_output(["openssl","dgst","-sha256","-binary"], input=msg)).decode(),
"quote_raw": scanned_text,
"request_id": request_id,
"device": os.uname().nodename,
"ts_utc": datetime.utcnow().isoformat(timespec="seconds") + "Z",
}
return env
def qrencode_to_file(text: str, path: str):
# Use qrencode CLI to render a PNG we can display.
subprocess.run(["qrencode","-l","M","-s","16","-t","PNG","-o",path], input=text.encode(), check=True)
class App(tk.Tk):
def __init__(self):
super().__init__()
self.title(APP_TITLE)
self.attributes("-fullscreen", True)
self.configure(background="black")
self.bind("<Escape>", lambda e: self.quit()) # for maintenance only
s = ttk.Style(self)
s.configure("Big.TButton", font=("DejaVu Sans", 48), padding=24)
s.configure("Big.TLabel", font=("DejaVu Sans", 32), foreground="white", background="black")
self.container = tk.Frame(self, bg="black")
self.container.pack(expand=True, fill="both")
self.status = ttk.Label(self.container, text="Ready", style="Big.TLabel")
self.status.pack(pady=20)
self.scan_btn = ttk.Button(self.container, text="SCAN", style="Big.TButton", command=self.start_scan)
self.scan_btn.pack(pady=20)
self.image_label = tk.Label(self.container, bg="black")
self.image_label.pack(pady=10)
self.new_btn = ttk.Button(self.container, text="NEW SCAN", style="Big.TButton", command=self.reset)
self.new_btn.pack_forget()
self.note = ttk.Label(self.container, text="", style="Big.TLabel")
self.note.pack(pady=10)
if not os.path.exists(KEY_PATH):
self.status.config(text=f"Key not found at {KEY_PATH}\nInsert/unlock vault to proceed.")
def reset(self):
self.image_label.configure(image="")
self.image_label.image = None
self.new_btn.pack_forget()
self.note.config(text="")
self.status.config(text="Ready")
self.scan_btn.config(state="normal")
def start_scan(self):
if not os.path.exists(KEY_PATH):
messagebox.showerror("Key missing", f"Signing key not found at:\n{KEY_PATH}\nUnlock your vault.")
return
self.status.config(text="Scanning…")
self.scan_btn.config(state="disabled")
threading.Thread(target=self._do_scan_and_sign, daemon=True).start()
def _do_scan_and_sign(self):
scanned = zbar_scan_oneshot()
if not scanned:
self.after(0, self._scan_failed)
return
try:
envelope = make_signed_envelope(scanned, KEY_PATH, ALGO)
payload = json.dumps(envelope, separators=(",",":"))
qrencode_to_file(payload, QR_TMP)
self.after(0, self._show_qr, envelope)
except Exception as e:
self.after(0, lambda: self._error(str(e)))
def _scan_failed(self):
self.status.config(text="No QR detected. Try again.")
self.scan_btn.config(state="normal")
def _show_qr(self, envelope):
# Display the PNG produced by qrencode
try:
img = tk.PhotoImage(file=QR_TMP)
self.image_label.configure(image=img)
self.image_label.image = img
except Exception as e:
self.status.config(text=f"QR render failed: {e}")
self.scan_btn.config(state="normal")
return
self.status.config(text="Signed. Show this QR to your online box.")
self.note.config(text=f"Algo: {envelope['algo']} Host: {envelope['device']}")
self.new_btn.pack(pady=20)
if __name__ == "__main__":
App().mainloop()
PY
sudo chmod +x /opt/styx-kiosk/kiosk.py
sudo chown -R styx:styx /opt/styx-kiosk
# 3) Minimal X session: openbox + kiosk; no mouse pointer
sudo -u styx tee /home/styx/.xinitrc >/dev/null <<'XRC'
xset -dpms
xset s off
xset s noblank
# If 'unclutter' is installed, uncomment the next line to hide cursor:
# unclutter -idle 0 -root &
openbox-session &
/opt/styx-kiosk/kiosk.py
XRC
sudo chown styx:styx /home/styx/.xinitrc
sudo chmod 0755 /home/styx/.xinitrc
# 4) Autologin the 'styx' user on tty1, auto-start X
sudo mkdir -p /etc/systemd/system/getty@tty1.service.d
sudo tee /etc/systemd/system/getty@tty1.service.d/override.conf >/dev/null <<'OVR'
[Service]
ExecStart=
ExecStart=-/sbin/agetty --autologin styx --noclear %I $TERM
Type=idle
OVR
sudo -u styx tee -a /home/styx/.bash_profile >/dev/null <<'BRC'
# Start X on the first tty automatically, headless
if [ -z "$DISPLAY" ] && [ "$(tty)" = "/dev/tty1" ]; then
exec startx -- -nocursor
fi
BRC
sudo systemctl daemon-reload
sudo systemctl enable getty@tty1.service
echo "Done. Reboot to try the kiosk."

195
scripts/styx_prep.sh Executable file
View File

@ -0,0 +1,195 @@
#!/usr/bin/env bash
set -euo pipefail
# === CONFIG ===
STYX_USER="styx"
STYX_PASS="TempPass#123" # change at first login
STYX_HOSTNAME="styx"
SSH_PUBKEY="" # e.g., 'ssh-ed25519 AAAA... your@host' (optional)
# === helpers ===
require_root() {
if [[ $EUID -ne 0 ]]; then exec sudo -E "$0" "$@"; fi
}
ensure_binfmt_arm64() {
# If binfmt for arm64 isn't registered, register it via Docker (idempotent).
if [[ ! -e /proc/sys/fs/binfmt_misc/qemu-aarch64 ]]; then
command -v docker >/dev/null || { echo "Docker required to register binfmt (sudo pacman -S docker)"; exit 1; }
sudo systemctl enable --now docker >/dev/null 2>&1 || true
sudo docker run --rm --privileged tonistiigi/binfmt --install arm64
fi
}
find_parts() {
BOOT=$(lsblk -o LABEL,PATH -nr | awk '$1=="system-boot"{print $2}' | head -n1)
ROOT=$(lsblk -o LABEL,PATH -nr | awk '$1=="writable"{print $2}' | head -n1)
if [[ -z "${BOOT:-}" || -z "${ROOT:-}" ]]; then
echo "Could not find 'system-boot'/'writable' on any device."
lsblk -o NAME,SIZE,FSTYPE,LABEL,PATH -nr
exit 1
fi
}
mount_parts() {
mkdir -p /mnt/pi-boot /mnt/pi-root
mount "$ROOT" /mnt/pi-root
mount "$BOOT" /mnt/pi-boot
# Bind only what we need (avoid /run to prevent postinst fights)
for d in dev dev/pts proc sys; do mount --bind "/$d" "/mnt/pi-root/$d"; done
# Ubuntu images use a resolv.conf symlink—replace with a real file
if [[ -L /mnt/pi-root/etc/resolv.conf || ! -e /mnt/pi-root/etc/resolv.conf ]]; then
rm -f /mnt/pi-root/etc/resolv.conf
cat /etc/resolv.conf > /mnt/pi-root/etc/resolv.conf
fi
}
prep_chroot() {
# Block service starts inside chroot (no systemd there)
cat >/mnt/pi-root/usr/sbin/policy-rc.d <<'EOF'
#!/bin/sh
exit 101
EOF
chmod +x /mnt/pi-root/usr/sbin/policy-rc.d
# All the work happens inside the ARM64 rootfs
CHCMD=$(cat <<'EOS'
set -euo pipefail
export DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC
# Ensure sbin is in PATH so user/group tools work
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
apt-get update
apt-get -y full-upgrade
# Remove snaps and keep them gone (Ubuntu for Pi ships with snaps)
apt-get -y purge snapd || true
rm -rf /snap /var/snap /var/lib/snapd /home/*/snap || true
mkdir -p /etc/apt/preferences.d
printf 'Package: snapd\nPin: release *\nPin-Priority: -10\n' > /etc/apt/preferences.d/nosnap.pref
# Ensure user/group tools exist
apt-get install -y passwd adduser || true
getent group i2c >/dev/null || /usr/sbin/groupadd i2c
# Base packages
BASE_PKGS="openssh-server git i2c-tools python3-smbus python3-pil zbar-tools qrencode lm-sensors"
apt-get install -y $BASE_PKGS
# ------- OLED (Luma) -------
# Prefer distro package; fall back to pip if not present in this release
if ! dpkg -s python3-luma.oled >/dev/null 2>&1; then
apt-get update
if ! apt-get install -y python3-luma.oled; then
apt-get install -y python3-pip
pip3 install --no-input --break-system-packages luma.oled
fi
fi
# ------- Camera apps -------
# Ubuntu renamed libcamera-apps -> rpicam-apps for Raspberry Pi.
# Try in order; tolerate absence (the box might be display-only).
apt-get update
if ! apt-get install -y rpicam-apps; then
apt-get install -y libcamera-apps || apt-get install -y libcamera-tools || true
fi
# Enable SSH on boot (no systemctl in chroot)
mkdir -p /etc/systemd/system/multi-user.target.wants
ln -sf /lib/systemd/system/ssh.service /etc/systemd/system/multi-user.target.wants/ssh.service
# Create user and set password
if ! id -u STYX_USER >/dev/null 2>&1; then
/usr/sbin/useradd -m -s /bin/bash -G sudo,video,i2c STYX_USER
fi
echo 'STYX_USER:STYX_PASS' | /usr/sbin/chpasswd
# Optional: preload SSH key
if [ -n 'SSH_PUBKEY' ] && echo 'SSH_PUBKEY' | grep -q 'ssh-'; then
install -d -m700 /home/STYX_USER/.ssh
echo 'SSH_PUBKEY' >> /home/STYX_USER/.ssh/authorized_keys
chmod 600 /home/STYX_USER/.ssh/authorized_keys
chown -R STYX_USER:STYX_USER /home/STYX_USER/.ssh
fi
# Freenove code
git clone https://github.com/Freenove/Freenove_Computer_Case_Kit_for_Raspberry_Pi.git /opt/freenove || true
# Hostname
echo 'STYX_HOSTNAME' > /etc/hostname
if grep -q '^127\.0\.1\.1' /etc/hosts; then
sed -i 's/^127\.0\.1\.1.*/127.0.1.1\tSTYX_HOSTNAME/' /etc/hosts
else
echo -e '127.0.1.1\tSTYX_HOSTNAME' >> /etc/hosts
fi
apt-get clean
EOS
)
# Inject config values safely
CHCMD="${CHCMD//STYX_USER/${STYX_USER}}"
CHCMD="${CHCMD//STYX_PASS/${STYX_PASS}}"
CHCMD="${CHCMD//STYX_HOSTNAME/${STYX_HOSTNAME}}"
CHCMD="${CHCMD//SSH_PUBKEY/${SSH_PUBKEY}}"
chroot /mnt/pi-root /bin/bash -lc "$CHCMD"
}
install_service_host() {
# Systemd unit for the Freenove example app
mkdir -p /mnt/pi-root/etc/systemd/system/multi-user.target.wants
cat >/mnt/pi-root/etc/systemd/system/freenove-case.service <<'SERVICE'
[Unit]
Description=Freenove Case OLED/Fans/LEDs
After=multi-user.target
[Service]
Type=simple
ExecStart=/usr/bin/python3 /opt/freenove/Code/application.py
Restart=on-failure
[Install]
WantedBy=multi-user.target
SERVICE
ln -sf /etc/systemd/system/freenove-case.service \
/mnt/pi-root/etc/systemd/system/multi-user.target.wants/freenove-case.service || true
}
boot_tweaks() {
# Enable I2C and set DSI panel on the BOOT partition
grep -q 'dtparam=i2c_arm=on' /mnt/pi-boot/config.txt || echo 'dtparam=i2c_arm=on' >> /mnt/pi-boot/config.txt
# Append kernel cmdline only once
if ! grep -q 'DSI-1:800x480@60D' /mnt/pi-boot/cmdline.txt 2>/dev/null; then
sed -i '1 s#$# video=DSI-1:800x480@60D video=HDMI-A-1:off video=HDMI-A-2:off#' /mnt/pi-boot/cmdline.txt || true
fi
}
cleanup() {
rm -f /mnt/pi-root/usr/sbin/policy-rc.d || true
for d in dev/pts dev proc sys; do umount -lf "/mnt/pi-root/$d" 2>/dev/null || true; done
umount -lf /mnt/pi-boot 2>/dev/null || true
umount -lf /mnt/pi-root 2>/dev/null || true
sync || true
}
main() {
require_root
ensure_binfmt_arm64
find_parts
trap 'echo "ERROR at line $LINENO" >&2; cleanup' ERR INT
mount_parts
prep_chroot
install_service_host
boot_tweaks
cleanup
echo "✅ Done. Move the NVMe to the Pi and boot."
echo " Login: user '${STYX_USER}' pass '${STYX_PASS}' (change with 'passwd')."
echo " Quick checks on the Pi:"
echo " sudo i2cdetect -y 1"
echo " rpicam-still -n -o test.jpg # (if rpicam-apps installed)"
echo " libcamera-still -n -o test.jpg # (if legacy libcamera-apps installed)"
echo " systemctl status freenove-case"
}
main "$@"

575
scripts/styx_prep_nvme_luks.sh Executable file
View File

@ -0,0 +1,575 @@
#!/usr/bin/env bash
set -euo pipefail
# --- CONFIG (edit if needed) ---
# Leave NVME empty → script will auto-detect the SSK dock.
NVME="${NVME:-}"
FLAVOR="${FLAVOR:-desktop}"
# Persistent cache so the image survives reboots.
IMG_DIR="${IMG_DIR:-/var/cache/styx-rpi}"
IMG_FILE="${IMG_FILE:-ubuntu-24.04.3-preinstalled-${FLAVOR}-arm64+raspi.img}"
IMG_BOOT_MNT="${IMG_BOOT_MNT:-/mnt/img-boot}"
IMG_ROOT_MNT="${IMG_ROOT_MNT:-/mnt/img-root}"
TGT_ROOT="/mnt/target-root"
TGT_BOOT="/mnt/target-boot"
STYX_USER="styx"
STYX_HOSTNAME="titan-ag"
STYX_PASS="TempPass#123" # will be forced to change on first login via cloud-init
SSH_PUBKEY="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOb8oMX6u0z3sH/p/WBGlvPXXdbGETCKzWYwR/dd6fZb titan-bastion"
# Video / input prefs
DSI_FLAGS="video=DSI-1:800x480@60D video=HDMI-A-1:off video=HDMI-A-2:off"
# --- Helpers ---
fatal(){ echo "ERROR: $*" >&2; exit 1; }
need(){ command -v "$1" >/dev/null || fatal "Missing tool: $1"; }
require_root(){ [[ $EUID -eq 0 ]] || exec sudo -E "$0" "$@"; }
part() {
local n="$1"
if [[ "$NVME" =~ [0-9]$ ]]; then
echo "${NVME}p${n}"
else
echo "${NVME}${n}"
fi
}
auto_detect_target_disk() {
# If user already set NVME, validate and return
if [[ -n "${NVME:-}" ]]; then
[[ -b "$NVME" ]] || fatal "NVME='$NVME' is not a block device"
return
fi
# Prefer stable by-id symlinks
local byid
byid=$(ls -1 /dev/disk/by-id/usb-SSK* 2>/dev/null | head -n1 || true)
if [[ -n "$byid" ]]; then
NVME=$(readlink -f "$byid")
else
# Heuristic via lsblk -S: look for USB with SSK/Ingram/Storage in vendor/model
NVME=$(lsblk -S -p -o NAME,TRAN,VENDOR,MODEL | \
awk '/ usb / && ($3 ~ /SSK|Ingram/i || $4 ~ /SSK|Storage/i){print $1; exit}')
fi
[[ -n "${NVME:-}" && -b "$NVME" ]] || fatal "Could not auto-detect SSK USB NVMe dock. Export NVME=/dev/sdX and re-run."
echo "Auto-detected target disk: $NVME"
}
preflight_cleanup() {
local img="$IMG_DIR/$IMG_FILE"
# 1) Unmount image mountpoints and detach only loops for this IMG
umount -lf "$IMG_BOOT_MNT" "$IMG_ROOT_MNT" 2>/dev/null || true
# losetup -j exits non-zero if no association → tolerate it
{ losetup -j "$img" | cut -d: -f1 | xargs -r losetup -d; } 2>/dev/null || true
# 2) Unmount our target mounts
umount -lf "$TGT_ROOT/boot/firmware" "$TGT_BOOT" "$TGT_ROOT" 2>/dev/null || true
# 3) Unmount the actual target partitions if mounted anywhere (tolerate 'not found')
for p in "$(part 1)" "$(part 2)"; do
# findmnt returns 1 when no match → capture and iterate if any
while read -r mnt; do
[ -n "$mnt" ] && umount -lf "$mnt" 2>/dev/null || true
done < <(findmnt -rno TARGET -S "$p" 2>/dev/null || true)
done
# 4) Close dm-crypt mapping (if it exists)
cryptsetup luksClose cryptroot 2>/dev/null || true
dmsetup remove -f cryptroot 2>/dev/null || true
# 5) Let udev settle
command -v udevadm >/dev/null && udevadm settle || true
}
guard_target_device() {
# Refuse to operate if NVME appears to be the current system disk
local root_src root_disk
root_src=$(findmnt -no SOURCE /)
root_disk=$(lsblk -no pkname "$root_src" 2>/dev/null || true)
if [[ -n "$root_disk" && "/dev/$root_disk" == "$NVME" ]]; then
fatal "Refusing to operate on system disk ($NVME). Pick the external NVMe."
fi
}
need_host_fido2() {
if ! command -v fido2-token >/dev/null 2>&1; then
echo "Host is missing fido2-token. On Arch: sudo pacman -S libfido2"
echo "On Debian/Ubuntu host: sudo apt-get install fido2-tools"
exit 1
fi
}
ensure_image() {
mkdir -p "$IMG_DIR"
chmod 755 "$IMG_DIR"
local BASE="https://cdimage.ubuntu.com/releases/noble/release"
local XZ="ubuntu-24.04.3-preinstalled-${FLAVOR}-arm64+raspi.img.xz"
# If the decompressed .img is missing, fetch/decompress into the cache.
if [[ ! -f "$IMG_DIR/$IMG_FILE" ]]; then
need curl; need unxz # Arch: pacman -S curl xz | Ubuntu: apt-get install curl xz-utils
if [[ ! -f "$IMG_DIR/$XZ" ]]; then
echo "Fetching image…"
curl -fL -o "$IMG_DIR/$XZ" "$BASE/$XZ"
fi
echo "Decompressing to $IMG_DIR/$IMG_FILE"
# Keep the .xz for future runs; stream-decompress to the .img
if command -v unxz >/dev/null 2>&1; then
unxz -c "$IMG_DIR/$XZ" > "$IMG_DIR/$IMG_FILE"
else
need xz
xz -dc "$IMG_DIR/$XZ" > "$IMG_DIR/$IMG_FILE"
fi
sync
else
echo "Using cached image: $IMG_DIR/$IMG_FILE"
fi
}
ensure_binfmt_aarch64(){
# Register qemu-aarch64 for chrooted ARM64 apt runs
if [[ ! -e /proc/sys/fs/binfmt_misc/qemu-aarch64 ]]; then
need docker
systemctl enable --now docker >/dev/null 2>&1 || true
docker run --rm --privileged tonistiigi/binfmt --install arm64 >/dev/null
fi
if [[ ! -x /usr/local/bin/qemu-aarch64-static ]]; then
docker rm -f qemu-static >/dev/null 2>&1 || true
docker create --name qemu-static docker.io/multiarch/qemu-user-static:latest >/dev/null
docker cp qemu-static:/usr/bin/qemu-aarch64-static /usr/local/bin/
install -D -m755 /usr/local/bin/qemu-aarch64-static /usr/local/bin/qemu-aarch64-static
docker rm qemu-static >/dev/null
fi
}
open_image() {
[[ -r "$IMG_DIR/$IMG_FILE" ]] || fatal "Image not found: $IMG_DIR/$IMG_FILE"
mkdir -p "$IMG_BOOT_MNT" "$IMG_ROOT_MNT"
# Pre-clean: detach any previous loop(s) for this image (tolerate absence)
umount -lf "$IMG_BOOT_MNT" 2>/dev/null || true
umount -lf "$IMG_ROOT_MNT" 2>/dev/null || true
# If no loop is attached, losetup -j returns non-zero → swallow it
mapfile -t OLD < <({ losetup -j "$IMG_DIR/$IMG_FILE" | cut -d: -f1; } 2>/dev/null || true)
for L in "${OLD[@]:-}"; do losetup -d "$L" 2>/dev/null || true; done
command -v udevadm >/dev/null && udevadm settle || true
# Attach with partition scan; wait for partition nodes to exist
LOOP=$(losetup --find --show --partscan "$IMG_DIR/$IMG_FILE") || fatal "losetup failed"
command -v udevadm >/dev/null && udevadm settle || true
for _ in {1..25}; do
[[ -b "${LOOP}p1" && -b "${LOOP}p2" ]] && break
sleep 0.1
command -v udevadm >/dev/null && udevadm settle || true
done
[[ -b "${LOOP}p1" ]] || fatal "loop partitions not present for $LOOP"
# Cleanup on exit: unmount first, then detach loop (tolerate absence)
trap 'umount -lf "'"$IMG_BOOT_MNT"'" "'"$IMG_ROOT_MNT"'" 2>/dev/null; losetup -d "'"$LOOP"'" 2>/dev/null' EXIT
# Mount image partitions read-only
mount -o ro "${LOOP}p1" "$IMG_BOOT_MNT"
mount -o ro "${LOOP}p2" "$IMG_ROOT_MNT"
# Sanity checks without using failing pipelines
# start*.elf must exist
if ! compgen -G "$IMG_BOOT_MNT/start*.elf" > /dev/null; then
fatal "start*.elf not found in image"
fi
# vmlinuz-* must exist
if ! compgen -G "$IMG_ROOT_MNT/boot/vmlinuz-*" > /dev/null; then
fatal "vmlinuz-* not found in image root"
fi
}
confirm_and_wipe(){
lsblk -o NAME,SIZE,MODEL,TRAN,LABEL "$NVME"
read -rp "Type EXACTLY 'WIPE' to destroy ALL DATA on $NVME: " ACK
[[ "$ACK" == "WIPE" ]] || fatal "Aborted"
wipefs -a "$NVME"
sgdisk -Zo "$NVME"
# GPT: 1: 1MiB..513MiB vfat ESP; 2: rest LUKS
parted -s "$NVME" mklabel gpt \
mkpart system-boot fat32 1MiB 513MiB set 1 esp on \
mkpart cryptroot 513MiB 100%
partprobe "$NVME"; sleep 1
mkfs.vfat -F32 -n system-boot "$(part 1)"
}
setup_luks(){
echo "Create LUKS2 on $(part 2) (you will be prompted for a passphrase; keep it as fallback)"
need cryptsetup
cryptsetup luksFormat --type luks2 "$(part 2)"
cryptsetup open "$(part 2)" cryptroot
mkfs.ext4 -L rootfs /dev/mapper/cryptroot
}
mount_targets(){
mkdir -p "$TGT_ROOT" "$TGT_BOOT"
mount /dev/mapper/cryptroot "$TGT_ROOT"
mkdir -p "$TGT_ROOT/boot/firmware"
mount "$(part 1)" "$TGT_BOOT"
mount --bind "$TGT_BOOT" "$TGT_ROOT/boot/firmware"
}
rsync_root_and_boot(){
need rsync
rsync -aAXH --numeric-ids --delete \
--exclude='/boot/firmware' --exclude='/boot/firmware/**' \
--exclude='/dev/*' --exclude='/proc/*' --exclude='/sys/*' \
--exclude='/run/*' --exclude='/tmp/*' --exclude='/mnt/*' \
--exclude='/media/*' --exclude='/lost+found' \
"$IMG_ROOT_MNT"/ "$TGT_ROOT"/
rsync -aH --delete "$IMG_BOOT_MNT"/ "$TGT_ROOT/boot/firmware"/
}
write_crypttab_fstab(){
LUUID=$(blkid -s UUID -o value "$(part 2)")
printf 'cryptroot UUID=%s none luks,discard,fido2-device=auto\n' "$LUUID" > "$TGT_ROOT/etc/crypttab"
cat > "$TGT_ROOT/etc/fstab" <<EOF
/dev/mapper/cryptroot / ext4 defaults,discard,errors=remount-ro 0 1
LABEL=system-boot /boot/firmware vfat defaults,umask=0077 0 1
EOF
}
fix_firmware_files(){
local C="$TGT_ROOT/boot/firmware/config.txt"
local CL="$TGT_ROOT/boot/firmware/cmdline.txt"
[[ -f "$C" ]] || fatal "missing $C"
# Always boot the uncompressed Pi 5 kernel
if grep -q '^kernel=' "$C"; then
sed -i 's#^kernel=.*#kernel=kernel_2712.img#' "$C"
else
sed -i '1i kernel=kernel_2712.img' "$C"
fi
# Ensure initramfs and cmdline indirection are set
grep -q '^initramfs ' "$C" || echo 'initramfs initrd.img followkernel' >> "$C"
grep -q '^cmdline=cmdline.txt' "$C" || sed -i '1i cmdline=cmdline.txt' "$C"
# Display & buses (Pi 5)
grep -q '^dtoverlay=vc4-kms-v3d-pi5' "$C" || echo 'dtoverlay=vc4-kms-v3d-pi5' >> "$C"
grep -q '^dtparam=i2c_arm=on' "$C" || echo 'dtparam=i2c_arm=on' >> "$C"
grep -q '^dtparam=pciex1=on' "$C" || echo 'dtparam=pciex1=on' >> "$C"
grep -q '^dtparam=pciex1_gen=2' "$C" || echo 'dtparam=pciex1_gen=2' >> "$C"
grep -q '^enable_uart=1' "$C" || echo 'enable_uart=1' >> "$C"
# Minimal, correct dracut hints using the bare UUID
local LUUID; LUUID=$(blkid -s UUID -o value "$(part 2)")
: > "$CL"
{
echo -n "rd.luks.uuid=$LUUID rd.luks.name=$LUUID=cryptroot "
echo -n "root=/dev/mapper/cryptroot rootfstype=ext4 rootwait fixrtc "
echo "console=serial0,115200 console=tty1 ds=nocloud;s=file:///boot/firmware/ ${DSI_FLAGS} rd.debug"
} >> "$CL"
}
seed_cloud_init(){
# NoCloud seed to create user, lock down SSH, set hostname, and enable avahi.
cat > "$TGT_ROOT/boot/firmware/user-data" <<EOF
#cloud-config
hostname: $STYX_HOSTNAME
manage_etc_hosts: true
users:
- name: $STYX_USER
gecos: "$STYX_USER"
shell: /bin/bash
groups: [sudo,video,i2c]
sudo: ALL=(ALL) NOPASSWD:ALL
lock_passwd: false
ssh_authorized_keys:
- $SSH_PUBKEY
chpasswd:
list: |
$STYX_USER:$STYX_PASS
expire: true
ssh_pwauth: false
package_update: true
packages: [openssh-server, avahi-daemon]
runcmd:
- systemctl enable --now ssh
- systemctl enable --now avahi-daemon || true
EOF
# Minimal meta-data for NoCloud
date +%s | awk '{print "instance-id: iid-titan-ag-"$1"\nlocal-hostname: '"$STYX_HOSTNAME"'"}' \
> "$TGT_ROOT/boot/firmware/meta-data"
}
prep_chroot_mounts(){
for d in dev proc sys; do mount --bind "/$d" "$TGT_ROOT/$d"; done
mount -t devpts devpts "$TGT_ROOT/dev/pts"
# Replace the usual resolv.conf symlink with a real file for apt to work
rm -f "$TGT_ROOT/etc/resolv.conf"
cp /etc/resolv.conf "$TGT_ROOT/etc/resolv.conf"
# Block service starts (no systemd in chroot)
cat > "$TGT_ROOT/usr/sbin/policy-rc.d" <<'EOP'
#!/bin/sh
exit 101
EOP
chmod +x "$TGT_ROOT/usr/sbin/policy-rc.d"
# Ensure qemu static is present inside chroot
install -D -m755 /usr/local/bin/qemu-aarch64-static "$TGT_ROOT/usr/bin/qemu-aarch64-static"
}
in_chroot(){
chroot "$TGT_ROOT" /usr/bin/qemu-aarch64-static /bin/bash -lc '
set -euo pipefail
export DEBIAN_FRONTEND=noninteractive TZ=Etc/UTC
# --- APT sources (ports) ---
cat > /etc/apt/sources.list <<'"'"'EOS'"'"'
deb http://ports.ubuntu.com/ubuntu-ports noble main restricted universe multiverse
deb http://ports.ubuntu.com/ubuntu-ports noble-updates main restricted universe multiverse
deb http://ports.ubuntu.com/ubuntu-ports noble-security main restricted universe multiverse
EOS
apt-get update
# --- Remove snaps and pin them off ---
apt-get -y purge snapd || true
rm -rf /snap /var/snap /var/lib/snapd /home/*/snap || true
mkdir -p /etc/apt/preferences.d
cat > /etc/apt/preferences.d/nosnap.pref <<'"'"'EOS'"'"'
Package: snapd
Pin: release *
Pin-Priority: -10
EOS
# --- Base tools (no flash-kernel; we use dracut) ---
apt-get install -y --no-install-recommends \
openssh-client openssh-server openssh-sftp-server avahi-daemon \
cryptsetup dracut fido2-tools libfido2-1 i2c-tools \
python3-smbus python3-pil zbar-tools qrencode lm-sensors \
file zstd lz4 || true
# Camera apps: try rpicam-apps; otherwise basic libcamera tools
apt-get install -y rpicam-apps || apt-get install -y libcamera-tools || true
# --- Persistent journal so we can read logs after failed boot ---
mkdir -p /etc/systemd/journald.conf.d
cat > /etc/systemd/journald.conf.d/99-persistent.conf <<'"'"'EOS'"'"'
[Journal]
Storage=persistent
EOS
# --- SSH hardening (ensure file exists even if package was half-installed) ---
if [ ! -f /etc/ssh/sshd_config ]; then
mkdir -p /etc/ssh
cat > /etc/ssh/sshd_config <<'"'"'EOS'"'"'
PermitRootLogin no
PasswordAuthentication no
KbdInteractiveAuthentication no
PubkeyAuthentication yes
# Accept defaults for the rest
EOS
fi
sed -i -e "s/^#\?PasswordAuthentication .*/PasswordAuthentication no/" \
-e "s/^#\?KbdInteractiveAuthentication .*/KbdInteractiveAuthentication no/" \
-e "s/^#\?PermitRootLogin .*/PermitRootLogin no/" \
-e "s/^#\?PubkeyAuthentication .*/PubkeyAuthentication yes/" /etc/ssh/sshd_config || true
# --- Hostname & hosts ---
echo "'"$STYX_HOSTNAME"'" > /etc/hostname
if grep -q "^127\\.0\\.1\\.1" /etc/hosts; then
sed -i "s/^127\\.0\\.1\\.1.*/127.0.1.1\t'"$STYX_HOSTNAME"'/" /etc/hosts
else
echo -e "127.0.1.1\t'"$STYX_HOSTNAME"'" >> /etc/hosts
fi
# --- Enable services on first boot ---
mkdir -p /etc/systemd/system/multi-user.target.wants
ln -sf /lib/systemd/system/ssh.service /etc/systemd/system/multi-user.target.wants/ssh.service
ln -sf /lib/systemd/system/avahi-daemon.service /etc/systemd/system/multi-user.target.wants/avahi-daemon.service || true
# --- Ensure i2c group ---
getent group i2c >/dev/null || groupadd i2c
# --- Dracut configuration (generic, not host-only) ---
mkdir -p /etc/dracut.conf.d
cat > /etc/dracut.conf.d/00-hostonly.conf <<'"'"'EOS'"'"'
hostonly=no
EOS
cat > /etc/dracut.conf.d/10-systemd-crypt.conf <<'"'"'EOS'"'"'
add_dracutmodules+=" systemd crypt "
EOS
cat > /etc/dracut.conf.d/20-drivers.conf <<'"'"'EOS'"'"'
add_drivers+=" nvme xhci_pci xhci_hcd usbhid hid_generic hid "
EOS
cat > /etc/dracut.conf.d/30-fido2.conf <<'"'"'EOS'"'"'
install_items+="/usr/bin/systemd-cryptsetup /usr/bin/fido2-token /usr/lib/*/libfido2.so* /usr/lib/*/libcbor.so*"
EOS
# --- Build initramfs and place it where firmware expects it ---
KVER=$(ls -1 /lib/modules | sort -V | tail -n1)
dracut --force /boot/initramfs-$KVER.img $KVER
ln -sf initramfs-$KVER.img /boot/initrd.img
ln -sf initramfs-$KVER.img /boot/initrd.img-$KVER
cp -a /boot/initramfs-$KVER.img /boot/firmware/initrd.img
# --- Create uncompressed kernel for Pi 5 firmware ---
if [ -f "/usr/lib/linux-image-$KVER/Image" ]; then
cp -a "/usr/lib/linux-image-$KVER/Image" /boot/firmware/kernel_2712.img
else
FMT=$(file -b "/boot/vmlinuz-$KVER" || true)
case "$FMT" in
*Zstandard*|*zstd*) zstd -dc "/boot/vmlinuz-$KVER" > /boot/firmware/kernel_2712.img ;;
*LZ4*) lz4 -dc "/boot/vmlinuz-$KVER" > /boot/firmware/kernel_2712.img ;;
*gzip*) zcat "/boot/vmlinuz-$KVER" > /boot/firmware/kernel_2712.img ;;
*) cp -a "/boot/vmlinuz-$KVER" /boot/firmware/kernel_2712.img ;;
esac
fi
# --- Ensure Pi 5 DTB is present on the boot partition ---
DTB=$(find /lib/firmware -type f -name "bcm2712-rpi-5-b.dtb" | sort | tail -n1 || true)
[ -n "$DTB" ] && cp -a "$DTB" /boot/firmware/
# --- Dracut hook to copy rdsosreport.txt to the FAT partition on failure ---
mkdir -p /usr/lib/dracut/modules.d/99copylog
cat > /usr/lib/dracut/modules.d/99copylog/module-setup.sh <<'"'"'EOS'"'"'
#!/bin/bash
check() { return 0; }
depends() { echo base; return 0; }
install() {
# Guard $moddir for nounset; derive if absent
local mdir="${moddir:-$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)}"
inst_hook emergency 99 "$mdir/copylog.sh"
}
EOS
chmod +x /usr/lib/dracut/modules.d/99copylog/module-setup.sh
cat > /usr/lib/dracut/modules.d/99copylog/copylog.sh <<'"'"'EOS'"'"'
#!/bin/sh
set -e
for dev in /dev/nvme0n1p1 /dev/sda1 /dev/sdb1 /dev/mmcblk0p1; do
[ -b "$dev" ] || continue
mkdir -p /mnt/bootfat
if mount -t vfat "$dev" /mnt/bootfat 2>/dev/null; then
if [ -s /run/initramfs/rdsosreport.txt ]; then
cp -f /run/initramfs/rdsosreport.txt /mnt/bootfat/rdsosreport.txt 2>/dev/null || true
sync || true
fi
umount /mnt/bootfat || true
break
fi
done
EOS
chmod +x /usr/lib/dracut/modules.d/99copylog/copylog.sh
# Rebuild to ensure the copylog module is included
dracut --force /boot/initramfs-$KVER.img $KVER
ln -sf initramfs-$KVER.img /boot/initrd.img
cp -a /boot/initramfs-$KVER.img /boot/firmware/initrd.img
true
'
}
verify_boot_assets(){
echo "---- verify boot assets on FAT ----"
file "$TGT_ROOT/boot/firmware/kernel_2712.img" || true
ls -lh "$TGT_ROOT/boot/firmware/initrd.img" || true
echo "-- config.txt (key lines) --"
grep -E '^(kernel|initramfs|cmdline)=|^dtoverlay=|^dtparam=' "$TGT_ROOT/boot/firmware/config.txt" || true
echo "-- cmdline.txt --"
cat "$TGT_ROOT/boot/firmware/cmdline.txt" || true
echo "-- firmware blobs (sample) --"
ls -1 "$TGT_ROOT/boot/firmware"/start*.elf "$TGT_ROOT/boot/firmware"/fixup*.dat | head -n 8 || true
echo "-- Pi5 DTB --"
ls -l "$TGT_ROOT/boot/firmware/"*rpi-5-b.dtb || true
}
enroll_fido_tokens(){
echo "Enrolling FIDO2 Solo keys into $(part 2) ..."
need systemd-cryptenroll
need fido2-token
# Collect all hidraw paths from both output styles (some distros print 'Device: /dev/hidrawX')
mapfile -t DEVS < <(
fido2-token -L \
| sed -n 's,^\(/dev/hidraw[0-9]\+\):.*,\1,p; s,^Device:[[:space:]]\+/dev/hidraw\([0-9]\+\).*,/dev/hidraw\1,p' \
| sort -u
)
if (( ${#DEVS[@]} == 0 )); then
echo "No FIDO2 tokens detected; skipping enrollment (you can enroll later)."
echo "Example later: systemd-cryptenroll $(part 2) --fido2-device=/dev/hidrawX --fido2-with-client-pin=no"
return 0
fi
# Recommend keeping exactly ONE key plugged during first enrollment to avoid ambiguity.
if (( ${#DEVS[@]} > 1 )); then
echo "Note: multiple FIDO2 tokens present: ${DEVS[*]}"
echo "If enrollment fails, try with only one key inserted."
fi
local rc=0
for D in "${DEVS[@]}"; do
echo "-> Enrolling $D (you should be asked to touch the key)"
if ! SYSTEMD_LOG_LEVEL=debug systemd-cryptenroll "$(part 2)" \
--fido2-device="$D" \
--fido2-with-client-pin=no \
--fido2-with-user-presence=yes \
--fido2-with-user-verification=no \
--label="solo-$(basename "$D")"; then
echo "WARN: enrollment failed for $D"
rc=1
fi
done
echo "Tokens enrolled (if any):"
systemd-cryptenroll "$(part 2)" --list || true
return $rc
}
cleanup(){
rm -f "$TGT_ROOT/usr/sbin/policy-rc.d" || true
umount -lf "$TGT_ROOT/dev/pts" 2>/dev/null || true
for d in dev proc sys; do umount -lf "$TGT_ROOT/$d" 2>/dev/null || true; done
umount -lf "$TGT_ROOT/boot/firmware" 2>/dev/null || true
umount -lf "$TGT_BOOT" 2>/dev/null || true
umount -lf "$TGT_ROOT" 2>/dev/null || true
cryptsetup close cryptroot 2>/dev/null || true
umount -lf "$IMG_BOOT_MNT" 2>/dev/null || true
umount -lf "$IMG_ROOT_MNT" 2>/dev/null || true
}
main(){
require_root
need losetup; need parted; need rsync
auto_detect_target_disk
echo "Target disk: $NVME"
ensure_binfmt_aarch64
ensure_image
preflight_cleanup
guard_target_device
open_image
confirm_and_wipe
setup_luks
mount_targets
rsync_root_and_boot
write_crypttab_fstab
fix_firmware_files
seed_cloud_init
prep_chroot_mounts
in_chroot
verify_boot_assets
need_host_fido2
enroll_fido_tokens
cleanup
echo "✅ NVMe prepared."
echo " Install in the Pi 5 and boot with no SD."
echo " Expect LUKS to unlock automatically with a Solo key inserted;"
echo " passphrase fallback remains. Hostname: ${STYX_HOSTNAME} User: ${STYX_USER}"
echo " On first boot, reach it via: ssh -i ~/.ssh/id_ed25519_titan styx@titan-ag.local"
}
main "$@"

View File

@ -0,0 +1,58 @@
import importlib.util
import pathlib
def load_module():
path = pathlib.Path(__file__).resolve().parents[1] / "dashboards_render_atlas.py"
spec = importlib.util.spec_from_file_location("dashboards_render_atlas", path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module
def test_table_panel_options_and_filterable():
mod = load_module()
panel = mod.table_panel(
1,
"test",
"metric",
{"h": 1, "w": 1, "x": 0, "y": 0},
unit="percent",
transformations=[{"id": "labelsToFields", "options": {}}],
instant=True,
options={"showColumnFilters": False},
filterable=False,
footer={"show": False, "fields": "", "calcs": []},
format="table",
)
assert panel["fieldConfig"]["defaults"]["unit"] == "percent"
assert panel["fieldConfig"]["defaults"]["custom"]["filterable"] is False
assert panel["options"]["showHeader"] is True
assert panel["targets"][0]["format"] == "table"
def test_node_filter_and_expr_helpers():
mod = load_module()
expr = mod.node_filter("titan-.*")
assert "label_replace" in expr
cpu_expr = mod.node_cpu_expr("titan-.*")
mem_expr = mod.node_mem_expr("titan-.*")
assert "node_cpu_seconds_total" in cpu_expr
assert "node_memory_MemAvailable_bytes" in mem_expr
def test_render_configmap_writes(tmp_path):
mod = load_module()
mod.DASHBOARD_DIR = tmp_path / "dash"
mod.ROOT = tmp_path
uid = "atlas-test"
info = {"configmap": tmp_path / "cm.yaml"}
data = {"title": "Atlas Test"}
mod.write_json(uid, data)
mod.render_configmap(uid, info)
json_path = mod.DASHBOARD_DIR / f"{uid}.json"
assert json_path.exists()
content = (tmp_path / "cm.yaml").read_text()
assert "kind: ConfigMap" in content
assert f"{uid}.json" in content

View File

@ -0,0 +1,181 @@
import importlib.util
import pathlib
import pytest
def load_sync_module(monkeypatch):
# Minimal env required by module import
env = {
"KEYCLOAK_BASE_URL": "http://keycloak",
"KEYCLOAK_REALM": "atlas",
"KEYCLOAK_CLIENT_ID": "mailu-sync",
"KEYCLOAK_CLIENT_SECRET": "secret",
"MAILU_DOMAIN": "example.com",
"MAILU_DB_HOST": "localhost",
"MAILU_DB_PORT": "5432",
"MAILU_DB_NAME": "mailu",
"MAILU_DB_USER": "mailu",
"MAILU_DB_PASSWORD": "pw",
}
for k, v in env.items():
monkeypatch.setenv(k, v)
module_path = pathlib.Path(__file__).resolve().parents[1] / "mailu_sync.py"
spec = importlib.util.spec_from_file_location("mailu_sync_testmod", module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module
def test_random_password_length_and_charset(monkeypatch):
sync = load_sync_module(monkeypatch)
pw = sync.random_password()
assert len(pw) == 24
assert all(ch.isalnum() for ch in pw)
class _FakeResponse:
def __init__(self, json_data=None, status=200):
self._json_data = json_data or {}
self.status_code = status
def raise_for_status(self):
if self.status_code >= 400:
raise AssertionError(f"status {self.status_code}")
def json(self):
return self._json_data
class _FakeSession:
def __init__(self, put_resp, get_resp):
self.put_resp = put_resp
self.get_resp = get_resp
self.put_called = False
self.get_called = False
def post(self, *args, **kwargs):
return _FakeResponse({"access_token": "dummy"})
def put(self, *args, **kwargs):
self.put_called = True
return self.put_resp
def get(self, *args, **kwargs):
self.get_called = True
return self.get_resp
def test_kc_update_attributes_succeeds(monkeypatch):
sync = load_sync_module(monkeypatch)
ok_resp = _FakeResponse({"attributes": {"mailu_app_password": ["abc"]}})
sync.SESSION = _FakeSession(_FakeResponse({}), ok_resp)
sync.kc_update_attributes("token", {"id": "u1", "username": "u1"}, {"mailu_app_password": "abc"})
assert sync.SESSION.put_called and sync.SESSION.get_called
def test_kc_update_attributes_raises_without_attribute(monkeypatch):
sync = load_sync_module(monkeypatch)
missing_attr_resp = _FakeResponse({"attributes": {}}, status=200)
sync.SESSION = _FakeSession(_FakeResponse({}), missing_attr_resp)
with pytest.raises(Exception):
sync.kc_update_attributes("token", {"id": "u1", "username": "u1"}, {"mailu_app_password": "abc"})
def test_kc_get_users_paginates(monkeypatch):
sync = load_sync_module(monkeypatch)
class _PagedSession:
def __init__(self):
self.calls = 0
def post(self, *_, **__):
return _FakeResponse({"access_token": "tok"})
def get(self, *_, **__):
self.calls += 1
if self.calls == 1:
return _FakeResponse([{"id": "u1"}, {"id": "u2"}])
return _FakeResponse([]) # stop pagination
sync.SESSION = _PagedSession()
users = sync.kc_get_users("tok")
assert [u["id"] for u in users] == ["u1", "u2"]
assert sync.SESSION.calls == 2
def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
sync = load_sync_module(monkeypatch)
executed = []
class _Cursor:
def execute(self, sql, params):
executed.append((sql, params))
sync.ensure_mailu_user(_Cursor(), "user@other.com", "pw", "User")
assert not executed
def test_ensure_mailu_user_upserts(monkeypatch):
sync = load_sync_module(monkeypatch)
captured = {}
class _Cursor:
def execute(self, sql, params):
captured.update(params)
sync.ensure_mailu_user(_Cursor(), "user@example.com", "pw", "User Example")
assert captured["email"] == "user@example.com"
assert captured["localpart"] == "user"
# password should be hashed, not the raw string
assert captured["password"] != "pw"
def test_main_generates_password_and_upserts(monkeypatch):
sync = load_sync_module(monkeypatch)
users = [
{"id": "u1", "username": "user1", "email": "user1@example.com", "attributes": {}},
{"id": "u2", "username": "user2", "email": "user2@example.com", "attributes": {"mailu_app_password": ["keepme"]}},
{"id": "u3", "username": "user3", "email": "user3@other.com", "attributes": {}},
]
updated = []
class _Cursor:
def __init__(self):
self.executions = []
def execute(self, sql, params):
self.executions.append(params)
def close(self):
return None
class _Conn:
def __init__(self):
self.autocommit = False
self._cursor = _Cursor()
def cursor(self, cursor_factory=None):
return self._cursor
def close(self):
return None
monkeypatch.setattr(sync, "get_kc_token", lambda: "tok")
monkeypatch.setattr(sync, "kc_get_users", lambda token: users)
monkeypatch.setattr(sync, "kc_update_attributes", lambda token, user, attrs: updated.append((user["id"], attrs["mailu_app_password"])))
conns = []
def _connect(**kwargs):
conn = _Conn()
conns.append(conn)
return conn
monkeypatch.setattr(sync.psycopg2, "connect", _connect)
sync.main()
# Should attempt two inserts (third user skipped due to domain mismatch)
assert len(updated) == 1 # only one missing attr was backfilled
assert conns and len(conns[0]._cursor.executions) == 2

View File

@ -5,7 +5,7 @@ metadata:
name: gitea-ingress
namespace: gitea
annotations:
cert-manager.io/cluster-issuer: "letsencrypt-prod"
cert-manager.io/cluster-issuer: letsencrypt
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
tls:

View File

@ -0,0 +1,49 @@
# services/gitops-ui/helmrelease.yaml
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: weave-gitops
namespace: flux-system
spec:
interval: 30m
chart:
spec:
chart: ./charts/gitops-server
sourceRef:
kind: GitRepository
name: weave-gitops-upstream
namespace: flux-system
# track upstream tag; see source object for version pin
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3
remediateLastFailure: true
cleanupOnFail: true
values:
adminUser:
create: true
createClusterRole: true
createSecret: true
username: admin
# bcrypt hash for temporary password "G1tOps!2025" (rotate after login)
passwordHash: "$2y$12$wDEOzR1Gc2dbvNSJ3ZXNdOBVFEjC6YASIxnZmHIbO.W1m0fie/QVi"
ingress:
enabled: true
className: traefik
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
traefik.ingress.kubernetes.io/router.entrypoints: websecure
hosts:
- host: cd.bstein.dev
paths:
- path: /
pathType: Prefix
tls:
- secretName: gitops-ui-tls
hosts:
- cd.bstein.dev
metrics:
enabled: true

View File

@ -0,0 +1,7 @@
# services/gitops-ui/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: flux-system
resources:
- source.yaml
- helmrelease.yaml

View File

@ -0,0 +1,11 @@
# services/gitops-ui/source.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: GitRepository
metadata:
name: weave-gitops-upstream
namespace: flux-system
spec:
interval: 1h
url: https://github.com/weaveworks/weave-gitops.git
ref:
tag: v0.38.0

View File

@ -23,6 +23,11 @@ spec:
spec:
nodeSelector:
jellyfin: "true"
securityContext:
runAsUser: 1000
fsGroup: 65532
fsGroupChangePolicy: OnRootMismatch
runAsGroup: 65532
runtimeClassName: nvidia
containers:
- name: jellyfin
@ -36,6 +41,12 @@ spec:
value: "compute,video,utility"
- name: JELLYFIN_PublishedServerUrl
value: "https://stream.bstein.dev"
- name: PUID
value: "1000"
- name: PGID
value: "65532"
- name: UMASK
value: "002"
resources:
limits:
nvidia.com/gpu: 1
@ -64,4 +75,4 @@ spec:
claimName: jellyfin-cache-astreae
- name: media
persistentVolumeClaim:
claimName: jellyfin-media-asteria
claimName: jellyfin-media-asteria-new

Some files were not shown because too many files have changed in this diff Show More