Compare commits

..

414 Commits

Author SHA1 Message Date
6e4cafa3df maintenance: harden metis recovery and fix harbor rollout 2026-03-31 14:51:49 -03:00
41021c472b maintenance/jenkins: align Metis ingress, sentinel push, and CI job 2026-03-31 14:21:53 -03:00
17afb0bb55 maintenance: add Metis service and sentinel manifests 2026-03-31 14:07:17 -03:00
1e0e73a28f monitoring: combine Ariadne and Metis tests 2026-03-31 13:54:04 -03:00
af01a620c3 monitoring: roll grafana to apply latest alert rules 2026-03-30 18:41:21 -03:00
0edc513e2e monitoring: raise rootfs warning threshold to 85 percent 2026-03-30 18:40:59 -03:00
3659c9c07b maintenance: unblock sweeper rollouts on degraded nodes 2026-03-30 18:39:05 -03:00
11d58dccb7 maintenance: run image sweeper periodically for sd safety 2026-03-30 18:36:25 -03:00
5bcff5f405 monitoring: tame email noise and harden postmark alerts 2026-03-30 18:32:22 -03:00
f5dcea860e atlasbot: wire context and timeout fallbacks 2026-03-30 16:55:19 -03:00
a1e90f4600 atlasbot: wire quick smart genius modes 2026-03-30 16:51:23 -03:00
f04f032721 longhorn: avoid webhook deadlock and forced image pulls 2026-03-30 10:16:42 -03:00
083999c84c comms: harden matrix auth ingress routes for MAS 2026-03-30 08:21:19 -03:00
dc62a84e2e flux: keep feature branch tracking until main push is available 2026-03-30 07:57:13 -03:00
31ffaedf2a flux: target main branch for sync and image automation 2026-03-30 07:48:47 -03:00
b2d1dc4e3f flux: return sync and image automation branches to master 2026-03-30 07:48:09 -03:00
flux-bot
271a941d89 chore(atlasbot): automated image update 2026-03-30 10:47:00 +00:00
flux-bot
fa30a2cade chore(atlasbot): automated image update 2026-03-30 07:10:35 +00:00
f71d0bc3f3 atlasbot: switch quick mode to 7b fast model 2026-03-30 04:07:08 -03:00
flux-bot
19a3207eac chore(atlasbot): automated image update 2026-03-30 07:04:35 +00:00
2d5107f7e2 bstein-dev-home: deploy backend image 0.1.1-123 2026-03-30 03:54:39 -03:00
a091ea75a3 atlasbot: deploy matrix timeout fix image 0.1.2-103 2026-03-30 03:51:30 -03:00
95dabf5df8 atlasbot: disable ollama retries for strict quick budgets 2026-03-30 03:50:59 -03:00
flux-bot
311cec8adf chore(bstein-dev-home): automated image update 2026-03-30 06:46:11 +00:00
flux-bot
b18e355412 chore(atlasbot): automated image update 2026-03-30 06:45:32 +00:00
flux-bot
80057210fc chore(bstein-dev-home): automated image update 2026-03-30 06:38:10 +00:00
flux-bot
7a1e99a95e chore(bstein-dev-home): automated image update 2026-03-30 06:34:10 +00:00
flux-bot
ace86ad736 chore(bstein-dev-home): automated image update 2026-03-30 06:29:09 +00:00
flux-bot
2a4deb6dd1 chore(atlasbot): automated image update 2026-03-30 06:25:30 +00:00
flux-bot
eee5456921 chore(atlasbot): automated image update 2026-03-30 05:55:27 +00:00
f86d3a4c00 atlasbot: cap quick runtime and expose genius model to portal 2026-03-30 02:53:06 -03:00
a6b77c68f0 maintenance: grant ariadne auth-delegator 2026-02-08 09:55:20 -03:00
9599b4c975 ariadne: use vault-admin role for vault config 2026-02-07 22:34:10 -03:00
df96c06fa2 ariadne: run image sweeper daily 2026-02-07 11:11:41 -03:00
e575e6cb1e gitea: prefer rpi5 nodes 2026-02-07 11:07:02 -03:00
flux-bot
bca66c5d71 chore(maintenance): automated image update 2026-02-07 13:56:49 +00:00
b2affe091d maintenance: align vault role env 2026-02-07 10:51:20 -03:00
flux-bot
6c7f2112c2 chore(atlasbot): automated image update 2026-02-07 13:50:38 +00:00
a4874163ec infra: bias gitea/monerod placement, bump synapse ensure job 2026-02-07 10:48:48 -03:00
079f8efbb9 comms: run synapse admin ensure (admin flag) 2026-02-07 10:30:34 -03:00
95228b75ab comms: ensure synapse admin flag; ariadne vault role 2026-02-07 10:28:55 -03:00
9e75bf0b42 ariadne: accelerate schedules for alert clearing 2026-02-07 03:23:42 -03:00
b2841985ef comms: re-suspend synapse admin job 2026-02-07 03:19:42 -03:00
9553995ba5 comms: run synapse admin ensure 2026-02-07 03:16:44 -03:00
e840777668 vault: allow maintenance auth sync 2026-02-07 03:13:53 -03:00
718a1ca312 crypto: run xmrig only on rpi5 2026-02-06 23:34:31 -03:00
55f0347b70 comms: suspend synapse admin ensure 2026-02-06 20:21:01 -03:00
f77e13b2cb comms: run synapse admin ensure with python image 2026-02-06 20:13:02 -03:00
fd2b10d00d comms: run synapse admin ensure 2026-02-06 20:01:38 -03:00
4209299a40 jenkins: add dind cache pvc 2026-02-06 20:00:01 -03:00
1804ff06c6 gitea: avoid longhorn nodes 2026-02-06 19:33:55 -03:00
4b5913827d maintenance: pivot soteria to longhorn 2026-02-06 18:38:29 -03:00
80548a2e82 longhorn: add b2 backup target 2026-02-06 18:28:37 -03:00
flux-bot
29756b1e62 chore(maintenance): automated image update 2026-02-06 21:27:42 +00:00
4bc91c40f6 maintenance: restore soteria job node selector 2026-02-06 04:19:36 -03:00
1260d18cdf maintenance: pin soteria jobs to titan-24 for backup 2026-02-06 04:15:58 -03:00
47efd0be06 maintenance: pin soteria jobs to arm64 workers 2026-02-06 04:10:55 -03:00
flux-bot
fa410c8f1e chore(maintenance): automated image update 2026-02-06 07:10:04 +00:00
0ed75718c2 maintenance: remove restic init job 2026-02-06 03:50:30 -03:00
50ff59a33b maintenance: add restic init job 2026-02-06 03:48:45 -03:00
flux-bot
9d9bcd1988 chore(maintenance): automated image update 2026-02-05 18:56:27 +00:00
flux-bot
c96749bab6 chore(maintenance): automated image update 2026-02-05 18:45:20 +00:00
5e239accbd maintenance: schedule soteria on rpi workers 2026-02-05 15:30:09 -03:00
flux-bot
c50298c8fe chore(bstein-dev-home): automated image update 2026-02-05 18:24:54 +00:00
flux-bot
3fcab34b7d chore(maintenance): automated image update 2026-02-05 18:24:44 +00:00
e223ef8e76 harbor: route registry traffic via core 2026-02-05 15:23:42 -03:00
7f72683242 harbor: wire registryctl notification auth 2026-02-05 15:17:54 -03:00
eeb8475848 harbor: fix registry notification URL 2026-02-05 15:00:43 -03:00
839b79696c harbor: restore registry notifications env 2026-02-05 14:50:53 -03:00
920f146efb harbor: enable registry notifications 2026-02-05 14:44:09 -03:00
flux-bot
c2c5474bc8 chore(atlasbot): automated image update 2026-02-05 17:38:26 +00:00
flux-bot
eab7ed5cff chore(maintenance): automated image update 2026-02-05 17:04:24 +00:00
flux-bot
22eb1a1159 chore(maintenance): automated image update 2026-02-05 16:32:49 +00:00
d7c1ecd098 maintenance: move soteria image to bstein 2026-02-05 13:12:03 -03:00
flux-bot
96288c9fdd chore(atlasbot): automated image update 2026-02-05 15:58:19 +00:00
flux-bot
a71bf7d9d5 chore(atlasbot): automated image update 2026-02-05 01:26:05 +00:00
533baa6d0c atlasbot: set genius model env 2026-02-04 19:39:43 -03:00
flux-bot
cee353e305 chore(atlasbot): automated image update 2026-02-04 22:15:47 +00:00
flux-bot
436d24ea70 chore(atlasbot): automated image update 2026-02-04 21:45:45 +00:00
flux-bot
6fb80e37e8 chore(atlasbot): automated image update 2026-02-04 21:39:45 +00:00
flux-bot
132e73100f chore(atlasbot): automated image update 2026-02-04 19:08:32 +00:00
flux-bot
fe8cc40903 chore(atlasbot): automated image update 2026-02-04 18:09:26 +00:00
flux-bot
947a43e630 chore(atlasbot): automated image update 2026-02-04 18:03:26 +00:00
flux-bot
31679b59f5 chore(atlasbot): automated image update 2026-02-04 17:56:26 +00:00
flux-bot
77b81e1e9a chore(atlasbot): automated image update 2026-02-04 17:49:23 +00:00
flux-bot
6523e45b3f chore(atlasbot): automated image update 2026-02-04 17:30:22 +00:00
flux-bot
49414c6cca chore(atlasbot): automated image update 2026-02-04 17:23:23 +00:00
flux-bot
6efa280e9d chore(atlasbot): automated image update 2026-02-04 17:20:23 +00:00
flux-bot
ff81cfdb82 chore(atlasbot): automated image update 2026-02-04 17:14:21 +00:00
flux-bot
c4b0250321 chore(atlasbot): automated image update 2026-02-04 17:07:21 +00:00
flux-bot
c1a8aa43d6 chore(atlasbot): automated image update 2026-02-04 17:00:21 +00:00
flux-bot
0275adb5b7 chore(atlasbot): automated image update 2026-02-04 16:53:20 +00:00
flux-bot
663143660b chore(atlasbot): automated image update 2026-02-04 16:45:19 +00:00
flux-bot
cb25cf7571 chore(atlasbot): automated image update 2026-02-04 16:39:18 +00:00
flux-bot
33127dde26 chore(atlasbot): automated image update 2026-02-04 14:03:05 +00:00
flux-bot
dc214cee79 chore(atlasbot): automated image update 2026-02-04 03:27:09 +00:00
flux-bot
4395986b0c chore(atlasbot): automated image update 2026-02-04 03:01:07 +00:00
flux-bot
fba7fe9029 chore(atlasbot): automated image update 2026-02-04 02:54:06 +00:00
flux-bot
8ecc8dd548 chore(atlasbot): automated image update 2026-02-04 02:42:05 +00:00
flux-bot
672a559e52 chore(atlasbot): automated image update 2026-02-04 02:30:04 +00:00
flux-bot
0dedf4083e chore(atlasbot): automated image update 2026-02-04 01:54:01 +00:00
flux-bot
bf8b99e365 chore(maintenance): automated image update 2026-02-04 01:51:59 +00:00
flux-bot
a33ad1c073 chore(atlasbot): automated image update 2026-02-04 01:27:59 +00:00
flux-bot
be90638fac chore(atlasbot): automated image update 2026-02-04 01:09:57 +00:00
flux-bot
3bc6d29f54 chore(atlasbot): automated image update 2026-02-04 00:55:56 +00:00
flux-bot
4e88c55e57 chore(atlasbot): automated image update 2026-02-04 00:42:56 +00:00
flux-bot
b8c94d5870 chore(atlasbot): automated image update 2026-02-04 00:37:55 +00:00
flux-bot
7f83d2f936 chore(atlasbot): automated image update 2026-02-04 00:34:55 +00:00
flux-bot
d42aa42d8a chore(atlasbot): automated image update 2026-02-04 00:19:53 +00:00
flux-bot
86f512fa1a chore(atlasbot): automated image update 2026-02-03 22:41:45 +00:00
flux-bot
16e2b19ea9 chore(atlasbot): automated image update 2026-02-03 22:06:41 +00:00
flux-bot
a1cb07c6d6 chore(atlasbot): automated image update 2026-02-03 20:18:32 +00:00
flux-bot
558d24ad6b chore(atlasbot): automated image update 2026-02-03 19:56:31 +00:00
flux-bot
160218a4ae chore(atlasbot): automated image update 2026-02-03 19:29:28 +00:00
flux-bot
2e361e620e chore(atlasbot): automated image update 2026-02-03 18:04:21 +00:00
flux-bot
fcd0ea9872 chore(atlasbot): automated image update 2026-02-03 17:53:20 +00:00
flux-bot
75826b0e5e chore(atlasbot): automated image update 2026-02-03 17:42:19 +00:00
flux-bot
71ddd03899 chore(atlasbot): automated image update 2026-02-03 17:34:18 +00:00
flux-bot
2d3a0b0184 chore(atlasbot): automated image update 2026-02-03 17:16:17 +00:00
flux-bot
c7fb848a62 chore(atlasbot): automated image update 2026-02-03 15:15:07 +00:00
flux-bot
c643c965b8 chore(atlasbot): automated image update 2026-02-03 15:05:06 +00:00
flux-bot
618be5ce01 chore(atlasbot): automated image update 2026-02-03 14:57:06 +00:00
flux-bot
ac049e6bb9 chore(atlasbot): automated image update 2026-02-03 14:51:05 +00:00
flux-bot
50108afc57 chore(atlasbot): automated image update 2026-02-03 14:40:04 +00:00
flux-bot
1f74a29445 chore(atlasbot): automated image update 2026-02-03 14:15:01 +00:00
flux-bot
08bc5f4b82 chore(atlasbot): automated image update 2026-02-03 14:07:01 +00:00
flux-bot
c208314506 chore(atlasbot): automated image update 2026-02-03 13:43:59 +00:00
flux-bot
763e5ff9e9 chore(atlasbot): automated image update 2026-02-03 13:22:57 +00:00
flux-bot
5ecb42cfef chore(atlasbot): automated image update 2026-02-03 13:08:56 +00:00
flux-bot
102d8e56ff chore(atlasbot): automated image update 2026-02-03 13:04:56 +00:00
flux-bot
ac96c5482f chore(atlasbot): automated image update 2026-02-03 12:56:55 +00:00
flux-bot
71aa60c696 chore(atlasbot): automated image update 2026-02-03 12:32:53 +00:00
flux-bot
d7582da21b chore(atlasbot): automated image update 2026-02-03 07:33:28 +00:00
flux-bot
4bf3773eb3 chore(atlasbot): automated image update 2026-02-03 06:31:22 +00:00
flux-bot
895ea49dc5 chore(atlasbot): automated image update 2026-02-03 06:07:21 +00:00
flux-bot
f355f6dd6a chore(atlasbot): automated image update 2026-02-03 04:57:14 +00:00
9f87e61f4a atlasbot: raise llm call caps 2026-02-03 01:55:21 -03:00
flux-bot
9a2890c45c chore(atlasbot): automated image update 2026-02-03 03:29:07 +00:00
flux-bot
ad74a45e76 chore(atlasbot): automated image update 2026-02-03 03:26:07 +00:00
fda4860d67 jenkins(atlasbot): set main branch 2026-02-02 23:12:13 -03:00
9f8a0f94d2 jenkins(atlasbot): use main branch 2026-02-02 23:10:42 -03:00
51d12791ca jenkins(atlasbot): track main branch 2026-02-02 22:25:56 -03:00
9fb36f23cd ci(atlasbot): add Jenkins job and image automation 2026-02-02 20:25:47 -03:00
flux-bot
1a2fe05808 chore(atlasbot): automated image update 2026-02-02 21:04:06 +00:00
flux-bot
0c5ec895ee chore(atlasbot): automated image update 2026-02-02 20:22:02 +00:00
7c87e177e9 vault: add default k8s audience 2026-02-02 17:15:35 -03:00
flux-bot
5e6d2a938f chore(atlasbot): automated image update 2026-02-02 20:08:02 +00:00
flux-bot
09070c2cc6 chore(atlasbot): automated image update 2026-02-02 19:53:00 +00:00
flux-bot
5dd30d8802 chore(atlasbot): automated image update 2026-02-02 18:13:52 +00:00
flux-bot
f302cb2448 chore(atlasbot): automated image update 2026-02-02 18:04:51 +00:00
c0a231fd91 atlasbot: bump image to 0.1.0-133 2026-02-02 14:58:38 -03:00
flux-bot
87f8a6d2c0 chore(atlasbot): automated image update 2026-02-02 17:56:53 +00:00
flux-bot
78a0867215 chore(atlasbot): automated image update 2026-02-02 17:56:48 +00:00
b0da9080c7 atlasbot: bump image to 0.1.0-132 2026-02-02 14:56:24 -03:00
8e3feeeaac atlasbot: bump image to 0.1.0-131 2026-02-02 14:54:36 -03:00
6f2ecdb364 atlasbot: bump image to 0.1.0-130 2026-02-02 14:48:34 -03:00
a5e168e55f atlasbot: bump image to 0.1.0-129 2026-02-02 14:41:22 -03:00
flux-bot
87dc1209b1 chore(atlasbot): automated image update 2026-02-02 17:32:49 +00:00
f86845053e atlasbot: disable queue for testing 2026-02-02 14:24:09 -03:00
flux-bot
c04c5ab048 chore(atlasbot): automated image update 2026-02-02 17:13:47 +00:00
flux-bot
ec3bdb7225 chore(atlasbot): automated image update 2026-02-02 16:55:46 +00:00
flux-bot
4b68809bb9 chore(atlasbot): automated image update 2026-02-02 16:45:45 +00:00
flux-bot
661bc6ac7d chore(atlasbot): automated image update 2026-02-02 16:38:44 +00:00
a9ee943344 atlasbot: bump image to 0.1.0-123 2026-02-02 13:30:34 -03:00
826df7d960 atlasbot: bump image to 0.1.0-122 2026-02-02 13:21:28 -03:00
flux-bot
8dfe124212 chore(atlasbot): automated image update 2026-02-02 16:10:42 +00:00
flux-bot
a3bef857f9 chore(atlasbot): automated image update 2026-02-02 15:57:41 +00:00
flux-bot
ed766d7a02 chore(atlasbot): automated image update 2026-02-02 15:47:40 +00:00
4295913056 atlasbot: bump image to 0.1.0-118 2026-02-02 12:39:24 -03:00
flux-bot
e3dfa2c0ea chore(atlasbot): automated image update 2026-02-02 15:20:38 +00:00
flux-bot
6bf8181677 chore(atlasbot): automated image update 2026-02-02 15:17:37 +00:00
d67f3d6fca jenkins: reload jcasc for soteria 2026-02-02 12:11:07 -03:00
flux-bot
41a0363fbc chore(atlasbot): automated image update 2026-02-02 15:09:37 +00:00
a609e230f2 atlasbot: bump image to 0.1.0-114 2026-02-02 12:05:58 -03:00
flux-bot
37342bfe4a chore(atlasbot): automated image update 2026-02-02 15:01:36 +00:00
a509354067 atlasbot: bump image to 0.1.0-112 2026-02-02 11:52:59 -03:00
flux-bot
fb14516674 chore(atlasbot): automated image update 2026-02-02 14:49:35 +00:00
60c80cc86f atlasbot: bump image to 0.1.0-110 2026-02-02 11:42:03 -03:00
flux-bot
7b8ea36554 chore(atlasbot): automated image update 2026-02-02 14:36:35 +00:00
49224375a0 atlasbot: bump image to 0.1.0-108 2026-02-02 11:23:53 -03:00
7d7ddd52dc atlasbot: bump image to 0.1.0-107 2026-02-02 11:14:54 -03:00
cd7043c7f1 jenkins: add soteria pipeline job 2026-02-02 11:01:22 -03:00
fb82a038e9 atlasbot: bump image to 0.1.0-106 2026-02-02 11:00:18 -03:00
93bcea5893 add ai harbor regcred sync 2026-02-02 10:08:46 -03:00
0ba8578416 bump atlasbot image 2026-02-02 10:05:06 -03:00
86475b8bdf track atlasbot knowledge index 2026-02-02 09:48:40 -03:00
f19eaf3b6b move atlasbot to ai namespace 2026-02-02 09:46:50 -03:00
flux-bot
e537180f1f chore(comms): automated image update 2026-02-02 06:03:16 +00:00
flux-bot
8298ed5c16 chore(comms): automated image update 2026-02-02 05:59:16 +00:00
flux-bot
152a28bd09 chore(comms): automated image update 2026-02-02 05:59:04 +00:00
7e02cccbe8 comms: bump atlasbot to 0.1.0-103 2026-02-02 02:58:44 -03:00
flux-bot
e60b1594c0 chore(comms): automated image update 2026-02-02 05:49:15 +00:00
flux-bot
87b2b37918 chore(comms): automated image update 2026-02-02 05:46:15 +00:00
flux-bot
a1249b3e00 chore(comms): automated image update 2026-02-02 05:45:54 +00:00
5000d1f76b comms: bump atlasbot to 0.1.0-101 2026-02-02 02:45:33 -03:00
flux-bot
584625b893 chore(comms): automated image update 2026-02-02 05:39:14 +00:00
95f4ecc4e0 comms: bump atlasbot to 0.1.0-99 2026-02-02 02:16:31 -03:00
240e04f9a2 comms: bump atlasbot to 0.1.0-98 2026-02-02 02:09:57 -03:00
449b8fed64 comms: bump atlasbot to 0.1.0-97 2026-02-02 02:03:50 -03:00
flux-bot
f6d655bb0c chore(comms): automated image update 2026-02-02 05:02:11 +00:00
4fa1b6e84c comms: bump atlasbot to 0.1.0-96 2026-02-02 01:57:58 -03:00
168efd78f7 comms: bump atlasbot to 0.1.0-95 2026-02-02 01:54:41 -03:00
e0bd11fa57 comms: bump atlasbot to 0.1.0-94 2026-02-02 01:45:52 -03:00
3f43299c92 comms: bump atlasbot to 0.1.0-93 2026-02-02 01:38:59 -03:00
645790f404 comms: bump atlasbot to 0.1.0-92 2026-02-01 18:46:01 -03:00
f11f6a4e62 comms: bump atlasbot to 0.1.0-91 2026-02-01 18:42:00 -03:00
flux-bot
c559253a31 chore(comms): automated image update 2026-02-01 21:37:32 +00:00
flux-bot
a3619ce215 chore(comms): automated image update 2026-02-01 21:33:32 +00:00
flux-bot
398fb7b797 chore(comms): automated image update 2026-02-01 21:25:31 +00:00
b30e6af95d comms: bump atlasbot to 0.1.0-87 2026-02-01 18:05:00 -03:00
flux-bot
4fd79b4708 chore(comms): automated image update 2026-02-01 20:55:29 +00:00
f23da3aea5 comms: bump atlasbot to 0.1.0-85 2026-02-01 17:48:24 -03:00
flux-bot
d951ae5061 chore(comms): automated image update 2026-02-01 20:43:28 +00:00
dfe9916e91 comms: bump atlasbot to 0.1.0-83 2026-02-01 14:45:58 -03:00
flux-bot
036c758547 chore(comms): automated image update 2026-02-01 17:39:12 +00:00
382a6e49ee comms: bump atlasbot to 0.1.0-81 2026-02-01 14:34:43 -03:00
93e7449509 comms: bump atlasbot to 0.1.0-80 2026-02-01 14:28:34 -03:00
58d1c168ff comms: bump atlasbot to 0.1.0-79 2026-02-01 14:07:57 -03:00
flux-bot
889400cdbf chore(comms): automated image update 2026-02-01 15:41:02 +00:00
flux-bot
e06066a327 chore(comms): automated image update 2026-02-01 15:36:01 +00:00
138f8c4407 comms: bump atlasbot image 2026-02-01 12:25:31 -03:00
33569aff99 vault: fix k8s auth env indent 2026-02-01 12:20:04 -03:00
3e2f56da7d vault: set kubernetes issuer 2026-02-01 12:18:57 -03:00
flux-bot
0914ba3509 chore(comms): automated image update 2026-02-01 15:01:58 +00:00
flux-bot
865a979424 chore(comms): automated image update 2026-02-01 14:55:58 +00:00
flux-bot
5dfc3ed259 chore(comms): automated image update 2026-02-01 14:55:52 +00:00
b479364017 comms: bump atlasbot image 2026-02-01 11:55:26 -03:00
flux-bot
00d8f852a3 chore(comms): automated image update 2026-02-01 14:47:57 +00:00
flux-bot
2d7f744284 chore(comms): automated image update 2026-02-01 14:18:55 +00:00
5f1b1a6cd0 vault: set k8s auth audiences 2026-02-01 11:17:02 -03:00
flux-bot
e966961dbe chore(comms): automated image update 2026-02-01 13:58:53 +00:00
7ffb0aba5d atlasbot: bump to 0.1.0-70 2026-02-01 10:37:29 -03:00
flux-bot
e80a439725 chore(comms): automated image update 2026-02-01 08:40:26 +00:00
flux-bot
8a22825796 chore(comms): automated image update 2026-02-01 08:40:09 +00:00
1fabd4ce2f atlasbot: bump to 0.1.0-69 2026-02-01 05:39:54 -03:00
759ac5ef90 comms: bump atlasbot image 2026-02-01 05:31:07 -03:00
flux-bot
bc971cce92 chore(comms): automated image update 2026-02-01 08:23:24 +00:00
flux-bot
069f6b4983 chore(comms): automated image update 2026-02-01 08:18:24 +00:00
64cfd5180d comms: bump atlasbot image 2026-02-01 05:12:59 -03:00
flux-bot
8a087fb16d chore(comms): automated image update 2026-02-01 08:10:23 +00:00
flux-bot
652c3a28a3 chore(comms): automated image update 2026-02-01 07:55:22 +00:00
flux-bot
141c54ccf3 chore(comms): automated image update 2026-02-01 07:49:21 +00:00
flux-bot
0f8529c7c5 chore(comms): automated image update 2026-02-01 07:46:21 +00:00
flux-bot
dafba36768 chore(comms): automated image update 2026-02-01 07:38:20 +00:00
4d5e9552e3 comms: bump atlasbot to 0.1.0-59 2026-02-01 04:32:01 -03:00
ddf1d41fd3 comms: bump atlasbot to 0.1.0-58 2026-02-01 04:25:12 -03:00
flux-bot
49e630f7fd chore(comms): automated image update 2026-02-01 07:17:18 +00:00
flux-bot
b7a81d28d1 chore(comms): automated image update 2026-02-01 06:39:16 +00:00
109c00bc3c comms: bump atlasbot to 0.1.0-55 2026-02-01 02:08:54 -03:00
flux-bot
c9ad055b4c chore(comms): automated image update 2026-02-01 05:07:08 +00:00
10498c659b comms: bump atlasbot to 0.1.0-54 2026-02-01 01:51:26 -03:00
flux-bot
978bd8e595 chore(comms): automated image update 2026-02-01 04:51:06 +00:00
259552ac28 comms: bump atlasbot to 0.1.0-53 2026-02-01 01:39:09 -03:00
flux-bot
7f2ded5244 chore(comms): automated image update 2026-02-01 04:39:05 +00:00
e4c370b983 comms: bump atlasbot to 0.1.0-52 2026-02-01 01:29:30 -03:00
flux-bot
7dfc98b6d6 chore(comms): automated image update 2026-02-01 04:29:04 +00:00
cb60c64bce comms: bump atlasbot to 0.1.0-51 2026-02-01 01:15:18 -03:00
flux-bot
091f095893 chore(comms): automated image update 2026-02-01 04:15:03 +00:00
5b389d12df comms(atlasbot): bump image to 0.1.0-50 2026-01-31 22:30:04 -03:00
flux-bot
ae88bc8484 chore(comms): automated image update 2026-02-01 01:28:49 +00:00
529576e082 comms: bump atlasbot image 2026-01-31 21:40:11 -03:00
flux-bot
a7ffaa3213 chore(maintenance): automated image update 2026-02-01 00:39:49 +00:00
flux-bot
e478f1c74d chore(comms): automated image update 2026-02-01 00:39:45 +00:00
2480b6cecc comms: disable atlasbot queue for tests 2026-01-31 18:21:39 -03:00
bbe27f963d comms: bump atlasbot to 0.1.0-48 2026-01-31 18:14:55 -03:00
flux-bot
c5da854cef chore(comms): automated image update 2026-01-31 21:14:27 +00:00
0319707fff atlasbot: make node counts explicit 2026-01-31 16:44:50 -03:00
4f8d8f1f25 atlasbot: prioritize high-priority subquestions 2026-01-31 16:38:54 -03:00
5448ff3f55 atlasbot: expand chunk summaries 2026-01-31 16:35:02 -03:00
b6c2d1416e atlasbot: enable debug pipeline logging 2026-01-31 16:30:05 -03:00
flux-bot
152e1d88f4 chore(comms): automated image update 2026-01-31 19:29:18 +00:00
86e9dc289f atlasbot: bump to 0.1.0-43 2026-01-31 14:24:13 -03:00
flux-bot
c4b7198c46 chore(comms): automated image update 2026-01-31 17:21:08 +00:00
f8a12be2ec atlasbot: bump image to 0.1.0-42 2026-01-31 14:15:41 -03:00
flux-bot
c9ec5126cd chore(comms): automated image update 2026-01-31 17:15:07 +00:00
flux-bot
c66db7c18f chore(maintenance): automated image update 2026-01-31 16:42:06 +00:00
flux-bot
de47ab76a5 chore(maintenance): automated image update 2026-01-31 16:39:06 +00:00
c788512d59 atlasbot: bump image to 0.1.0-41 2026-01-31 13:26:44 -03:00
flux-bot
ae25ccb6f2 chore(comms): automated image update 2026-01-31 16:25:03 +00:00
flux-bot
e27f4cfc68 chore(comms): automated image update 2026-01-31 11:08:36 +00:00
50e06b4a13 atlasbot: bump image to 0.1.0-40 2026-01-31 08:08:21 -03:00
934d6e7a3b comms: fix atlasbot image indentation 2026-01-31 07:17:58 -03:00
flux-bot
25654a731e chore(comms): automated image update 2026-01-31 10:12:32 +00:00
4aecadb3de atlasbot: bump image to 0.1.0-39 2026-01-31 07:11:56 -03:00
3b79a82c71 atlasbot: bump image to 0.1.0-38 2026-01-31 06:18:58 -03:00
flux-bot
04b263dc2d chore(comms): automated image update 2026-01-31 09:18:28 +00:00
93841d9de7 maintenance: add soteria service 2026-01-31 03:35:39 -03:00
bb294c6d21 atlasbot: bump image to 0.1.0-37 2026-01-31 03:20:44 -03:00
flux-bot
64962f8863 chore(comms): automated image update 2026-01-31 06:20:12 +00:00
bcb4c05b14 ariadne: add alertmanager url 2026-01-30 21:57:05 -03:00
flux-bot
d00a09fb58 chore(maintenance): automated image update 2026-01-31 00:54:47 +00:00
flux-bot
a22ff047f7 chore(maintenance): automated image update 2026-01-31 00:40:46 +00:00
flux-bot
fef5d7d26a chore(maintenance): automated image update 2026-01-30 23:54:41 +00:00
fa60fa124c comms: suspend mas-local-users-ensure 2026-01-30 17:46:46 -03:00
30c1192978 comms: bump mas-local-users-ensure job 2026-01-30 17:44:42 -03:00
644be2c575 comms: bump comms-secrets-ensure job 2026-01-30 17:42:28 -03:00
29d1bf9f4e comms: run mas-local-users-ensure job (retry) 2026-01-30 17:37:42 -03:00
9bdab331b6 comms: suspend mas-local-users-ensure job 2026-01-30 17:33:55 -03:00
8f49ac2d63 comms: run mas-local-users-ensure job 2026-01-30 17:29:29 -03:00
flux-bot
43b9cd27ed chore(maintenance): automated image update 2026-01-30 20:18:24 +00:00
580ac4950b comms: add atlas-genius bot 2026-01-30 17:17:59 -03:00
flux-bot
d677e83423 chore(comms): automated image update 2026-01-30 20:07:20 +00:00
flux-bot
bff55a6dc7 chore(bstein-dev-home): automated image update 2026-01-30 20:05:30 +00:00
flux-bot
0465658ba7 chore(bstein-dev-home): automated image update 2026-01-30 20:02:30 +00:00
flux-bot
3e484ba726 chore(comms): automated image update 2026-01-30 19:53:19 +00:00
flux-bot
088bb3b435 chore(comms): automated image update 2026-01-30 19:42:22 +00:00
flux-bot
e81bad9d47 chore(maintenance): automated image update 2026-01-30 13:21:48 +00:00
3f11a065a3 atlasbot: support quick/smart Matrix accounts 2026-01-30 10:21:07 -03:00
flux-bot
ec6375f31d chore(maintenance): automated image update 2026-01-30 05:19:07 +00:00
flux-bot
5a8360ed97 chore(maintenance): automated image update 2026-01-30 03:15:56 +00:00
flux-bot
9e75f82d43 chore(comms): automated image update 2026-01-29 23:54:42 +00:00
flux-bot
7ac26eb0dd chore(maintenance): automated image update 2026-01-29 19:56:19 +00:00
00d2f6a61f comms: bump atlasbot to 0.1.0-32 2026-01-29 16:51:43 -03:00
flux-bot
687ca2c22d chore(comms): automated image update 2026-01-29 19:50:22 +00:00
52281ca2ec comms: bump atlasbot to 0.1.0-31 2026-01-29 16:09:15 -03:00
flux-bot
8850e9fdf1 chore(comms): automated image update 2026-01-29 19:08:18 +00:00
a253993451 comms: bump atlasbot to 0.1.0-30 2026-01-29 14:56:59 -03:00
flux-bot
aeff2bbe73 chore(comms): automated image update 2026-01-29 17:55:12 +00:00
39616b2435 comms: bump atlasbot 0.1.0-29 2026-01-29 14:18:51 -03:00
flux-bot
b3d8674499 chore(maintenance): automated image update 2026-01-29 16:43:04 +00:00
3ca0fb352d sso: suspend execute-actions email test job 2026-01-29 13:41:41 -03:00
f7ea7d57e9 sso: send execute-actions email to robotuser 2026-01-29 13:40:45 -03:00
flux-bot
a418844f61 chore(maintenance): automated image update 2026-01-29 16:35:03 +00:00
96d914d02c comms: bump atlasbot to 0.1.0-28 2026-01-29 13:33:39 -03:00
e6c031829a sso: suspend keycloak oneoff jobs 2026-01-29 13:30:10 -03:00
ebfb19c34e sso: rerun execute-actions email test 2026-01-29 13:28:32 -03:00
4fedec3999 sso: set keycloak smtp to postmark 2026-01-29 13:27:28 -03:00
55f78f2eb7 sso: rerun execute-actions email test 2026-01-29 13:23:59 -03:00
ab5ef933d8 sso: run keycloak execute-actions email test 2026-01-29 13:21:40 -03:00
3e23109229 sso: suspend realm settings job 2026-01-29 13:20:11 -03:00
d18c06ad31 sso: rerun keycloak realm settings 2026-01-29 13:10:31 -03:00
292a6b7e04 monitoring: stabilize alert queries 2026-01-29 13:07:55 -03:00
flux-bot
d7fd5682f3 chore(maintenance): automated image update 2026-01-29 16:07:01 +00:00
bedab04b22 atlasbot: bump to 0.1.0-27 2026-01-29 13:06:37 -03:00
6d7a32ce11 atlasbot: align to installed qwen model 2026-01-29 10:25:57 -03:00
87ded58aca atlasbot: align models and bump image 2026-01-29 10:17:38 -03:00
flux-bot
5f30ab73bf chore(comms): automated image update 2026-01-29 13:16:50 +00:00
flux-bot
3f2d2e5fdb chore(maintenance): automated image update 2026-01-29 13:16:46 +00:00
flux-bot
f55e9a6043 chore(comms): automated image update 2026-01-29 12:23:45 +00:00
flux-bot
7de15db57a chore(comms): automated image update 2026-01-29 11:47:42 +00:00
flux-bot
265f809f8f chore(maintenance): automated image update 2026-01-29 11:43:38 +00:00
flux-bot
e4d19fc5b4 chore(comms): automated image update 2026-01-29 11:42:41 +00:00
flux-bot
d10eace338 chore(maintenance): automated image update 2026-01-29 10:45:37 +00:00
78afc97db2 atlasbot: bump image and allow longhorn read 2026-01-29 07:45:24 -03:00
flux-bot
3c0d4d0f4f chore(comms): automated image update 2026-01-29 10:44:37 +00:00
flux-bot
d73d6d7c01 chore(comms): automated image update 2026-01-29 09:21:30 +00:00
flux-bot
af02ee7abf chore(comms): automated image update 2026-01-29 09:16:59 +00:00
630a596cb6 atlasbot: bump image tag 2026-01-29 06:16:43 -03:00
flux-bot
d2729138b6 chore(maintenance): automated image update 2026-01-29 09:12:26 +00:00
a6fbcc8669 maintenance(ariadne): allow apps/events, bump image tag 2026-01-29 06:09:36 -03:00
flux-bot
d91d632496 chore(maintenance): automated image update 2026-01-29 09:01:41 +00:00
flux-bot
3a9949a24d chore(comms): automated image update 2026-01-29 08:01:25 +00:00
b045506516 vault: allow kubernetes auth login 2026-01-29 02:22:51 -03:00
flux-bot
3f24de03d1 chore(maintenance): automated image update 2026-01-29 04:58:20 +00:00
flux-bot
a3ffcb2ea1 chore(comms): automated image update 2026-01-29 04:58:10 +00:00
flux-bot
314a922109 chore(comms): automated image update 2026-01-29 04:56:21 +00:00
flux-bot
2ed4762fab chore(maintenance): automated image update 2026-01-29 04:56:05 +00:00
1c6d572559 images: bump ariadne and atlasbot 2026-01-29 01:55:07 -03:00
flux-bot
58cc15a7e0 chore(comms): automated image update 2026-01-29 01:35:52 +00:00
flux-bot
3da28531fd chore(maintenance): automated image update 2026-01-29 01:35:03 +00:00
flux-bot
58f818cebc chore(maintenance): automated image update 2026-01-28 23:47:54 +00:00
flux-bot
cff7ec922e chore(comms): automated image update 2026-01-28 23:46:43 +00:00
flux-bot
a49f0580da chore(maintenance): automated image update 2026-01-28 23:43:54 +00:00
flux-bot
10d4f015b2 chore(maintenance): automated image update 2026-01-28 23:36:54 +00:00
flux-bot
669849b883 chore(maintenance): automated image update 2026-01-28 23:31:53 +00:00
flux-bot
9ce9470677 chore(comms): automated image update 2026-01-28 22:59:41 +00:00
c3555d59f7 monitoring: fix GPU share attribution 2026-01-28 19:08:53 -03:00
28af553498 monitoring: de-dupe ariadne schedule alert 2026-01-28 18:45:15 -03:00
d42385de3e comms: suspend synapse admin ensure job 2026-01-28 18:39:28 -03:00
6104035474 maintenance: restart ariadne after synapse token update 2026-01-28 18:37:49 -03:00
dabf043ce6 comms: force admin token to use othrys-seeder 2026-01-28 18:35:28 -03:00
9b8ef436c8 comms: fix vault_put indentation 2026-01-28 18:31:48 -03:00
8cf24a6c96 comms: source admin token from seeder access tokens 2026-01-28 18:29:49 -03:00
2797464b45 comms: mint synapse admin token with syt_ prefix 2026-01-28 18:20:37 -03:00
320cf901ba comms: rerun synapse admin ensure with device 2026-01-28 18:17:24 -03:00
5bb0fc126e comms: ensure synapse device for admin token 2026-01-28 18:10:55 -03:00
1b8271ed61 maintenance: restart ariadne after synapse token 2026-01-28 17:59:25 -03:00
fab030e9c0 comms: rotate invalid synapse admin token 2026-01-28 17:57:39 -03:00
be6b65cedb comms: rerun synapse admin ensure job 2026-01-28 17:54:53 -03:00
cbed39bd64 comms: run synapse admin ensure job 2026-01-28 17:50:01 -03:00
445622e936 comms: use bundled synapse admin ensure image 2026-01-28 17:47:58 -03:00
17e28d2891 maintenance: restart ariadne to reload secrets 2026-01-28 17:31:25 -03:00
8325827c41 comms: suspend synapse admin ensure job 2026-01-28 17:29:07 -03:00
7c7ed38ead comms: fix synapse admin ensure vault login 2026-01-28 17:27:39 -03:00
5d2fb32ff8 comms: rebuild synapse admin ensure job 2026-01-28 17:25:34 -03:00
flux-bot
b62a5ba3fb chore(maintenance): automated image update 2026-01-28 20:21:37 +00:00
359445ab43 comms: run synapse admin ensure job 2026-01-28 17:19:55 -03:00
4d1382cfc9 maintenance: track ariadne latest image 2026-01-28 14:04:58 -03:00
b66c7de5fd monitoring: avoid ariadne alert title conflict 2026-01-28 14:02:12 -03:00
3d4e5bdde1 monitoring: disable legacy cron alert 2026-01-28 13:58:28 -03:00
f37baf2447 monitoring: restart grafana to reload alerts 2026-01-28 13:53:33 -03:00
ad3d8d75c9 monitoring: reuse maint-cron uid for ariadne alert 2026-01-28 13:52:12 -03:00
4ecfdcef7c monitoring: restart grafana for ariadne alerts 2026-01-28 13:49:41 -03:00
flux-bot
63ae3e3f6f chore(comms): automated image update 2026-01-28 16:49:09 +00:00
eab2ce50b1 monitoring: alert on ariadne schedules 2026-01-28 13:47:54 -03:00
flux-bot
523db13be0 chore(maintenance): automated image update 2026-01-28 16:47:19 +00:00
6a3f8cffe1 comms: fix MAS job indentation 2026-01-28 13:25:51 -03:00
80a0f424cd comms: tolerate MAS login rate limits 2026-01-28 13:23:25 -03:00
8e9d85ccd7 comms: stop seeding atlas bots in synapse job 2026-01-28 13:18:44 -03:00
85abd589d4 comms: inject quick/smart bot creds for MAS job 2026-01-28 13:12:02 -03:00
flux-bot
bfbd707293 chore(bstein-dev-home): automated image update 2026-01-28 16:07:02 +00:00
flux-bot
526a895775 chore(bstein-dev-home): automated image update 2026-01-28 16:06:02 +00:00
38e1eba112 comms: add atlas quick/smart bots 2026-01-28 13:01:09 -03:00
flux-bot
f9e6cabe6d chore(comms): automated image update 2026-01-28 15:59:05 +00:00
36bb695c15 monitoring: fix grafana pod annotation indent 2026-01-28 12:37:42 -03:00
flux-bot
b449b65244 chore(comms): automated image update 2026-01-28 15:35:02 +00:00
1a9651914e monitoring: restart grafana after alert fix 2026-01-28 12:32:56 -03:00
flux-bot
9e5be20983 chore(comms): automated image update 2026-01-28 15:32:23 +00:00
d55bc98bbe monitoring: fix postmark alert metrics 2026-01-28 12:31:33 -03:00
flux-bot
46d677f5e7 chore(comms): automated image update 2026-01-28 15:22:49 +00:00
ef63b0f9f3 feat: add nats platform kustomization 2026-01-28 12:15:39 -03:00
111ae84255 chore: move flux sync to feature/atlasbot 2026-01-28 12:12:23 -03:00
d78a3c2550 comms: allow atlasbot to pull harbor images 2026-01-28 11:54:11 -03:00
fb89158622 atlasbot: move to service image and add nats queue infra 2026-01-28 11:52:37 -03:00
312 changed files with 11500 additions and 39739 deletions

1
.gitignore vendored
View File

@ -2,6 +2,7 @@
!README.md
!knowledge/**/*.md
!services/comms/knowledge/**/*.md
!services/atlasbot/knowledge/**/*.md
__pycache__/
*.py[cod]
.pytest_cache

374
Jenkinsfile vendored
View File

@ -11,47 +11,9 @@ spec:
hardware: rpi5
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-06
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
jenkins/jenkins-jenkins-agent: "true"
containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python
image: registry.bstein.dev/bstein/python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
image: python:3.12-slim
command:
- cat
tty: true
@ -61,21 +23,6 @@ spec:
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan_iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
}
stages {
stage('Checkout') {
@ -85,295 +32,12 @@ spec:
}
stage('Install deps') {
steps {
sh '''
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
sh 'pip install --no-cache-dir -r ci/requirements.txt'
}
}
stage('Prepare local quality evidence') {
stage('Glue tests') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
}
}
stage('Run quality gate') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile jenkins --build-dir build
quality_gate_rc=$?
set -e
printf '%s\n' "${quality_gate_rc}" > build/quality-gate.rc
'''
}
}
stage('Publish test metrics') {
steps {
sh '''
set -eu
export JUNIT_GLOB='build/junit-*.xml'
export QUALITY_GATE_EXIT_CODE_PATH='build/quality-gate.rc'
export QUALITY_GATE_SUMMARY_PATH='build/quality-gate-summary.json'
python3 ci/scripts/publish_test_metrics.py
'''
}
}
stage('Enforce quality gate') {
steps {
sh '''
set -euo pipefail
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
'''
sh 'pytest -q ci/tests/glue'
}
}
stage('Resolve Flux branch') {
@ -381,7 +45,7 @@ PY
script {
env.FLUX_BRANCH = sh(
returnStdout: true,
script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'"
script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
).trim()
if (!env.FLUX_BRANCH) {
error('Flux branch not found in gotk-sync.yaml')
@ -400,20 +64,6 @@ PY
steps {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set -euo pipefail
if ! command -v git >/dev/null 2>&1; then
if command -v apk >/dev/null 2>&1; then
apk add --no-cache git >/dev/null
elif command -v apt-get >/dev/null 2>&1; then
apt-get update >/dev/null
apt-get install -y git >/dev/null
fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
@ -424,18 +74,4 @@ PY
}
}
}
post {
always {
script {
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
try {
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
} catch (Throwable err) {
echo "junit step unavailable: ${err.class.simpleName}"
}
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
}
}
}

View File

@ -1,29 +1,3 @@
# titan-iac
Flux-managed Kubernetes desired-state config for `bstein.dev`.
Canonical source URL:
- `ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
## Scope
This repo contains cluster configuration consumed by Flux:
- platform/infrastructure manifests
- service manifests and kustomizations
- operational scripts for render/reconcile workflows
This repo is **not** the Ananke application source repo.
Ananke lives in `bstein/ananke` and orchestrates host-side shutdown/startup behavior around this desired state.
## Validation workflow
```bash
kustomize build services/<app>
kubectl apply --server-side --dry-run=client -k services/<app>
flux reconcile kustomization <name> --namespace flux-system --with-source
```
## Apply model
Use Git + Flux as the source of truth.
Avoid manual in-cluster edits for durable changes.
Flux-managed Kubernetes cluster for bstein.dev services.

View File

@ -10,47 +10,9 @@ spec:
hardware: rpi5
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-06
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
jenkins/jenkins-jenkins-agent: "true"
containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python
image: registry.bstein.dev/bstein/python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
image: python:3.12-slim
command:
- cat
tty: true
@ -60,21 +22,6 @@ spec:
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan_iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
}
stages {
stage('Checkout') {
@ -84,295 +31,12 @@ spec:
}
stage('Install deps') {
steps {
sh '''
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
sh 'pip install --no-cache-dir -r ci/requirements.txt'
}
}
stage('Prepare local quality evidence') {
stage('Glue tests') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
}
}
stage('Run quality gate') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile jenkins --build-dir build
quality_gate_rc=$?
set -e
printf '%s\n' "${quality_gate_rc}" > build/quality-gate.rc
'''
}
}
stage('Publish test metrics') {
steps {
sh '''
set -eu
export JUNIT_GLOB='build/junit-*.xml'
export QUALITY_GATE_EXIT_CODE_PATH='build/quality-gate.rc'
export QUALITY_GATE_SUMMARY_PATH='build/quality-gate-summary.json'
python3 ci/scripts/publish_test_metrics.py
'''
}
}
stage('Enforce quality gate') {
steps {
sh '''
set -euo pipefail
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
'''
sh 'pytest -q ci/tests/glue'
}
}
stage('Resolve Flux branch') {
@ -399,20 +63,6 @@ PY
steps {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set -euo pipefail
if ! command -v git >/dev/null 2>&1; then
if command -v apk >/dev/null 2>&1; then
apk add --no-cache git >/dev/null
elif command -v apt-get >/dev/null 2>&1; then
apt-get update >/dev/null
apt-get install -y git >/dev/null
fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
@ -423,18 +73,4 @@ PY
}
}
}
post {
always {
script {
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
try {
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
} catch (Throwable err) {
echo "junit step unavailable: ${err.class.simpleName}"
}
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
}
}
}

View File

@ -1,7 +1,4 @@
pytest==8.3.4
pytest-cov==6.0.0
coverage==7.6.10
kubernetes==30.1.0
PyYAML==6.0.2
requests==2.32.3
ruff==0.8.4

View File

@ -1,358 +0,0 @@
#!/usr/bin/env python3
"""Publish titan-iac quality-gate results to Pushgateway."""
from __future__ import annotations
import json
import os
from glob import glob
from pathlib import Path
import sys
import urllib.error
import urllib.request
import xml.etree.ElementTree as ET
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from ci.scripts import publish_test_metrics_quality as _quality_helpers
CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS
_build_check_statuses = _quality_helpers._build_check_statuses
_combine_statuses = _quality_helpers._combine_statuses
_infer_sonarqube_status = _quality_helpers._infer_sonarqube_status
_infer_source_lines_over_500 = _quality_helpers._infer_source_lines_over_500
_infer_supply_chain_status = _quality_helpers._infer_supply_chain_status
_infer_workspace_coverage_percent = _quality_helpers._infer_workspace_coverage_percent
_load_optional_json = _quality_helpers._load_optional_json
_normalize_result_status = _quality_helpers._normalize_result_status
def _escape_label(value: str) -> str:
"""Escape a Prometheus label value without changing its content."""
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
def _label_str(labels: dict[str, str]) -> str:
"""Render a stable Prometheus label set from a mapping."""
parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
return "{" + ",".join(parts) + "}" if parts else ""
def _read_text(url: str) -> str:
"""Fetch a plain-text response body from the given URL."""
with urllib.request.urlopen(url, timeout=10) as response:
return response.read().decode("utf-8")
def _post_text(url: str, payload: str) -> None:
"""PUT a plain-text payload and fail on any 4xx/5xx response."""
request = urllib.request.Request(
url,
data=payload.encode("utf-8"),
method="PUT",
headers={"Content-Type": "text/plain"},
)
with urllib.request.urlopen(request, timeout=10) as response:
if response.status >= 400:
raise RuntimeError(f"push failed with status={response.status}")
def _parse_junit(path: str) -> dict[str, int]:
"""Parse a JUnit XML file into aggregate test counters."""
if not os.path.exists(path):
return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
tree = ET.parse(path)
root = tree.getroot()
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
suites: list[ET.Element]
if root.tag == "testsuite":
suites = [root]
elif root.tag == "testsuites":
suites = [elem for elem in root if elem.tag == "testsuite"]
else:
suites = []
for suite in suites:
for key in totals:
raw_value = suite.attrib.get(key, "0")
try:
totals[key] += int(float(raw_value))
except ValueError:
totals[key] += 0
return totals
def _collect_junit_totals(pattern: str) -> dict[str, int]:
"""Sum JUnit counters across every XML file matching the pattern."""
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
for path in sorted(glob(pattern)):
parsed = _parse_junit(path)
for key in totals:
totals[key] += parsed[key]
return totals
def _collect_junit_cases(pattern: str) -> list[tuple[str, str]]:
"""Collect individual JUnit test-case statuses for flaky-test trend panels."""
cases: list[tuple[str, str]] = []
for path in sorted(glob(pattern)):
if not os.path.exists(path):
continue
root = ET.parse(path).getroot()
suites: list[ET.Element]
if root.tag == "testsuite":
suites = [root]
elif root.tag == "testsuites":
suites = [elem for elem in root if elem.tag == "testsuite"]
else:
suites = []
for suite in suites:
for test_case in suite.findall("testcase"):
case_name = test_case.attrib.get("name", "").strip()
class_name = test_case.attrib.get("classname", "").strip()
if not case_name:
continue
full_name = f"{class_name}.{case_name}" if class_name else case_name
status = "passed"
if test_case.find("failure") is not None or test_case.find("error") is not None:
status = "failed"
elif test_case.find("skipped") is not None:
status = "skipped"
cases.append((full_name, status))
return cases
def _read_exit_code(path: str) -> int:
"""Read the quality-gate exit code, defaulting to failure if missing."""
try:
with open(path, "r", encoding="utf-8") as handle:
return int(handle.read().strip())
except (FileNotFoundError, ValueError):
return 1
def _load_summary(path: str) -> dict:
"""Load the JSON quality-gate summary, returning an empty mapping on error."""
try:
with open(path, "r", encoding="utf-8") as handle:
return json.load(handle)
except (FileNotFoundError, json.JSONDecodeError):
return {}
def _summary_float(summary: dict, key: str) -> float:
"""Extract a float-like value from the summary, defaulting to 0.0."""
value = summary.get(key)
if isinstance(value, (int, float)):
return float(value)
return 0.0
def _summary_int(summary: dict, key: str) -> int:
"""Extract an int-like value from the summary, defaulting to 0."""
value = summary.get(key)
if isinstance(value, int):
return value
if isinstance(value, float):
return int(value)
return 0
def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
"""Return the current counter value for a labeled metric if present."""
text = _read_text(f"{pushgateway_url.rstrip('/')}/metrics")
for line in text.splitlines():
if not line.startswith(metric + "{"):
continue
if any(f'{key}="{value}"' not in line for key, value in labels.items()):
continue
parts = line.split()
if len(parts) < 2:
continue
try:
return float(parts[1])
except ValueError:
return 0.0
return 0.0
def _build_payload(
suite: str,
status: str,
tests: dict[str, int],
test_cases: list[tuple[str, str]],
ok_count: int,
failed_count: int,
branch: str,
build_number: str,
jenkins_job: str,
summary: dict | None = None,
workspace_line_coverage_percent: float = 0.0,
source_files_total: int = 0,
source_lines_over_500: int = 0,
check_statuses: dict[str, str] | None = None,
) -> str:
"""Build the Pushgateway payload for the current suite run."""
passed = max(tests["tests"] - tests["failures"] - tests["errors"] - tests["skipped"], 0)
build_labels = _label_str(
{
"suite": suite,
"branch": branch or "unknown",
"build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
}
)
test_case_base_labels = {
"suite": suite,
"branch": branch or "unknown",
"build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
}
lines = [
"# TYPE platform_quality_gate_runs_total counter",
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}',
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count}',
"# TYPE titan_iac_quality_gate_tests_total gauge",
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="failed"}} {tests["failures"]}',
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="error"}} {tests["errors"]}',
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {tests["skipped"]}',
"# TYPE titan_iac_quality_gate_run_status gauge",
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}',
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}',
"# TYPE platform_quality_gate_build_info gauge",
f"platform_quality_gate_build_info{build_labels} 1",
"# TYPE titan_iac_quality_gate_build_info gauge",
f"titan_iac_quality_gate_build_info{build_labels} 1",
"# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {workspace_line_coverage_percent:.3f}',
"# TYPE platform_quality_gate_source_files_total gauge",
f'platform_quality_gate_source_files_total{{suite="{suite}"}} {source_files_total}',
"# TYPE platform_quality_gate_source_lines_over_500_total gauge",
f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
]
if check_statuses:
lines.append("# TYPE titan_iac_quality_gate_checks_total gauge")
for check_name in CANONICAL_CHECKS:
check_status = check_statuses.get(check_name, "not_applicable")
lines.append(
f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(check_name)}",result="{_escape_label(check_status)}"}} 1'
)
lines.append("# TYPE platform_quality_gate_test_case_result gauge")
if test_cases:
for test_name, test_status in test_cases:
labels = {
**test_case_base_labels,
"test": test_name,
"status": test_status,
}
lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
)
else:
labels = {**test_case_base_labels, "test": "__no_test_cases__", "status": "skipped"}
lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
)
return "\n".join(lines) + "\n"
def main() -> int:
"""Publish the quality-gate metrics and print a compact run summary."""
suite = os.getenv("SUITE_NAME", "titan_iac")
pushgateway_url = os.getenv("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091")
job_name = os.getenv("QUALITY_GATE_JOB_NAME", "platform-quality-ci")
junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml"))
exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc"))
summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json")
branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
if branch.startswith("origin/"):
branch = branch[len("origin/") :]
build_number = os.getenv("BUILD_NUMBER", "")
jenkins_job = os.getenv("JOB_NAME", "titan-iac")
tests = _collect_junit_totals(junit_glob)
test_cases = _collect_junit_cases(junit_glob)
exit_code = _read_exit_code(exit_code_path)
status = "ok" if exit_code == 0 else "failed"
summary = _load_summary(summary_path)
workspace_line_coverage_percent = _summary_float(summary, "workspace_line_coverage_percent")
if workspace_line_coverage_percent <= 0:
workspace_line_coverage_percent = _infer_workspace_coverage_percent(summary, "build/coverage-unit.xml")
source_files_total = _summary_int(summary, "source_files_total")
source_lines_over_500 = _summary_int(summary, "source_lines_over_500")
if source_lines_over_500 <= 0:
source_lines_over_500 = _infer_source_lines_over_500(summary)
sonarqube_report = _load_optional_json(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", "build/sonarqube-quality-gate.json"))
supply_chain_report = _load_optional_json(os.getenv("QUALITY_GATE_IRONBANK_REPORT", "build/ironbank-compliance.json"))
supply_chain_required = os.getenv("QUALITY_GATE_IRONBANK_REQUIRED", "0").strip().lower() in {"1", "true", "yes", "on"}
check_statuses = _build_check_statuses(
summary=summary,
tests=tests,
workspace_line_coverage_percent=workspace_line_coverage_percent,
source_lines_over_500=source_lines_over_500,
sonarqube_report=sonarqube_report,
supply_chain_report=supply_chain_report,
supply_chain_required=supply_chain_required,
)
ok_count = int(
_fetch_existing_counter(
pushgateway_url,
"platform_quality_gate_runs_total",
{"job": job_name, "suite": suite, "status": "ok"},
)
)
failed_count = int(
_fetch_existing_counter(
pushgateway_url,
"platform_quality_gate_runs_total",
{"job": job_name, "suite": suite, "status": "failed"},
)
)
if status == "ok":
ok_count += 1
else:
failed_count += 1
payload = _build_payload(
suite=suite,
status=status,
tests=tests,
test_cases=test_cases,
ok_count=ok_count,
failed_count=failed_count,
branch=branch,
build_number=build_number,
jenkins_job=jenkins_job,
summary=summary,
workspace_line_coverage_percent=workspace_line_coverage_percent,
source_files_total=source_files_total,
source_lines_over_500=source_lines_over_500,
check_statuses=check_statuses,
)
push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}"
_post_text(push_url, payload)
summary = {
"suite": suite,
"status": status,
"tests_total": tests["tests"],
"tests_failed": tests["failures"],
"tests_error": tests["errors"],
"tests_skipped": tests["skipped"],
"ok_count": ok_count,
"failed_count": failed_count,
"checks_recorded": len(check_statuses),
"workspace_line_coverage_percent": workspace_line_coverage_percent,
"source_files_total": source_files_total,
"source_lines_over_500": source_lines_over_500,
}
print(json.dumps(summary, sort_keys=True))
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main())

View File

@ -1,200 +0,0 @@
#!/usr/bin/env python3
"""Quality/status helpers for publish_test_metrics."""
from __future__ import annotations
import json
from pathlib import Path
import xml.etree.ElementTree as ET
SUCCESS_STATUSES = {"ok", "pass", "passed", "success", "compliant"}
NOT_APPLICABLE_STATUSES = {"not_applicable", "n/a", "na", "none", "skipped"}
FAILED_STATUSES = {"failed", "fail", "error", "errors", "warn", "warning", "red"}
CANONICAL_CHECKS = [
"tests",
"coverage",
"loc",
"docs_naming",
"gate_glue",
"sonarqube",
"supply_chain",
]
def _infer_workspace_coverage_percent(summary: dict, default_xml: str) -> float:
"""Infer workspace line coverage from quality summary coverage XML metadata."""
results = summary.get("results", []) if isinstance(summary, dict) else []
coverage_xml = default_xml
for result in results:
if not isinstance(result, dict):
continue
if str(result.get("name") or "").strip().lower() != "coverage":
continue
candidate = str(result.get("coverage_xml") or "").strip()
if candidate:
coverage_xml = candidate
break
xml_path = Path(coverage_xml)
if not xml_path.exists():
return 0.0
try:
root = ET.parse(xml_path).getroot()
line_rate = root.attrib.get("line-rate")
if line_rate is None:
return 0.0
return float(line_rate) * 100.0
except (ET.ParseError, OSError, ValueError):
return 0.0
def _infer_source_lines_over_500(summary: dict) -> int:
"""Infer over-limit source file count from hygiene issue payloads."""
results = summary.get("results", []) if isinstance(summary, dict) else []
for result in results:
if not isinstance(result, dict):
continue
if str(result.get("name") or "").strip().lower() not in {"hygiene", "loc", "smell"}:
continue
issues = result.get("issues")
if not isinstance(issues, list):
continue
return sum(1 for item in issues if isinstance(item, str) and item.startswith("file exceeds"))
return 0
def _normalize_result_status(value: str | None, default: str = "failed") -> str:
"""Map arbitrary check status text into canonical check result buckets."""
if not value:
return default
normalized = value.strip().lower()
if normalized in SUCCESS_STATUSES:
return "ok"
if normalized in NOT_APPLICABLE_STATUSES:
return "not_applicable"
if normalized in FAILED_STATUSES:
return "failed"
return default
def _load_optional_json(path: str | None) -> dict:
"""Load an optional JSON report file, returning an empty object when absent."""
if not path:
return {}
candidate = Path(path)
if not candidate.exists():
return {}
try:
return json.loads(candidate.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return {}
def _combine_statuses(statuses: list[str]) -> str:
"""Roll up many check statuses into one canonical result."""
if not statuses:
return "not_applicable"
if any(status == "failed" for status in statuses):
return "failed"
if all(status == "not_applicable" for status in statuses):
return "not_applicable"
if all(status in {"ok", "not_applicable"} for status in statuses):
return "ok"
return "failed"
def _infer_sonarqube_status(report: dict) -> str:
"""Infer canonical SonarQube check status from its JSON report payload."""
if not report:
return "not_applicable"
status = (
report.get("projectStatus", {}).get("status")
or report.get("qualityGate", {}).get("status")
or report.get("status")
)
return _normalize_result_status(str(status) if status is not None else None, default="failed")
def _infer_supply_chain_status(report: dict, required: bool) -> str:
"""Infer canonical supply-chain status from IronBank/artifact report payload."""
if not report:
return "failed" if required else "not_applicable"
compliant = report.get("compliant")
if isinstance(compliant, bool):
return "ok" if compliant else "failed"
status = report.get("status")
if status is None:
return "failed" if required else "not_applicable"
normalized = _normalize_result_status(str(status), default="failed")
if normalized == "not_applicable" and required:
return "failed"
return normalized
def _build_check_statuses(
summary: dict | None,
tests: dict[str, int],
workspace_line_coverage_percent: float,
source_lines_over_500: int,
sonarqube_report: dict,
supply_chain_report: dict,
supply_chain_required: bool,
) -> dict[str, str]:
"""Generate the canonical quality-check status map for dashboarding."""
raw_results = summary.get("results", []) if isinstance(summary, dict) else []
status_by_name: dict[str, str] = {}
for result in raw_results:
if not isinstance(result, dict):
continue
check_name = str(result.get("name") or "").strip().lower()
if not check_name:
continue
status_by_name[check_name] = _normalize_result_status(result.get("status"), default="failed")
tests_status = status_by_name.get("tests")
if not tests_status:
candidate_keys = ["unit", "integration", "e2e", "pytest", "test", "tests"]
candidates = [status_by_name[key] for key in candidate_keys if key in status_by_name]
if candidates:
tests_status = _combine_statuses(candidates)
elif tests["tests"] > 0:
tests_status = "ok" if (tests["failures"] + tests["errors"]) == 0 else "failed"
else:
tests_status = "not_applicable"
coverage_status = status_by_name.get("coverage")
if not coverage_status:
if workspace_line_coverage_percent > 0:
coverage_status = "ok" if workspace_line_coverage_percent >= 95.0 else "failed"
else:
coverage_status = "not_applicable"
loc_status = status_by_name.get("loc")
if not loc_status:
loc_status = "ok" if source_lines_over_500 == 0 else "failed"
docs_naming_status = status_by_name.get("docs_naming")
if not docs_naming_status:
candidates = [status_by_name[key] for key in ["docs", "hygiene", "smell", "lint", "naming"] if key in status_by_name]
docs_naming_status = _combine_statuses(candidates) if candidates else "not_applicable"
gate_glue_status = status_by_name.get("gate_glue")
if not gate_glue_status:
candidates = [status_by_name[key] for key in ["gate_glue", "glue", "gate"] if key in status_by_name]
gate_glue_status = _combine_statuses(candidates) if candidates else "not_applicable"
sonarqube_status = status_by_name.get("sonarqube") or _infer_sonarqube_status(sonarqube_report)
supply_chain_status = status_by_name.get("supply_chain") or _infer_supply_chain_status(
supply_chain_report,
required=supply_chain_required,
)
return {
"tests": tests_status,
"coverage": coverage_status,
"loc": loc_status,
"docs_naming": docs_naming_status,
"gate_glue": gate_glue_status,
"sonarqube": sonarqube_status,
"supply_chain": supply_chain_status,
}

View File

@ -1,173 +0,0 @@
"""Build a titan-iac supply-chain compliance report from Trivy evidence."""
from __future__ import annotations
import argparse
import datetime as dt
import json
from pathlib import Path
from typing import Any
FAIL_SEVERITIES = {"HIGH", "CRITICAL"}
def _read_json(path: Path) -> dict[str, Any]:
"""Read a JSON object from disk for use as pipeline evidence."""
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise ValueError(f"{path} must contain a JSON object")
return payload
def _parse_day(raw: str | None) -> dt.date | None:
"""Parse an ISO day while letting optional waiver dates stay optional."""
if not raw:
return None
return dt.date.fromisoformat(raw)
def _today(override: str | None = None) -> dt.date:
"""Return the policy day so tests can pin expiry behavior."""
return _parse_day(override) or dt.date.today()
def _load_waiver_pairs(path: Path | None, policy_day: dt.date) -> tuple[set[tuple[str, str]], int]:
"""Return active ``(misconfiguration id, target)`` waivers and expired count."""
if path is None or not path.exists():
return set(), 0
payload = _read_json(path)
default_expires_at = payload.get("default_expires_at")
active: set[tuple[str, str]] = set()
expired = 0
for entry in payload.get("misconfigurations", []):
if not isinstance(entry, dict):
continue
misconfiguration_id = str(entry.get("id") or "").strip()
if not misconfiguration_id:
continue
expires_at = _parse_day(str(entry.get("expires_at") or default_expires_at or ""))
targets = entry.get("targets", [])
if not isinstance(targets, list):
continue
if expires_at and expires_at < policy_day:
expired += len(targets)
continue
# Waivers are target-specific so a new unsafe manifest fails until it is
# either fixed or deliberately accepted with a fresh expiration.
for target in targets:
if isinstance(target, str) and target:
active.add((misconfiguration_id, target))
return active, expired
def _iter_failed_misconfigurations(payload: dict[str, Any]):
"""Yield failed high/critical Trivy misconfiguration records."""
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
target = str(result.get("Target") or "")
for item in result.get("Misconfigurations") or []:
if not isinstance(item, dict):
continue
if item.get("Status") != "FAIL":
continue
if str(item.get("Severity") or "").upper() not in FAIL_SEVERITIES:
continue
yield target, item
def _count_vulnerabilities(payload: dict[str, Any], severity: str) -> int:
"""Count Trivy vulnerabilities at a specific severity."""
count = 0
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
for item in result.get("Vulnerabilities") or []:
if isinstance(item, dict) and str(item.get("Severity") or "").upper() == severity:
count += 1
return count
def _count_secrets(payload: dict[str, Any]) -> int:
"""Count detected secrets in the Trivy filesystem report."""
count = 0
for result in payload.get("Results", []):
if isinstance(result, dict):
count += len(result.get("Secrets") or [])
return count
def build_report(
trivy_payload: dict[str, Any],
waiver_path: Path | None = None,
today_override: str | None = None,
) -> dict[str, Any]:
"""Build the compliance summary consumed by the quality gate."""
policy_day = _today(today_override)
active_waivers, expired_waivers = _load_waiver_pairs(waiver_path, policy_day)
open_misconfigs: list[dict[str, str]] = []
waived_misconfigs = 0
for target, item in _iter_failed_misconfigurations(trivy_payload):
misconfiguration_id = str(item.get("ID") or "")
if (misconfiguration_id, target) in active_waivers:
waived_misconfigs += 1
continue
open_misconfigs.append(
{
"id": misconfiguration_id,
"target": target,
"severity": str(item.get("Severity") or ""),
"title": str(item.get("Title") or ""),
}
)
critical = _count_vulnerabilities(trivy_payload, "CRITICAL")
high = _count_vulnerabilities(trivy_payload, "HIGH")
secrets = _count_secrets(trivy_payload)
status = "ok" if critical == 0 and secrets == 0 and not open_misconfigs else "failed"
return {
"status": status,
"compliant": status == "ok",
"category": "artifact_security",
"scan_type": "filesystem",
"scanner": "trivy",
"critical_vulnerabilities": critical,
"high_vulnerabilities": high,
"high_vulnerability_policy": "observe",
"secrets": secrets,
"high_or_critical_misconfigurations": len(open_misconfigs),
"waived_misconfigurations": waived_misconfigs,
"expired_waivers": expired_waivers,
"waiver_file": str(waiver_path) if waiver_path else "",
"open_misconfiguration_examples": open_misconfigs[:20],
}
def main(argv: list[str] | None = None) -> int:
"""CLI entrypoint used by Jenkins after the Trivy scan completes."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--trivy-json", required=True)
parser.add_argument("--waivers")
parser.add_argument("--output", required=True)
parser.add_argument("--today")
args = parser.parse_args(argv)
trivy_payload = _read_json(Path(args.trivy_json))
waiver_path = Path(args.waivers) if args.waivers else None
report = build_report(trivy_payload, waiver_path=waiver_path, today_override=args.today)
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main())

View File

@ -1,7 +1,6 @@
max_success_age_hours: 48
allow_suspended:
- bstein-dev-home/vaultwarden-cred-sync
- comms/guest-name-randomizer
- comms/othrys-room-reset
- comms/pin-othrys-invite
- comms/seed-othrys-room
@ -10,7 +9,6 @@ allow_suspended:
- health/wger-user-sync
- mailu-mailserver/mailu-sync-nightly
- nextcloud/nextcloud-mail-sync
- vault/vault-oidc-config
ariadne_schedule_tasks:
- schedule.mailu_sync
- schedule.nextcloud_sync

View File

@ -1,108 +0,0 @@
"""Glue checks for Ariadne schedules exported to VictoriaMetrics."""
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
import requests
import yaml
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _query(promql: str) -> list[dict]:
vm_url = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
response = requests.get(f"{vm_url}/api/v1/query", params={"query": promql}, timeout=10)
response.raise_for_status()
payload = response.json()
return payload.get("data", {}).get("result", [])
def _expected_tasks() -> list[dict]:
cfg = _load_config()
tasks = [
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def _normalize_task(item: object, cfg: dict) -> dict:
if isinstance(item, str):
return {
"task": item,
"check_last_success": True,
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
}
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
return tracked
def _task_regex(tasks: list[dict]) -> str:
return "|".join(item["task"] for item in tasks)
def test_ariadne_schedule_series_exist():
tasks = _expected_tasks()
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing next-run metrics for: {', '.join(missing)}"
def test_ariadne_schedule_recent_success():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing last-success metrics for: {', '.join(missing)}"
now = datetime.now(timezone.utc)
age_by_task = {
item.get("metric", {}).get("task"): (now - datetime.fromtimestamp(float(item["value"][1]), tz=timezone.utc)).total_seconds() / 3600
for item in series
}
too_old = [
f"{task} ({age_by_task[task]:.1f}h > {item['max_success_age_hours']}h)"
for item in tasks
if (task := item["task"]) in age_by_task and age_by_task[task] > float(item["max_success_age_hours"])
]
assert not too_old, "Ariadne schedules are stale: " + ", ".join(too_old)
def test_ariadne_schedule_last_status_present_and_boolean():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing last-status metrics for: {', '.join(missing)}"
invalid = []
for item in series:
task = item.get("metric", {}).get("task")
value = float(item["value"][1])
if value not in (0.0, 1.0):
invalid.append(f"{task}={value}")
assert not invalid, f"Unexpected Ariadne last-status values: {', '.join(invalid)}"

View File

@ -1,5 +1,3 @@
"""Glue checks for the metrics the quality-gate publishes."""
from __future__ import annotations
import os
@ -25,63 +23,26 @@ def _query(promql: str) -> list[dict]:
return payload.get("data", {}).get("result", [])
def _expected_tasks() -> list[dict]:
cfg = _load_config()
tasks = [
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def test_glue_metrics_present():
series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
assert series, "No glue cronjob label series found"
def _normalize_task(item: object, cfg: dict) -> dict:
if isinstance(item, str):
return {
"task": item,
"check_last_success": True,
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
}
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
return tracked
def _task_regex(tasks: list[dict]) -> str:
return "|".join(item["task"] for item in tasks)
def test_glue_metrics_success_join():
query = (
"kube_cronjob_status_last_successful_time "
'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
)
series = _query(query)
assert series, "No glue cronjob last success series found"
def test_ariadne_schedule_metrics_present():
tasks = _expected_tasks()
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
cfg = _load_config()
expected = cfg.get("ariadne_schedule_tasks", [])
if not expected:
return
series = _query("ariadne_schedule_next_run_timestamp_seconds")
tasks = {item.get("metric", {}).get("task") for item in series}
missing = [task for task in expected if task not in tasks]
assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"
def test_ariadne_schedule_success_and_status_metrics_present():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
success = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
status = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
success_tasks = {item.get("metric", {}).get("task") for item in success}
status_tasks = {item.get("metric", {}).get("task") for item in status}
expected = {item["task"] for item in tasks}
missing_success = sorted(expected - success_tasks)
missing_status = sorted(expected - status_tasks)
assert not missing_success, f"Missing Ariadne success metrics for: {', '.join(missing_success)}"
assert not missing_status, f"Missing Ariadne status metrics for: {', '.join(missing_status)}"

View File

@ -1,407 +0,0 @@
{
"version": 1,
"generated_from": "Jenkins titan-iac build 225 Trivy filesystem scan",
"default_expires_at": "2026-05-22",
"ticket": "atlas-quality-wave-k8s-hardening",
"default_reason": "Existing Kubernetes manifest hardening baseline accepted only for the first quality-gate rollout; fix or renew explicitly before expiry.",
"misconfigurations": [
{
"id": "DS-0002",
"targets": [
"dockerfiles/Dockerfile.ananke-node-helper"
]
},
{
"id": "KSV-0009",
"targets": [
"services/mailu/vip-controller.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml"
]
},
{
"id": "KSV-0010",
"targets": [
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0014",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/guest-register-deployment.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/actual-budget-deployment.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/deployment.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vmalert-atlas-availability.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud-mail-sync/cronjob.yaml",
"services/nextcloud/collabora.yaml",
"services/nextcloud/cronjob.yaml",
"services/nextcloud/deployment.yaml",
"services/nextcloud/maintenance-cronjob.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/planka/deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vault/statefulset.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0017",
"targets": [
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0041",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"infrastructure/traefik/clusterrole.yaml",
"services/bstein-dev-home/rbac.yaml",
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/comms/mas-secrets-ensure-rbac.yaml",
"services/maintenance/soteria-rbac.yaml"
]
},
{
"id": "KSV-0047",
"targets": [
"services/monitoring/rbac.yaml"
]
},
{
"id": "KSV-0053",
"targets": [
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/ariadne-rbac.yaml"
]
},
{
"id": "KSV-0056",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/disable-k3s-traefik-rbac.yaml",
"services/maintenance/k3s-traefik-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0114",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0118",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/coredns-deployment.yaml",
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vmalert-atlas-availability.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud/collabora.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/sui-metrics/overlays/atlas/patch-node-selector.yaml",
"services/typhon/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0121",
"targets": [
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml"
]
}
]
}

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: ai-llm
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/ai-llm

View File

@ -0,0 +1,26 @@
# clusters/atlas/flux-system/applications/atlasbot/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: atlasbot
namespace: ai
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: feature/atlasbot
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(atlasbot): automated image update"
push:
branch: feature/atlasbot
update:
strategy: Setters
path: services/atlasbot

View File

@ -0,0 +1,17 @@
# clusters/atlas/flux-system/applications/atlasbot/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: atlasbot
namespace: flux-system
spec:
interval: 10m
prune: true
sourceRef:
kind: GitRepository
name: flux-system
path: ./services/atlasbot
targetNamespace: ai
timeout: 2m
dependsOn:
- name: ai-llm

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: bstein-dev-home-migrations
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/bstein-dev-home/oneoffs/migrations

View File

@ -13,14 +13,14 @@ spec:
git:
checkout:
ref:
branch: main
branch: feature/atlasbot
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(bstein-dev-home): automated image update"
push:
branch: main
branch: feature/atlasbot
update:
strategy: Setters
path: services/bstein-dev-home

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: bstein-dev-home
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/bstein-dev-home

View File

@ -0,0 +1,26 @@
# clusters/atlas/flux-system/applications/comms/image-automation.yaml
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: comms
namespace: comms
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: feature/atlasbot
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(comms): automated image update"
push:
branch: feature/atlasbot
update:
strategy: Setters
path: services/comms

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: comms
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true
@ -15,3 +13,5 @@ spec:
path: ./services/comms
targetNamespace: comms
timeout: 2m
dependsOn:
- name: traefik

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: crypto
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/crypto

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: finance
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/finance

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: gitea
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/gitea
@ -15,8 +13,4 @@ spec:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: longhorn
- name: vault
- name: postgres
wait: true

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: harbor
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/harbor
@ -18,6 +16,3 @@ spec:
wait: false
dependsOn:
- name: core
- name: longhorn
- name: vault
- name: postgres

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: health
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/health
@ -17,6 +15,7 @@ spec:
dependsOn:
- name: keycloak
- name: postgres
- name: traefik
- name: vault
healthChecks:
- apiVersion: apps/v1

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: jellyfin
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/jellyfin

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: jenkins
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/jenkins
@ -16,6 +14,7 @@ spec:
targetNamespace: jenkins
dependsOn:
- name: helm
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment
@ -26,4 +25,3 @@ spec:
name: jenkins
namespace: jenkins
wait: false
timeout: 20m

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: keycloak
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true
@ -14,8 +12,4 @@ spec:
name: flux-system
path: ./services/keycloak
targetNamespace: sso
dependsOn:
- name: longhorn
- name: vault
- name: postgres
timeout: 2m

View File

@ -6,6 +6,9 @@ resources:
- vault/kustomization.yaml
- vaultwarden/kustomization.yaml
- comms/kustomization.yaml
- comms/image-automation.yaml
- atlasbot/kustomization.yaml
- atlasbot/image-automation.yaml
- crypto/kustomization.yaml
- monerod/kustomization.yaml
- pegasus/kustomization.yaml
@ -21,12 +24,10 @@ resources:
- sui-metrics/kustomization.yaml
- openldap/kustomization.yaml
- keycloak/kustomization.yaml
- quality/kustomization.yaml
- oauth2-proxy/kustomization.yaml
- mailu/kustomization.yaml
- jenkins/kustomization.yaml
- ai-llm/kustomization.yaml
- typhon/kustomization.yaml
- nextcloud/kustomization.yaml
- nextcloud-mail-sync/kustomization.yaml
- outline/kustomization.yaml

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: mailu
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
sourceRef:

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: monerod
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/crypto/monerod
@ -18,4 +16,4 @@ spec:
dependsOn:
- name: crypto
wait: true
timeout: 15m
timeout: 5m

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: nextcloud-mail-sync
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: nextcloud
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/nextcloud

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: oauth2-proxy
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: openldap
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: outline
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/outline
@ -17,6 +15,7 @@ spec:
dependsOn:
- name: keycloak
- name: mailu
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: pegasus
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/pegasus

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: planka
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/planka
@ -17,6 +15,7 @@ spec:
dependsOn:
- name: keycloak
- name: mailu
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment

View File

@ -1,36 +0,0 @@
# clusters/atlas/flux-system/applications/quality/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: quality
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/quality
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: quality
dependsOn:
- name: cert-manager
- name: keycloak
- name: vault
- name: postgres
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: sonarqube
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: sonarqube-exporter
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: oauth2-proxy-sonarqube
namespace: quality
wait: false
timeout: 20m

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: sui-metrics
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/sui-metrics/overlays/atlas

View File

@ -1,31 +0,0 @@
# clusters/atlas/flux-system/applications/typhon/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: typhon
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/typhon
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: climate
dependsOn:
- name: vault
- name: vault-csi
- name: monitoring
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: typhon
namespace: climate
- apiVersion: v1
kind: Service
name: typhon
namespace: climate
wait: false
timeout: 20m

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: vault
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
sourceRef:
@ -17,5 +15,4 @@ spec:
prune: true
wait: true
dependsOn:
- name: longhorn
- name: helm

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: vaultwarden
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
suspend: false
@ -19,3 +17,4 @@ spec:
wait: true
dependsOn:
- name: helm
- name: traefik

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: wallet-monero-temp
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/crypto/wallet-monero-temp

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: xmr-miner
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/crypto/xmr-miner
@ -19,4 +17,3 @@ spec:
- name: crypto
- name: monerod
wait: true
timeout: 30m

View File

@ -5966,9 +5966,6 @@ spec:
- args:
- --events-addr=http://notification-controller.$(RUNTIME_NAMESPACE).svc.cluster.local./
- --watch-all-namespaces=true
- --concurrent=1
- --requeue-dependency=5s
- --interval-jitter-percentage=30
- --log-level=info
- --log-encoding=json
- --enable-leader-election

View File

@ -7,9 +7,9 @@ metadata:
name: flux-system
namespace: flux-system
spec:
interval: 15m0s
interval: 1m0s
ref:
branch: main
branch: feature/atlasbot
secretRef:
name: flux-system-gitea
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
@ -20,7 +20,7 @@ metadata:
name: flux-system
namespace: flux-system
spec:
interval: 1h0m0s
interval: 10m0s
path: ./clusters/atlas/flux-system
prune: true
sourceRef:

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: cert-manager-cleanup
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/cert-manager/cleanup

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: cert-manager
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/cert-manager

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: core
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/core

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: gitops-ui
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
timeout: 10m
@ -18,4 +16,5 @@ spec:
targetNamespace: flux-system
dependsOn:
- name: helm
- name: traefik
wait: true

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: helm
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
sourceRef:

View File

@ -16,5 +16,6 @@ resources:
- longhorn/kustomization.yaml
- longhorn-ui/kustomization.yaml
- postgres/kustomization.yaml
- nats/kustomization.yaml
- ../platform/vault-csi/kustomization.yaml
- ../platform/vault-injector/kustomization.yaml

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: logging
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/logging

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: longhorn-adopt
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/longhorn/adopt

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: longhorn-ui
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/longhorn/ui-ingress

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: longhorn
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/longhorn/core

View File

@ -13,14 +13,14 @@ spec:
git:
checkout:
ref:
branch: main
branch: feature/atlasbot
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(maintenance): automated image update"
push:
branch: main
branch: feature/atlasbot
update:
strategy: Setters
path: services/maintenance

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: maintenance
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/maintenance

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: metallb
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
sourceRef:

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: monitoring
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/monitoring

View File

@ -0,0 +1,21 @@
# clusters/atlas/flux-system/platform/nats/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: nats
namespace: flux-system
spec:
interval: 10m
path: ./infrastructure/nats
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: nats
healthChecks:
- apiVersion: apps/v1
kind: StatefulSet
name: nats
namespace: nats
wait: true

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: postgres
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/postgres
@ -16,7 +14,6 @@ spec:
name: flux-system
targetNamespace: postgres
dependsOn:
- name: longhorn
- name: vault
- name: vault-csi
healthChecks:

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: traefik
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/traefik

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: vault-csi
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
sourceRef:

View File

@ -4,8 +4,6 @@ kind: Kustomization
metadata:
name: vault-injector
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/vault-injector

View File

@ -1,12 +0,0 @@
FROM debian:bookworm-slim
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bash \
ca-certificates \
curl \
util-linux \
zstd \
&& rm -rf /var/lib/apt/lists/*
CMD ["/bin/sh"]

View File

@ -2,8 +2,4 @@ FROM python:3.11-slim
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
RUN pip install --no-cache-dir requests psycopg2-binary \
&& groupadd --system guest-tools \
&& useradd --system --uid 65532 --gid guest-tools --home-dir /nonexistent --shell /usr/sbin/nologin guest-tools
USER guest-tools
RUN pip install --no-cache-dir requests psycopg2-binary

View File

@ -1,8 +1,16 @@
# Use the mirrored Harbor artifact so CI does not depend on Docker Hub egress.
FROM registry.bstein.dev/streaming/data-prepper@sha256:32ac6ad42e0f12da08bebee307e290b17d127b30def9b06eeaffbcbbc5033e83
FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
ENV DATA_PREPPER_PATH=/usr/share/data-prepper
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
&& mkdir -p /var/log/data-prepper
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
USER 10001
WORKDIR /usr/share/data-prepper
CMD ["bin/data-prepper"]

View File

@ -1,13 +1,10 @@
FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates \
&& addgroup -S livekit-token \
&& adduser -S -D -H -u 65532 -G livekit-token livekit-token
RUN apk add --no-cache ca-certificates
COPY --from=base /lk-jwt-service /lk-jwt-service
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER livekit-token
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/lk-jwt-service"]

View File

@ -29,12 +29,10 @@ FROM ${DEBIAN_IMAGE}
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends ca-certificates; \
update-ca-certificates; rm -rf /var/lib/apt/lists/*; \
groupadd --system p2pool; \
useradd --system --uid 65532 --gid p2pool --home-dir /nonexistent --shell /usr/sbin/nologin p2pool
update-ca-certificates; rm -rf /var/lib/apt/lists/*
COPY --from=fetch /out/p2pool /usr/local/bin/p2pool
RUN /usr/local/bin/p2pool --version || true
EXPOSE 3333
USER p2pool
ENTRYPOINT ["/usr/local/bin/p2pool"]

View File

@ -26,12 +26,9 @@ RUN set -eux; \
curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \
tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \
install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \
rm -f /opt/monero/monero.tar.bz2; \
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero
rm -f /opt/monero/monero.tar.bz2
ENV PATH="/usr/local/bin:/usr/bin:/bin"
RUN /usr/local/bin/monero-wallet-rpc --version || true
EXPOSE 18083
USER monero

View File

@ -23,14 +23,10 @@ RUN set -eux; \
mkdir -p /opt/monero; \
tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \
rm -f /tmp/monero.tar.bz2; \
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero; \
mkdir -p /data; \
chown monero:monero /data; \
chmod 0770 /data
ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \
PATH="/opt/monero:${PATH}"
USER monero
CMD ["/opt/monero/monerod", "--version"]

View File

@ -1,13 +1,10 @@
FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates \
&& addgroup -S oauth2-proxy \
&& adduser -S -D -H -u 65532 -G oauth2-proxy oauth2-proxy
RUN apk add --no-cache ca-certificates
COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER oauth2-proxy
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/bin/oauth2-proxy"]

View File

@ -1,13 +1,10 @@
FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates \
&& addgroup -S pegasus \
&& adduser -S -D -H -u 65532 -G pegasus pegasus
RUN apk add --no-cache ca-certificates
COPY --from=base /pegasus /pegasus
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER pegasus
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/pegasus"]

View File

@ -1,48 +0,0 @@
# dockerfiles/Dockerfile.quality-tools
FROM debian:bookworm-slim
ARG SONAR_SCANNER_VERSION=8.0.1.6346
ARG TRIVY_VERSION=0.70.0
ENV TRIVY_CACHE_DIR=/opt/trivy-cache
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bash \
ca-certificates \
curl \
git \
jq \
unzip \
&& rm -rf /var/lib/apt/lists/* \
&& groupadd --system quality-tools \
&& useradd --system --uid 65532 --gid quality-tools --home-dir /nonexistent --shell /usr/sbin/nologin quality-tools
RUN set -eux; \
scanner_zip="sonar-scanner-cli-${SONAR_SCANNER_VERSION}-linux-aarch64.zip"; \
base_url="https://binaries.sonarsource.com/Distribution/sonar-scanner-cli"; \
curl -fsSL "${base_url}/${scanner_zip}" -o "/tmp/${scanner_zip}"; \
curl -fsSL "${base_url}/${scanner_zip}.sha256" -o "/tmp/${scanner_zip}.sha256"; \
printf '%s %s\n' "$(cat "/tmp/${scanner_zip}.sha256")" "/tmp/${scanner_zip}" | sha256sum -c -; \
unzip -q "/tmp/${scanner_zip}" -d /opt; \
ln -s "/opt/sonar-scanner-${SONAR_SCANNER_VERSION}-linux-aarch64/bin/sonar-scanner" /usr/local/bin/sonar-scanner; \
rm -f "/tmp/${scanner_zip}" "/tmp/${scanner_zip}.sha256"
RUN set -eux; \
trivy_tgz="trivy_${TRIVY_VERSION}_Linux-ARM64.tar.gz"; \
curl -fsSL "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/${trivy_tgz}" -o "/tmp/${trivy_tgz}"; \
tar -C /usr/local/bin -xzf "/tmp/${trivy_tgz}" trivy; \
rm -f "/tmp/${trivy_tgz}"; \
trivy --version; \
sonar-scanner -v
RUN set -eux; \
mkdir -p "${TRIVY_CACHE_DIR}"; \
trivy image --download-db-only --cache-dir "${TRIVY_CACHE_DIR}"; \
chmod -R a+rX "${TRIVY_CACHE_DIR}"; \
mkdir -p /workspace; \
chown quality-tools:quality-tools /workspace
WORKDIR /workspace
USER quality-tools

View File

@ -0,0 +1,3 @@
FROM python:3.11-slim
RUN pip install --no-cache-dir psycopg2-binary bcrypt

View File

@ -27,42 +27,10 @@ spec:
timeout: 10m
values:
installCRDs: true
extraArgs:
- --acme-http01-solver-nameservers=1.1.1.1:53,8.8.8.8:53
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
@ -76,36 +44,6 @@ spec:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
@ -119,36 +57,6 @@ spec:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:

View File

@ -26,7 +26,7 @@ spec:
spec:
containers:
- name: coredns
image: registry.k8s.io/coredns/coredns:v1.12.1
image: registry.bstein.dev/infra/coredns:1.12.1
imagePullPolicy: IfNotPresent
args:
- -conf

View File

@ -4,11 +4,9 @@ kind: Kustomization
resources:
- ../modules/base
- ../modules/profiles/atlas-ha
- node-prefer-noschedule-serviceaccount.yaml
- node-prefer-noschedule-rbac.yaml
- node-prefer-noschedule-cronjob.yaml
- coredns-custom.yaml
- coredns-deployment.yaml
- longhorn-node-taints.yaml
- ntp-sync-daemonset.yaml
- ../sources/cert-manager/letsencrypt.yaml
- ../sources/cert-manager/letsencrypt-prod.yaml

View File

@ -0,0 +1,40 @@
# infrastructure/core/longhorn-node-taints.yaml
apiVersion: v1
kind: Node
metadata:
name: titan-13
spec:
taints:
- key: longhorn
value: "true"
effect: PreferNoSchedule
---
apiVersion: v1
kind: Node
metadata:
name: titan-15
spec:
taints:
- key: longhorn
value: "true"
effect: PreferNoSchedule
---
apiVersion: v1
kind: Node
metadata:
name: titan-17
spec:
taints:
- key: longhorn
value: "true"
effect: PreferNoSchedule
---
apiVersion: v1
kind: Node
metadata:
name: titan-19
spec:
taints:
- key: longhorn
value: "true"
effect: PreferNoSchedule

View File

@ -1,35 +0,0 @@
# infrastructure/core/node-prefer-noschedule-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: node-prefer-noschedule
namespace: kube-system
spec:
schedule: "*/20 * * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
spec:
serviceAccountName: node-prefer-noschedule
restartPolicy: OnFailure
containers:
- name: taint
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command:
- /usr/bin/env
- bash
- -ceu
- |
for node in titan-13 titan-15 titan-17 titan-19; do
if kubectl get node "${node}" >/dev/null 2>&1; then
kubectl label node "${node}" atlas.bstein.dev/spillover=true --overwrite=true
kubectl taint node "${node}" longhorn=true:PreferNoSchedule --overwrite=true
kubectl taint node "${node}" atlas.bstein.dev/spillover=true:PreferNoSchedule --overwrite=true
else
echo "skipping missing node ${node}"
fi
done

View File

@ -1,22 +0,0 @@
# infrastructure/core/node-prefer-noschedule-rbac.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: node-prefer-noschedule
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: node-prefer-noschedule
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: node-prefer-noschedule
subjects:
- kind: ServiceAccount
name: node-prefer-noschedule
namespace: kube-system

View File

@ -1,6 +0,0 @@
# infrastructure/core/node-prefer-noschedule-serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: node-prefer-noschedule
namespace: kube-system

View File

@ -0,0 +1,10 @@
# infrastructure/longhorn/core/backup-target.yaml
apiVersion: longhorn.io/v1beta2
kind: BackupTarget
metadata:
name: default
namespace: longhorn-system
spec:
backupTargetURL: "s3://atlas-soteria@us-west-004/"
credentialSecret: longhorn-backup-b2
pollInterval: 5m0s

View File

@ -6,6 +6,39 @@ metadata:
namespace: longhorn-system
spec:
interval: 30m
postRenderers:
- kustomize:
patches:
- target:
kind: Service
name: longhorn-conversion-webhook
namespace: longhorn-system
patch: |
- op: add
path: /spec/publishNotReadyAddresses
value: true
- target:
kind: Service
name: longhorn-admission-webhook
namespace: longhorn-system
patch: |
- op: add
path: /spec/publishNotReadyAddresses
value: true
- target:
kind: DaemonSet
name: longhorn-manager
namespace: longhorn-system
patch: |
- op: replace
path: /spec/template/spec/containers/0/readinessProbe/httpGet/path
value: /v1/healthz
- op: replace
path: /spec/template/spec/containers/0/readinessProbe/httpGet/port
value: 9500
- op: replace
path: /spec/template/spec/containers/0/readinessProbe/httpGet/scheme
value: HTTP
chart:
spec:
chart: longhorn
@ -26,9 +59,6 @@ spec:
cleanupOnFail: true
timeout: 15m
values:
global:
nodeSelector:
longhorn-host: "true"
service:
ui:
type: NodePort
@ -37,7 +67,7 @@ spec:
createSecret: false
registrySecret: longhorn-registry
image:
pullPolicy: Always
pullPolicy: IfNotPresent
longhorn:
engine:
repository: registry.bstein.dev/infra/longhorn-engine
@ -80,13 +110,4 @@ spec:
repository: registry.bstein.dev/infra/longhorn-livenessprobe
tag: v2.16.0
defaultSettings:
systemManagedPodsImagePullPolicy: Always
longhornManager:
nodeSelector:
longhorn-host: "true"
longhornDriver:
nodeSelector:
longhorn-host: "true"
longhornUI:
nodeSelector:
longhorn-host: "true"
systemManagedPodsImagePullPolicy: IfNotPresent

View File

@ -6,17 +6,14 @@ resources:
- vault-serviceaccount.yaml
- secretproviderclass.yaml
- vault-sync-deployment.yaml
- backup-target.yaml
- helmrelease.yaml
- longhorn-settings-ensure-job.yaml
- longhorn-disk-tags-ensure-job.yaml
configMapGenerator:
- name: longhorn-settings-ensure-script
files:
- longhorn_settings_ensure.sh=scripts/longhorn_settings_ensure.sh
- name: longhorn-disk-tags-ensure-script
files:
- longhorn_disk_tags_ensure.py=scripts/longhorn_disk_tags_ensure.py
generatorOptions:
disableNameSuffixHash: true

View File

@ -1,36 +0,0 @@
# infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-disk-tags-ensure-1
namespace: longhorn-system
spec:
backoffLimit: 0
ttlSecondsAfterFinished: 3600
template:
spec:
serviceAccountName: longhorn-service-account
restartPolicy: Never
volumes:
- name: longhorn-disk-tags-ensure-script
configMap:
name: longhorn-disk-tags-ensure-script
defaultMode: 0555
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
- key: node-role.kubernetes.io/worker
operator: Exists
containers:
- name: apply
image: python:3.12.9-alpine3.20
command: ["python", "/scripts/longhorn_disk_tags_ensure.py"]
volumeMounts:
- name: longhorn-disk-tags-ensure-script
mountPath: /scripts
readOnly: true

View File

@ -2,11 +2,10 @@
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-settings-ensure-7
name: longhorn-settings-ensure-4
namespace: longhorn-system
spec:
backoffLimit: 0
activeDeadlineSeconds: 240
ttlSecondsAfterFinished: 3600
template:
spec:

View File

@ -1,100 +0,0 @@
#!/usr/bin/env python3
"""Reconcile Longhorn disk tags for the Titan longhorn storage classes.
The astreae/asteria storageclasses select Longhorn disks by tag. The current
nodes already have the right disk paths, but the tag fields can drift to empty
after node recovery. This job patches the live Longhorn Node CRs back to the
expected tags so PVC provisioning keeps working.
"""
from __future__ import annotations
import json
import os
import ssl
import urllib.request
LONGHORN_NS = "longhorn-system"
LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes"
DESIRED_TAGS = {
"/mnt/astreae": "astreae",
"/mnt/asteria": "asteria",
}
def api_base() -> str:
host = os.environ.get("KUBERNETES_SERVICE_HOST")
port = os.environ.get("KUBERNETES_SERVICE_PORT", "443")
if not host:
raise SystemExit("missing KUBERNETES_SERVICE_HOST")
return f"https://{host}:{port}"
def token() -> str:
path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
with open(path, "r", encoding="utf-8") as fh:
return fh.read().strip()
def ca_context() -> ssl.SSLContext:
cafile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
return ssl.create_default_context(cafile=cafile)
def request_json(method: str, path: str, body: dict | None = None) -> dict:
req = urllib.request.Request(
f"{api_base()}{path}",
method=method,
headers={
"Authorization": f"Bearer {token()}",
"Content-Type": "application/merge-patch+json",
"Accept": "application/json",
},
data=None if body is None else json.dumps(body).encode("utf-8"),
)
with urllib.request.urlopen(req, context=ca_context(), timeout=20) as resp:
payload = resp.read()
return json.loads(payload) if payload else {}
def list_nodes() -> list[dict]:
data = request_json("GET", LONGHORN_API.format(namespace=LONGHORN_NS))
return data.get("items", [])
def patch_disk_tags(node_name: str, disk_name: str, desired_tag: str) -> None:
body = {"spec": {"disks": {disk_name: {"tags": [desired_tag]}}}}
request_json(
"PATCH",
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
body=body,
)
def main() -> int:
changed = 0
skipped = 0
for node in list_nodes():
name = node.get("metadata", {}).get("name", "")
spec_disks = node.get("spec", {}).get("disks", {}) or {}
for disk_name, disk in spec_disks.items():
disk_path = disk.get("path")
desired_tag = DESIRED_TAGS.get(disk_path)
if not desired_tag:
continue
current_tags = disk.get("tags") or []
if current_tags == [desired_tag]:
skipped += 1
continue
print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {[desired_tag]!r}")
patch_disk_tags(name, disk_name, desired_tag)
changed += 1
print(f"done: changed={changed} skipped={skipped}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -4,12 +4,11 @@ set -eu
# Longhorn blocks direct CR patches for some settings; use the internal API instead.
api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
curl_opts="-fsS --connect-timeout 3 --max-time 15"
wait_for_api() {
attempts=30
while [ "${attempts}" -gt 0 ]; do
if curl ${curl_opts} "${api_base}" >/dev/null 2>&1; then
if curl -fsS "${api_base}" >/dev/null 2>&1; then
return 0
fi
attempts=$((attempts - 1))
@ -23,14 +22,14 @@ update_setting() {
name="$1"
value="$2"
current="$(curl ${curl_opts} "${api_base}/${name}" || true)"
current="$(curl -fsS "${api_base}/${name}" || true)"
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
echo "Setting ${name} already set."
return 0
fi
echo "Setting ${name} -> ${value}"
curl ${curl_opts} -X PUT \
curl -fsS -X PUT \
-H "Content-Type: application/json" \
-d "{\"value\":\"${value}\"}" \
"${api_base}/${name}" >/dev/null
@ -41,7 +40,3 @@ update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v
update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"
# Keep storage-heavy nodes from getting hammered by rebuild storms and skew.
update_setting replica-auto-balance "best-effort"
update_setting concurrent-replica-rebuild-per-node-limit "2"
update_setting node-down-pod-deletion-policy "delete-both-statefulset-and-deployment-pod"

View File

@ -13,13 +13,13 @@ spec:
- objectName: "harbor-pull__dockerconfigjson"
secretPath: "kv/data/atlas/shared/harbor-pull"
secretKey: "dockerconfigjson"
- objectName: "longhorn-backup-b2__AWS_ACCESS_KEY_ID"
- objectName: "longhorn_backup__AWS_ACCESS_KEY_ID"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ACCESS_KEY_ID"
- objectName: "longhorn-backup-b2__AWS_SECRET_ACCESS_KEY"
- objectName: "longhorn_backup__AWS_SECRET_ACCESS_KEY"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_SECRET_ACCESS_KEY"
- objectName: "longhorn-backup-b2__AWS_ENDPOINTS"
- objectName: "longhorn_backup__AWS_ENDPOINTS"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ENDPOINTS"
secretObjects:
@ -31,9 +31,9 @@ spec:
- secretName: longhorn-backup-b2
type: Opaque
data:
- objectName: longhorn-backup-b2__AWS_ACCESS_KEY_ID
- objectName: longhorn_backup__AWS_ACCESS_KEY_ID
key: AWS_ACCESS_KEY_ID
- objectName: longhorn-backup-b2__AWS_SECRET_ACCESS_KEY
- objectName: longhorn_backup__AWS_SECRET_ACCESS_KEY
key: AWS_SECRET_ACCESS_KEY
- objectName: longhorn-backup-b2__AWS_ENDPOINTS
- objectName: longhorn_backup__AWS_ENDPOINTS
key: AWS_ENDPOINTS

View File

@ -26,16 +26,6 @@ spec:
- key: hardware
operator: In
values: ["rpi5", "rpi4"]
- weight: 90
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
containers:
- name: sync
image: alpine:3.20

View File

@ -78,7 +78,6 @@ spec:
- --upstream=http://longhorn-frontend.longhorn-system.svc.cluster.local
- --http-address=0.0.0.0:4180
- --skip-provider-button=true
- --approval-prompt=auto
- --skip-jwt-bearer-tokens=true
- --oidc-groups-claim=groups
- --cookie-domain=longhorn.bstein.dev

View File

@ -0,0 +1,17 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: nats-config
namespace: nats
labels:
app: nats
component: config
annotations:
description: "NATS JetStream configuration"
data:
nats.conf: |
jetstream {
store_dir: /data
max_mem_store: 128MB
max_file_store: 1GB
}

View File

@ -0,0 +1,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- configmap.yaml
- service.yaml
- statefulset.yaml

View File

@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: nats

View File

@ -0,0 +1,17 @@
apiVersion: v1
kind: Service
metadata:
name: nats
namespace: nats
labels:
app: nats
spec:
selector:
app: nats
ports:
- name: client
port: 4222
targetPort: 4222
- name: monitoring
port: 8222
targetPort: 8222

View File

@ -0,0 +1,54 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: nats
namespace: nats
labels:
app: nats
spec:
serviceName: nats
replicas: 1
selector:
matchLabels:
app: nats
template:
metadata:
labels:
app: nats
spec:
containers:
- name: nats
image: nats:2.10.18
args:
- "-c"
- "/etc/nats/nats.conf"
ports:
- name: client
containerPort: 4222
- name: monitoring
containerPort: 8222
volumeMounts:
- name: config
mountPath: /etc/nats
- name: data
mountPath: /data
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
volumes:
- name: config
configMap:
name: nats-config
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi

View File

@ -25,7 +25,6 @@ spec:
serviceAccountName: postgres-vault
nodeSelector:
node-role.kubernetes.io/worker: "true"
hardware: rpi5
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
@ -36,17 +35,7 @@ spec:
values: ["true"]
- key: hardware
operator: In
values: ["rpi5"]
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-06"]
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: In
values: ["titan-05", "titan-07", "titan-08", "titan-11"]
values: ["rpi4", "rpi5"]
containers:
- name: postgres
image: postgres:15

Some files were not shown because too many files have changed in this diff Show More