Compare commits

..

873 Commits

Author SHA1 Message Date
jenkins
0ee0070d14 Roll Veles backend launch-scope hotfix 2026-06-09 18:35:21 -03:00
jenkins
adc9601228 Roll Veles auth-scoped app images 2026-06-09 18:00:57 -03:00
jenkins
9aec012c42 veles: roll sim-worker to 0.1.0-3 2026-06-09 17:28:19 -03:00
jenkins
622dbc650d veles: roll backend to 0.1.0-4 2026-06-09 16:45:27 -03:00
jenkins
6a6ef8a22b veles: roll backend to 0.1.0-3 2026-06-09 16:36:01 -03:00
jenkins
e8fe7f1146 veles: roll simulation-ready images to 0.1.0-2 2026-06-09 16:29:10 -03:00
jenkins
fc5edbaa83 veles: roll app images to 0.1.0-1 2026-06-09 15:56:17 -03:00
jenkins
16698074bf veles: fit backend within core quota 2026-06-09 15:50:43 -03:00
jenkins
531bc440d5 veles: promote managed app deployments 2026-06-09 15:47:33 -03:00
flux-bot
0d8571b7a6 chore(maintenance): automated image update 2026-06-09 17:15:08 +00:00
jenkins
083e9e1148 veles: align app ports and traffic gate 2026-06-09 12:54:34 -03:00
jenkins
6833c3fe61 veles: harden app infrastructure contract 2026-06-09 11:59:27 -03:00
jenkins
07073970cf veles: let postgres initialize data volume 2026-06-09 02:17:41 -03:00
flux-bot
249f20091f chore(maintenance): automated image update 2026-06-09 05:15:03 +00:00
jenkins
63c869bf42 longhorn: ensure engine image on oceanus 2026-06-09 02:06:34 -03:00
jenkins
8fcc61cae9 longhorn: validate oceanus csi registration 2026-06-09 01:57:59 -03:00
jenkins
7885a70ee1 longhorn: pin toleration jobs off titan-14 2026-06-09 01:55:02 -03:00
jenkins
42069b0f23 longhorn: ensure csi tolerates oceanus 2026-06-09 01:53:04 -03:00
jenkins
5db9dd54fc longhorn: enforce oceanus taint setting 2026-06-09 01:49:16 -03:00
jenkins
530f813ebd longhorn: run csi driver on oceanus 2026-06-09 01:47:03 -03:00
jenkins
599e973d68 veles: enable postgres on oceanus 2026-06-09 01:43:24 -03:00
jenkins
6a40f40932 keycloak: make veles realm job idempotent 2026-06-09 01:26:22 -03:00
jenkins
363e564002 keycloak: fix veles groups mapper 2026-06-09 01:18:30 -03:00
jenkins
2985a7d12c veles: replace secrets oneoff job 2026-06-09 01:06:18 -03:00
jenkins
4f7777522e veles: run secrets oneoff with bash 2026-06-09 01:02:18 -03:00
jenkins
832c025c80 veles: satisfy vault sync memory floor 2026-06-09 00:59:12 -03:00
jenkins
28356e89fc monitoring: keep nvidia exporter off oceanus 2026-06-09 00:55:11 -03:00
jenkins
ea6a10dd7f longhorn: set veles recurring job names 2026-06-09 00:49:03 -03:00
jenkins
654900b8a2 veles: stage atlas infrastructure 2026-06-09 00:46:46 -03:00
jenkins
e1d091eb14 nextcloud: run collabora on amd64 2026-06-08 14:51:00 -03:00
flux-bot
23225c366b chore(maintenance): automated image update 2026-06-08 17:14:32 +00:00
flux-bot
eb1163f1ab chore(maintenance): automated image update 2026-06-08 13:47:56 +00:00
flux-bot
48fce69f96 chore(maintenance): automated image update 2026-06-08 13:46:56 +00:00
flux-bot
d219995052 chore(maintenance): automated image update 2026-06-08 13:44:57 +00:00
flux-bot
0318c3fe08 chore(maintenance): automated image update 2026-06-08 13:40:55 +00:00
flux-bot
c09d88a06e chore(bstein-dev-home): automated image update 2026-06-08 09:44:29 +00:00
flux-bot
7c3aadd4c6 chore(bstein-dev-home): automated image update 2026-06-08 09:43:28 +00:00
flux-bot
e0d88aa265 chore(maintenance): automated image update 2026-06-08 05:14:27 +00:00
flux-bot
9667f0e606 chore(maintenance): automated image update 2026-06-08 01:49:16 +00:00
flux-bot
3fb72623e9 chore(maintenance): automated image update 2026-06-08 01:48:39 +00:00
flux-bot
d6acb9bf78 chore(maintenance): automated image update 2026-06-08 01:46:15 +00:00
flux-bot
312a299af5 chore(maintenance): automated image update 2026-06-08 01:41:37 +00:00
flux-bot
81efc1c723 chore(bstein-dev-home): automated image update 2026-06-07 21:52:07 +00:00
flux-bot
a903948ac6 chore(bstein-dev-home): automated image update 2026-06-07 21:49:49 +00:00
flux-bot
1c26fe1377 chore(maintenance): automated image update 2026-06-07 17:14:04 +00:00
flux-bot
d760defdf0 chore(maintenance): automated image update 2026-06-07 13:50:52 +00:00
flux-bot
4555b6e25a chore(maintenance): automated image update 2026-06-07 13:49:56 +00:00
flux-bot
3c00906357 chore(maintenance): automated image update 2026-06-07 13:46:51 +00:00
flux-bot
9f4fa3537f chore(maintenance): automated image update 2026-06-07 13:41:55 +00:00
jenkins
82486d1408 mailu: start postfix master without postlog service 2026-06-07 02:29:03 -03:00
jenkins
b8c844dbac mailu: auto-disable postfix postlog on rwx queue 2026-06-07 02:26:22 -03:00
jenkins
0532fe2634 mailu: fix postfix startup on rwx queue 2026-06-07 02:18:26 -03:00
flux-bot
2b0e2764c6 chore(maintenance): automated image update 2026-06-07 05:14:39 +00:00
flux-bot
f701ae1628 chore(maintenance): automated image update 2026-06-07 01:49:18 +00:00
flux-bot
0e211f599c chore(maintenance): automated image update 2026-06-07 01:44:19 +00:00
flux-bot
282da9cf49 chore(maintenance): automated image update 2026-06-06 17:14:11 +00:00
flux-bot
3d552432a4 chore(maintenance): automated image update 2026-06-06 13:48:55 +00:00
flux-bot
d97cdf2c2c chore(maintenance): automated image update 2026-06-06 13:48:51 +00:00
flux-bot
828af81405 chore(maintenance): automated image update 2026-06-06 13:45:50 +00:00
flux-bot
d6ec355cde chore(maintenance): automated image update 2026-06-06 13:41:50 +00:00
flux-bot
a5d297e8e0 chore(bstein-dev-home): automated image update 2026-06-06 09:47:21 +00:00
flux-bot
1146000c7d chore(bstein-dev-home): automated image update 2026-06-06 09:45:20 +00:00
flux-bot
6bcbab91d7 chore(maintenance): automated image update 2026-06-06 05:14:35 +00:00
flux-bot
6324292f3b chore(bstein-dev-home): automated image update 2026-06-05 21:46:40 +00:00
flux-bot
bf2d4e1a62 chore(bstein-dev-home): automated image update 2026-06-05 21:45:39 +00:00
flux-bot
c255749410 chore(maintenance): automated image update 2026-06-05 17:14:07 +00:00
jenkins
4fd8a00d4a monitoring(testing): cap history panel ranges 2026-06-05 13:22:29 -03:00
jenkins
75d002dc88 monitoring(testing): cap expensive dashboard queries 2026-06-05 13:15:12 -03:00
flux-bot
f93e3e6050 chore(maintenance): automated image update 2026-06-05 13:49:31 +00:00
flux-bot
ed5504b072 chore(maintenance): automated image update 2026-06-05 13:48:23 +00:00
flux-bot
22eb0cc3f4 chore(maintenance): automated image update 2026-06-05 13:46:30 +00:00
flux-bot
6b53caf57a chore(maintenance): automated image update 2026-06-05 13:42:22 +00:00
flux-bot
2712600a1e chore(bstein-dev-home): automated image update 2026-06-05 10:29:05 +00:00
flux-bot
c916a8c862 chore(bstein-dev-home): automated image update 2026-06-05 10:28:04 +00:00
flux-bot
7158c32c06 chore(maintenance): automated image update 2026-06-05 05:14:08 +00:00
jenkins
a2ecdef536 monitoring(testing): restore lesavka suite visibility 2026-06-05 01:04:56 -03:00
flux-bot
dce9d5c131 chore(maintenance): automated image update 2026-06-05 01:47:24 +00:00
flux-bot
ec2ab28cec chore(maintenance): automated image update 2026-06-05 01:46:27 +00:00
flux-bot
c13169e95b chore(maintenance): automated image update 2026-06-05 01:44:23 +00:00
flux-bot
2559752654 chore(maintenance): automated image update 2026-06-05 01:39:26 +00:00
flux-bot
18c7fd77b4 chore(maintenance): automated image update 2026-06-05 00:44:21 +00:00
flux-bot
9d138a65a9 chore(maintenance): automated image update 2026-06-05 00:44:16 +00:00
flux-bot
9e33541a24 chore(maintenance): automated image update 2026-06-05 00:42:16 +00:00
flux-bot
fe0339647b chore(maintenance): automated image update 2026-06-05 00:37:17 +00:00
jenkins
fd7ec39a15 test(titan-iac): split dashboard trigger checks 2026-06-04 21:09:53 -03:00
jenkins
82fe618be9 ci(data-prepper): allow healthy rpi workers for agents 2026-06-04 21:01:58 -03:00
jenkins
09e64c8ca4 ci(jenkins): refresh suite jobs twice daily 2026-06-04 20:38:02 -03:00
jenkins
f2ad8cca4c monitoring(testing): clean up dashboard health signals 2026-06-04 16:09:08 -03:00
flux-bot
bd6000f956 chore(maintenance): automated image update 2026-06-04 17:14:24 +00:00
flux-bot
97a31b1d23 chore(bstein-dev-home): automated image update 2026-06-04 09:43:44 +00:00
flux-bot
c175602541 chore(bstein-dev-home): automated image update 2026-06-04 09:42:45 +00:00
flux-bot
694a9eb87e chore(maintenance): automated image update 2026-06-04 01:47:27 +00:00
flux-bot
dc8551710a chore(maintenance): automated image update 2026-06-04 01:47:23 +00:00
flux-bot
847ed53214 chore(maintenance): automated image update 2026-06-04 01:45:22 +00:00
flux-bot
8fe590ab93 chore(maintenance): automated image update 2026-06-04 01:40:22 +00:00
flux-bot
41738a71bd chore(maintenance): automated image update 2026-06-03 17:14:19 +00:00
flux-bot
cc09315576 chore(bstein-dev-home): automated image update 2026-06-03 09:44:40 +00:00
flux-bot
ef14c57105 chore(bstein-dev-home): automated image update 2026-06-03 09:43:42 +00:00
flux-bot
e7c8453c0a chore(maintenance): automated image update 2026-06-03 01:36:59 +00:00
flux-bot
65db67b70a chore(maintenance): automated image update 2026-06-03 01:36:09 +00:00
flux-bot
4f79f09dad chore(maintenance): automated image update 2026-06-03 01:34:59 +00:00
flux-bot
bdf695691b chore(maintenance): automated image update 2026-06-03 01:33:08 +00:00
flux-bot
5f0d62fd95 chore(maintenance): automated image update 2026-06-02 17:15:09 +00:00
flux-bot
ab557b9985 chore(bstein-dev-home): automated image update 2026-06-02 09:39:40 +00:00
flux-bot
000e20861c chore(maintenance): automated image update 2026-06-02 01:35:20 +00:00
flux-bot
1528a35634 chore(maintenance): automated image update 2026-06-02 01:35:16 +00:00
flux-bot
c724faf33f chore(maintenance): automated image update 2026-06-02 01:34:15 +00:00
flux-bot
869fb6aca2 chore(maintenance): automated image update 2026-06-02 01:32:18 +00:00
flux-bot
0677bb7500 chore(maintenance): automated image update 2026-06-01 17:15:21 +00:00
flux-bot
82cfa87921 chore(maintenance): automated image update 2026-05-31 17:14:24 +00:00
flux-bot
4c6f00dd32 chore(maintenance): automated image update 2026-05-30 17:14:29 +00:00
flux-bot
d6a975423f chore(maintenance): automated image update 2026-05-30 04:43:08 +00:00
flux-bot
eb8052615d chore(maintenance): automated image update 2026-05-30 04:42:52 +00:00
flux-bot
7ec411e21c chore(maintenance): automated image update 2026-05-30 04:41:08 +00:00
flux-bot
79665b0185 chore(maintenance): automated image update 2026-05-30 04:39:51 +00:00
flux-bot
a42b5c5c19 chore(maintenance): automated image update 2026-05-29 17:14:43 +00:00
flux-bot
3c42a7759a chore(maintenance): automated image update 2026-05-29 01:53:00 +00:00
flux-bot
43218b7584 chore(maintenance): automated image update 2026-05-29 01:52:46 +00:00
flux-bot
faf3f5bc69 chore(maintenance): automated image update 2026-05-29 01:52:00 +00:00
flux-bot
625b9d3191 chore(maintenance): automated image update 2026-05-29 01:49:45 +00:00
flux-bot
47a492ac40 chore(maintenance): automated image update 2026-05-28 17:14:42 +00:00
flux-bot
bce683752c chore(bstein-dev-home): automated image update 2026-05-28 09:46:09 +00:00
flux-bot
6e9d25d93e chore(bstein-dev-home): automated image update 2026-05-28 09:44:47 +00:00
flux-bot
f403defecb chore(maintenance): automated image update 2026-05-28 01:32:10 +00:00
flux-bot
eba2170314 chore(maintenance): automated image update 2026-05-28 01:31:51 +00:00
flux-bot
50c53fcf59 chore(maintenance): automated image update 2026-05-28 01:30:10 +00:00
flux-bot
f1f6ef22dd chore(maintenance): automated image update 2026-05-28 01:28:51 +00:00
flux-bot
92db3b155c chore(maintenance): automated image update 2026-05-27 17:14:52 +00:00
flux-bot
f9811b88bf chore(bstein-dev-home): automated image update 2026-05-27 09:36:04 +00:00
flux-bot
df37013d8a chore(bstein-dev-home): automated image update 2026-05-27 09:35:05 +00:00
flux-bot
1e343fb135 chore(maintenance): automated image update 2026-05-26 17:15:09 +00:00
flux-bot
84b2f646fc chore(maintenance): automated image update 2026-05-26 01:32:11 +00:00
flux-bot
52305fa446 chore(maintenance): automated image update 2026-05-26 01:31:12 +00:00
flux-bot
8e900f89f9 chore(maintenance): automated image update 2026-05-26 01:30:11 +00:00
flux-bot
c9a8aa816e chore(maintenance): automated image update 2026-05-26 01:28:12 +00:00
flux-bot
7f1d430dca chore(maintenance): automated image update 2026-05-25 17:14:08 +00:00
flux-bot
76ddc811f9 chore(bstein-dev-home): automated image update 2026-05-25 09:35:17 +00:00
flux-bot
812db05a06 chore(bstein-dev-home): automated image update 2026-05-25 09:34:20 +00:00
flux-bot
eb44827abb chore(maintenance): automated image update 2026-05-25 01:31:19 +00:00
flux-bot
14c2b129d2 chore(maintenance): automated image update 2026-05-25 01:31:15 +00:00
flux-bot
1d28ab9aa4 chore(maintenance): automated image update 2026-05-25 01:30:14 +00:00
flux-bot
f959b98797 chore(maintenance): automated image update 2026-05-25 01:28:17 +00:00
flux-bot
9c4fcfffed chore(maintenance): automated image update 2026-05-24 17:14:17 +00:00
flux-bot
5100e37471 chore(bstein-dev-home): automated image update 2026-05-24 09:37:52 +00:00
flux-bot
39342e7910 chore(bstein-dev-home): automated image update 2026-05-24 09:36:52 +00:00
flux-bot
c3d37fc203 chore(maintenance): automated image update 2026-05-23 17:14:51 +00:00
flux-bot
2859e0f0dd chore(bstein-dev-home): automated image update 2026-05-23 09:38:17 +00:00
flux-bot
f62664c419 chore(bstein-dev-home): automated image update 2026-05-23 09:37:24 +00:00
flux-bot
fae4f2bbcd chore(maintenance): automated image update 2026-05-23 01:40:16 +00:00
flux-bot
0e00181cb4 chore(maintenance): automated image update 2026-05-23 01:37:55 +00:00
flux-bot
f50de4cd49 chore(maintenance): automated image update 2026-05-23 01:36:05 +00:00
jenkins
cf8baafed1 maintenance: document node recovery guardrails 2026-05-22 17:21:59 -03:00
jenkins
c7edc81239 maintenance: stabilize recovered worker nodes 2026-05-22 17:10:01 -03:00
jenkins
46c3e97688 maintenance: make titan-22 link keeper passive 2026-05-22 15:56:50 -03:00
jenkins
5bce6c4c04 openclaw: allow recovered workers while excluding hdd nodes 2026-05-22 15:33:28 -03:00
jenkins
ee5688f297 maintenance: track titan-22 link recovery 2026-05-22 15:25:41 -03:00
flux-bot
c54c7b4452 chore(maintenance): automated image update 2026-05-22 17:11:37 +00:00
jenkins
17dc9a6e52 scheduling: target hdd storage node exclusions 2026-05-22 14:02:17 -03:00
jenkins
155d7d020e scheduling: keep apps off longhorn storage nodes 2026-05-22 13:38:29 -03:00
jenkins
f383818f93 nextcloud: keep collabora off descheduler 2026-05-22 06:57:01 -03:00
jenkins
1fe125b8b3 game-stream(wolf): expose runtime sockets to app containers 2026-05-22 05:37:52 -03:00
jenkins
361a4decb3 game-stream(wolf): retain failed app containers 2026-05-22 05:28:38 -03:00
jenkins
2aea5f4ace game-stream(wolf): use manual Nvidia driver mount 2026-05-22 05:12:41 -03:00
jenkins
ce13ac054c game-stream(wolf): mount Nvidia driver volume 2026-05-22 05:08:57 -03:00
jenkins
a19a19fbd5 maintenance(titan-24): avoid unnecessary Docker restarts 2026-05-22 05:07:40 -03:00
jenkins
f1a72d64fd gpu(titan-24): populate Nvidia driver volume without exec 2026-05-22 05:05:02 -03:00
jenkins
ac9c481ce7 gpu(titan-24): fix Nvidia driver volume bootstrap 2026-05-22 05:02:59 -03:00
jenkins
2ff55289a8 gpu(titan-24): prepare Wolf Nvidia driver volume 2026-05-22 04:59:52 -03:00
jenkins
2d8405d299 crypto: throttle mining during recovery 2026-05-22 04:26:29 -03:00
jenkins
5e27384ea2 monitoring(gpu): show activity share by namespace 2026-05-22 04:22:51 -03:00
flux-bot
ec972a52f1 chore(bstein-dev-home): automated image update 2026-05-22 07:07:12 +00:00
flux-bot
10eed46e81 chore(bstein-dev-home): automated image update 2026-05-22 07:06:25 +00:00
jenkins
d21b61f6d9 monitoring(gpu): count monitored GPU pool devices 2026-05-22 03:23:36 -03:00
jenkins
b367c6dea3 monitoring: keep quality probe on worker nodes 2026-05-22 03:16:01 -03:00
jenkins
6388ef5c6d monitoring(gpu): add pool utilization counters 2026-05-22 03:09:10 -03:00
flux-bot
4ce5a67b94 chore(bstein-dev-home): automated image update 2026-05-22 06:08:50 +00:00
flux-bot
1375bac117 chore(bstein-dev-home): automated image update 2026-05-22 06:08:18 +00:00
jenkins
570b1212d7 monitoring(gpu): normalize utilization pie to pool capacity 2026-05-22 02:55:24 -03:00
jenkins
ea21e106cf keycloak(portal): allow groups scope 2026-05-22 02:48:10 -03:00
jenkins
1c50af1d72 ci(data-prepper): avoid titan-04 during recovery 2026-05-22 02:37:21 -03:00
jenkins
b5dc723e02 monitoring(gpu): hide zero-utilization namespaces 2026-05-22 02:35:51 -03:00
flux-bot
3f24fbdc6d chore(bstein-dev-home): automated image update 2026-05-22 05:33:39 +00:00
flux-bot
1cd9fd18f4 chore(bstein-dev-home): automated image update 2026-05-22 05:32:46 +00:00
flux-bot
e7ad2c3955 chore(maintenance): automated image update 2026-05-22 05:28:56 +00:00
jenkins
fd3da0e2ae monitoring(gpu): add process-level utilization attribution 2026-05-22 02:28:08 -03:00
jenkins
5513608b1a monitoring(gpu): remove ambiguous shared wording 2026-05-22 01:55:25 -03:00
jenkins
72e4dcd84b monitoring(gpu): attribute utilization to namespaces 2026-05-22 01:46:32 -03:00
jenkins
26af225f06 ci(data-prepper): allow recovered titan-04 agents 2026-05-22 01:40:44 -03:00
flux-bot
e368927a0e chore(maintenance): automated image update 2026-05-22 01:50:35 +00:00
flux-bot
825a7a7f37 chore(maintenance): automated image update 2026-05-22 01:50:24 +00:00
flux-bot
0719b5317f chore(maintenance): automated image update 2026-05-22 01:47:23 +00:00
flux-bot
f0ed508277 chore(maintenance): automated image update 2026-05-22 01:42:20 +00:00
flux-bot
ca2ffd52ab chore(bstein-dev-home): automated image update 2026-05-21 22:21:25 +00:00
flux-bot
a4a75a5dda chore(bstein-dev-home): automated image update 2026-05-21 21:40:21 +00:00
jenkins
b44915d158 ci(titan-iac): observe Sonar while project gate is baselined 2026-05-21 17:56:44 -03:00
jenkins
80bc7be00b game-stream: roll gatekeeper on firewall script changes 2026-05-21 17:22:21 -03:00
jenkins
0eba74d9b3 game-stream: gate Moonlight before node routing 2026-05-21 17:17:07 -03:00
flux-bot
0f84be5083 chore(bstein-dev-home): automated image update 2026-05-21 20:14:09 +00:00
flux-bot
19f477ccc8 chore(bstein-dev-home): automated image update 2026-05-21 20:12:06 +00:00
jenkins
3f6970aa1a game-stream: use official WolfManager image 2026-05-21 16:59:47 -03:00
flux-bot
0ff3342ea6 chore(maintenance): automated image update 2026-05-21 19:49:44 +00:00
jenkins
807a31679c ci(titan-iac): exclude privileged game-stream manifests from sonar 2026-05-21 16:28:14 -03:00
jenkins
f064c5b47b game-stream: avoid gatekeeper service env collision 2026-05-21 16:23:08 -03:00
jenkins
d89fec8ae5 game-stream: add Wolf portal access controls 2026-05-21 15:54:56 -03:00
jenkins
1332b611a3 vault-csi: tolerate busy-node probe delays 2026-05-21 15:52:48 -03:00
jenkins
4e82df6891 monitoring(gpu): show utilization with idle fallback 2026-05-21 15:26:02 -03:00
jenkins
6042d8f714 logging: make opensearch tune idempotent 2026-05-21 14:27:56 -03:00
flux-bot
c69ef0064f chore(maintenance): automated image update 2026-05-21 17:14:27 +00:00
jenkins
939231dd6a logging: tune opensearch for single-node recovery 2026-05-21 14:12:36 -03:00
jenkins
e3c05095f8 logging: trim active pod logs on constrained nodes 2026-05-21 13:28:37 -03:00
jenkins
d9955af899 monitoring(gpu): clarify reservation accounting 2026-05-21 13:04:58 -03:00
flux-bot
39db0471d7 chore(maintenance): automated image update 2026-05-21 10:08:55 +00:00
jenkins
323bf85c12 game-stream: pass Wolf OIDC token to Ariadne 2026-05-21 07:07:30 -03:00
flux-bot
38b140580b chore(bstein-dev-home): automated image update 2026-05-21 09:47:03 +00:00
flux-bot
74fb699bac chore(bstein-dev-home): automated image update 2026-05-21 09:44:52 +00:00
flux-bot
e3305b7ddd chore(maintenance): automated image update 2026-05-21 09:09:24 +00:00
flux-bot
4cb49d97f4 chore(maintenance): automated image update 2026-05-21 08:49:11 +00:00
jenkins
dfa53aec9e game-stream: point Wolf proxy at Ariadne service 2026-05-21 05:15:23 -03:00
jenkins
608386a820 logging(opensearch): place recovery pod on titan-05 2026-05-21 04:36:01 -03:00
jenkins
d94535d828 logging(opensearch): pin init container requests 2026-05-21 04:18:06 -03:00
jenkins
c3dcf60145 logging(opensearch): lower cpu request for rpi5 scheduling 2026-05-21 04:14:36 -03:00
jenkins
fb7dd5e5d3 logging(opensearch): patch rendered cpu limit 2026-05-21 03:53:35 -03:00
jenkins
df960fb519 logging(opensearch): set cpu limit above request 2026-05-21 03:49:08 -03:00
jenkins
9544b59380 logging(opensearch): raise heap for ingest pressure 2026-05-21 03:41:38 -03:00
jenkins
c75902a8ef logging(data-prepper): allow slow startup before liveness 2026-05-21 03:35:26 -03:00
jenkins
8fbe82eb5b game-stream: expose Wolf Moonlight ports 2026-05-21 03:30:40 -03:00
jenkins
409295f8cb logging(data-prepper): include bc in runtime image 2026-05-21 03:26:57 -03:00
jenkins
6240133fb4 sso(keycloak): suspend portal admin role ensure 2026-05-21 03:22:44 -03:00
jenkins
25ca8f92a3 agent(openclaw): rely on oauth for control ui auth 2026-05-21 03:15:18 -03:00
jenkins
85468110b3 sso(keycloak): grant portal admin client management 2026-05-21 02:43:19 -03:00
jenkins
ccf76f2c7d agent(openclaw): keep gateway state off jetsons 2026-05-21 02:42:53 -03:00
jenkins
c5dc6a6c80 agent(openclaw): persist gateway state 2026-05-21 02:39:53 -03:00
flux-bot
89345cfddc chore(maintenance): automated image update 2026-05-21 05:38:10 +00:00
jenkins
1470cea862 game-stream: deploy Wolf foundation 2026-05-21 02:07:17 -03:00
jenkins
9f61cff34e maintenance(titan-24): configure Docker NVIDIA runtime 2026-05-21 02:01:29 -03:00
jenkins
e0707b68c6 maintenance(titan-24): start Docker through host systemd 2026-05-21 01:58:27 -03:00
jenkins
b67120ef79 agent(openclaw): isolate oauth cookie state 2026-05-21 01:55:32 -03:00
jenkins
f3a1037dcd maintenance(titan-24): install Docker for Wolf 2026-05-21 01:54:31 -03:00
flux-bot
a408b0bd43 chore(maintenance): automated image update 2026-05-21 01:39:54 +00:00
flux-bot
66594b70c8 chore(maintenance): automated image update 2026-05-21 01:39:05 +00:00
flux-bot
830883777d chore(maintenance): automated image update 2026-05-21 01:37:55 +00:00
flux-bot
eed588cab0 chore(maintenance): automated image update 2026-05-21 01:35:05 +00:00
jenkins
ad99b399f6 comms: trim vault agent scheduling requests 2026-05-20 19:52:43 -03:00
jenkins
c51078c6a3 comms: avoid singleton rollout surge deadlocks 2026-05-20 19:43:48 -03:00
jenkins
3d1af76df7 comms: keep rpi5 workloads off control plane 2026-05-20 19:37:42 -03:00
jenkins
440aec861e comms(mas): keep auth service off control plane 2026-05-20 19:18:34 -03:00
jenkins
1fc7233267 agent(openclaw): trust oauth proxy identity 2026-05-20 18:35:08 -03:00
jenkins
af8a163e70 agent(openclaw): allow public control origin 2026-05-20 18:30:19 -03:00
jenkins
5ec561f620 monitoring(grafana): lower recovery scheduling requests 2026-05-20 18:16:04 -03:00
jenkins
eb4a197eb7 core(nodes): mark rpi4 spillover workers 2026-05-20 18:14:49 -03:00
jenkins
f010d0547f monitoring(grafana): keep off control plane spillover 2026-05-20 18:07:11 -03:00
jenkins
d822c93829 quality: cap placement preference weights 2026-05-20 17:49:09 -03:00
jenkins
5a547f6f01 quality: keep sonar off control plane 2026-05-20 17:45:56 -03:00
jenkins
6fae3edd67 quality: allow sonar to spill onto rpi4 workers 2026-05-20 17:43:39 -03:00
jenkins
ed81d52dd9 monitoring(grafana): avoid fragile placement and init pull 2026-05-20 17:25:39 -03:00
jenkins
8ce8b1aac2 agent(openclaw): expose oauth protected UI 2026-05-20 17:22:12 -03:00
jenkins
400077436b monitoring(grafana): harden scheduling and readiness 2026-05-20 17:00:33 -03:00
jenkins
f2ae3c1b0c monitoring(testing): make branch filter static 2026-05-20 15:10:24 -03:00
jenkins
974955ac83 monitoring(testing): backfill category health rollups 2026-05-20 14:39:07 -03:00
flux-bot
0ea80a8a19 chore(maintenance): automated image update 2026-05-20 17:15:40 +00:00
jenkins
1c6c3992cf monitoring(testing): reduce month-range query cost 2026-05-20 13:26:33 -03:00
jenkins
109698a2e3 monitoring(testing): attach branch labels to run rollups 2026-05-20 12:54:12 -03:00
jenkins
e380b65eb9 monitoring(testing): memoize dashboard freshness panels 2026-05-20 12:45:50 -03:00
jenkins
3a06d29387 monitoring(testing): record check health timelines 2026-05-20 12:14:49 -03:00
jenkins
b70afe2f03 monitoring(testing): memoize slow dashboard panels 2026-05-20 11:52:25 -03:00
jenkins
fe37f12e32 monitoring(testing): surface current gate health 2026-05-20 11:01:28 -03:00
flux-bot
f9641a22b8 chore(maintenance): automated image update 2026-05-20 11:15:51 +00:00
flux-bot
55d554f22b chore(maintenance): automated image update 2026-05-20 11:15:42 +00:00
flux-bot
238ceb5f9b chore(maintenance): automated image update 2026-05-20 11:14:44 +00:00
flux-bot
e7a3266143 chore(maintenance): automated image update 2026-05-20 11:12:41 +00:00
flux-bot
17ce769284 chore(maintenance): automated image update 2026-05-20 10:49:37 +00:00
flux-bot
ae47cd9de5 chore(maintenance): automated image update 2026-05-20 10:49:31 +00:00
flux-bot
c0bb270087 chore(maintenance): automated image update 2026-05-20 10:46:31 +00:00
flux-bot
8b1dd7cb2b chore(maintenance): automated image update 2026-05-20 10:42:30 +00:00
jenkins
f80044258f ci(data-prepper): keep recovery builds schedulable 2026-05-20 07:37:59 -03:00
flux-bot
c633079532 chore(bstein-dev-home): automated image update 2026-05-20 10:37:19 +00:00
flux-bot
6032f6daef chore(bstein-dev-home): automated image update 2026-05-20 10:36:12 +00:00
jenkins
bf2c0c5e4d ci(data-prepper): avoid unstable build nodes 2026-05-20 07:26:13 -03:00
flux-bot
47be1e7c70 chore(bstein-dev-home): automated image update 2026-05-20 10:18:15 +00:00
flux-bot
df56762342 chore(bstein-dev-home): automated image update 2026-05-20 10:16:09 +00:00
flux-bot
afb3955116 chore(maintenance): automated image update 2026-05-20 10:04:07 +00:00
jenkins
b261834537 ci(jenkins): soften agent spread constraints 2026-05-20 06:53:29 -03:00
jenkins
777f4abe69 fix(jenkins): remove deprecated cloud cap field 2026-05-20 06:40:44 -03:00
jenkins
3cb3a39b49 ops: restart jenkins for spread policy 2026-05-20 06:34:43 -03:00
jenkins
26629205fb ci: tighten agent spread and sweeper limits 2026-05-20 06:33:12 -03:00
flux-bot
a914b2fd05 chore(maintenance): automated image update 2026-05-20 09:25:28 +00:00
jenkins
4a197c870e ci(data-prepper): relax agent spread scheduling 2026-05-20 06:02:36 -03:00
jenkins
974dd84938 ci(jenkins): keep recovery agents schedulable 2026-05-20 05:53:42 -03:00
flux-bot
8fba90f8df chore(maintenance): automated image update 2026-05-20 08:41:55 +00:00
jenkins
178e523bc2 ops(traefik): keep ingress available during balancing 2026-05-20 05:23:49 -03:00
jenkins
0115f5f684 ci(titan-iac): relax agent scheduling 2026-05-20 04:58:40 -03:00
jenkins
269136bee9 ops(cert-manager): keep admission webhook available 2026-05-20 04:51:39 -03:00
jenkins
3676fe058f ops: keep jenkins controller on rpi5 2026-05-20 04:42:12 -03:00
jenkins
75cd2eb39f ops: restart jenkins for placement policy 2026-05-20 04:29:40 -03:00
jenkins
48e434a028 ops: harden ci placement and gpu idle reporting 2026-05-20 04:27:26 -03:00
flux-bot
d2202b6955 chore(maintenance): automated image update 2026-05-20 07:15:57 +00:00
flux-bot
8e706d9900 chore(maintenance): automated image update 2026-05-20 06:50:18 +00:00
jenkins
a30d0fffa4 triage: wire openclaw local diagnosis 2026-05-20 03:14:50 -03:00
flux-bot
878f9ed9b8 chore(maintenance): automated image update 2026-05-20 06:08:37 +00:00
flux-bot
e3112ccb6a chore(maintenance): automated image update 2026-05-20 05:47:28 +00:00
flux-bot
e7145094d9 chore(maintenance): automated image update 2026-05-20 05:39:26 +00:00
flux-bot
8511c90178 chore(maintenance): automated image update 2026-05-20 05:29:24 +00:00
jenkins
50e20a7805 mailu: allow tika slow startup 2026-05-20 02:23:36 -03:00
flux-bot
594e02a518 chore(maintenance): automated image update 2026-05-20 05:21:23 +00:00
jenkins
b04726b6e2 mailu: avoid unhealthy titan-14 placement 2026-05-20 02:13:54 -03:00
jenkins
36487543bb vault: prefer rpi5 for injector 2026-05-20 02:03:35 -03:00
flux-bot
265d9df2ac chore(maintenance): automated image update 2026-05-20 04:58:11 +00:00
jenkins
287699b3db maintenance: prefer rpi5 for ariadne 2026-05-20 01:57:03 -03:00
jenkins
54ec6ebdca quality: give sonarqube a startup probe 2026-05-20 01:40:15 -03:00
flux-bot
617186bd20 chore(maintenance): automated image update 2026-05-20 04:38:06 +00:00
flux-bot
8d45fa9e2b chore(maintenance): automated image update 2026-05-20 04:33:07 +00:00
flux-bot
63a1b7bb4f chore(maintenance): automated image update 2026-05-20 04:27:23 +00:00
jenkins
84ecb09328 quality: recreate sonarqube on pvc moves 2026-05-20 01:24:38 -03:00
jenkins
fe172e0cc6 quality: right-size sonarqube cpu request 2026-05-20 01:21:11 -03:00
flux-bot
9b3c7244cc chore(maintenance): automated image update 2026-05-20 04:20:02 +00:00
flux-bot
b879064146 chore(maintenance): automated image update 2026-05-20 04:08:01 +00:00
flux-bot
ae4ceef711 chore(maintenance): automated image update 2026-05-20 04:01:58 +00:00
jenkins
5230341d21 quality: keep sonarqube on rpi5 workers 2026-05-20 00:57:24 -03:00
jenkins
5bf7a5ac20 test(dashboards): expect memoized gate rollups 2026-05-20 00:36:31 -03:00
jenkins
8bf9e12b4e openclaw: allow gateway on jetson lane 2026-05-19 23:37:16 -03:00
jenkins
b648a66f9a jenkins: allow agents to fall back to rpi4 2026-05-19 23:30:34 -03:00
jenkins
dd20678c46 openclaw: route testing triage through ariadne 2026-05-19 23:30:29 -03:00
jenkins
2c4b1b9cc9 monitoring: hide idle gpu share during activity 2026-05-19 23:30:22 -03:00
jenkins
ea2071bbc5 monitoring(testing): memoize gate check health 2026-05-19 22:41:02 -03:00
jenkins
a32995b1a1 openclaw: fix gateway scratch permissions 2026-05-19 20:37:50 -03:00
jenkins
07a2dfbb92 jenkins: lower controller scheduling requests 2026-05-19 20:37:07 -03:00
jenkins
0a59d6f26e openclaw: tune triage kubectl guidance 2026-05-19 20:34:12 -03:00
jenkins
7688673072 openclaw: restart gateway on model changes 2026-05-19 20:29:35 -03:00
jenkins
1776a3266c jenkins: avoid HDD storage nodes for controller 2026-05-19 20:29:12 -03:00
jenkins
c667c97089 openclaw: use gpu-sized triage model 2026-05-19 20:28:26 -03:00
jenkins
44bfb5a68e openclaw: split gateway and inference placement 2026-05-19 20:23:42 -03:00
jenkins
aadc93e681 openclaw: move mvp inference to titan-24 2026-05-19 20:14:44 -03:00
jenkins
dd4aff8861 openclaw: keep mvp access internal 2026-05-19 20:10:15 -03:00
jenkins
3142d35403 openclaw: pin mvp model lane to titan-21 2026-05-19 19:54:11 -03:00
jenkins
887023eaeb ci(testing): treat optional supply-chain as non-blocking 2026-05-19 19:51:10 -03:00
jenkins
e8fb92a44f openclaw: use smaller local triage model 2026-05-19 19:45:10 -03:00
jenkins
3a8a53133c openclaw: pin arm64 gateway image 2026-05-19 19:30:23 -03:00
jenkins
1fcb12bcb8 openclaw: use ephemeral jetson state for mvp 2026-05-19 19:27:22 -03:00
jenkins
1bc58e10c0 openclaw: add testing triage workspace 2026-05-19 19:17:14 -03:00
jenkins
b7caf4cfec maintenance: document rpi reservation privileges 2026-05-19 18:52:45 -03:00
jenkins
2464c61339 ci(jenkins): avoid brittle agent nodes 2026-05-19 18:26:51 -03:00
jenkins
c923be8ff1 ops: fix descheduler policy for chart api 2026-05-19 18:06:30 -03:00
jenkins
4bd83a1aa8 cert-manager: harden webhook resources 2026-05-19 17:35:10 -03:00
jenkins
69ff5b8bb2 vault: use http health probes 2026-05-19 17:25:56 -03:00
jenkins
92c2cf2127 ci(data-prepper): relax agent placement 2026-05-19 17:09:06 -03:00
jenkins
baa8e96fcc quality: loosen sonarqube placement pressure 2026-05-19 16:41:03 -03:00
jenkins
37fe1d5d24 ci(jenkins): reduce default agent cpu request 2026-05-19 16:21:43 -03:00
jenkins
24920c8a56 ops: keep scavenger work out of scheduler headroom 2026-05-19 16:02:10 -03:00
jenkins
ba84082a1e ci(jenkins): tolerate missing workspace in post actions 2026-05-19 15:57:50 -03:00
jenkins
e3e8a046e4 ops: stage rpi reservations without auto restart 2026-05-19 15:51:05 -03:00
jenkins
8806739d3d harbor: lower redis bootstrap request 2026-05-19 15:13:16 -03:00
jenkins
bf908556bf ops: restart rpi agents through host namespace 2026-05-19 15:00:31 -03:00
jenkins
4be03e1514 harbor: keep bootstrap workloads on titan-11 2026-05-19 14:31:41 -03:00
jenkins
a8a17e7978 ops: enforce rpi kubelet reservations via systemd 2026-05-19 14:23:35 -03:00
jenkins
c982b86136 ci(jenkins): roll controller for plugin refresh 2026-05-19 14:07:34 -03:00
jenkins
10a5776c79 ops: roll rpi reservation daemonset 2026-05-19 13:57:10 -03:00
jenkins
399efa46e4 ci(jenkins): bump kubernetes plugin 2026-05-19 13:45:26 -03:00
jenkins
2d46a2b8fb ops: relax p2pool scheduling request 2026-05-19 13:45:02 -03:00
jenkins
0ae76bf1ca ops: tune crypto guardrails after rollout 2026-05-19 13:35:56 -03:00
jenkins
a3e14ce0f2 ops: add resource guardrails for rpi workers 2026-05-19 12:48:40 -03:00
jenkins
c75e0d1b88 monitoring(testing): roll up current test case state 2026-05-19 11:09:29 -03:00
jenkins
eb003f5b32 monitoring(testing): avoid nested f-string parser drift 2026-05-19 08:11:04 -03:00
jenkins
5a356e8aed monitoring: avoid titan-04 for quality gateway 2026-05-19 07:52:18 -03:00
jenkins
e29299a90d monitoring(testing): dedupe run counters by scrape target 2026-05-19 07:46:06 -03:00
flux-bot
c898e71242 chore(bstein-dev-home): automated image update 2026-05-19 09:34:45 +00:00
flux-bot
afcffc6903 chore(bstein-dev-home): automated image update 2026-05-19 09:33:35 +00:00
jenkins
8bf3d63bae monitoring(testing): prefer fresh coverage metrics 2026-05-19 06:31:04 -03:00
jenkins
5d80f882ae monitoring(testing): ignore stale replaced check states 2026-05-19 04:25:56 -03:00
jenkins
ba9b72312a monitoring(testing): derive gate health from raw checks 2026-05-19 03:59:55 -03:00
jenkins
813d057c6d monitoring(testing): clarify category panel window 2026-05-19 02:29:16 -03:00
jenkins
9789ff5338 test(dashboards): align testing row title 2026-05-18 23:42:43 -03:00
flux-bot
6ca3449f76 chore(maintenance): automated image update 2026-05-19 01:33:25 +00:00
flux-bot
84ebdd3e56 chore(maintenance): automated image update 2026-05-19 01:33:01 +00:00
flux-bot
d83bb17cdf chore(maintenance): automated image update 2026-05-19 01:31:25 +00:00
flux-bot
764df923a0 chore(maintenance): automated image update 2026-05-19 01:29:59 +00:00
jenkins
3102862ee9 monitoring(testing): clarify CI run and test history labels 2026-05-18 21:04:14 -03:00
jenkins
3d043424b4 monitoring(testing): filter test category rollups 2026-05-18 19:32:17 -03:00
jenkins
cc2a98b0a2 monitoring(testing): keep latest gate state current 2026-05-18 15:18:22 -03:00
jenkins
28f401cce1 monitoring(testing): count gate checks as boolean states 2026-05-18 14:53:15 -03:00
jenkins
0de90d622a monitoring(testing): clarify CI run health labels 2026-05-18 14:18:56 -03:00
flux-bot
17628a060f chore(maintenance): automated image update 2026-05-18 17:10:49 +00:00
jenkins
aa750f18b0 monitoring(overview): simplify test category lanes 2026-05-18 11:42:42 -03:00
flux-bot
968ab0ff6e chore(maintenance): automated image update 2026-05-18 12:00:56 +00:00
flux-bot
00c35d93ee chore(maintenance): automated image update 2026-05-18 11:59:55 +00:00
flux-bot
3f843f9a18 chore(maintenance): automated image update 2026-05-18 11:57:58 +00:00
flux-bot
30b8affe5b chore(maintenance): automated image update 2026-05-18 11:54:54 +00:00
flux-bot
4a38d0eef2 chore(maintenance): automated image update 2026-05-18 11:46:33 +00:00
flux-bot
9fa5bb6225 chore(bstein-dev-home): automated image update 2026-05-18 09:39:21 +00:00
flux-bot
7eed659692 chore(bstein-dev-home): automated image update 2026-05-18 09:37:22 +00:00
jenkins
6ed0a1f18e monitoring: persist quality gateway metrics 2026-05-18 03:59:31 -03:00
flux-bot
50b76f56d7 chore(maintenance): automated image update 2026-05-18 01:29:30 +00:00
flux-bot
a721546d61 chore(maintenance): automated image update 2026-05-18 01:28:37 +00:00
flux-bot
b0b500a4f6 chore(maintenance): automated image update 2026-05-18 01:27:29 +00:00
flux-bot
404e12c05d chore(maintenance): automated image update 2026-05-18 01:26:36 +00:00
jenkins
f48ed0cd7d monitoring(testing): restrict category panels to taxonomy 2026-05-17 18:26:57 -03:00
jenkins
ea3366c913 monitoring(overview): restore category timeline 2026-05-17 18:01:37 -03:00
jenkins
52bee00432 monitoring(overview): clean up category pass-rate panel 2026-05-17 16:47:17 -03:00
jenkins
eb39335d59 monitoring(testing): use latest check status in gate panels 2026-05-17 15:54:29 -03:00
jenkins
690c82e61b monitoring(vmalert): roll test pass-rate rule update 2026-05-17 15:32:24 -03:00
jenkins
3e8667d48a monitoring(testing): count failed test cases in pass-rate rollup 2026-05-17 15:27:21 -03:00
flux-bot
674f953e55 chore(maintenance): automated image update 2026-05-17 17:11:45 +00:00
flux-bot
bc54865f5c chore(bstein-dev-home): automated image update 2026-05-17 10:05:38 +00:00
flux-bot
2c91470411 chore(bstein-dev-home): automated image update 2026-05-17 10:02:34 +00:00
jenkins
65c6e123cf monitoring(testing): prefer fresh suite quality samples 2026-05-17 05:54:28 -03:00
flux-bot
4a32ad5fe5 chore(maintenance): automated image update 2026-05-17 07:00:02 +00:00
flux-bot
de1940ae00 chore(maintenance): automated image update 2026-05-17 06:58:04 +00:00
flux-bot
35eeb39bdc chore(maintenance): automated image update 2026-05-17 06:48:48 +00:00
flux-bot
fd490d69f5 chore(maintenance): automated image update 2026-05-17 06:43:47 +00:00
jenkins
2d88aab3a3 monitoring: clarify fresh suite signal 2026-05-17 00:28:07 -03:00
jenkins
a69a21f05d monitoring: use fresh quality snapshots 2026-05-16 20:57:56 -03:00
jenkins
8a13b9d4e7 monitoring: require worker for quality gateway 2026-05-16 19:52:11 -03:00
jenkins
cbf345cfcf monitoring: roll quality gateway without surge 2026-05-16 19:50:24 -03:00
jenkins
b5a79e8091 monitoring: keep quality gateway on stable workers 2026-05-16 19:46:56 -03:00
jenkins
c404a967d0 monitoring: attach build branch to quality rollups 2026-05-16 18:22:16 -03:00
jenkins
42ce51baad monitoring: make current gate rollup use latest checks 2026-05-16 18:07:21 -03:00
jenkins
7f209fbbc9 monitoring: refresh quality rollups every minute 2026-05-16 18:00:52 -03:00
jenkins
65298d7357 monitoring: keep quality check status without branch labels 2026-05-16 17:49:37 -03:00
jenkins
7dc03eefce monitoring: reload vmalert quality rules 2026-05-16 17:29:52 -03:00
jenkins
e0c92aa49d monitoring: count only canonical reporting suites 2026-05-16 17:26:20 -03:00
jenkins
588cc3aa14 monitoring: clarify quality gate dashboard tooltips 2026-05-16 17:03:58 -03:00
jenkins
ad86195436 monitoring: add typhon and category test telemetry 2026-05-16 15:38:26 -03:00
flux-bot
8d4ed6b584 chore(maintenance): automated image update 2026-05-16 17:10:16 +00:00
jenkins
df429a57f2 jenkins: reload casc for lesavka schedule 2026-05-16 13:51:55 -03:00
jenkins
c43f7d84e8 monitoring: polish testing dashboard telemetry 2026-05-16 13:48:01 -03:00
jenkins
7bb2b90a13 monitoring: fix gate health status query 2026-05-16 12:31:19 -03:00
jenkins
a739f14a86 monitoring: disable fan intensity legend safely 2026-05-16 12:17:33 -03:00
jenkins
02e8e633a5 monitoring: hide fan threshold legend 2026-05-16 12:14:22 -03:00
jenkins
160c960345 monitoring: add lesavka category test telemetry 2026-05-16 12:07:57 -03:00
jenkins
86034e0aac monitoring: clean overview power and gate panels 2026-05-16 06:38:55 -03:00
flux-bot
c18b43c294 chore(bstein-dev-home): automated image update 2026-05-16 09:34:55 +00:00
flux-bot
5210a700bf chore(bstein-dev-home): automated image update 2026-05-16 09:33:58 +00:00
jenkins
fbf768e90f monitoring: make overview history panels visible 2026-05-16 06:11:22 -03:00
jenkins
4238262ad3 monitoring: fix gpu share and overview legends 2026-05-16 05:58:59 -03:00
jenkins
3492b6026e monitoring: fix overview fan and gate timelines 2026-05-16 05:34:24 -03:00
jenkins
5eef2e9ba3 monitoring: refine overview timeline readability 2026-05-16 05:18:53 -03:00
jenkins
5d01b3a60d monitoring: trial overview health timelines 2026-05-16 05:08:09 -03:00
jenkins
2ede953580 monitoring: trial overview right rail layout 2026-05-16 03:31:04 -03:00
jenkins
1cfc846ffc monitoring: retire duplicate jobs dashboard 2026-05-16 03:04:27 -03:00
jenkins
8fb5831e00 monitoring: publish atlas testing dashboard 2026-05-16 02:56:52 -03:00
jenkins
b6c921b291 monitoring: clarify gitops and check timelines 2026-05-16 02:21:05 -03:00
jenkins
b4229c5a8f monitoring: refine overview and failure colors 2026-05-15 22:44:25 -03:00
flux-bot
f0520d652a chore(maintenance): automated image update 2026-05-16 01:31:40 +00:00
flux-bot
959a60b73a chore(maintenance): automated image update 2026-05-16 01:31:36 +00:00
flux-bot
1c4c24724f chore(maintenance): automated image update 2026-05-16 01:29:34 +00:00
flux-bot
29881676e8 chore(maintenance): automated image update 2026-05-16 01:27:34 +00:00
jenkins
4527f29e7e monitoring: clarify testing and gitops dashboards 2026-05-15 22:07:41 -03:00
jenkins
a2d5c9c83e monitoring(testing): use lane timelines for test health 2026-05-15 21:05:13 -03:00
jenkins
792ac2b946 monitoring(testing): clarify run and compliance history 2026-05-15 20:00:40 -03:00
jenkins
944a778c0a monitoring: clarify testing dashboard health trends 2026-05-15 19:52:46 -03:00
jenkins
2b9cb84383 monitoring: add gitops dashboard status 2026-05-15 19:37:03 -03:00
jenkins
045d144268 flux: decouple apps from traefik readiness 2026-05-15 16:43:31 -03:00
jenkins
b794e3b514 flux: serialize kustomization reconciles 2026-05-15 16:11:12 -03:00
jenkins
3a39d37995 flux: jitter kustomization intervals 2026-05-15 15:53:02 -03:00
jenkins
8d8b3fc821 flux: avoid child kustomization apply churn 2026-05-15 15:28:53 -03:00
jenkins
b18df4caad flux: calm bootstrap polling 2026-05-15 15:12:38 -03:00
jenkins
cf20efed66 flux: reduce bootstrap reconcile churn 2026-05-15 15:08:27 -03:00
jenkins
6adbe457c4 monitoring: tune testing dashboard and gate rollups 2026-05-15 14:26:06 -03:00
flux-bot
0c11a64d25 chore(maintenance): automated image update 2026-05-15 17:11:05 +00:00
jenkins
c79489d0b8 recovery: keep storage nodes as spillover only 2026-05-15 11:52:26 -03:00
flux-bot
67253315f0 chore(bstein-dev-home): automated image update 2026-05-15 09:35:00 +00:00
flux-bot
fa8ab0840b chore(bstein-dev-home): automated image update 2026-05-15 09:33:54 +00:00
flux-bot
bf5550762e chore(maintenance): automated image update 2026-05-15 01:31:39 +00:00
flux-bot
39e023e8f3 chore(maintenance): automated image update 2026-05-15 01:30:50 +00:00
flux-bot
fd0d748c33 chore(maintenance): automated image update 2026-05-15 01:29:38 +00:00
flux-bot
77956ab811 chore(maintenance): automated image update 2026-05-15 01:27:49 +00:00
flux-bot
3ea233abcb chore(maintenance): automated image update 2026-05-14 17:10:25 +00:00
flux-bot
93bc3dfbe5 chore(bstein-dev-home): automated image update 2026-05-14 09:34:15 +00:00
flux-bot
4ca62f6fb5 chore(bstein-dev-home): automated image update 2026-05-14 09:34:04 +00:00
flux-bot
6914b92e67 chore(maintenance): automated image update 2026-05-14 01:32:55 +00:00
flux-bot
613d496491 chore(maintenance): automated image update 2026-05-14 01:32:10 +00:00
flux-bot
570c077190 chore(maintenance): automated image update 2026-05-14 01:30:54 +00:00
flux-bot
b401a4e49f chore(maintenance): automated image update 2026-05-14 01:29:05 +00:00
flux-bot
559bdf2a72 chore(maintenance): automated image update 2026-05-13 17:12:37 +00:00
flux-bot
f3a7fe58c4 chore(bstein-dev-home): automated image update 2026-05-13 09:34:29 +00:00
flux-bot
46ab392e97 chore(bstein-dev-home): automated image update 2026-05-13 09:34:23 +00:00
flux-bot
352e136621 chore(maintenance): automated image update 2026-05-13 01:31:03 +00:00
flux-bot
1b265f43d5 chore(maintenance): automated image update 2026-05-13 01:30:09 +00:00
flux-bot
ecfead7193 chore(maintenance): automated image update 2026-05-13 01:29:03 +00:00
flux-bot
53f5968f8f chore(maintenance): automated image update 2026-05-13 01:27:07 +00:00
flux-bot
8dadb36b97 chore(maintenance): automated image update 2026-05-12 17:09:48 +00:00
flux-bot
74668938cc chore(bstein-dev-home): automated image update 2026-05-12 09:33:54 +00:00
flux-bot
9def813324 chore(bstein-dev-home): automated image update 2026-05-12 09:33:39 +00:00
jenkins
6811958b52 monitoring: align overview generator with restored layout 2026-05-12 04:19:36 -03:00
jenkins
d1cdb4fd13 monitoring: restore atlas overview dashboard 2026-05-12 04:00:26 -03:00
flux-bot
50580623db chore(maintenance): automated image update 2026-05-12 01:31:40 +00:00
flux-bot
7340762622 chore(maintenance): automated image update 2026-05-12 01:31:22 +00:00
flux-bot
2102a5ec76 chore(maintenance): automated image update 2026-05-12 01:30:22 +00:00
flux-bot
850ed8abf6 chore(maintenance): automated image update 2026-05-12 01:28:38 +00:00
flux-bot
05ba76ecaa chore(maintenance): automated image update 2026-05-11 21:07:59 +00:00
flux-bot
6db1c3f5da chore(maintenance): automated image update 2026-05-11 21:07:46 +00:00
flux-bot
63429fff1d chore(maintenance): automated image update 2026-05-11 21:06:45 +00:00
flux-bot
de3f7fea69 chore(maintenance): automated image update 2026-05-11 21:04:59 +00:00
flux-bot
1e5ef8dbd1 chore(maintenance): automated image update 2026-05-11 20:51:40 +00:00
flux-bot
385d21056a chore(bstein-dev-home): automated image update 2026-05-11 20:48:56 +00:00
flux-bot
3c5fa4bbe2 chore(bstein-dev-home): automated image update 2026-05-11 20:47:40 +00:00
jenkins
58adb757c4 monitoring(testing): show LOC compliance as positive percent 2026-05-11 17:36:13 -03:00
flux-bot
d01cfe9066 chore(maintenance): automated image update 2026-05-11 17:10:04 +00:00
flux-bot
d522af7bb7 chore(bstein-dev-home): automated image update 2026-05-11 16:30:19 +00:00
flux-bot
e6dd39b4c7 chore(bstein-dev-home): automated image update 2026-05-11 16:29:00 +00:00
flux-bot
4404454cb9 chore(bstein-dev-home): automated image update 2026-05-11 09:33:16 +00:00
flux-bot
59613d500f chore(bstein-dev-home): automated image update 2026-05-11 09:33:01 +00:00
flux-bot
afeae15443 chore(maintenance): automated image update 2026-05-11 04:54:34 +00:00
flux-bot
ba0155ad3b chore(maintenance): automated image update 2026-05-11 04:54:26 +00:00
flux-bot
2c048cdeda chore(maintenance): automated image update 2026-05-11 04:52:26 +00:00
flux-bot
f307c7f2af chore(maintenance): automated image update 2026-05-11 04:48:32 +00:00
jenkins
a90d84f796 monitoring(testing): use solid threshold bars 2026-05-11 01:01:46 -03:00
jenkins
dad9e4e8f2 monitoring: ignore availability scrape gaps 2026-05-10 16:38:05 -03:00
jenkins
eb57c1fe0f monitoring: count post-start availability gaps 2026-05-10 16:21:47 -03:00
jenkins
e7213d9d1c monitoring: fill pre-telemetry availability 2026-05-10 16:13:13 -03:00
jenkins
7b656dbaeb monitoring: restart vmalert on rule changes 2026-05-10 15:50:50 -03:00
jenkins
01af181442 monitoring: schedule availability rollup deterministically 2026-05-10 15:49:32 -03:00
jenkins
192a36cf8a monitoring: fix vmalert remote write endpoint 2026-05-10 15:47:34 -03:00
jenkins
7f7dde01de monitoring: precompute atlas availability rollup 2026-05-10 15:40:12 -03:00
jenkins
32ffe30145 monitoring: bound atlas availability query 2026-05-10 14:40:55 -03:00
flux-bot
521eda1c00 chore(maintenance): automated image update 2026-05-10 17:09:40 +00:00
flux-bot
49948621d0 chore(bstein-dev-home): automated image update 2026-05-10 09:34:40 +00:00
flux-bot
28b77781d1 chore(bstein-dev-home): automated image update 2026-05-10 09:33:40 +00:00
jenkins
adfbe4ed64 metis: fix pvc rollout and sentinel pulls 2026-05-10 04:36:31 -03:00
jenkins
92fbe0ebdf vaultwarden: avoid suspect workers 2026-05-10 04:27:48 -03:00
flux-bot
b0bd29696e chore(bstein-dev-home): automated image update 2026-05-10 07:13:19 +00:00
flux-bot
496b933c65 chore(bstein-dev-home): automated image update 2026-05-10 07:13:16 +00:00
jenkins
da7ee45366 postgres: avoid unstable nodes 2026-05-10 04:04:34 -03:00
flux-bot
ffdc4bef36 chore(maintenance): automated image update 2026-05-10 06:49:25 +00:00
flux-bot
3aaa96a673 chore(maintenance): automated image update 2026-05-10 06:49:19 +00:00
flux-bot
2f1eb38551 chore(maintenance): automated image update 2026-05-10 06:48:19 +00:00
flux-bot
cdda5be827 chore(bstein-dev-home): automated image update 2026-05-10 06:46:34 +00:00
flux-bot
52682b98f5 chore(maintenance): automated image update 2026-05-10 06:46:25 +00:00
flux-bot
749fa16fca chore(maintenance): automated image update 2026-05-10 06:46:16 +00:00
flux-bot
b0372c41c2 chore(bstein-dev-home): automated image update 2026-05-10 06:43:19 +00:00
jenkins
e96e8943c9 jenkins: keep ci agents on rpi5 workers 2026-05-10 03:25:09 -03:00
flux-bot
acfaa2c3c0 chore(maintenance): automated image update 2026-05-10 06:23:06 +00:00
jenkins
7fb0be3487 jenkins: spread ci agents and cap concurrency 2026-05-10 03:17:50 -03:00
jenkins
fd91537982 ci: avoid titan-06 for Jenkins agents 2026-05-10 02:24:40 -03:00
jenkins
a64d4cee56 jenkins: tolerate slow kubernetes agent exec 2026-05-10 02:17:30 -03:00
jenkins
ba3e24548a jenkins: schedule daily quality jobs 2026-05-09 23:18:32 -03:00
jenkins
4beb08f1cf scheduling: keep longhorn vault sync off storage nodes 2026-05-05 13:46:19 -03:00
jenkins
e2cbbd6963 scheduling: keep singleton apps off storage nodes 2026-05-05 13:37:04 -03:00
jenkins
c46764e80c recovery(atlas): stop post-outage control-plane churn 2026-05-05 10:42:28 -03:00
jenkins
b81053aaec ai(ollama): recover onto live jetson gpu pool 2026-05-05 06:42:15 -03:00
jenkins
9e659b790b recovery(post-outage): restore jellyfin and maintenance sync 2026-05-05 06:31:09 -03:00
jenkins
c07220253e maintenance(metis): run service on longhorn-ready workers 2026-05-05 06:19:15 -03:00
jenkins
39fb0e91e0 maintenance(metis): move runtime state to longhorn 2026-05-05 06:15:22 -03:00
jenkins
6243021ade maintenance(metis): recover on arm64 builders 2026-05-05 06:12:06 -03:00
4a6b54b4c3 logging: trim dated pod log rotations 2026-04-27 16:49:11 -03:00
6c816e9fad logging: trim constrained pod logs earlier 2026-04-27 16:42:02 -03:00
2b5c7ca10b logging: trim oversized rotated pod logs on constrained nodes 2026-04-27 16:31:57 -03:00
45b145667a longhorn: rerun settings ensure job 2026-04-27 16:16:51 -03:00
9fb8dd4839 stability: harden fluent-bit buffering and longhorn node-down recovery 2026-04-27 16:15:13 -03:00
flux-bot
6352e0d976 chore(maintenance): automated image update 2026-04-26 00:59:25 +00:00
flux-bot
d4ff5d482e chore(maintenance): automated image update 2026-04-26 00:59:05 +00:00
flux-bot
b303add71c chore(maintenance): automated image update 2026-04-26 00:57:30 +00:00
flux-bot
a42e61de61 chore(maintenance): automated image update 2026-04-26 00:55:05 +00:00
Codex
6eb0158c6c maintenance(metis): raise remote build timeout 2026-04-25 01:41:36 -03:00
Codex
0171ffad38 keycloak(metis): seed node intranet ips in vault 2026-04-24 22:18:58 -03:00
flux-bot
84934a6d1c chore(maintenance): automated image update 2026-04-24 21:39:36 +00:00
flux-bot
98a2ade86d chore(maintenance): automated image update 2026-04-24 21:39:18 +00:00
flux-bot
738a5184cb chore(maintenance): automated image update 2026-04-24 21:37:35 +00:00
flux-bot
488c2694e3 chore(maintenance): automated image update 2026-04-24 21:36:19 +00:00
flux-bot
015d99dc5f chore(maintenance): automated image update 2026-04-24 21:08:32 +00:00
flux-bot
b80745dc2d chore(maintenance): automated image update 2026-04-24 21:08:15 +00:00
jenkins
0fa1b38f95 recovery(metis): trim node vault password placeholders 2026-04-24 18:07:35 -03:00
flux-bot
49e714c88c chore(maintenance): automated image update 2026-04-24 21:07:32 +00:00
flux-bot
ff0b9762b1 chore(maintenance): automated image update 2026-04-24 21:05:15 +00:00
jenkins
ce36ff099b recovery(metis): rerun node password seeding job 2026-04-24 17:33:40 -03:00
jenkins
6c4a7dea29 recovery(metis): use atlas kv node secrets 2026-04-24 17:29:58 -03:00
jenkins
04a80c1168 recovery(metis): seed per-node vault password slots 2026-04-24 17:24:37 -03:00
flux-bot
8179bd85db chore(maintenance): automated image update 2026-04-24 20:19:26 +00:00
flux-bot
c08499b52d chore(maintenance): automated image update 2026-04-24 20:19:10 +00:00
flux-bot
eca9e494ad chore(maintenance): automated image update 2026-04-24 20:17:26 +00:00
flux-bot
ab0e68f9f3 chore(maintenance): automated image update 2026-04-24 20:15:10 +00:00
flux-bot
0566a47e35 chore(maintenance): automated image update 2026-04-24 17:50:13 +00:00
flux-bot
133597bfd0 chore(maintenance): automated image update 2026-04-24 17:49:55 +00:00
flux-bot
ccf318f977 chore(maintenance): automated image update 2026-04-24 17:48:12 +00:00
flux-bot
8affc052bf chore(maintenance): automated image update 2026-04-24 17:46:54 +00:00
flux-bot
0cf5043977 chore(maintenance): automated image update 2026-04-24 17:20:52 +00:00
flux-bot
f2ffc6c1ef chore(maintenance): automated image update 2026-04-24 17:19:09 +00:00
flux-bot
e7c770b10b chore(maintenance): automated image update 2026-04-24 17:17:52 +00:00
jenkins
0ac3c97f90 maintenance(metis): restore full helper image refs 2026-04-24 13:51:12 -03:00
flux-bot
3e5e37d65a chore(maintenance): automated image update 2026-04-24 16:11:02 +00:00
flux-bot
2acbcbff51 chore(maintenance): automated image update 2026-04-24 16:10:45 +00:00
flux-bot
70b382bc80 chore(maintenance): automated image update 2026-04-24 16:09:02 +00:00
flux-bot
d0191361d4 chore(maintenance): automated image update 2026-04-24 16:06:44 +00:00
flux-bot
59bb0bef78 chore(maintenance): automated image update 2026-04-24 15:56:37 +00:00
jenkins
4b456cf54a maintenance(metis): track arch-specific images 2026-04-24 12:55:47 -03:00
jenkins
91c6023d25 maintenance(metis): move ingress to recovery host 2026-04-24 10:51:09 -03:00
jenkins
85d15cd3e1 maintenance(metis): raise remote pod timeout for recovery builds 2026-04-24 00:01:43 -03:00
jenkins
c0a4cbf03e maintenance(metis): fix remote workspace permissions 2026-04-23 23:45:18 -03:00
jenkins
fad895efbb maintenance(metis): move build scratch to usb storage 2026-04-23 23:37:00 -03:00
jenkins
47b31ebcf4 monitoring(testing): collapse heavy drilldowns 2026-04-22 16:56:52 -03:00
jenkins
88d2225774 test(titan-iac): cover dashboard generator contract 2026-04-22 15:31:36 -03:00
jenkins
a1f6758b95 monitoring(grafana): refresh provisioned dashboards 2026-04-22 15:13:26 -03:00
jenkins
23146aaa8a monitoring(testing): clean canonical suite rows 2026-04-22 14:34:40 -03:00
jenkins
cc757ba082 ci(data-prepper): quote testcase metrics correctly 2026-04-22 13:28:35 -03:00
jenkins
c3c8b60671 ci(data-prepper): retrigger archive fix 2026-04-22 13:23:23 -03:00
jenkins
15792b1cf3 ci(data-prepper): archive junit without plugin dependency 2026-04-22 13:21:52 -03:00
jenkins
e75a5d5675 ci(data-prepper): keep validation labels portable 2026-04-22 13:13:56 -03:00
jenkins
4282810602 ci(data-prepper): retrigger quality publish 2026-04-22 13:07:37 -03:00
jenkins
8a58132dd4 ci(data-prepper): avoid xml parser in metrics publish 2026-04-22 13:04:47 -03:00
jenkins
be0d3e4300 ci(data-prepper): harden quality evidence helpers 2026-04-22 12:58:27 -03:00
jenkins
ba6848a67a ci(data-prepper): publish real testcase metrics 2026-04-22 12:48:36 -03:00
jenkins
23beb08e5e monitoring(testing): split quality trend panels 2026-04-22 12:42:33 -03:00
5d560d962d chore(metis): deploy scratch annotation sync 2026-04-22 04:28:08 -03:00
51ade59a46 fix(metis): keep sentinel rollouts moving on degraded nodes 2026-04-22 03:40:28 -03:00
7f91be27f9 chore(metis): deploy scratch sentinel fix 2026-04-22 03:33:54 -03:00
63cd159151 test(titan-iac): cover mailu sync scripts 2026-04-22 02:53:00 -03:00
443c70d01b monitoring(testing): promote atlas testing layout 2026-04-22 02:26:31 -03:00
flux-bot
9f0ea1683a chore(bstein-dev-home): automated image update 2026-04-22 05:01:25 +00:00
flux-bot
55df293e00 chore(bstein-dev-home): automated image update 2026-04-22 05:00:26 +00:00
3168ffe027 ci(titan-iac): feed coverage into sonar gate 2026-04-22 01:57:19 -03:00
abdefbbd05 ci(quality): enforce sonar and supply-chain gates 2026-04-22 01:29:54 -03:00
flux-bot
ead503d71e chore(bstein-dev-home): automated image update 2026-04-22 04:15:46 +00:00
flux-bot
f54bdf8483 chore(bstein-dev-home): automated image update 2026-04-22 04:14:49 +00:00
flux-bot
80cb4c257f chore(bstein-dev-home): automated image update 2026-04-22 04:06:45 +00:00
flux-bot
228e8a9772 chore(bstein-dev-home): automated image update 2026-04-22 04:05:50 +00:00
15c798b915 gitops(bstein-home): deploy current image tags on main 2026-04-22 00:53:06 -03:00
2ded2eb23d ci(titan-iac): apply supply-chain waiver ledger 2026-04-22 00:42:03 -03:00
flux-bot
e6bb015ef2 chore(maintenance): automated image update 2026-04-22 03:26:48 +00:00
flux-bot
ead7c276b4 chore(maintenance): automated image update 2026-04-22 03:11:42 +00:00
bfad9c19c5 deploy(bstein-home): target non-root frontend port 2026-04-22 00:01:50 -03:00
439a44bc85 ci(data-prepper): scan staged supply-chain inputs 2026-04-21 23:29:53 -03:00
flux-bot
13f179d842 chore(maintenance): automated image update 2026-04-22 02:09:28 +00:00
c0e5df30d5 ci(quality): use preloaded scanner image 2026-04-21 22:50:53 -03:00
flux-bot
79fbf2644b chore(maintenance): automated image update 2026-04-22 01:50:20 +00:00
0eca6adbbb ci(quality): pass sonar token as login 2026-04-21 22:17:55 -03:00
5801633b30 ci(quality): run sonar and supply-chain scans 2026-04-21 22:09:06 -03:00
fac139fd0e monitoring: rotate grafana dedupe job 2026-04-21 21:25:05 -03:00
jenkins
2df830f01b longhorn: bound settings sync curl calls and rerun job 2026-04-21 21:18:41 -03:00
flux-bot
26fab34de5 chore(maintenance): automated image update 2026-04-22 00:16:57 +00:00
jenkins
e29d0fe349 longhorn: rebalance replicas and cap rebuild pressure 2026-04-21 21:12:19 -03:00
jenkins
77f7620eca scheduling: de-prefer spillover nodes for non-longhorn services 2026-04-21 21:00:56 -03:00
fb0dd60954 jenkins: allow slow controller startup 2026-04-21 20:54:42 -03:00
jenkins
4401c26496 jenkins: de-prefer spillover longhorn nodes for controller and agents 2026-04-21 20:48:02 -03:00
9682a17a82 jenkins: avoid recursive volume ownership resets 2026-04-21 20:34:02 -03:00
55d87c0c14 ci(quality): bind sonarqube token credential in pipelines 2026-04-21 20:16:59 -03:00
379f20efc5 jenkins: prefer rpi5 without hard pin 2026-04-21 19:51:09 -03:00
7883593166 ci(jenkins): inject sonarqube token from vault 2026-04-21 19:43:08 -03:00
flux-bot
5509dd86d5 chore(maintenance): automated image update 2026-04-21 22:01:24 +00:00
06b27c9b9a ci(titan-iac): lower agent cpu request 2026-04-21 18:32:45 -03:00
flux-bot
a927affb1f chore(maintenance): automated image update 2026-04-21 21:22:18 +00:00
flux-bot
fab182e91e chore(maintenance): automated image update 2026-04-21 20:59:18 +00:00
d5be9e1ae9 ci(data-prepper): use mirrored base artifact 2026-04-21 16:56:25 -03:00
fb48d473d2 ci(data-prepper): report n/a coverage as complete 2026-04-21 16:32:42 -03:00
5e5cffbdc7 ci(data-prepper): allow arm64 worker scheduling 2026-04-21 15:33:42 -03:00
e1d804dbb0 ci(data-prepper): lower kaniko cpu request 2026-04-21 15:26:13 -03:00
flux-bot
2086427b72 chore(maintenance): automated image update 2026-04-21 17:56:42 +00:00
e811c0cabf ci(jenkins): require rpi5 controller placement 2026-04-21 14:12:14 -03:00
flux-bot
b68c002e2d chore(maintenance): automated image update 2026-04-21 17:05:21 +00:00
cb7e0238dc infra(ci): use harbor python utility images 2026-04-21 13:37:46 -03:00
flux-bot
043a2e75c8 chore(maintenance): automated image update 2026-04-21 16:30:12 +00:00
6ac375f82e ci(titan-iac): use harbor python runner 2026-04-21 13:18:31 -03:00
jenkins
8c1a26ead6 ci(titan-iac): use in-cluster victoria metrics dns 2026-04-21 12:30:06 -03:00
jenkins
d119f838e9 ci(titan-iac): harden quality metric publisher 2026-04-21 12:24:18 -03:00
jenkins
ae2356de6a monitoring(testing): render missing metric zero states 2026-04-21 11:46:15 -03:00
jenkins
c1ac36df17 monitoring(testing): link test metrics to build artifacts 2026-04-21 11:39:13 -03:00
jenkins
cc79f3ebcd ci(titan-iac): include primary branch in quality metrics 2026-04-21 11:08:59 -03:00
jenkins
1f991fc43d harbor: expand registry storage 2026-04-21 10:56:27 -03:00
jenkins
b62980b76d harbor: reduce vault injector bootstrap requests 2026-04-21 10:08:39 -03:00
jenkins
26da4945ea harbor: move registry bootstrap to titan-11 2026-04-21 09:55:29 -03:00
jenkins
d599a162a9 monitoring(testing): add branch evidence panels 2026-04-21 09:35:43 -03:00
jenkins
e53adc17b3 ci(data-prepper): archive full quality evidence 2026-04-21 09:24:09 -03:00
jenkins
7cd40d457d Merge remote-tracking branch 'origin/main' 2026-04-21 09:23:03 -03:00
flux-bot
d559d03bea chore(maintenance): automated image update 2026-04-21 06:32:37 +00:00
flux-bot
691dc3c71b chore(maintenance): automated image update 2026-04-21 06:27:29 +00:00
flux-bot
e81ecdd716 chore(maintenance): automated image update 2026-04-21 06:14:21 +00:00
flux-bot
74e385ad8b chore(maintenance): automated image update 2026-04-21 06:10:27 +00:00
flux-bot
fecd095717 chore(maintenance): automated image update 2026-04-21 06:03:10 +00:00
flux-bot
caa02806c0 chore(maintenance): automated image update 2026-04-21 06:00:02 +00:00
flux-bot
c6c6f90d26 chore(maintenance): automated image update 2026-04-21 05:54:02 +00:00
flux-bot
e4efb89466 chore(maintenance): automated image update 2026-04-21 05:52:01 +00:00
flux-bot
8584885ddd chore(maintenance): automated image update 2026-04-21 05:44:00 +00:00
flux-bot
6aeacaf872 chore(maintenance): automated image update 2026-04-21 05:42:00 +00:00
flux-bot
0146b92cc1 chore(maintenance): automated image update 2026-04-21 05:33:59 +00:00
flux-bot
981fca6cb4 chore(maintenance): automated image update 2026-04-21 05:26:59 +00:00
flux-bot
6dab28081d chore(maintenance): automated image update 2026-04-21 05:12:56 +00:00
flux-bot
6ebc475da2 chore(maintenance): automated image update 2026-04-21 05:05:56 +00:00
flux-bot
fff26ebacb chore(maintenance): automated image update 2026-04-21 04:57:54 +00:00
flux-bot
e3bebaa10b chore(maintenance): automated image update 2026-04-21 04:55:55 +00:00
flux-bot
df16f03e46 chore(maintenance): automated image update 2026-04-21 04:46:53 +00:00
flux-bot
b5243e8566 chore(maintenance): automated image update 2026-04-21 04:36:52 +00:00
flux-bot
4501bbf8f0 chore(maintenance): automated image update 2026-04-21 04:34:52 +00:00
flux-bot
5331d7149a chore(maintenance): automated image update 2026-04-21 04:24:51 +00:00
jenkins
c4b0389892 quality(titan-iac): widen enforced coverage contract 2026-04-20 21:39:53 -03:00
jenkins
387e104359 test(titan-iac): widen tracked quality coverage 2026-04-20 21:34:59 -03:00
jenkins
5ebc320843 ci(titan-iac): support direct script execution for metrics publish 2026-04-20 15:47:20 -03:00
jenkins
006f79658f ci(titan-iac): retrigger after titan-09 cordon 2026-04-20 15:36:51 -03:00
jenkins
9451bb9c61 test(titan-iac): raise quality gate coverage for quality runner 2026-04-20 15:29:46 -03:00
jenkins
655c26c589 quality(titan-iac): split metrics publisher and harden gate lint 2026-04-20 15:21:49 -03:00
jenkins
607d8c21fa monitoring(testing): fix missing-state queries and add test-case drilldowns 2026-04-20 13:45:01 -03:00
jenkins
b7f6cbd87c ci(titan-iac): enforce 30d build and artifact retention 2026-04-20 12:30:57 -03:00
jenkins
a07b49a05f monitoring(testing): fix atlas-jobs coverage and loc query expressions 2026-04-20 12:20:42 -03:00
jenkins
1d4227beec ci(data-prepper): add retention and archive quality artifacts 2026-04-20 10:55:13 -03:00
jenkins
57306201cf monitoring(testing): backfill placeholder test-case metrics across sparse suites 2026-04-20 09:13:34 -03:00
jenkins
7437ec5929 ci(titan-iac): emit placeholder test-case metric when junit has no cases 2026-04-20 09:10:04 -03:00
jenkins
710ec96990 test(titan-iac): update payload unit tests for per-test metric argument 2026-04-20 08:50:39 -03:00
jenkins
cb1c41c6ea ci(titan-iac): infer coverage/loc metrics from quality summary artifacts 2026-04-20 08:43:21 -03:00
jenkins
e8823197f8 monitoring(testing): align test selector with exported job label 2026-04-20 08:38:38 -03:00
jenkins
c5b1302ff6 monitoring(testing): add fallbacks for problematic-test trend queries 2026-04-20 08:37:26 -03:00
jenkins
f02db9801c monitoring(testing): add per-test metrics and flaky-test panels 2026-04-20 08:35:05 -03:00
jenkins
7d113291c9 monitoring(testing): split check trends into per-check success/failure panels 2026-04-20 08:07:30 -03:00
jenkins
47d5416dde ci(titan-iac): harden promote git workspace detection 2026-04-20 00:59:24 -03:00
codex
f2c4204bab monitoring(testing): fix suite all filter aliases and regex templating 2026-04-19 23:22:34 -03:00
codex
71cfdce862 jenkins: source streaming harbor creds from dedicated vault path 2026-04-19 23:02:30 -03:00
codex
d4112e5a74 ci(titan-iac): guard promote stage when workspace lacks .git 2026-04-19 22:58:58 -03:00
codex
6d2c72ff98 jenkins: keep streaming creds optional without vault hard dependency 2026-04-19 22:45:25 -03:00
codex
c8f7cd6ec2 jenkins(logging): split streaming harbor credentials 2026-04-19 22:40:56 -03:00
codex
bd85143aa0 jenkins: stop overriding push creds with harbor-pull secret 2026-04-19 22:36:18 -03:00
codex
cb992d1c53 maintenance(metis): raise remote timeout and improve progress 2026-04-19 22:34:16 -03:00
codex
7be6cfb9cb ci(titan-iac): install git in runner before promote stage 2026-04-19 22:33:22 -03:00
codex
b848e6b6d8 monitoring(dashboards): regenerate atlas-testing from generator 2026-04-19 22:29:20 -03:00
flux-bot
849bba8f5d chore(maintenance): automated image update 2026-04-20 01:19:35 +00:00
codex
86c492d8c1 ci: retrigger titan-iac after titan-18 cordon 2026-04-19 22:07:10 -03:00
codex
1ed8b7233d maintenance(metis): roll duplicate-build fix to 0.1.0-24 2026-04-19 22:03:04 -03:00
codex
ddabda06bf ci: fix data-prepper defaults and restore metrics publisher coverage 2026-04-19 21:57:40 -03:00
codex
881c724725 jenkins: revert sonar vault path injection blocking startup 2026-04-19 21:42:04 -03:00
codex
2db4952c39 jenkins(sonar): wire defaults and observe-mode toggles 2026-04-19 21:30:02 -03:00
codex
57432e01a3 maintenance(metis): export bastion ssh key for replacement readiness 2026-04-19 21:22:57 -03:00
codex
97bc0cea8c maintenance(metis): use inventory path available in remote runner pods 2026-04-19 21:18:30 -03:00
codex
e930aac039 ci(gate): enforce sonar and supply-chain checks across suites 2026-04-19 21:16:42 -03:00
flux-bot
13ec9b2d7d chore(maintenance): automated image update 2026-04-20 00:14:29 +00:00
d8f07c2b70 maintenance(metis): run vault-enabled metis service image 2026-04-19 21:14:19 -03:00
20a255252c maintenance(metis): add titan-16 replacement profile 2026-04-19 21:01:49 -03:00
376e68ec31 maintenance(metis): inject harbor creds into service runtime 2026-04-19 20:52:04 -03:00
flux-bot
7497f8d4e0 chore(maintenance): automated image update 2026-04-19 23:45:10 +00:00
b3270e7231 maintenance(metis): add titan-10 and titan-12 inventory profiles 2026-04-19 20:44:12 -03:00
1dce63fb9b monitoring(testing): render zero-state data for missing/sonar panels 2026-04-19 16:56:22 -03:00
96f3844677 quality(sonarqube): read exporter token from shared oidc vault path 2026-04-19 16:40:39 -03:00
65edbd9ed9 quality(sonarqube): inject exporter token from vault 2026-04-19 16:34:27 -03:00
29138b8a51 ci(metrics): publish canonical titan-iac gate checks 2026-04-19 16:29:07 -03:00
flux-bot
aede5aa899 chore(maintenance): automated image update 2026-04-19 19:19:49 +00:00
12293c9d11 test(ci): align publish_test_metrics unit tests with current API 2026-04-19 16:18:35 -03:00
2d0360be3b ci(metrics): use Pushgateway PUT for suite payload replacement 2026-04-19 16:10:20 -03:00
f9d7694f25 monitoring(testing): harden suite selector and success history query 2026-04-19 15:31:59 -03:00
9e3cc0f760 ci(jenkins): fix glue test VM URL and default SA observer RBAC 2026-04-19 15:06:13 -03:00
32410555cd monitoring: remove combined UPS draw series from history panels 2026-04-19 14:51:25 -03:00
347e7ccc84 monitoring: revert atlas overview dashboard to pre-quality changes 2026-04-19 14:43:41 -03:00
e47a877169 ci: resolve flux branch without Groovy dollar interpolation 2026-04-19 14:41:22 -03:00
592d037522 ci: fix titan-iac and data-prepper pipeline gate publishing 2026-04-19 14:33:26 -03:00
3ccc2a1100 quality: standardize suite checks and add SonarQube stack 2026-04-19 14:18:58 -03:00
9a20f4f854 monitoring(testing): redesign atlas testing dashboard and unify suite aliases 2026-04-18 17:47:06 -03:00
9a8c454123 tests(quality-gate): cover metrics publisher edge paths 2026-04-18 17:29:50 -03:00
flux-bot
e1f430455d chore(maintenance): automated image update 2026-04-18 19:36:24 +00:00
01fe20fe68 monitoring(metrics): normalize platform gate contract and pegasus suite name 2026-04-18 16:34:20 -03:00
2221a2d279 monitoring: alert on soteria backup job creation spikes 2026-04-17 01:09:25 -03:00
flux-bot
20305a7181 chore(maintenance): automated image update 2026-04-17 03:48:15 +00:00
10c813d583 maintenance(soteria): pause backup scheduler during backlog incident 2026-04-16 21:29:14 -03:00
1b041aa813 monitoring(dashboards): fix success-rate fallback expression 2026-04-16 20:02:26 -03:00
8f2b247b5f monitoring(dashboards): fallback idle panels to zero 2026-04-16 19:59:08 -03:00
1f3ce453fb maintenance(soteria): add startup probe and relax liveness 2026-04-16 19:54:07 -03:00
ff11f7ee65 monitoring(vm): raise kube-state-metrics scrape size cap 2026-04-16 19:47:56 -03:00
11d9c5eae3 monitoring(vm): avoid accelerator nodes for vmsingle 2026-04-16 19:39:35 -03:00
95dd0bbd56 monitoring(vm): auto-reload scrape config changes 2026-04-16 19:33:39 -03:00
72e7a39373 monitoring: fix grafana no-data scrape gaps 2026-04-16 19:30:31 -03:00
09d438e8b4 maintenance(titan-24): remove flux temp desktop automation 2026-04-15 22:58:37 -03:00
6752e4c0e5 maintenance(titan-24): keep helper retries armed 2026-04-15 22:50:41 -03:00
e7f3edb4bf maintenance(titan-24): tolerate unreachable helper jobs 2026-04-15 22:30:22 -03:00
c55d5ac3b5 maintenance(titan-24): add desktop helper and rootfs sweep 2026-04-15 22:25:11 -03:00
fb43b02b2a monitoring(soteria): tune PVC backup age thresholds for nightly cadence 2026-04-14 02:17:52 -03:00
55fa72d446 monitoring(overview): align enclosure fonts and shorten fan labels 2026-04-14 01:18:41 -03:00
496f7a12dd monitoring(overview): dedupe typhon series and map fans by port 2026-04-14 00:31:38 -03:00
6b75ae7dcc monitoring(overview): fix jenkins success/failure ranking with single-frame status labels 2026-04-13 23:13:45 -03:00
50a9bda808 typhon: register app and add v2-safe ble/control runtime toggles 2026-04-13 23:07:53 -03:00
c573012a7c monitoring(overview): globally sort jenkins rows across status frames 2026-04-13 23:03:38 -03:00
8ac428f816 monitoring(overview): derive jenkins top-6 in PromQL per panel 2026-04-13 22:38:40 -03:00
99e7dababd monitoring(overview): restore jenkins panel readability with top-6 stat rows 2026-04-13 22:13:08 -03:00
8db72c9475 monitoring(overview): replace jenkins tables with stat lists and fix links/colors 2026-04-13 22:07:24 -03:00
2db8e1423d monitoring(overview): fix jenkins row links, status color, and ordering 2026-04-13 20:58:09 -03:00
flux-bot
3e440ba7cd chore(maintenance): automated image update 2026-04-13 19:52:06 +00:00
e437f55d87 monitoring(overview): make jenkins success/failure panels scrollable lists 2026-04-13 16:24:19 -03:00
3bbd0a6f90 monitoring(jenkins): dedupe weather metrics and cap newest list rows 2026-04-13 14:29:44 -03:00
cf988e361b monitoring(overview): make jenkins success/failure lists readable 2026-04-13 14:25:19 -03:00
flux-bot
7f676fdc70 chore(maintenance): automated image update 2026-04-13 17:21:53 +00:00
flux-bot
f2830ce940 chore(maintenance): automated image update 2026-04-13 16:58:51 +00:00
a05a6a0e88 monitoring(overview): increase jenkins success/failure row legibility 2026-04-13 13:51:03 -03:00
30acfe39c4 maintenance(soteria): grant pod logs and roll out 0.1.0-32 2026-04-13 12:52:38 -03:00
flux-bot
ac62a43815 chore(maintenance): automated image update 2026-04-13 15:49:45 +00:00
4bcb1cc940 monitoring(overview): split jenkins weather into success/failure columns 2026-04-13 12:17:34 -03:00
d0abf9a70d monitoring: slightly reduce fan activity value font 2026-04-13 12:08:01 -03:00
flux-bot
69ab8805a9 chore(maintenance): automated image update 2026-04-13 15:06:41 +00:00
18666d5aec monitoring(jenkins): improve weather panel readability and layout 2026-04-13 11:52:40 -03:00
d847a731fb monitoring: increase ups current stat font size 2026-04-13 11:43:25 -03:00
9f9b00a6fb monitoring(jenkins): switch weather to single stat-list panel 2026-04-13 06:24:58 -03:00
28756ceda8 monitoring: align ups and climate cards to postgres two-stat pattern 2026-04-13 06:22:41 -03:00
56cca6df83 monitoring: rebuild split ups and climate cards from scratch 2026-04-13 06:12:29 -03:00
aa935984a8 monitoring: equalize split ups card heights and row spacing 2026-04-13 05:42:39 -03:00
a2172f56ec monitoring(overview): fix pvc backup health/age panel query 2026-04-13 05:33:28 -03:00
db701b89c2 monitoring(overview): add jenkins success and duration columns 2026-04-13 05:31:43 -03:00
ef352cbdc1 monitoring: prevent compact UPS card value clipping 2026-04-13 05:16:37 -03:00
f6b97ac82e monitoring: fix clipped values in compact split panel 2026-04-13 05:00:01 -03:00
0a28cf07c2 monitoring: force one-row value-only split panels 2026-04-13 04:51:03 -03:00
3dd0bc875d monitoring(jenkins): stop collapsing weather bars into one row 2026-04-13 04:32:13 -03:00
cf30f63fb4 typhon: schedule exporter on arm64 workers 2026-04-13 04:30:03 -03:00
2ae886ec74 monitoring: make split climate and ups panels value-only 2026-04-13 04:27:16 -03:00
4d10919ead monitoring(jenkins): render weather panels with exported job labels 2026-04-13 04:03:56 -03:00
c06ba41d0d monitoring: tighten split panel layout in overview 2026-04-13 03:53:06 -03:00
flux-bot
1ed1d6cf80 chore(maintenance): automated image update 2026-04-13 06:47:57 +00:00
f26d7afbbc monitoring: split climate and ups current panels 2026-04-13 03:35:50 -03:00
e5ffa94c1d maintenance(soteria): roll pvc-node pin fix and pod-read rbac 2026-04-13 03:31:57 -03:00
flux-bot
c2048fa594 chore(maintenance): automated image update 2026-04-13 06:30:55 +00:00
08cec8be77 maintenance(soteria): move restic vault path to shared scope 2026-04-13 03:00:57 -03:00
a6ff6122b0 maintenance(vault): roll sync pod after soteria secret mapping 2026-04-13 02:55:14 -03:00
0ffe1e1905 maintenance(jenkins): stabilize ariadne api token bootstrap 2026-04-13 02:55:10 -03:00
4e9b232a4f maintenance(soteria): source restic credentials from vault 2026-04-13 02:53:38 -03:00
b25422f1b4 maintenance(ariadne): restart to pick jenkins api creds 2026-04-13 02:45:29 -03:00
50c9852cff maintenance(jenkins): provision ariadne api user for weather collector 2026-04-13 02:41:20 -03:00
3d2f5c0778 monitoring(alerts): make soteria backup health rule driver-agnostic 2026-04-13 02:36:39 -03:00
flux-bot
206daf156a chore(maintenance): automated image update 2026-04-13 05:31:46 +00:00
f3e77ea994 Revert "monitoring(overview): recenter climate/ups cards and gate stale offline climate data"
This reverts commit 19d6ffcf2a4268fd414cbe5109aafd043d7bb514.
2026-04-13 02:26:09 -03:00
fbb4736d4a maintenance(soteria): roll pods after restic config switch 2026-04-13 02:24:05 -03:00
f02a782991 maintenance(soteria): enable restic encrypted backup mode 2026-04-13 02:23:01 -03:00
6f96f7b78f maintenance(soteria): fix duplicate b2 config keys 2026-04-13 02:21:25 -03:00
4fb0b371ff maintenance(soteria): switch to encrypted restic backups 2026-04-13 02:15:46 -03:00
flux-bot
4c671a5396 chore(maintenance): automated image update 2026-04-13 05:13:43 +00:00
flux-bot
3c675fd887 chore(maintenance): automated image update 2026-04-13 05:03:42 +00:00
2243072be2 maintenance(flux): update ariadne automation on main 2026-04-13 02:03:20 -03:00
19d6ffcf2a monitoring(overview): recenter climate/ups cards and gate stale offline climate data 2026-04-13 01:43:21 -03:00
53a20a8560 maintenance(soteria): avoid titan-10 scheduling 2026-04-13 01:16:59 -03:00
f1bb65cb73 monitoring(overview): center climate/ups cards and add UPS discharge risk coloring 2026-04-13 01:08:58 -03:00
0576de7a61 maintenance(soteria): roll snapshot-first backup fix image 2026-04-13 00:42:15 -03:00
c409c7ca80 monitoring(jobs): add jenkins build weather job list panels 2026-04-13 00:26:22 -03:00
f2aab54884 monitoring(overview): add fixed labels to canvas 2x2 stat cards 2026-04-13 00:21:56 -03:00
e6785f7db1 monitoring(overview): fix ups/climate 2x2 cards and dynamic climate axes 2026-04-13 00:18:06 -03:00
d514fb35e5 longhorn(core): restore b2 secret objects in vault sync 2026-04-12 23:54:35 -03:00
41a5add906 monitoring(climate): drop zero samples to unlock dynamic history scaling 2026-04-12 23:02:23 -03:00
00fe5e8a0f monitoring(testing): add coverage and code-smell infraction panels 2026-04-12 22:58:33 -03:00
3a148c63e4 monitoring(overview): rebalance climate row widths for current/history panels 2026-04-12 22:57:25 -03:00
f17fa41207 monitoring(overview): restore single-panel cards and dynamic climate axes 2026-04-12 22:53:46 -03:00
d642deb4f4 maintenance(soteria): fix prometheus scrape port to 8080 2026-04-12 22:36:51 -03:00
51e35b8643 monitoring(overview): stack ups current card into draw/runtime rows 2026-04-12 22:25:34 -03:00
e53933ece7 monitoring(overview): stack climate stats into explicit 2x2 rows 2026-04-12 22:19:37 -03:00
4efd28c956 Revert "monitoring(overview): force horizontal stat cards for climate/ups wrap"
This reverts commit 287c339aa0001c1daec161fd9fc73fbd4b267b48.
2026-04-12 22:14:59 -03:00
a1ab78b0c9 monitoring(grafana): mount and provision atlas-testing dashboard 2026-04-12 22:13:58 -03:00
287c339aa0 monitoring(overview): force horizontal stat cards for climate/ups wrap 2026-04-12 22:11:40 -03:00
dc1f1cbb7c monitoring(overview): split climate and ups stats into two-row query groups 2026-04-12 22:07:58 -03:00
4a10163b10 monitoring(overview): tune stat sizing for 2x2 climate/ups cards 2026-04-12 22:03:13 -03:00
f45217f98e monitoring(overview): simplify ups current card to draw/runtime 2026-04-12 21:36:42 -03:00
66da1b3aab monitoring(overview): shorten ups labels for readable stat rows 2026-04-12 21:32:48 -03:00
8d30fddd7d monitoring(overview): wrap ups and climate stats for narrow panels 2026-04-12 21:28:14 -03:00
a0f1149bbb monitoring(overview): restore readable two-row stats for ups and climate 2026-04-12 21:23:28 -03:00
d2672300a3 monitoring(jobs): switch cleanup stats to two-row layout 2026-04-12 20:38:52 -03:00
ed5a59f21d maintenance(soteria): set explicit b2 endpoint and bucket 2026-04-12 20:31:02 -03:00
66bd705971 monitoring: tune stat text sizing for climate and ups 2026-04-12 20:30:17 -03:00
4b78e67036 monitoring: use wide stat layout for ups and climate cards 2026-04-12 20:23:38 -03:00
3a4bdbd42f monitoring: switch ups/climate/fan stats to vertical orientation 2026-04-12 20:12:17 -03:00
e222344cd9 monitoring(jobs): add schedule fallback series for cold starts 2026-04-12 20:09:43 -03:00
a1257b65ff maintenance(ariadne): roll image to 0.1.0-103 for cleanup rollout 2026-04-12 20:06:03 -03:00
299a68ad95 monitoring(jobs): split testing dashboard and clean up job ops view 2026-04-12 20:06:03 -03:00
049a0deb04 maintenance(soteria): roll react ui image and wire b2 monitoring 2026-04-12 20:04:35 -03:00
7d3b12c774 monitoring: restore stat layout for ups/climate/fan rows 2026-04-12 19:56:12 -03:00
ac71b4621c monitoring: render ups/climate/fan panels as row tables 2026-04-12 19:46:39 -03:00
3271369e2d monitoring: set compact stat layout for climate and ups rows 2026-04-12 19:37:08 -03:00
931ee5944d monitoring: pack overview/power stats horizontally 2026-04-12 19:23:10 -03:00
08077f46c6 monitoring(atlas-power): force horizontal layout for stat rows 2026-04-12 19:06:07 -03:00
b9b9308500 maintenance(soteria): roll image to 0.1.0-22 for oauth2 headers 2026-04-12 18:55:09 -03:00
3096e0d7de monitoring(overview): tighten climate labels and drop duplicate temp line 2026-04-12 18:50:25 -03:00
9f5c9bfb86 maintenance(soteria): re-enable flux management for workload resources 2026-04-12 18:41:56 -03:00
6b0d6b017c monitoring(overview): tune climate row and restore ups card density 2026-04-12 18:35:15 -03:00
de3272e160 merge: atlas jobs ariadne schedule observability 2026-04-12 18:33:07 -03:00
8a413c0024 merge: lane2 jenkins cleanup activate 2026-04-12 18:33:00 -03:00
aa24e08744 merge: lane2 jenkins cleanup wiring 2026-04-12 18:32:48 -03:00
cb27592272 monitoring(overview): reflow UPS/climate rows and add jenkins weather 2026-04-12 18:14:54 -03:00
f67ca30f94 monitoring(climate): add C/F history and dedupe typhon series 2026-04-12 17:56:54 -03:00
b6b1e533ed monitoring(jobs): add Ariadne schedule inventory signals 2026-04-12 17:29:27 -03:00
58ccbfb130 monitoring: add humidity and dew point to climate panels 2026-04-12 17:28:15 -03:00
5bf01bb8e6 vault(auth): allow maintenance soteria oidc secret path 2026-04-12 17:23:41 -03:00
a20fd995a1 monitoring: switch climate dashboards to typhon metrics 2026-04-12 17:20:05 -03:00
e2fbff3315 typhon(flux): unblock reconcile by dropping vault dependsOn 2026-04-12 15:24:40 -03:00
c3ef14c269 typhon: add AC Infinity telemetry service and Flux wiring 2026-04-12 15:23:08 -03:00
82cab1ce2a maintenance(soteria): tighten oauth2 ingress and drill validation 2026-04-12 15:07:54 -03:00
c325744540 monitoring(alerts): watch soteria authz denial spikes 2026-04-12 15:07:54 -03:00
241a405c05 maintenance(soteria): harden ingress path and add backup alerts 2026-04-12 15:07:54 -03:00
6a44a56c38 maintenance(soteria): add serviceaccount and rbac manifests 2026-04-12 15:07:54 -03:00
091e743d0e maintenance(soteria): add protected UI, OIDC bootstrap, and backup health panel wiring 2026-04-12 15:07:53 -03:00
4864939eef maintenance(ariadne): activate jenkins workspace cleanup deletes 2026-04-12 15:01:35 -03:00
326 changed files with 32757 additions and 5093 deletions

374
Jenkinsfile vendored
View File

@ -7,14 +7,52 @@ pipeline {
apiVersion: v1 apiVersion: v1
kind: Pod kind: Pod
spec: spec:
serviceAccountName: "jenkins"
nodeSelector: nodeSelector:
hardware: rpi5
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-04
- titan-06
- titan-11
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
jenkins/jenkins-jenkins-agent: "true"
containers: containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python - name: python
image: python:3.12-slim image: registry.bstein.dev/bstein/python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
command: command:
- cat - cat
tty: true tty: true
@ -24,9 +62,21 @@ spec:
environment { environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1' PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1' PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan-iac' SUITE_NAME = 'titan_iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428' VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '0'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
} }
stages { stages {
stage('Checkout') { stage('Checkout') {
@ -36,7 +86,175 @@ spec:
} }
stage('Install deps') { stage('Install deps') {
steps { steps {
sh 'pip install --no-cache-dir -r ci/requirements.txt' sh '''
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
}
}
stage('Prepare local quality evidence') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml,services/game-stream/**"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
} }
} }
stage('Run quality gate') { stage('Run quality gate') {
@ -66,8 +284,96 @@ spec:
stage('Enforce quality gate') { stage('Enforce quality gate') {
steps { steps {
sh ''' sh '''
set -eu set -euo pipefail
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0 gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
''' '''
} }
} }
@ -76,7 +382,7 @@ spec:
script { script {
env.FLUX_BRANCH = sh( env.FLUX_BRANCH = sh(
returnStdout: true, returnStdout: true,
script: '''awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml''' script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'"
).trim() ).trim()
if (!env.FLUX_BRANCH) { if (!env.FLUX_BRANCH) {
error('Flux branch not found in gotk-sync.yaml') error('Flux branch not found in gotk-sync.yaml')
@ -93,16 +399,28 @@ spec:
} }
} }
steps { steps {
container('jnlp') { withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) { sh '''
sh ''' set -euo pipefail
set +x if ! command -v git >/dev/null 2>&1; then
git config user.email "jenkins@bstein.dev" if command -v apk >/dev/null 2>&1; then
git config user.name "jenkins" apk add --no-cache git >/dev/null
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git elif command -v apt-get >/dev/null 2>&1; then
git push origin HEAD:${FLUX_BRANCH} apt-get update >/dev/null
''' apt-get install -y git >/dev/null
} fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
git push origin HEAD:${FLUX_BRANCH}
'''
} }
} }
} }
@ -110,15 +428,23 @@ spec:
post { post {
always { always {
script { script {
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) { try {
try { if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
junit allowEmptyResults: true, testResults: 'build/junit-*.xml' try {
} catch (Throwable err) { junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
echo "junit step unavailable: ${err.class.simpleName}" } catch (Throwable err) {
echo "junit step unavailable: ${err.class.simpleName}"
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
} catch (Throwable err) {
if (err.class.simpleName == 'MissingContextVariableException') {
echo 'workspace unavailable; skipping post-build artifact collection'
} else {
throw err
} }
} }
} }
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
} }
} }
} }

View File

@ -6,14 +6,52 @@ pipeline {
apiVersion: v1 apiVersion: v1
kind: Pod kind: Pod
spec: spec:
serviceAccountName: "jenkins"
nodeSelector: nodeSelector:
hardware: rpi5
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-04
- titan-06
- titan-11
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
jenkins/jenkins-jenkins-agent: "true"
containers: containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python - name: python
image: python:3.12-slim image: registry.bstein.dev/bstein/python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
command: command:
- cat - cat
tty: true tty: true
@ -23,9 +61,21 @@ spec:
environment { environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1' PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1' PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan-iac' SUITE_NAME = 'titan_iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428' VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '0'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
} }
stages { stages {
stage('Checkout') { stage('Checkout') {
@ -35,7 +85,175 @@ spec:
} }
stage('Install deps') { stage('Install deps') {
steps { steps {
sh 'pip install --no-cache-dir -r ci/requirements.txt' sh '''
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
}
}
stage('Prepare local quality evidence') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml,services/game-stream/**"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
} }
} }
stage('Run quality gate') { stage('Run quality gate') {
@ -65,8 +283,96 @@ spec:
stage('Enforce quality gate') { stage('Enforce quality gate') {
steps { steps {
sh ''' sh '''
set -eu set -euo pipefail
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0 gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
''' '''
} }
} }
@ -75,7 +381,7 @@ spec:
script { script {
env.FLUX_BRANCH = sh( env.FLUX_BRANCH = sh(
returnStdout: true, returnStdout: true,
script: '''awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml''' script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'"
).trim() ).trim()
if (!env.FLUX_BRANCH) { if (!env.FLUX_BRANCH) {
error('Flux branch not found in gotk-sync.yaml') error('Flux branch not found in gotk-sync.yaml')
@ -92,16 +398,28 @@ spec:
} }
} }
steps { steps {
container('jnlp') { withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) { sh '''
sh ''' set -euo pipefail
set +x if ! command -v git >/dev/null 2>&1; then
git config user.email "jenkins@bstein.dev" if command -v apk >/dev/null 2>&1; then
git config user.name "jenkins" apk add --no-cache git >/dev/null
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git elif command -v apt-get >/dev/null 2>&1; then
git push origin HEAD:${FLUX_BRANCH} apt-get update >/dev/null
''' apt-get install -y git >/dev/null
} fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
git push origin HEAD:${FLUX_BRANCH}
'''
} }
} }
} }
@ -109,15 +427,23 @@ spec:
post { post {
always { always {
script { script {
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) { try {
try { if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
junit allowEmptyResults: true, testResults: 'build/junit-*.xml' try {
} catch (Throwable err) { junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
echo "junit step unavailable: ${err.class.simpleName}" } catch (Throwable err) {
echo "junit step unavailable: ${err.class.simpleName}"
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
} catch (Throwable err) {
if (err.class.simpleName == 'MissingContextVariableException') {
echo 'workspace unavailable; skipping post-build artifact collection'
} else {
throw err
} }
} }
} }
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
} }
} }
} }

View File

@ -6,30 +6,50 @@ from __future__ import annotations
import json import json
import os import os
from glob import glob from glob import glob
from pathlib import Path
import sys
import urllib.error import urllib.error
import urllib.request import urllib.request
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from ci.scripts import publish_test_metrics_quality as _quality_helpers
CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS
_build_check_statuses = _quality_helpers._build_check_statuses
_combine_statuses = _quality_helpers._combine_statuses
_infer_sonarqube_status = _quality_helpers._infer_sonarqube_status
_infer_source_lines_over_500 = _quality_helpers._infer_source_lines_over_500
_infer_supply_chain_status = _quality_helpers._infer_supply_chain_status
_infer_workspace_coverage_percent = _quality_helpers._infer_workspace_coverage_percent
_load_optional_json = _quality_helpers._load_optional_json
_normalize_result_status = _quality_helpers._normalize_result_status
def _escape_label(value: str) -> str: def _escape_label(value: str) -> str:
"""Escape a Prometheus label value without changing its content."""
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"') return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
def _label_str(labels: dict[str, str]) -> str: def _label_str(labels: dict[str, str]) -> str:
"""Render a stable Prometheus label set from a mapping."""
parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val] parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
return "{" + ",".join(parts) + "}" if parts else "" return "{" + ",".join(parts) + "}" if parts else ""
def _read_text(url: str) -> str: def _read_text(url: str) -> str:
"""Fetch a plain-text response body from the given URL."""
with urllib.request.urlopen(url, timeout=10) as response: with urllib.request.urlopen(url, timeout=10) as response:
return response.read().decode("utf-8") return response.read().decode("utf-8")
def _post_text(url: str, payload: str) -> None: def _post_text(url: str, payload: str) -> None:
"""PUT a plain-text payload and fail on any 4xx/5xx response."""
request = urllib.request.Request( request = urllib.request.Request(
url, url,
data=payload.encode("utf-8"), data=payload.encode("utf-8"),
method="POST", method="PUT",
headers={"Content-Type": "text/plain"}, headers={"Content-Type": "text/plain"},
) )
with urllib.request.urlopen(request, timeout=10) as response: with urllib.request.urlopen(request, timeout=10) as response:
@ -38,6 +58,7 @@ def _post_text(url: str, payload: str) -> None:
def _parse_junit(path: str) -> dict[str, int]: def _parse_junit(path: str) -> dict[str, int]:
"""Parse a JUnit XML file into aggregate test counters."""
if not os.path.exists(path): if not os.path.exists(path):
return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0} return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
@ -64,6 +85,7 @@ def _parse_junit(path: str) -> dict[str, int]:
def _collect_junit_totals(pattern: str) -> dict[str, int]: def _collect_junit_totals(pattern: str) -> dict[str, int]:
"""Sum JUnit counters across every XML file matching the pattern."""
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0} totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
for path in sorted(glob(pattern)): for path in sorted(glob(pattern)):
parsed = _parse_junit(path) parsed = _parse_junit(path)
@ -72,7 +94,38 @@ def _collect_junit_totals(pattern: str) -> dict[str, int]:
return totals return totals
def _collect_junit_cases(pattern: str) -> list[tuple[str, str]]:
"""Collect individual JUnit test-case statuses for flaky-test trend panels."""
cases: list[tuple[str, str]] = []
for path in sorted(glob(pattern)):
if not os.path.exists(path):
continue
root = ET.parse(path).getroot()
suites: list[ET.Element]
if root.tag == "testsuite":
suites = [root]
elif root.tag == "testsuites":
suites = [elem for elem in root if elem.tag == "testsuite"]
else:
suites = []
for suite in suites:
for test_case in suite.findall("testcase"):
case_name = test_case.attrib.get("name", "").strip()
class_name = test_case.attrib.get("classname", "").strip()
if not case_name:
continue
full_name = f"{class_name}.{case_name}" if class_name else case_name
status = "passed"
if test_case.find("failure") is not None or test_case.find("error") is not None:
status = "failed"
elif test_case.find("skipped") is not None:
status = "skipped"
cases.append((full_name, status))
return cases
def _read_exit_code(path: str) -> int: def _read_exit_code(path: str) -> int:
"""Read the quality-gate exit code, defaulting to failure if missing."""
try: try:
with open(path, "r", encoding="utf-8") as handle: with open(path, "r", encoding="utf-8") as handle:
return int(handle.read().strip()) return int(handle.read().strip())
@ -81,6 +134,7 @@ def _read_exit_code(path: str) -> int:
def _load_summary(path: str) -> dict: def _load_summary(path: str) -> dict:
"""Load the JSON quality-gate summary, returning an empty mapping on error."""
try: try:
with open(path, "r", encoding="utf-8") as handle: with open(path, "r", encoding="utf-8") as handle:
return json.load(handle) return json.load(handle)
@ -88,7 +142,26 @@ def _load_summary(path: str) -> dict:
return {} return {}
def _summary_float(summary: dict, key: str) -> float:
"""Extract a float-like value from the summary, defaulting to 0.0."""
value = summary.get(key)
if isinstance(value, (int, float)):
return float(value)
return 0.0
def _summary_int(summary: dict, key: str) -> int:
"""Extract an int-like value from the summary, defaulting to 0."""
value = summary.get(key)
if isinstance(value, int):
return value
if isinstance(value, float):
return int(value)
return 0
def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float: def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
"""Return the current counter value for a labeled metric if present."""
text = _read_text(f"{pushgateway_url.rstrip('/')}/metrics") text = _read_text(f"{pushgateway_url.rstrip('/')}/metrics")
for line in text.splitlines(): for line in text.splitlines():
if not line.startswith(metric + "{"): if not line.startswith(metric + "{"):
@ -109,20 +182,34 @@ def _build_payload(
suite: str, suite: str,
status: str, status: str,
tests: dict[str, int], tests: dict[str, int],
test_cases: list[tuple[str, str]],
ok_count: int, ok_count: int,
failed_count: int, failed_count: int,
branch: str, branch: str,
build_number: str, build_number: str,
jenkins_job: str,
summary: dict | None = None, summary: dict | None = None,
workspace_line_coverage_percent: float = 0.0,
source_files_total: int = 0,
source_lines_over_500: int = 0,
check_statuses: dict[str, str] | None = None,
) -> str: ) -> str:
"""Build the Pushgateway payload for the current suite run."""
passed = max(tests["tests"] - tests["failures"] - tests["errors"] - tests["skipped"], 0) passed = max(tests["tests"] - tests["failures"] - tests["errors"] - tests["skipped"], 0)
build_labels = _label_str( build_labels = _label_str(
{ {
"suite": suite, "suite": suite,
"branch": branch or "unknown", "branch": branch or "unknown",
"build_number": build_number or "unknown", "build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
} }
) )
test_case_base_labels = {
"suite": suite,
"branch": branch or "unknown",
"build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
}
lines = [ lines = [
"# TYPE platform_quality_gate_runs_total counter", "# TYPE platform_quality_gate_runs_total counter",
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}', f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}',
@ -135,37 +222,85 @@ def _build_payload(
"# TYPE titan_iac_quality_gate_run_status gauge", "# TYPE titan_iac_quality_gate_run_status gauge",
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}', f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}',
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}', f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}',
"# TYPE platform_quality_gate_build_info gauge",
f"platform_quality_gate_build_info{build_labels} 1",
"# TYPE titan_iac_quality_gate_build_info gauge", "# TYPE titan_iac_quality_gate_build_info gauge",
f"titan_iac_quality_gate_build_info{build_labels} 1", f"titan_iac_quality_gate_build_info{build_labels} 1",
"# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {workspace_line_coverage_percent:.3f}',
"# TYPE platform_quality_gate_source_files_total gauge",
f'platform_quality_gate_source_files_total{{suite="{suite}"}} {source_files_total}',
"# TYPE platform_quality_gate_source_lines_over_500_total gauge",
f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
] ]
results = summary.get("results", []) if isinstance(summary, dict) else [] if check_statuses:
if results:
lines.append("# TYPE titan_iac_quality_gate_checks_total gauge") lines.append("# TYPE titan_iac_quality_gate_checks_total gauge")
for result in results: for check_name in CANONICAL_CHECKS:
check_name = result.get("name") check_status = check_statuses.get(check_name, "not_applicable")
check_status = result.get("status")
if not check_name or not check_status:
continue
lines.append( lines.append(
f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(str(check_name))}",result="{_escape_label(str(check_status))}"}} 1' f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(check_name)}",result="{_escape_label(check_status)}"}} 1'
) )
lines.append("# TYPE platform_quality_gate_test_case_result gauge")
if test_cases:
for test_name, test_status in test_cases:
labels = {
**test_case_base_labels,
"test": test_name,
"status": test_status,
}
lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
)
else:
labels = {**test_case_base_labels, "test": "__no_test_cases__", "status": "skipped"}
lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
)
return "\n".join(lines) + "\n" return "\n".join(lines) + "\n"
def main() -> int: def main() -> int:
suite = os.getenv("SUITE_NAME", "titan-iac") """Publish the quality-gate metrics and print a compact run summary."""
suite = os.getenv("SUITE_NAME", "titan_iac")
pushgateway_url = os.getenv("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091") pushgateway_url = os.getenv("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091")
job_name = os.getenv("QUALITY_GATE_JOB_NAME", "platform-quality-ci") job_name = os.getenv("QUALITY_GATE_JOB_NAME", "platform-quality-ci")
junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml")) junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml"))
exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc")) exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc"))
summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json") summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json")
branch = os.getenv("BRANCH_NAME", os.getenv("GIT_BRANCH", "")) branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
if branch.startswith("origin/"):
branch = branch[len("origin/") :]
build_number = os.getenv("BUILD_NUMBER", "") build_number = os.getenv("BUILD_NUMBER", "")
jenkins_job = os.getenv("JOB_NAME", "titan-iac")
tests = _collect_junit_totals(junit_glob) tests = _collect_junit_totals(junit_glob)
test_cases = _collect_junit_cases(junit_glob)
exit_code = _read_exit_code(exit_code_path) exit_code = _read_exit_code(exit_code_path)
status = "ok" if exit_code == 0 else "failed" status = "ok" if exit_code == 0 else "failed"
summary = _load_summary(summary_path) summary = _load_summary(summary_path)
workspace_line_coverage_percent = _summary_float(summary, "workspace_line_coverage_percent")
if workspace_line_coverage_percent <= 0:
workspace_line_coverage_percent = _infer_workspace_coverage_percent(summary, "build/coverage-unit.xml")
source_files_total = _summary_int(summary, "source_files_total")
source_lines_over_500 = _summary_int(summary, "source_lines_over_500")
if source_lines_over_500 <= 0:
source_lines_over_500 = _infer_source_lines_over_500(summary)
sonarqube_report = _load_optional_json(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", "build/sonarqube-quality-gate.json"))
supply_chain_report = _load_optional_json(os.getenv("QUALITY_GATE_IRONBANK_REPORT", "build/ironbank-compliance.json"))
truthy = {"1", "true", "yes", "on"}
supply_chain_required = (
os.getenv("QUALITY_GATE_IRONBANK_REQUIRED", "0").strip().lower() in truthy
or os.getenv("PUBLISH_IMAGES", "false").strip().lower() in truthy
)
check_statuses = _build_check_statuses(
summary=summary,
tests=tests,
workspace_line_coverage_percent=workspace_line_coverage_percent,
source_lines_over_500=source_lines_over_500,
sonarqube_report=sonarqube_report,
supply_chain_report=supply_chain_report,
supply_chain_required=supply_chain_required,
)
ok_count = int( ok_count = int(
_fetch_existing_counter( _fetch_existing_counter(
@ -190,11 +325,17 @@ def main() -> int:
suite=suite, suite=suite,
status=status, status=status,
tests=tests, tests=tests,
test_cases=test_cases,
ok_count=ok_count, ok_count=ok_count,
failed_count=failed_count, failed_count=failed_count,
branch=branch, branch=branch,
build_number=build_number, build_number=build_number,
jenkins_job=jenkins_job,
summary=summary, summary=summary,
workspace_line_coverage_percent=workspace_line_coverage_percent,
source_files_total=source_files_total,
source_lines_over_500=source_lines_over_500,
check_statuses=check_statuses,
) )
push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}" push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}"
_post_text(push_url, payload) _post_text(push_url, payload)
@ -208,11 +349,14 @@ def main() -> int:
"tests_skipped": tests["skipped"], "tests_skipped": tests["skipped"],
"ok_count": ok_count, "ok_count": ok_count,
"failed_count": failed_count, "failed_count": failed_count,
"checks_recorded": len(summary.get("results", [])) if isinstance(summary, dict) else 0, "checks_recorded": len(check_statuses),
"workspace_line_coverage_percent": workspace_line_coverage_percent,
"source_files_total": source_files_total,
"source_lines_over_500": source_lines_over_500,
} }
print(json.dumps(summary, sort_keys=True)) print(json.dumps(summary, sort_keys=True))
return 0 return 0
if __name__ == "__main__": if __name__ == "__main__": # pragma: no cover
raise SystemExit(main()) raise SystemExit(main())

View File

@ -0,0 +1,204 @@
#!/usr/bin/env python3
"""Quality/status helpers for publish_test_metrics."""
from __future__ import annotations
import json
from pathlib import Path
import xml.etree.ElementTree as ET
SUCCESS_STATUSES = {"ok", "pass", "passed", "success", "compliant"}
NOT_APPLICABLE_STATUSES = {"not_applicable", "n/a", "na", "none", "skipped"}
FAILED_STATUSES = {"failed", "fail", "error", "errors", "warn", "warning", "red"}
CANONICAL_CHECKS = [
"tests",
"coverage",
"loc",
"docs_naming",
"gate_glue",
"sonarqube",
"supply_chain",
]
def _infer_workspace_coverage_percent(summary: dict, default_xml: str) -> float:
"""Infer workspace line coverage from quality summary coverage XML metadata."""
results = summary.get("results", []) if isinstance(summary, dict) else []
coverage_xml = default_xml
for result in results:
if not isinstance(result, dict):
continue
if str(result.get("name") or "").strip().lower() != "coverage":
continue
candidate = str(result.get("coverage_xml") or "").strip()
if candidate:
coverage_xml = candidate
break
xml_path = Path(coverage_xml)
if not xml_path.exists():
return 0.0
try:
root = ET.parse(xml_path).getroot()
line_rate = root.attrib.get("line-rate")
if line_rate is None:
return 0.0
return float(line_rate) * 100.0
except (ET.ParseError, OSError, ValueError):
return 0.0
def _infer_source_lines_over_500(summary: dict) -> int:
"""Infer over-limit source file count from hygiene issue payloads."""
results = summary.get("results", []) if isinstance(summary, dict) else []
for result in results:
if not isinstance(result, dict):
continue
if str(result.get("name") or "").strip().lower() not in {"hygiene", "loc", "smell"}:
continue
issues = result.get("issues")
if not isinstance(issues, list):
continue
return sum(1 for item in issues if isinstance(item, str) and item.startswith("file exceeds"))
return 0
def _normalize_result_status(value: str | None, default: str = "failed") -> str:
"""Map arbitrary check status text into canonical check result buckets."""
if not value:
return default
normalized = value.strip().lower()
if normalized in SUCCESS_STATUSES:
return "ok"
if normalized in NOT_APPLICABLE_STATUSES:
return "not_applicable"
if normalized in FAILED_STATUSES:
return "failed"
return default
def _load_optional_json(path: str | None) -> dict:
"""Load an optional JSON report file, returning an empty object when absent."""
if not path:
return {}
candidate = Path(path)
if not candidate.exists():
return {}
try:
return json.loads(candidate.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return {}
def _combine_statuses(statuses: list[str]) -> str:
"""Roll up many check statuses into one canonical result."""
if not statuses:
return "not_applicable"
if any(status == "failed" for status in statuses):
return "failed"
if all(status == "not_applicable" for status in statuses):
return "not_applicable"
if all(status in {"ok", "not_applicable"} for status in statuses):
return "ok"
return "failed"
def _infer_sonarqube_status(report: dict) -> str:
"""Infer canonical SonarQube check status from its JSON report payload."""
if not report:
return "not_applicable"
status = (
report.get("projectStatus", {}).get("status")
or report.get("qualityGate", {}).get("status")
or report.get("status")
)
return _normalize_result_status(str(status) if status is not None else None, default="failed")
def _infer_supply_chain_status(report: dict, required: bool) -> str:
"""Infer canonical supply-chain status from IronBank/artifact report payload."""
if not report:
return "failed" if required else "not_applicable"
compliant = report.get("compliant")
if isinstance(compliant, bool):
if compliant:
return "ok"
return "failed" if required else "not_applicable"
status = report.get("status")
if status is None:
return "failed" if required else "not_applicable"
normalized = _normalize_result_status(str(status), default="failed")
if normalized == "failed" and not required:
return "not_applicable"
if normalized == "not_applicable" and required:
return "failed"
return normalized
def _build_check_statuses(
summary: dict | None,
tests: dict[str, int],
workspace_line_coverage_percent: float,
source_lines_over_500: int,
sonarqube_report: dict,
supply_chain_report: dict,
supply_chain_required: bool,
) -> dict[str, str]:
"""Generate the canonical quality-check status map for dashboarding."""
raw_results = summary.get("results", []) if isinstance(summary, dict) else []
status_by_name: dict[str, str] = {}
for result in raw_results:
if not isinstance(result, dict):
continue
check_name = str(result.get("name") or "").strip().lower()
if not check_name:
continue
status_by_name[check_name] = _normalize_result_status(result.get("status"), default="failed")
tests_status = status_by_name.get("tests")
if not tests_status:
candidate_keys = ["unit", "integration", "e2e", "pytest", "test", "tests"]
candidates = [status_by_name[key] for key in candidate_keys if key in status_by_name]
if candidates:
tests_status = _combine_statuses(candidates)
elif tests["tests"] > 0:
tests_status = "ok" if (tests["failures"] + tests["errors"]) == 0 else "failed"
else:
tests_status = "not_applicable"
coverage_status = status_by_name.get("coverage")
if not coverage_status:
if workspace_line_coverage_percent > 0:
coverage_status = "ok" if workspace_line_coverage_percent >= 95.0 else "failed"
else:
coverage_status = "not_applicable"
loc_status = status_by_name.get("loc")
if not loc_status:
loc_status = "ok" if source_lines_over_500 == 0 else "failed"
docs_naming_status = status_by_name.get("docs_naming")
if not docs_naming_status:
candidates = [status_by_name[key] for key in ["docs", "hygiene", "smell", "lint", "naming"] if key in status_by_name]
docs_naming_status = _combine_statuses(candidates) if candidates else "not_applicable"
gate_glue_status = status_by_name.get("gate_glue")
if not gate_glue_status:
candidates = [status_by_name[key] for key in ["gate_glue", "glue", "gate"] if key in status_by_name]
gate_glue_status = _combine_statuses(candidates) if candidates else "not_applicable"
sonarqube_status = status_by_name.get("sonarqube") or _infer_sonarqube_status(sonarqube_report)
supply_chain_status = status_by_name.get("supply_chain") or _infer_supply_chain_status(
supply_chain_report,
required=supply_chain_required,
)
return {
"tests": tests_status,
"coverage": coverage_status,
"loc": loc_status,
"docs_naming": docs_naming_status,
"gate_glue": gate_glue_status,
"sonarqube": sonarqube_status,
"supply_chain": supply_chain_status,
}

View File

@ -0,0 +1,173 @@
"""Build a titan-iac supply-chain compliance report from Trivy evidence."""
from __future__ import annotations
import argparse
import datetime as dt
import json
from pathlib import Path
from typing import Any
FAIL_SEVERITIES = {"HIGH", "CRITICAL"}
def _read_json(path: Path) -> dict[str, Any]:
"""Read a JSON object from disk for use as pipeline evidence."""
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise ValueError(f"{path} must contain a JSON object")
return payload
def _parse_day(raw: str | None) -> dt.date | None:
"""Parse an ISO day while letting optional waiver dates stay optional."""
if not raw:
return None
return dt.date.fromisoformat(raw)
def _today(override: str | None = None) -> dt.date:
"""Return the policy day so tests can pin expiry behavior."""
return _parse_day(override) or dt.date.today()
def _load_waiver_pairs(path: Path | None, policy_day: dt.date) -> tuple[set[tuple[str, str]], int]:
"""Return active ``(misconfiguration id, target)`` waivers and expired count."""
if path is None or not path.exists():
return set(), 0
payload = _read_json(path)
default_expires_at = payload.get("default_expires_at")
active: set[tuple[str, str]] = set()
expired = 0
for entry in payload.get("misconfigurations", []):
if not isinstance(entry, dict):
continue
misconfiguration_id = str(entry.get("id") or "").strip()
if not misconfiguration_id:
continue
expires_at = _parse_day(str(entry.get("expires_at") or default_expires_at or ""))
targets = entry.get("targets", [])
if not isinstance(targets, list):
continue
if expires_at and expires_at < policy_day:
expired += len(targets)
continue
# Waivers are target-specific so a new unsafe manifest fails until it is
# either fixed or deliberately accepted with a fresh expiration.
for target in targets:
if isinstance(target, str) and target:
active.add((misconfiguration_id, target))
return active, expired
def _iter_failed_misconfigurations(payload: dict[str, Any]):
"""Yield failed high/critical Trivy misconfiguration records."""
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
target = str(result.get("Target") or "")
for item in result.get("Misconfigurations") or []:
if not isinstance(item, dict):
continue
if item.get("Status") != "FAIL":
continue
if str(item.get("Severity") or "").upper() not in FAIL_SEVERITIES:
continue
yield target, item
def _count_vulnerabilities(payload: dict[str, Any], severity: str) -> int:
"""Count Trivy vulnerabilities at a specific severity."""
count = 0
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
for item in result.get("Vulnerabilities") or []:
if isinstance(item, dict) and str(item.get("Severity") or "").upper() == severity:
count += 1
return count
def _count_secrets(payload: dict[str, Any]) -> int:
"""Count detected secrets in the Trivy filesystem report."""
count = 0
for result in payload.get("Results", []):
if isinstance(result, dict):
count += len(result.get("Secrets") or [])
return count
def build_report(
trivy_payload: dict[str, Any],
waiver_path: Path | None = None,
today_override: str | None = None,
) -> dict[str, Any]:
"""Build the compliance summary consumed by the quality gate."""
policy_day = _today(today_override)
active_waivers, expired_waivers = _load_waiver_pairs(waiver_path, policy_day)
open_misconfigs: list[dict[str, str]] = []
waived_misconfigs = 0
for target, item in _iter_failed_misconfigurations(trivy_payload):
misconfiguration_id = str(item.get("ID") or "")
if (misconfiguration_id, target) in active_waivers:
waived_misconfigs += 1
continue
open_misconfigs.append(
{
"id": misconfiguration_id,
"target": target,
"severity": str(item.get("Severity") or ""),
"title": str(item.get("Title") or ""),
}
)
critical = _count_vulnerabilities(trivy_payload, "CRITICAL")
high = _count_vulnerabilities(trivy_payload, "HIGH")
secrets = _count_secrets(trivy_payload)
status = "ok" if critical == 0 and secrets == 0 and not open_misconfigs else "failed"
return {
"status": status,
"compliant": status == "ok",
"category": "artifact_security",
"scan_type": "filesystem",
"scanner": "trivy",
"critical_vulnerabilities": critical,
"high_vulnerabilities": high,
"high_vulnerability_policy": "observe",
"secrets": secrets,
"high_or_critical_misconfigurations": len(open_misconfigs),
"waived_misconfigurations": waived_misconfigs,
"expired_waivers": expired_waivers,
"waiver_file": str(waiver_path) if waiver_path else "",
"open_misconfiguration_examples": open_misconfigs[:20],
}
def main(argv: list[str] | None = None) -> int:
"""CLI entrypoint used by Jenkins after the Trivy scan completes."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--trivy-json", required=True)
parser.add_argument("--waivers")
parser.add_argument("--output", required=True)
parser.add_argument("--today")
args = parser.parse_args(argv)
trivy_payload = _read_json(Path(args.trivy_json))
waiver_path = Path(args.waivers) if args.waivers else None
report = build_report(trivy_payload, waiver_path=waiver_path, today_override=args.today)
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main())

View File

@ -0,0 +1,108 @@
"""Glue checks for Ariadne schedules exported to VictoriaMetrics."""
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
import requests
import yaml
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _query(promql: str) -> list[dict]:
vm_url = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
response = requests.get(f"{vm_url}/api/v1/query", params={"query": promql}, timeout=10)
response.raise_for_status()
payload = response.json()
return payload.get("data", {}).get("result", [])
def _expected_tasks() -> list[dict]:
cfg = _load_config()
tasks = [
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def _normalize_task(item: object, cfg: dict) -> dict:
if isinstance(item, str):
return {
"task": item,
"check_last_success": True,
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
}
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
return tracked
def _task_regex(tasks: list[dict]) -> str:
return "|".join(item["task"] for item in tasks)
def test_ariadne_schedule_series_exist():
tasks = _expected_tasks()
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing next-run metrics for: {', '.join(missing)}"
def test_ariadne_schedule_recent_success():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing last-success metrics for: {', '.join(missing)}"
now = datetime.now(timezone.utc)
age_by_task = {
item.get("metric", {}).get("task"): (now - datetime.fromtimestamp(float(item["value"][1]), tz=timezone.utc)).total_seconds() / 3600
for item in series
}
too_old = [
f"{task} ({age_by_task[task]:.1f}h > {item['max_success_age_hours']}h)"
for item in tasks
if (task := item["task"]) in age_by_task and age_by_task[task] > float(item["max_success_age_hours"])
]
assert not too_old, "Ariadne schedules are stale: " + ", ".join(too_old)
def test_ariadne_schedule_last_status_present_and_boolean():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing last-status metrics for: {', '.join(missing)}"
invalid = []
for item in series:
task = item.get("metric", {}).get("task")
value = float(item["value"][1])
if value not in (0.0, 1.0):
invalid.append(f"{task}={value}")
assert not invalid, f"Unexpected Ariadne last-status values: {', '.join(invalid)}"

View File

@ -1,3 +1,5 @@
"""Glue checks for the metrics the quality-gate publishes."""
from __future__ import annotations from __future__ import annotations
import os import os
@ -23,26 +25,63 @@ def _query(promql: str) -> list[dict]:
return payload.get("data", {}).get("result", []) return payload.get("data", {}).get("result", [])
def test_glue_metrics_present(): def _expected_tasks() -> list[dict]:
series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}') cfg = _load_config()
assert series, "No glue cronjob label series found" tasks = [
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def test_glue_metrics_success_join(): def _normalize_task(item: object, cfg: dict) -> dict:
query = ( if isinstance(item, str):
"kube_cronjob_status_last_successful_time " return {
'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}' "task": item,
) "check_last_success": True,
series = _query(query) "max_success_age_hours": cfg.get("max_success_age_hours", 48),
assert series, "No glue cronjob last success series found" }
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
return tracked
def _task_regex(tasks: list[dict]) -> str:
return "|".join(item["task"] for item in tasks)
def test_ariadne_schedule_metrics_present(): def test_ariadne_schedule_metrics_present():
cfg = _load_config() tasks = _expected_tasks()
expected = cfg.get("ariadne_schedule_tasks", []) selector = _task_regex(tasks)
if not expected: series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
return seen = {item.get("metric", {}).get("task") for item in series}
series = _query("ariadne_schedule_next_run_timestamp_seconds") missing = [item["task"] for item in tasks if item["task"] not in seen]
tasks = {item.get("metric", {}).get("task") for item in series}
missing = [task for task in expected if task not in tasks]
assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}" assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"
def test_ariadne_schedule_success_and_status_metrics_present():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
success = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
status = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
success_tasks = {item.get("metric", {}).get("task") for item in success}
status_tasks = {item.get("metric", {}).get("task") for item in status}
expected = {item["task"] for item in tasks}
missing_success = sorted(expected - success_tasks)
missing_status = sorted(expected - status_tasks)
assert not missing_success, f"Missing Ariadne success metrics for: {', '.join(missing_success)}"
assert not missing_status, f"Missing Ariadne status metrics for: {', '.join(missing_status)}"

View File

@ -0,0 +1,407 @@
{
"version": 1,
"generated_from": "Jenkins titan-iac build 225 Trivy filesystem scan",
"default_expires_at": "2026-05-22",
"ticket": "atlas-quality-wave-k8s-hardening",
"default_reason": "Existing Kubernetes manifest hardening baseline accepted only for the first quality-gate rollout; fix or renew explicitly before expiry.",
"misconfigurations": [
{
"id": "DS-0002",
"targets": [
"dockerfiles/Dockerfile.ananke-node-helper"
]
},
{
"id": "KSV-0009",
"targets": [
"services/mailu/vip-controller.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml"
]
},
{
"id": "KSV-0010",
"targets": [
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0014",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/guest-register-deployment.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/actual-budget-deployment.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/deployment.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vmalert-atlas-availability.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud-mail-sync/cronjob.yaml",
"services/nextcloud/collabora.yaml",
"services/nextcloud/cronjob.yaml",
"services/nextcloud/deployment.yaml",
"services/nextcloud/maintenance-cronjob.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/planka/deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vault/statefulset.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0017",
"targets": [
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0041",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"infrastructure/traefik/clusterrole.yaml",
"services/bstein-dev-home/rbac.yaml",
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/comms/mas-secrets-ensure-rbac.yaml",
"services/maintenance/soteria-rbac.yaml"
]
},
{
"id": "KSV-0047",
"targets": [
"services/monitoring/rbac.yaml"
]
},
{
"id": "KSV-0053",
"targets": [
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/ariadne-rbac.yaml"
]
},
{
"id": "KSV-0056",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/disable-k3s-traefik-rbac.yaml",
"services/maintenance/k3s-traefik-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0114",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0118",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/coredns-deployment.yaml",
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vmalert-atlas-availability.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud/collabora.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/sui-metrics/overlays/atlas/patch-node-selector.yaml",
"services/typhon/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0121",
"targets": [
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml"
]
}
]
}

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: ai-llm name: ai-llm
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/ai-llm path: ./services/ai-llm

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: bstein-dev-home-migrations name: bstein-dev-home-migrations
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/bstein-dev-home/oneoffs/migrations path: ./services/bstein-dev-home/oneoffs/migrations

View File

@ -13,14 +13,14 @@ spec:
git: git:
checkout: checkout:
ref: ref:
branch: feature/ariadne branch: main
commit: commit:
author: author:
email: ops@bstein.dev email: ops@bstein.dev
name: flux-bot name: flux-bot
messageTemplate: "chore(bstein-dev-home): automated image update" messageTemplate: "chore(bstein-dev-home): automated image update"
push: push:
branch: feature/ariadne branch: main
update: update:
strategy: Setters strategy: Setters
path: services/bstein-dev-home path: services/bstein-dev-home

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: bstein-dev-home name: bstein-dev-home
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/bstein-dev-home path: ./services/bstein-dev-home

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: comms name: comms
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
prune: true prune: true
@ -13,5 +15,3 @@ spec:
path: ./services/comms path: ./services/comms
targetNamespace: comms targetNamespace: comms
timeout: 2m timeout: 2m
dependsOn:
- name: traefik

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: crypto name: crypto
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/crypto path: ./services/crypto

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: finance name: finance
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/finance path: ./services/finance

View File

@ -0,0 +1,29 @@
# clusters/atlas/flux-system/applications/game-stream/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: game-stream
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/game-stream
targetNamespace: game-stream
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: cert-manager
- name: keycloak
- name: traefik
- name: vault
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: oauth2-proxy-wolf
namespace: game-stream
wait: false
timeout: 10m

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: gitea name: gitea
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/gitea path: ./services/gitea

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: harbor name: harbor
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/harbor path: ./services/harbor

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: health name: health
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/health path: ./services/health
@ -15,7 +17,6 @@ spec:
dependsOn: dependsOn:
- name: keycloak - name: keycloak
- name: postgres - name: postgres
- name: traefik
- name: vault - name: vault
healthChecks: healthChecks:
- apiVersion: apps/v1 - apiVersion: apps/v1

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: jellyfin name: jellyfin
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/jellyfin path: ./services/jellyfin

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: jenkins name: jenkins
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/jenkins path: ./services/jenkins
@ -14,7 +16,6 @@ spec:
targetNamespace: jenkins targetNamespace: jenkins
dependsOn: dependsOn:
- name: helm - name: helm
- name: traefik
healthChecks: healthChecks:
- apiVersion: apps/v1 - apiVersion: apps/v1
kind: Deployment kind: Deployment

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: keycloak name: keycloak
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
prune: true prune: true

View File

@ -21,10 +21,15 @@ resources:
- sui-metrics/kustomization.yaml - sui-metrics/kustomization.yaml
- openldap/kustomization.yaml - openldap/kustomization.yaml
- keycloak/kustomization.yaml - keycloak/kustomization.yaml
- quality/kustomization.yaml
- oauth2-proxy/kustomization.yaml - oauth2-proxy/kustomization.yaml
- mailu/kustomization.yaml - mailu/kustomization.yaml
- jenkins/kustomization.yaml - jenkins/kustomization.yaml
- ai-llm/kustomization.yaml - ai-llm/kustomization.yaml
- openclaw/kustomization.yaml
- game-stream/kustomization.yaml
- veles/kustomization.yaml
- typhon/kustomization.yaml
- nextcloud/kustomization.yaml - nextcloud/kustomization.yaml
- nextcloud-mail-sync/kustomization.yaml - nextcloud-mail-sync/kustomization.yaml
- outline/kustomization.yaml - outline/kustomization.yaml

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: mailu name: mailu
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
sourceRef: sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: monerod name: monerod
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/crypto/monerod path: ./services/crypto/monerod

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: nextcloud-mail-sync name: nextcloud-mail-sync
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
prune: true prune: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: nextcloud name: nextcloud
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/nextcloud path: ./services/nextcloud

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: oauth2-proxy name: oauth2-proxy
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
prune: true prune: true

View File

@ -0,0 +1,34 @@
# clusters/atlas/flux-system/applications/openclaw/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: openclaw
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/openclaw
targetNamespace: openclaw
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true
timeout: 30m
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: openclaw-ollama
namespace: openclaw
- apiVersion: apps/v1
kind: Deployment
name: openclaw
namespace: openclaw
dependsOn:
- name: cert-manager
- name: core
- name: longhorn
- name: traefik

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: openldap name: openldap
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
prune: true prune: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: outline name: outline
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/outline path: ./services/outline
@ -15,7 +17,6 @@ spec:
dependsOn: dependsOn:
- name: keycloak - name: keycloak
- name: mailu - name: mailu
- name: traefik
healthChecks: healthChecks:
- apiVersion: apps/v1 - apiVersion: apps/v1
kind: Deployment kind: Deployment

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: pegasus name: pegasus
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/pegasus path: ./services/pegasus

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: planka name: planka
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/planka path: ./services/planka
@ -15,7 +17,6 @@ spec:
dependsOn: dependsOn:
- name: keycloak - name: keycloak
- name: mailu - name: mailu
- name: traefik
healthChecks: healthChecks:
- apiVersion: apps/v1 - apiVersion: apps/v1
kind: Deployment kind: Deployment

View File

@ -0,0 +1,36 @@
# clusters/atlas/flux-system/applications/quality/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: quality
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/quality
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: quality
dependsOn:
- name: cert-manager
- name: keycloak
- name: vault
- name: postgres
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: sonarqube
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: sonarqube-exporter
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: oauth2-proxy-sonarqube
namespace: quality
wait: false
timeout: 20m

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: sui-metrics name: sui-metrics
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/sui-metrics/overlays/atlas path: ./services/sui-metrics/overlays/atlas

View File

@ -0,0 +1,31 @@
# clusters/atlas/flux-system/applications/typhon/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: typhon
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/typhon
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: climate
dependsOn:
- name: vault
- name: vault-csi
- name: monitoring
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: typhon
namespace: climate
- apiVersion: v1
kind: Service
name: typhon
namespace: climate
wait: false
timeout: 20m

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: vault name: vault
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
sourceRef: sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: vaultwarden name: vaultwarden
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
suspend: false suspend: false
@ -17,4 +19,3 @@ spec:
wait: true wait: true
dependsOn: dependsOn:
- name: helm - name: helm
- name: traefik

View File

@ -0,0 +1,29 @@
# clusters/atlas/flux-system/applications/veles/image-automation.yaml
# Staged for the first Veles image rollout. Add this file to the parent
# applications kustomization after the namespace exists and the Harbor repos
# have initial tags.
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: veles
namespace: veles
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: main
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(veles): automated image update"
push:
branch: main
update:
strategy: Setters
path: services/veles

View File

@ -0,0 +1,28 @@
# clusters/atlas/flux-system/applications/veles/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: veles
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/veles
targetNamespace: veles
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: cert-manager
- name: core
- name: keycloak
- name: longhorn
- name: traefik
- name: vault
- name: vault-csi
- name: vault-injector
wait: false
timeout: 20m

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: wallet-monero-temp name: wallet-monero-temp
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/crypto/wallet-monero-temp path: ./services/crypto/wallet-monero-temp

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: xmr-miner name: xmr-miner
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/crypto/xmr-miner path: ./services/crypto/xmr-miner

View File

@ -5966,6 +5966,9 @@ spec:
- args: - args:
- --events-addr=http://notification-controller.$(RUNTIME_NAMESPACE).svc.cluster.local./ - --events-addr=http://notification-controller.$(RUNTIME_NAMESPACE).svc.cluster.local./
- --watch-all-namespaces=true - --watch-all-namespaces=true
- --concurrent=1
- --requeue-dependency=5s
- --interval-jitter-percentage=30
- --log-level=info - --log-level=info
- --log-encoding=json - --log-encoding=json
- --enable-leader-election - --enable-leader-election

View File

@ -7,7 +7,7 @@ metadata:
name: flux-system name: flux-system
namespace: flux-system namespace: flux-system
spec: spec:
interval: 1m0s interval: 15m0s
ref: ref:
branch: main branch: main
secretRef: secretRef:
@ -20,7 +20,7 @@ metadata:
name: flux-system name: flux-system
namespace: flux-system namespace: flux-system
spec: spec:
interval: 10m0s interval: 1h0m0s
path: ./clusters/atlas/flux-system path: ./clusters/atlas/flux-system
prune: true prune: true
sourceRef: sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: cert-manager-cleanup name: cert-manager-cleanup
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 30m interval: 30m
path: ./infrastructure/cert-manager/cleanup path: ./infrastructure/cert-manager/cleanup

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: cert-manager name: cert-manager
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 30m interval: 30m
path: ./infrastructure/cert-manager path: ./infrastructure/cert-manager

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: core name: core
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./infrastructure/core path: ./infrastructure/core

View File

@ -0,0 +1,21 @@
# clusters/atlas/flux-system/platform/descheduler/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: descheduler
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/descheduler
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: kube-system
dependsOn:
- name: helm
- name: core
wait: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: gitops-ui name: gitops-ui
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
timeout: 10m timeout: 10m
@ -16,5 +18,4 @@ spec:
targetNamespace: flux-system targetNamespace: flux-system
dependsOn: dependsOn:
- name: helm - name: helm
- name: traefik
wait: true wait: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: helm name: helm
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 30m interval: 30m
sourceRef: sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
resources: resources:
- core/kustomization.yaml - core/kustomization.yaml
- helm/kustomization.yaml - helm/kustomization.yaml
- descheduler/kustomization.yaml
- resource-guardrails/kustomization.yaml
- cert-manager/kustomization.yaml - cert-manager/kustomization.yaml
- metallb/kustomization.yaml - metallb/kustomization.yaml
- traefik/kustomization.yaml - traefik/kustomization.yaml

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: logging name: logging
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/logging path: ./services/logging

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: longhorn-adopt name: longhorn-adopt
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 30m interval: 30m
path: ./infrastructure/longhorn/adopt path: ./infrastructure/longhorn/adopt

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: longhorn-ui name: longhorn-ui
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./infrastructure/longhorn/ui-ingress path: ./infrastructure/longhorn/ui-ingress

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: longhorn name: longhorn
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 30m interval: 30m
path: ./infrastructure/longhorn/core path: ./infrastructure/longhorn/core

View File

@ -13,14 +13,14 @@ spec:
git: git:
checkout: checkout:
ref: ref:
branch: feature/ariadne branch: main
commit: commit:
author: author:
email: ops@bstein.dev email: ops@bstein.dev
name: flux-bot name: flux-bot
messageTemplate: "chore(maintenance): automated image update" messageTemplate: "chore(maintenance): automated image update"
push: push:
branch: feature/ariadne branch: main
update: update:
strategy: Setters strategy: Setters
path: services/maintenance path: services/maintenance

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: maintenance name: maintenance
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/maintenance path: ./services/maintenance

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: metallb name: metallb
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 30m interval: 30m
sourceRef: sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: monitoring name: monitoring
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./services/monitoring path: ./services/monitoring

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: postgres name: postgres
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./infrastructure/postgres path: ./infrastructure/postgres

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/platform/resource-guardrails/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: resource-guardrails
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/resource-guardrails
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: core
wait: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: traefik name: traefik
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 10m interval: 10m
path: ./infrastructure/traefik path: ./infrastructure/traefik

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: vault-csi name: vault-csi
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 30m interval: 30m
sourceRef: sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata: metadata:
name: vault-injector name: vault-injector
namespace: flux-system namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec: spec:
interval: 30m interval: 30m
path: ./infrastructure/vault-injector path: ./infrastructure/vault-injector

View File

@ -2,4 +2,8 @@ FROM python:3.11-slim
ENV PIP_DISABLE_PIP_VERSION_CHECK=1 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
RUN pip install --no-cache-dir requests psycopg2-binary RUN pip install --no-cache-dir requests psycopg2-binary \
&& groupadd --system guest-tools \
&& useradd --system --uid 65532 --gid guest-tools --home-dir /nonexistent --shell /usr/sbin/nologin guest-tools
USER guest-tools

View File

@ -1,15 +1,12 @@
FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source # Use the mirrored Harbor artifact so CI does not depend on Docker Hub egress.
FROM registry.bstein.dev/streaming/data-prepper@sha256:32ac6ad42e0f12da08bebee307e290b17d127b30def9b06eeaffbcbbc5033e83
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
ENV DATA_PREPPER_PATH=/usr/share/data-prepper ENV DATA_PREPPER_PATH=/usr/share/data-prepper
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \ USER root
&& mkdir -p /var/log/data-prepper RUN apt-get update \
&& apt-get install -y --no-install-recommends bc \
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper && rm -rf /var/lib/apt/lists/*
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
USER 10001 USER 10001
WORKDIR /usr/share/data-prepper WORKDIR /usr/share/data-prepper

View File

@ -1,10 +1,13 @@
FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
FROM alpine:3.20 FROM alpine:3.20
RUN apk add --no-cache ca-certificates RUN apk add --no-cache ca-certificates \
&& addgroup -S livekit-token \
&& adduser -S -D -H -u 65532 -G livekit-token livekit-token
COPY --from=base /lk-jwt-service /lk-jwt-service COPY --from=base /lk-jwt-service /lk-jwt-service
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh RUN chmod 0755 /entrypoint.sh
USER livekit-token
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]
CMD ["/lk-jwt-service"] CMD ["/lk-jwt-service"]

View File

@ -29,10 +29,12 @@ FROM ${DEBIAN_IMAGE}
RUN set -eux; \ RUN set -eux; \
apt-get update; \ apt-get update; \
apt-get install -y --no-install-recommends ca-certificates; \ apt-get install -y --no-install-recommends ca-certificates; \
update-ca-certificates; rm -rf /var/lib/apt/lists/* update-ca-certificates; rm -rf /var/lib/apt/lists/*; \
groupadd --system p2pool; \
useradd --system --uid 65532 --gid p2pool --home-dir /nonexistent --shell /usr/sbin/nologin p2pool
COPY --from=fetch /out/p2pool /usr/local/bin/p2pool COPY --from=fetch /out/p2pool /usr/local/bin/p2pool
RUN /usr/local/bin/p2pool --version || true RUN /usr/local/bin/p2pool --version || true
EXPOSE 3333 EXPOSE 3333
USER p2pool
ENTRYPOINT ["/usr/local/bin/p2pool"] ENTRYPOINT ["/usr/local/bin/p2pool"]

View File

@ -26,9 +26,12 @@ RUN set -eux; \
curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \ curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \
tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \ tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \
install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \ install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \
rm -f /opt/monero/monero.tar.bz2 rm -f /opt/monero/monero.tar.bz2; \
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero
ENV PATH="/usr/local/bin:/usr/bin:/bin" ENV PATH="/usr/local/bin:/usr/bin:/bin"
RUN /usr/local/bin/monero-wallet-rpc --version || true RUN /usr/local/bin/monero-wallet-rpc --version || true
EXPOSE 18083 EXPOSE 18083
USER monero

View File

@ -23,10 +23,14 @@ RUN set -eux; \
mkdir -p /opt/monero; \ mkdir -p /opt/monero; \
tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \ tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \
rm -f /tmp/monero.tar.bz2; \ rm -f /tmp/monero.tar.bz2; \
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero; \
mkdir -p /data; \ mkdir -p /data; \
chown monero:monero /data; \
chmod 0770 /data chmod 0770 /data
ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \ ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \
PATH="/opt/monero:${PATH}" PATH="/opt/monero:${PATH}"
USER monero
CMD ["/opt/monero/monerod", "--version"] CMD ["/opt/monero/monerod", "--version"]

View File

@ -1,10 +1,13 @@
FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
FROM alpine:3.20 FROM alpine:3.20
RUN apk add --no-cache ca-certificates RUN apk add --no-cache ca-certificates \
&& addgroup -S oauth2-proxy \
&& adduser -S -D -H -u 65532 -G oauth2-proxy oauth2-proxy
COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh RUN chmod 0755 /entrypoint.sh
USER oauth2-proxy
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]
CMD ["/bin/oauth2-proxy"] CMD ["/bin/oauth2-proxy"]

View File

@ -1,10 +1,13 @@
FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
FROM alpine:3.20 FROM alpine:3.20
RUN apk add --no-cache ca-certificates RUN apk add --no-cache ca-certificates \
&& addgroup -S pegasus \
&& adduser -S -D -H -u 65532 -G pegasus pegasus
COPY --from=base /pegasus /pegasus COPY --from=base /pegasus /pegasus
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh RUN chmod 0755 /entrypoint.sh
USER pegasus
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]
CMD ["/pegasus"] CMD ["/pegasus"]

View File

@ -0,0 +1,48 @@
# dockerfiles/Dockerfile.quality-tools
FROM debian:bookworm-slim
ARG SONAR_SCANNER_VERSION=8.0.1.6346
ARG TRIVY_VERSION=0.70.0
ENV TRIVY_CACHE_DIR=/opt/trivy-cache
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bash \
ca-certificates \
curl \
git \
jq \
unzip \
&& rm -rf /var/lib/apt/lists/* \
&& groupadd --system quality-tools \
&& useradd --system --uid 65532 --gid quality-tools --home-dir /nonexistent --shell /usr/sbin/nologin quality-tools
RUN set -eux; \
scanner_zip="sonar-scanner-cli-${SONAR_SCANNER_VERSION}-linux-aarch64.zip"; \
base_url="https://binaries.sonarsource.com/Distribution/sonar-scanner-cli"; \
curl -fsSL "${base_url}/${scanner_zip}" -o "/tmp/${scanner_zip}"; \
curl -fsSL "${base_url}/${scanner_zip}.sha256" -o "/tmp/${scanner_zip}.sha256"; \
printf '%s %s\n' "$(cat "/tmp/${scanner_zip}.sha256")" "/tmp/${scanner_zip}" | sha256sum -c -; \
unzip -q "/tmp/${scanner_zip}" -d /opt; \
ln -s "/opt/sonar-scanner-${SONAR_SCANNER_VERSION}-linux-aarch64/bin/sonar-scanner" /usr/local/bin/sonar-scanner; \
rm -f "/tmp/${scanner_zip}" "/tmp/${scanner_zip}.sha256"
RUN set -eux; \
trivy_tgz="trivy_${TRIVY_VERSION}_Linux-ARM64.tar.gz"; \
curl -fsSL "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/${trivy_tgz}" -o "/tmp/${trivy_tgz}"; \
tar -C /usr/local/bin -xzf "/tmp/${trivy_tgz}" trivy; \
rm -f "/tmp/${trivy_tgz}"; \
trivy --version; \
sonar-scanner -v
RUN set -eux; \
mkdir -p "${TRIVY_CACHE_DIR}"; \
trivy image --download-db-only --cache-dir "${TRIVY_CACHE_DIR}"; \
chmod -R a+rX "${TRIVY_CACHE_DIR}"; \
mkdir -p /workspace; \
chown quality-tools:quality-tools /workspace
WORKDIR /workspace
USER quality-tools

View File

@ -27,12 +27,53 @@ spec:
timeout: 10m timeout: 10m
values: values:
installCRDs: true installCRDs: true
replicaCount: 2
podDisruptionBudget:
enabled: true
minAvailable: 1
extraArgs: extraArgs:
- --acme-http01-solver-nameservers=1.1.1.1:53,8.8.8.8:53 - --acme-http01-solver-nameservers=1.1.1.1:53,8.8.8.8:53
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
nodeSelector: nodeSelector:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity: affinity:
nodeAffinity: nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution: requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms: nodeSelectorTerms:
- matchExpressions: - matchExpressions:
@ -42,10 +83,63 @@ spec:
- rpi5 - rpi5
- rpi4 - rpi4
webhook: webhook:
replicaCount: 2
podDisruptionBudget:
enabled: true
minAvailable: 1
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
failureThreshold: 8
initialDelaySeconds: 90
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
readinessProbe:
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 5
nodeSelector: nodeSelector:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity: affinity:
nodeAffinity: nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution: requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms: nodeSelectorTerms:
- matchExpressions: - matchExpressions:
@ -55,10 +149,51 @@ spec:
- rpi5 - rpi5
- rpi4 - rpi4
cainjector: cainjector:
replicaCount: 2
podDisruptionBudget:
enabled: true
minAvailable: 1
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
nodeSelector: nodeSelector:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity: affinity:
nodeAffinity: nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution: requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms: nodeSelectorTerms:
- matchExpressions: - matchExpressions:

View File

@ -10,6 +10,7 @@ data:
errors errors
cache 30 cache 30
hosts { hosts {
192.168.22.9 agent.bstein.dev
192.168.22.9 alerts.bstein.dev 192.168.22.9 alerts.bstein.dev
192.168.22.9 auth.bstein.dev 192.168.22.9 auth.bstein.dev
192.168.22.9 bstein.dev 192.168.22.9 bstein.dev
@ -28,6 +29,7 @@ data:
192.168.22.9 matrix.live.bstein.dev 192.168.22.9 matrix.live.bstein.dev
192.168.22.9 metrics.bstein.dev 192.168.22.9 metrics.bstein.dev
192.168.22.9 monero.bstein.dev 192.168.22.9 monero.bstein.dev
192.168.22.9 moonlight.bstein.dev
10.43.6.87 money.bstein.dev 10.43.6.87 money.bstein.dev
192.168.22.9 notes.bstein.dev 192.168.22.9 notes.bstein.dev
192.168.22.9 office.bstein.dev 192.168.22.9 office.bstein.dev
@ -40,6 +42,7 @@ data:
192.168.22.9 secret.bstein.dev 192.168.22.9 secret.bstein.dev
192.168.22.9 sso.bstein.dev 192.168.22.9 sso.bstein.dev
192.168.22.9 stream.bstein.dev 192.168.22.9 stream.bstein.dev
192.168.22.9 wolf.bstein.dev
192.168.22.9 tasks.bstein.dev 192.168.22.9 tasks.bstein.dev
192.168.22.9 vault.bstein.dev 192.168.22.9 vault.bstein.dev
fallthrough fallthrough

View File

@ -4,8 +4,12 @@ kind: Kustomization
resources: resources:
- ../modules/base - ../modules/base
- ../modules/profiles/atlas-ha - ../modules/profiles/atlas-ha
- node-prefer-noschedule-serviceaccount.yaml
- node-prefer-noschedule-rbac.yaml
- node-prefer-noschedule-cronjob.yaml
- coredns-custom.yaml - coredns-custom.yaml
- coredns-deployment.yaml - coredns-deployment.yaml
- ntp-sync-daemonset.yaml - ntp-sync-daemonset.yaml
- workload-profiles.yaml
- ../sources/cert-manager/letsencrypt.yaml - ../sources/cert-manager/letsencrypt.yaml
- ../sources/cert-manager/letsencrypt-prod.yaml - ../sources/cert-manager/letsencrypt-prod.yaml

View File

@ -0,0 +1,80 @@
# infrastructure/core/node-prefer-noschedule-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: node-prefer-noschedule
namespace: kube-system
spec:
schedule: "* * * * *"
concurrencyPolicy: Replace
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
spec:
serviceAccountName: node-prefer-noschedule
restartPolicy: Never
containers:
- name: taint
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command:
- /usr/bin/env
- bash
- -ceu
- |
k() {
kubectl --request-timeout=10s "$@"
}
clear_worker() {
local node="${1}"
local hardware="${2}"
if k get node "${node}" >/dev/null 2>&1; then
k label node "${node}" node-role.kubernetes.io/worker=true "hardware=${hardware}" --overwrite=true || true
k label node "${node}" atlas.bstein.dev/spillover- || true
k taint node "${node}" node.kubernetes.io/unschedulable:NoSchedule- || true
k uncordon "${node}" || true
else
echo "skipping missing node ${node}"
fi
}
clear_worker titan-04 rpi5
clear_worker titan-05 rpi5
clear_worker titan-07 rpi5
clear_worker titan-08 rpi5
clear_worker titan-11 rpi5
clear_worker titan-12 rpi4
clear_worker titan-14 rpi4
clear_worker titan-18 rpi4
clear_worker titan-22 amd64
if k get node titan-22 >/dev/null 2>&1; then
k label node titan-22 atlas.bstein.dev/general-compute=last-resort --overwrite=true || true
fi
if k get node titan-23 >/dev/null 2>&1; then
k label node titan-23 \
veles.bstein.dev/simulation=true \
veles.bstein.dev/node-pool=oceanus \
node-role.kubernetes.io/veles-sim=true \
longhorn-host=true \
hardware=oceanus \
--overwrite=true || true
k label node titan-23 node-role.kubernetes.io/worker- || true
k taint node titan-23 veles.bstein.dev/simulation=true:NoSchedule --overwrite=true || true
else
echo "skipping missing node titan-23"
fi
for node in titan-13 titan-15 titan-17 titan-19; do
if k get node "${node}" >/dev/null 2>&1; then
k label node "${node}" atlas.bstein.dev/spillover=true longhorn-host=true --overwrite=true || true
k taint node "${node}" longhorn=true:PreferNoSchedule --overwrite=true || true
k taint node "${node}" atlas.bstein.dev/spillover=true:PreferNoSchedule --overwrite=true || true
else
echo "skipping missing node ${node}"
fi
done

View File

@ -0,0 +1,22 @@
# infrastructure/core/node-prefer-noschedule-rbac.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: node-prefer-noschedule
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: node-prefer-noschedule
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: node-prefer-noschedule
subjects:
- kind: ServiceAccount
name: node-prefer-noschedule
namespace: kube-system

View File

@ -0,0 +1,6 @@
# infrastructure/core/node-prefer-noschedule-serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: node-prefer-noschedule
namespace: kube-system

View File

@ -0,0 +1,27 @@
# infrastructure/core/workload-profiles.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: atlas-workload-profiles
namespace: kube-system
data:
profiles.yaml: |
profiles:
tiny:
request: { cpu: 25m, memory: 64Mi }
limit: { cpu: 200m, memory: 256Mi }
light:
request: { cpu: 50m, memory: 128Mi }
limit: { cpu: 500m, memory: 512Mi }
standard:
request: { cpu: 250m, memory: 512Mi }
limit: { cpu: "1", memory: 1Gi }
heavy:
request: { cpu: 500m, memory: 1Gi }
limit: { cpu: 1500m, memory: 3Gi }
ci:
request: { cpu: 512m, memory: 512Mi }
limit: { cpu: 1500m, memory: 2Gi }
scavenger:
request: { cpu: 10m, memory: 32Mi }
limit: { cpu: 250m, memory: 256Mi }

View File

@ -0,0 +1,97 @@
# infrastructure/descheduler/helmrelease.yaml
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: descheduler
namespace: kube-system
spec:
interval: 30m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3
chart:
spec:
chart: descheduler
version: 0.33.0
sourceRef:
kind: HelmRepository
name: descheduler
namespace: flux-system
values:
kind: CronJob
schedule: "*/20 * * * *"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
resources:
requests:
cpu: 50m
memory: 96Mi
limits:
cpu: 200m
memory: 256Mi
deschedulerPolicyAPIVersion: descheduler/v1alpha2
deschedulerPolicy:
maxNoOfPodsToEvictPerNode: 2
maxNoOfPodsToEvictPerNamespace: 2
profiles:
- name: atlas-rpi-balance
pluginConfig:
- name: DefaultEvictor
args:
nodeFit: true
minPodAge: 10m
ignorePvcPods: true
evictLocalStoragePods: false
- name: RemovePodsHavingTooManyRestarts
args:
podRestartThreshold: 12
includingInitContainers: true
- name: RemovePodsViolatingNodeAffinity
args:
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
- name: RemovePodsViolatingTopologySpreadConstraint
- name: RemovePodsViolatingNodeTaints
- name: LowNodeUtilization
args:
thresholds:
cpu: 45
memory: 45
pods: 45
targetThresholds:
cpu: 75
memory: 75
pods: 75
plugins:
balance:
enabled:
- RemovePodsViolatingTopologySpreadConstraint
- LowNodeUtilization
deschedule:
enabled:
- RemovePodsHavingTooManyRestarts
- RemovePodsViolatingNodeTaints
- RemovePodsViolatingNodeAffinity
priorityClassName: system-cluster-critical
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-0a
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule

View File

@ -0,0 +1,5 @@
# infrastructure/descheduler/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helmrelease.yaml

View File

@ -26,6 +26,9 @@ spec:
cleanupOnFail: true cleanupOnFail: true
timeout: 15m timeout: 15m
values: values:
global:
nodeSelector:
longhorn-host: "true"
service: service:
ui: ui:
type: NodePort type: NodePort
@ -78,3 +81,23 @@ spec:
tag: v2.16.0 tag: v2.16.0
defaultSettings: defaultSettings:
systemManagedPodsImagePullPolicy: Always systemManagedPodsImagePullPolicy: Always
taintToleration: veles.bstein.dev/simulation=true:NoSchedule
longhornManager:
tolerations:
- key: veles.bstein.dev/simulation
operator: Equal
value: "true"
effect: NoSchedule
nodeSelector:
longhorn-host: "true"
longhornDriver:
tolerations:
- key: veles.bstein.dev/simulation
operator: Equal
value: "true"
effect: NoSchedule
nodeSelector:
longhorn-host: "true"
longhornUI:
nodeSelector:
longhorn-host: "true"

View File

@ -7,7 +7,9 @@ resources:
- secretproviderclass.yaml - secretproviderclass.yaml
- vault-sync-deployment.yaml - vault-sync-deployment.yaml
- helmrelease.yaml - helmrelease.yaml
- veles-recurring-jobs.yaml
- longhorn-settings-ensure-job.yaml - longhorn-settings-ensure-job.yaml
- longhorn-csi-toleration-ensure-job.yaml
- longhorn-disk-tags-ensure-job.yaml - longhorn-disk-tags-ensure-job.yaml
configMapGenerator: configMapGenerator:

View File

@ -0,0 +1,106 @@
# infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-csi-toleration-ensure-4
namespace: longhorn-system
spec:
backoffLimit: 0
activeDeadlineSeconds: 240
ttlSecondsAfterFinished: 3600
template:
spec:
serviceAccountName: longhorn-service-account
restartPolicy: Never
nodeSelector:
kubernetes.io/hostname: titan-11
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
- key: node-role.kubernetes.io/worker
operator: Exists
containers:
- name: patch
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
ns="longhorn-system"
ds="longhorn-csi-plugin"
key="veles.bstein.dev/simulation"
value="true"
effect="NoSchedule"
patch_daemonset() {
target="$1"
current="$(kubectl -n "${ns}" get daemonset "${target}" -o json)"
if echo "${current}" | jq -e \
--arg key "${key}" \
--arg value "${value}" \
--arg effect "${effect}" \
'.spec.template.spec.tolerations[]? | select(.key == $key and .value == $value and .effect == $effect)' >/dev/null; then
echo "${target} already tolerates ${key}=${value}:${effect}"
return 0
fi
patch="$(echo "${current}" | jq -c \
--arg key "${key}" \
--arg value "${value}" \
--arg effect "${effect}" \
'{
spec: {
template: {
spec: {
tolerations: ((.spec.template.spec.tolerations // []) + [
{key: $key, operator: "Equal", value: $value, effect: $effect}
])
}
}
}
}')"
kubectl -n "${ns}" patch daemonset "${target}" --type=merge -p "${patch}"
}
patch_daemonset "${ds}"
engine_daemonsets="$(kubectl -n "${ns}" get daemonset -l longhorn.io/component=engine-image -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')"
for engine_ds in ${engine_daemonsets}; do
patch_daemonset "${engine_ds}"
done
csi_ready="false"
for attempt in $(seq 1 90); do
if kubectl get csinode titan-23 -o json | jq -e '.spec.drivers[]? | select(.name == "driver.longhorn.io")' >/dev/null; then
echo "driver.longhorn.io registered on titan-23"
csi_ready="true"
break
fi
sleep 2
done
if [ "${csi_ready}" != "true" ]; then
echo "driver.longhorn.io did not register on titan-23 before timeout" >&2
exit 1
fi
for engine_ds in ${engine_daemonsets}; do
for attempt in $(seq 1 90); do
if kubectl -n "${ns}" get pods -o json | jq -e \
--arg engine_ds "${engine_ds}" \
'.items[] | select(.spec.nodeName == "titan-23") | select(.metadata.ownerReferences[]?.name == $engine_ds) | select([.status.containerStatuses[]?.ready] | all)' >/dev/null; then
echo "${engine_ds} ready on titan-23"
break
fi
if [ "${attempt}" = "90" ]; then
echo "${engine_ds} did not become ready on titan-23 before timeout" >&2
exit 1
fi
sleep 2
done
done

View File

@ -2,7 +2,7 @@
apiVersion: batch/v1 apiVersion: batch/v1
kind: Job kind: Job
metadata: metadata:
name: longhorn-disk-tags-ensure-1 name: longhorn-disk-tags-ensure-3
namespace: longhorn-system namespace: longhorn-system
spec: spec:
backoffLimit: 0 backoffLimit: 0

View File

@ -2,15 +2,18 @@
apiVersion: batch/v1 apiVersion: batch/v1
kind: Job kind: Job
metadata: metadata:
name: longhorn-settings-ensure-4 name: longhorn-settings-ensure-10
namespace: longhorn-system namespace: longhorn-system
spec: spec:
backoffLimit: 0 backoffLimit: 0
activeDeadlineSeconds: 240
ttlSecondsAfterFinished: 3600 ttlSecondsAfterFinished: 3600
template: template:
spec: spec:
serviceAccountName: longhorn-service-account serviceAccountName: longhorn-service-account
restartPolicy: Never restartPolicy: Never
nodeSelector:
kubernetes.io/hostname: titan-11
volumes: volumes:
- name: longhorn-settings-ensure-script - name: longhorn-settings-ensure-script
configMap: configMap:

View File

@ -17,10 +17,28 @@ import urllib.request
LONGHORN_NS = "longhorn-system" LONGHORN_NS = "longhorn-system"
LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes" LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes"
DESIRED_TAGS = { DESIRED_DISK_TAGS = {
"/mnt/astreae": "astreae", "/mnt/astreae": ["astreae"],
"/mnt/asteria": "asteria", "/mnt/asteria": ["asteria"],
"/mnt/veles": ["veles-oceanus", "veles-db", "veles-artifacts"],
"/mnt/veles-db": ["veles-oceanus", "veles-db"],
"/mnt/veles-artifacts": ["veles-oceanus", "veles-artifacts"],
} }
DESIRED_NODE_TAGS = {
"titan-23": ["veles-oceanus"],
}
DESIRED_NODE_DISKS = {
"titan-23": {
"veles-oceanus": {
"path": "/mnt/veles",
"allowScheduling": True,
"evictionRequested": False,
"storageReserved": 0,
"tags": ["veles-oceanus", "veles-db", "veles-artifacts"],
}
}
}
DISABLE_DEFAULT_DISK_NODES = {"titan-23"}
def api_base() -> str: def api_base() -> str:
@ -63,8 +81,30 @@ def list_nodes() -> list[dict]:
return data.get("items", []) return data.get("items", [])
def patch_disk_tags(node_name: str, disk_name: str, desired_tag: str) -> None: def merged_tags(current_tags: list[str], desired_tags: list[str]) -> list[str]:
body = {"spec": {"disks": {disk_name: {"tags": [desired_tag]}}}} return sorted(dict.fromkeys([*current_tags, *desired_tags]))
def patch_node_tags(node_name: str, desired_tags: list[str]) -> None:
body = {"spec": {"tags": desired_tags}}
request_json(
"PATCH",
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
body=body,
)
def patch_disk_tags(node_name: str, disk_name: str, desired_tags: list[str]) -> None:
body = {"spec": {"disks": {disk_name: {"tags": desired_tags}}}}
request_json(
"PATCH",
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
body=body,
)
def patch_disks(node_name: str, disks: dict) -> None:
body = {"spec": {"disks": disks}}
request_json( request_json(
"PATCH", "PATCH",
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}", f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
@ -78,18 +118,52 @@ def main() -> int:
for node in list_nodes(): for node in list_nodes():
name = node.get("metadata", {}).get("name", "") name = node.get("metadata", {}).get("name", "")
desired_node_tags = DESIRED_NODE_TAGS.get(name)
if desired_node_tags:
current_node_tags = node.get("spec", {}).get("tags") or []
next_node_tags = merged_tags(current_node_tags, desired_node_tags)
if current_node_tags != next_node_tags:
print(f"patching {name} node tags={current_node_tags!r} -> {next_node_tags!r}")
patch_node_tags(name, next_node_tags)
changed += 1
else:
skipped += 1
spec_disks = node.get("spec", {}).get("disks", {}) or {} spec_disks = node.get("spec", {}).get("disks", {}) or {}
desired_disks = DESIRED_NODE_DISKS.get(name, {})
missing_disks = {
disk_name: disk_spec
for disk_name, disk_spec in desired_disks.items()
if disk_name not in spec_disks
}
if missing_disks:
print(f"adding {name} disks={sorted(missing_disks)}")
patch_disks(name, missing_disks)
changed += len(missing_disks)
spec_disks = {**spec_disks, **missing_disks}
if name in DISABLE_DEFAULT_DISK_NODES:
disable_patch = {}
for disk_name, disk in spec_disks.items():
disk_path = (disk.get("path") or "").rstrip("/")
if disk_path == "/var/lib/longhorn" and disk.get("allowScheduling", True):
disable_patch[disk_name] = {"allowScheduling": False}
if disable_patch:
print(f"disabling default Longhorn scheduling on {name} disks={sorted(disable_patch)}")
patch_disks(name, disable_patch)
changed += len(disable_patch)
for disk_name, disk in spec_disks.items(): for disk_name, disk in spec_disks.items():
disk_path = disk.get("path") disk_path = disk.get("path")
desired_tag = DESIRED_TAGS.get(disk_path) desired_disk_tags = DESIRED_DISK_TAGS.get(disk_path)
if not desired_tag: if not desired_disk_tags:
continue continue
current_tags = disk.get("tags") or [] current_tags = disk.get("tags") or []
if current_tags == [desired_tag]: if current_tags == desired_disk_tags:
skipped += 1 skipped += 1
continue continue
print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {[desired_tag]!r}") print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {desired_disk_tags!r}")
patch_disk_tags(name, disk_name, desired_tag) patch_disk_tags(name, disk_name, desired_disk_tags)
changed += 1 changed += 1
print(f"done: changed={changed} skipped={skipped}") print(f"done: changed={changed} skipped={skipped}")

View File

@ -4,11 +4,12 @@ set -eu
# Longhorn blocks direct CR patches for some settings; use the internal API instead. # Longhorn blocks direct CR patches for some settings; use the internal API instead.
api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings" api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
curl_opts="-fsS --connect-timeout 3 --max-time 15"
wait_for_api() { wait_for_api() {
attempts=30 attempts=30
while [ "${attempts}" -gt 0 ]; do while [ "${attempts}" -gt 0 ]; do
if curl -fsS "${api_base}" >/dev/null 2>&1; then if curl ${curl_opts} "${api_base}" >/dev/null 2>&1; then
return 0 return 0
fi fi
attempts=$((attempts - 1)) attempts=$((attempts - 1))
@ -22,17 +23,32 @@ update_setting() {
name="$1" name="$1"
value="$2" value="$2"
current="$(curl -fsS "${api_base}/${name}" || true)" current="$(curl ${curl_opts} "${api_base}/${name}" || true)"
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
echo "Setting ${name} already set." echo "Setting ${name} already set."
return 0 return 0
fi fi
echo "Setting ${name} -> ${value}" echo "Setting ${name} -> ${value}"
curl -fsS -X PUT \ out="$(mktemp)"
if curl ${curl_opts} -o "${out}" -X PUT \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d "{\"value\":\"${value}\"}" \ -d "{\"value\":\"${value}\"}" \
"${api_base}/${name}" >/dev/null "${api_base}/${name}"; then
rm -f "${out}"
return 0
fi
current="$(curl ${curl_opts} "${api_base}/${name}" || true)"
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
echo "Setting ${name} stored; Longhorn will apply it when current state allows."
rm -f "${out}"
return 0
fi
cat "${out}" >&2 || true
rm -f "${out}"
return 1
} }
wait_for_api wait_for_api
@ -40,3 +56,8 @@ update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v
update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2" update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2" update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56" update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"
update_setting taint-toleration "veles.bstein.dev/simulation=true:NoSchedule"
# Keep storage-heavy nodes from getting hammered by rebuild storms and skew.
update_setting replica-auto-balance "best-effort"
update_setting concurrent-replica-rebuild-per-node-limit "2"
update_setting node-down-pod-deletion-policy "delete-both-statefulset-and-deployment-pod"

View File

@ -13,9 +13,27 @@ spec:
- objectName: "harbor-pull__dockerconfigjson" - objectName: "harbor-pull__dockerconfigjson"
secretPath: "kv/data/atlas/shared/harbor-pull" secretPath: "kv/data/atlas/shared/harbor-pull"
secretKey: "dockerconfigjson" secretKey: "dockerconfigjson"
- objectName: "longhorn-backup-b2__AWS_ACCESS_KEY_ID"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ACCESS_KEY_ID"
- objectName: "longhorn-backup-b2__AWS_SECRET_ACCESS_KEY"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_SECRET_ACCESS_KEY"
- objectName: "longhorn-backup-b2__AWS_ENDPOINTS"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ENDPOINTS"
secretObjects: secretObjects:
- secretName: longhorn-registry - secretName: longhorn-registry
type: kubernetes.io/dockerconfigjson type: kubernetes.io/dockerconfigjson
data: data:
- objectName: harbor-pull__dockerconfigjson - objectName: harbor-pull__dockerconfigjson
key: .dockerconfigjson key: .dockerconfigjson
- secretName: longhorn-backup-b2
type: Opaque
data:
- objectName: longhorn-backup-b2__AWS_ACCESS_KEY_ID
key: AWS_ACCESS_KEY_ID
- objectName: longhorn-backup-b2__AWS_SECRET_ACCESS_KEY
key: AWS_SECRET_ACCESS_KEY
- objectName: longhorn-backup-b2__AWS_ENDPOINTS
key: AWS_ENDPOINTS

View File

@ -26,6 +26,16 @@ spec:
- key: hardware - key: hardware
operator: In operator: In
values: ["rpi5", "rpi4"] values: ["rpi5", "rpi4"]
- weight: 90
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
containers: containers:
- name: sync - name: sync
image: alpine:3.20 image: alpine:3.20

View File

@ -0,0 +1,60 @@
# infrastructure/longhorn/core/veles-recurring-jobs.yaml
apiVersion: longhorn.io/v1beta2
kind: RecurringJob
metadata:
name: veles-postgres-backup
namespace: longhorn-system
spec:
name: veles-postgres-backup
cron: "30 5 * * *"
task: backup
groups:
- veles
- veles-postgres
retain: 7
concurrency: 1
---
apiVersion: longhorn.io/v1beta2
kind: RecurringJob
metadata:
name: veles-postgres-snapshot
namespace: longhorn-system
spec:
name: veles-postgres-snapshot
cron: "*/30 * * * *"
task: snapshot
groups:
- veles
- veles-postgres
retain: 8
concurrency: 1
---
apiVersion: longhorn.io/v1beta2
kind: RecurringJob
metadata:
name: veles-artifacts-backup
namespace: longhorn-system
spec:
name: veles-artifacts-backup
cron: "45 5 * * *"
task: backup
groups:
- veles
- veles-artifacts
retain: 7
concurrency: 1
---
apiVersion: longhorn.io/v1beta2
kind: RecurringJob
metadata:
name: veles-artifacts-snapshot
namespace: longhorn-system
spec:
name: veles-artifacts-snapshot
cron: "15 */6 * * *"
task: snapshot
groups:
- veles
- veles-artifacts
retain: 8
concurrency: 1

View File

@ -3,3 +3,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- scavenger.yaml - scavenger.yaml
- veles.yaml

View File

@ -0,0 +1,17 @@
# infrastructure/modules/base/priorityclass/veles.yaml
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: veles-core
value: 500
globalDefault: false
description: "For Veles core database, API, and controller workloads"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: veles-sim
value: 50
globalDefault: false
preemptionPolicy: Never
description: "For Veles simulation jobs; lower than core and non-preempting"

View File

@ -5,3 +5,6 @@ resources:
- asteria.yaml - asteria.yaml
- asteria-encrypted.yaml - asteria-encrypted.yaml
- astreae.yaml - astreae.yaml
- veles-oceanus-db.yaml
- veles-oceanus-artifacts.yaml
- veles-oceanus-policy.yaml

View File

@ -0,0 +1,21 @@
# infrastructure/modules/base/storageclass/veles-oceanus-artifacts.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: veles-oceanus-artifacts
annotations:
veles.bstein.dev/allowed-namespace: veles
provisioner: driver.longhorn.io
parameters:
nodeSelector: veles-oceanus
diskSelector: veles-oceanus,veles-artifacts
fromBackup: ""
numberOfReplicas: "1"
staleReplicaTimeout: "30"
fsType: ext4
replicaAutoBalance: disabled
dataLocality: strict-local
recurringJobSelector: '[{"name":"veles-artifacts-backup","isGroup":false},{"name":"veles-artifacts-snapshot","isGroup":false}]'
reclaimPolicy: Retain
allowVolumeExpansion: true
volumeBindingMode: WaitForFirstConsumer

View File

@ -0,0 +1,21 @@
# infrastructure/modules/base/storageclass/veles-oceanus-db.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: veles-oceanus-db
annotations:
veles.bstein.dev/allowed-namespace: veles
provisioner: driver.longhorn.io
parameters:
nodeSelector: veles-oceanus
diskSelector: veles-oceanus,veles-db
fromBackup: ""
numberOfReplicas: "1"
staleReplicaTimeout: "30"
fsType: ext4
replicaAutoBalance: disabled
dataLocality: strict-local
recurringJobSelector: '[{"name":"veles-postgres-backup","isGroup":false},{"name":"veles-postgres-snapshot","isGroup":false}]'
reclaimPolicy: Retain
allowVolumeExpansion: true
volumeBindingMode: WaitForFirstConsumer

View File

@ -0,0 +1,25 @@
# infrastructure/modules/base/storageclass/veles-oceanus-policy.yaml
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionPolicy
metadata:
name: veles-oceanus-storage-namespace
spec:
failurePolicy: Fail
matchConstraints:
resourceRules:
- apiGroups: [""]
apiVersions: ["v1"]
operations: ["CREATE", "UPDATE"]
resources: ["persistentvolumeclaims"]
validations:
- expression: "!has(object.spec.storageClassName) || !(object.spec.storageClassName in ['veles-oceanus-db', 'veles-oceanus-artifacts']) || object.metadata.namespace == 'veles'"
message: "Veles Oceanus storage classes are reserved for namespace veles"
---
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionPolicyBinding
metadata:
name: veles-oceanus-storage-namespace
spec:
policyName: veles-oceanus-storage-namespace
validationActions:
- Deny

View File

@ -25,6 +25,7 @@ spec:
serviceAccountName: postgres-vault serviceAccountName: postgres-vault
nodeSelector: nodeSelector:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
hardware: rpi5
affinity: affinity:
nodeAffinity: nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution: requiredDuringSchedulingIgnoredDuringExecution:
@ -35,7 +36,17 @@ spec:
values: ["true"] values: ["true"]
- key: hardware - key: hardware
operator: In operator: In
values: ["rpi4", "rpi5"] values: ["rpi5"]
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-06"]
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: In
values: ["titan-05", "titan-07", "titan-08", "titan-11"]
containers: containers:
- name: postgres - name: postgres
image: postgres:15 image: postgres:15

View File

@ -0,0 +1,5 @@
# infrastructure/resource-guardrails/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- limitranges.yaml

Some files were not shown because too many files have changed in this diff Show More