Compare commits

..

938 Commits

Author SHA1 Message Date
flux-bot
1b6d161cc8 chore(maintenance): automated image update 2026-06-13 17:14:20 +00:00
flux-bot
d0a7911b95 chore(maintenance): automated image update 2026-06-13 13:50:04 +00:00
flux-bot
d5cf8b16a2 chore(maintenance): automated image update 2026-06-13 13:49:12 +00:00
flux-bot
6272d1f081 chore(maintenance): automated image update 2026-06-13 13:46:03 +00:00
flux-bot
8a843c4936 chore(maintenance): automated image update 2026-06-13 13:41:09 +00:00
flux-bot
7f06fecdac chore(maintenance): automated image update 2026-06-13 05:14:33 +00:00
flux-bot
28bfd4bbeb chore(maintenance): automated image update 2026-06-13 01:54:14 +00:00
flux-bot
c3183ac051 chore(maintenance): automated image update 2026-06-13 01:54:08 +00:00
flux-bot
c823ffee55 chore(maintenance): automated image update 2026-06-13 01:51:09 +00:00
flux-bot
3c1a368360 chore(maintenance): automated image update 2026-06-13 01:46:10 +00:00
b2f1cff95b Deploy Veles 0.1.3 2026-06-12 16:50:05 -03:00
flux-bot
5b902a4d4e chore(maintenance): automated image update 2026-06-12 17:14:58 +00:00
flux-bot
ddccdbdc49 chore(maintenance): automated image update 2026-06-12 13:52:49 +00:00
flux-bot
b259c6d46b chore(maintenance): automated image update 2026-06-12 13:52:38 +00:00
flux-bot
2b9c972202 chore(maintenance): automated image update 2026-06-12 13:49:43 +00:00
flux-bot
ce4f291e08 chore(maintenance): automated image update 2026-06-12 13:44:38 +00:00
jenkins
3ba994be05 Deploy Veles 0.1.2 2026-06-12 03:37:33 -03:00
jenkins
413659558b Disable runtime Keycloak Mailu provider loading 2026-06-12 03:05:54 -03:00
jenkins
a6e9ff2b65 Raise Keycloak memory for provider augmentation 2026-06-12 02:54:54 -03:00
jenkins
9cfca159b3 Give Keycloak provider augmentation more startup grace 2026-06-12 02:43:03 -03:00
jenkins
d6c2aedd76 Stop copying Keycloak provider sources at runtime 2026-06-12 02:31:39 -03:00
jenkins
fb6fb35a12 Revert Keycloak image bump 2026-06-12 02:23:07 -03:00
flux-bot
b5101a1320 chore(maintenance): automated image update 2026-06-12 05:15:24 +00:00
jenkins
a521a7b914 Bump Keycloak for Veles registration flow 2026-06-12 02:02:02 -03:00
flux-bot
21ced4a417 chore(maintenance): automated image update 2026-06-12 01:51:07 +00:00
flux-bot
eeee467d20 chore(maintenance): automated image update 2026-06-12 01:50:54 +00:00
flux-bot
5c7f64accb chore(maintenance): automated image update 2026-06-12 01:47:56 +00:00
flux-bot
74c9b85c46 chore(maintenance): automated image update 2026-06-12 01:42:54 +00:00
jenkins
cff9aeb535 Deploy Veles 0.1.1 2026-06-11 18:53:40 -03:00
jenkins
c2c0cbe283 Deploy Veles UI polish build 2026-06-11 16:40:29 -03:00
jenkins
6b945f85b3 Deploy Veles sideboard worker image 2026-06-11 15:31:01 -03:00
jenkins
bfd3399db5 Deploy Veles ed03968 images 2026-06-11 15:15:26 -03:00
flux-bot
b365e1c493 chore(maintenance): automated image update 2026-06-11 17:14:58 +00:00
85d7713e30 veles: deploy numeric strategy versions 2026-06-11 12:14:40 -03:00
flux-bot
51e101bc31 chore(maintenance): automated image update 2026-06-11 13:49:27 +00:00
flux-bot
bfda6cd908 chore(maintenance): automated image update 2026-06-11 13:48:26 +00:00
flux-bot
e068749261 chore(maintenance): automated image update 2026-06-11 13:46:27 +00:00
flux-bot
07cb3fa61f chore(maintenance): automated image update 2026-06-11 13:41:28 +00:00
79d47cad60 veles: deploy authoring telemetry fix 2026-06-11 07:17:15 -03:00
4ed9dcc48d veles: deploy combat-context sim worker 2026-06-11 06:13:20 -03:00
f0fa9404af veles: deploy latest-frame table open 2026-06-11 05:50:36 -03:00
93f586efe6 veles: deploy fast table loading fix 2026-06-11 05:42:32 -03:00
678daecda5 veles: run database migrations on backend rollout 2026-06-11 05:16:26 -03:00
6029ff4d53 veles: deploy June 11 app images 2026-06-11 05:12:54 -03:00
flux-bot
ec6deb1370 chore(maintenance): automated image update 2026-06-11 05:15:23 +00:00
flux-bot
0d25683632 chore(maintenance): automated image update 2026-06-11 01:48:52 +00:00
flux-bot
0e222d0cbb chore(maintenance): automated image update 2026-06-11 01:47:59 +00:00
flux-bot
e87206a455 chore(maintenance): automated image update 2026-06-11 01:45:54 +00:00
flux-bot
7af32c84d6 chore(maintenance): automated image update 2026-06-11 01:40:58 +00:00
flux-bot
57edd366ac chore(maintenance): automated image update 2026-06-10 17:14:49 +00:00
93c9539fd2 Advance Veles sim worker strategy hotfix 2026-06-10 13:55:13 -03:00
aeb345d4e6 Advance Veles sim worker hotfix tag 2026-06-10 13:40:33 -03:00
dc2db00c79 Pin Veles sim worker on backend rollout 2026-06-10 13:34:00 -03:00
adf9603077 Deploy Veles backend d89de29 2026-06-10 12:33:32 -03:00
07fe56a4aa Deploy Veles 80817d4 fixes 2026-06-10 12:19:35 -03:00
jenkins
528c727874 maintenance: raise node inotify limits 2026-06-10 05:43:23 -03:00
jenkins
ad920ef9c9 jenkins: lengthen k8s exec websocket timeouts 2026-06-10 05:35:34 -03:00
jenkins
05e147d5d8 jenkins: trust ephemeral agent workspaces 2026-06-10 05:14:41 -03:00
jenkins
a34e72aa7f jenkins: enable veles pipeline 2026-06-10 04:55:58 -03:00
jenkins
1a75277373 jenkins: make default agents writable 2026-06-10 04:49:10 -03:00
jenkins
6883aec12e Roll Veles guide authoring images 2026-06-10 04:30:59 -03:00
flux-bot
e8fa38d611 chore(maintenance): automated image update 2026-06-10 05:14:40 +00:00
jenkins
9eeb7a0974 Roll Veles account logout UI 2026-06-09 22:56:58 -03:00
flux-bot
07a97a74d6 chore(maintenance): automated image update 2026-06-10 01:49:35 +00:00
flux-bot
5821650240 chore(maintenance): automated image update 2026-06-10 01:49:27 +00:00
flux-bot
5466037a8d chore(maintenance): automated image update 2026-06-10 01:47:24 +00:00
flux-bot
4ddb82947e chore(maintenance): automated image update 2026-06-10 01:42:27 +00:00
jenkins
3c55b4d69b Roll Veles frontend route guard fix 2026-06-09 21:10:49 -03:00
jenkins
000c9154a1 Roll Veles observatory UI 2026-06-09 20:57:58 -03:00
jenkins
27fb19a900 Roll Veles frontend mobile home 2026-06-09 19:35:48 -03:00
jenkins
1e92deaf23 Roll Veles frontend responsive home 2026-06-09 19:27:46 -03:00
jenkins
7a676e01a3 Roll Veles frontend home cleanup 2026-06-09 19:17:05 -03:00
jenkins
c682e5158b Roll Veles public stats app images 2026-06-09 19:08:22 -03:00
jenkins
0ee0070d14 Roll Veles backend launch-scope hotfix 2026-06-09 18:35:21 -03:00
jenkins
adc9601228 Roll Veles auth-scoped app images 2026-06-09 18:00:57 -03:00
jenkins
9aec012c42 veles: roll sim-worker to 0.1.0-3 2026-06-09 17:28:19 -03:00
jenkins
622dbc650d veles: roll backend to 0.1.0-4 2026-06-09 16:45:27 -03:00
jenkins
6a6ef8a22b veles: roll backend to 0.1.0-3 2026-06-09 16:36:01 -03:00
jenkins
e8fe7f1146 veles: roll simulation-ready images to 0.1.0-2 2026-06-09 16:29:10 -03:00
jenkins
fc5edbaa83 veles: roll app images to 0.1.0-1 2026-06-09 15:56:17 -03:00
jenkins
16698074bf veles: fit backend within core quota 2026-06-09 15:50:43 -03:00
jenkins
531bc440d5 veles: promote managed app deployments 2026-06-09 15:47:33 -03:00
flux-bot
0d8571b7a6 chore(maintenance): automated image update 2026-06-09 17:15:08 +00:00
jenkins
083e9e1148 veles: align app ports and traffic gate 2026-06-09 12:54:34 -03:00
jenkins
6833c3fe61 veles: harden app infrastructure contract 2026-06-09 11:59:27 -03:00
jenkins
07073970cf veles: let postgres initialize data volume 2026-06-09 02:17:41 -03:00
flux-bot
249f20091f chore(maintenance): automated image update 2026-06-09 05:15:03 +00:00
jenkins
63c869bf42 longhorn: ensure engine image on oceanus 2026-06-09 02:06:34 -03:00
jenkins
8fcc61cae9 longhorn: validate oceanus csi registration 2026-06-09 01:57:59 -03:00
jenkins
7885a70ee1 longhorn: pin toleration jobs off titan-14 2026-06-09 01:55:02 -03:00
jenkins
42069b0f23 longhorn: ensure csi tolerates oceanus 2026-06-09 01:53:04 -03:00
jenkins
5db9dd54fc longhorn: enforce oceanus taint setting 2026-06-09 01:49:16 -03:00
jenkins
530f813ebd longhorn: run csi driver on oceanus 2026-06-09 01:47:03 -03:00
jenkins
599e973d68 veles: enable postgres on oceanus 2026-06-09 01:43:24 -03:00
jenkins
6a40f40932 keycloak: make veles realm job idempotent 2026-06-09 01:26:22 -03:00
jenkins
363e564002 keycloak: fix veles groups mapper 2026-06-09 01:18:30 -03:00
jenkins
2985a7d12c veles: replace secrets oneoff job 2026-06-09 01:06:18 -03:00
jenkins
4f7777522e veles: run secrets oneoff with bash 2026-06-09 01:02:18 -03:00
jenkins
832c025c80 veles: satisfy vault sync memory floor 2026-06-09 00:59:12 -03:00
jenkins
28356e89fc monitoring: keep nvidia exporter off oceanus 2026-06-09 00:55:11 -03:00
jenkins
ea6a10dd7f longhorn: set veles recurring job names 2026-06-09 00:49:03 -03:00
jenkins
654900b8a2 veles: stage atlas infrastructure 2026-06-09 00:46:46 -03:00
jenkins
e1d091eb14 nextcloud: run collabora on amd64 2026-06-08 14:51:00 -03:00
flux-bot
23225c366b chore(maintenance): automated image update 2026-06-08 17:14:32 +00:00
flux-bot
eb1163f1ab chore(maintenance): automated image update 2026-06-08 13:47:56 +00:00
flux-bot
48fce69f96 chore(maintenance): automated image update 2026-06-08 13:46:56 +00:00
flux-bot
d219995052 chore(maintenance): automated image update 2026-06-08 13:44:57 +00:00
flux-bot
0318c3fe08 chore(maintenance): automated image update 2026-06-08 13:40:55 +00:00
flux-bot
c09d88a06e chore(bstein-dev-home): automated image update 2026-06-08 09:44:29 +00:00
flux-bot
7c3aadd4c6 chore(bstein-dev-home): automated image update 2026-06-08 09:43:28 +00:00
flux-bot
e0d88aa265 chore(maintenance): automated image update 2026-06-08 05:14:27 +00:00
flux-bot
9667f0e606 chore(maintenance): automated image update 2026-06-08 01:49:16 +00:00
flux-bot
3fb72623e9 chore(maintenance): automated image update 2026-06-08 01:48:39 +00:00
flux-bot
d6acb9bf78 chore(maintenance): automated image update 2026-06-08 01:46:15 +00:00
flux-bot
312a299af5 chore(maintenance): automated image update 2026-06-08 01:41:37 +00:00
flux-bot
81efc1c723 chore(bstein-dev-home): automated image update 2026-06-07 21:52:07 +00:00
flux-bot
a903948ac6 chore(bstein-dev-home): automated image update 2026-06-07 21:49:49 +00:00
flux-bot
1c26fe1377 chore(maintenance): automated image update 2026-06-07 17:14:04 +00:00
flux-bot
d760defdf0 chore(maintenance): automated image update 2026-06-07 13:50:52 +00:00
flux-bot
4555b6e25a chore(maintenance): automated image update 2026-06-07 13:49:56 +00:00
flux-bot
3c00906357 chore(maintenance): automated image update 2026-06-07 13:46:51 +00:00
flux-bot
9f4fa3537f chore(maintenance): automated image update 2026-06-07 13:41:55 +00:00
jenkins
82486d1408 mailu: start postfix master without postlog service 2026-06-07 02:29:03 -03:00
jenkins
b8c844dbac mailu: auto-disable postfix postlog on rwx queue 2026-06-07 02:26:22 -03:00
jenkins
0532fe2634 mailu: fix postfix startup on rwx queue 2026-06-07 02:18:26 -03:00
flux-bot
2b0e2764c6 chore(maintenance): automated image update 2026-06-07 05:14:39 +00:00
flux-bot
f701ae1628 chore(maintenance): automated image update 2026-06-07 01:49:18 +00:00
flux-bot
0e211f599c chore(maintenance): automated image update 2026-06-07 01:44:19 +00:00
flux-bot
282da9cf49 chore(maintenance): automated image update 2026-06-06 17:14:11 +00:00
flux-bot
3d552432a4 chore(maintenance): automated image update 2026-06-06 13:48:55 +00:00
flux-bot
d97cdf2c2c chore(maintenance): automated image update 2026-06-06 13:48:51 +00:00
flux-bot
828af81405 chore(maintenance): automated image update 2026-06-06 13:45:50 +00:00
flux-bot
d6ec355cde chore(maintenance): automated image update 2026-06-06 13:41:50 +00:00
flux-bot
a5d297e8e0 chore(bstein-dev-home): automated image update 2026-06-06 09:47:21 +00:00
flux-bot
1146000c7d chore(bstein-dev-home): automated image update 2026-06-06 09:45:20 +00:00
flux-bot
6bcbab91d7 chore(maintenance): automated image update 2026-06-06 05:14:35 +00:00
flux-bot
6324292f3b chore(bstein-dev-home): automated image update 2026-06-05 21:46:40 +00:00
flux-bot
bf2d4e1a62 chore(bstein-dev-home): automated image update 2026-06-05 21:45:39 +00:00
flux-bot
c255749410 chore(maintenance): automated image update 2026-06-05 17:14:07 +00:00
jenkins
4fd8a00d4a monitoring(testing): cap history panel ranges 2026-06-05 13:22:29 -03:00
jenkins
75d002dc88 monitoring(testing): cap expensive dashboard queries 2026-06-05 13:15:12 -03:00
flux-bot
f93e3e6050 chore(maintenance): automated image update 2026-06-05 13:49:31 +00:00
flux-bot
ed5504b072 chore(maintenance): automated image update 2026-06-05 13:48:23 +00:00
flux-bot
22eb0cc3f4 chore(maintenance): automated image update 2026-06-05 13:46:30 +00:00
flux-bot
6b53caf57a chore(maintenance): automated image update 2026-06-05 13:42:22 +00:00
flux-bot
2712600a1e chore(bstein-dev-home): automated image update 2026-06-05 10:29:05 +00:00
flux-bot
c916a8c862 chore(bstein-dev-home): automated image update 2026-06-05 10:28:04 +00:00
flux-bot
7158c32c06 chore(maintenance): automated image update 2026-06-05 05:14:08 +00:00
jenkins
a2ecdef536 monitoring(testing): restore lesavka suite visibility 2026-06-05 01:04:56 -03:00
flux-bot
dce9d5c131 chore(maintenance): automated image update 2026-06-05 01:47:24 +00:00
flux-bot
ec2ab28cec chore(maintenance): automated image update 2026-06-05 01:46:27 +00:00
flux-bot
c13169e95b chore(maintenance): automated image update 2026-06-05 01:44:23 +00:00
flux-bot
2559752654 chore(maintenance): automated image update 2026-06-05 01:39:26 +00:00
flux-bot
18c7fd77b4 chore(maintenance): automated image update 2026-06-05 00:44:21 +00:00
flux-bot
9d138a65a9 chore(maintenance): automated image update 2026-06-05 00:44:16 +00:00
flux-bot
9e33541a24 chore(maintenance): automated image update 2026-06-05 00:42:16 +00:00
flux-bot
fe0339647b chore(maintenance): automated image update 2026-06-05 00:37:17 +00:00
jenkins
fd7ec39a15 test(titan-iac): split dashboard trigger checks 2026-06-04 21:09:53 -03:00
jenkins
82fe618be9 ci(data-prepper): allow healthy rpi workers for agents 2026-06-04 21:01:58 -03:00
jenkins
09e64c8ca4 ci(jenkins): refresh suite jobs twice daily 2026-06-04 20:38:02 -03:00
jenkins
f2ad8cca4c monitoring(testing): clean up dashboard health signals 2026-06-04 16:09:08 -03:00
flux-bot
bd6000f956 chore(maintenance): automated image update 2026-06-04 17:14:24 +00:00
flux-bot
97a31b1d23 chore(bstein-dev-home): automated image update 2026-06-04 09:43:44 +00:00
flux-bot
c175602541 chore(bstein-dev-home): automated image update 2026-06-04 09:42:45 +00:00
flux-bot
694a9eb87e chore(maintenance): automated image update 2026-06-04 01:47:27 +00:00
flux-bot
dc8551710a chore(maintenance): automated image update 2026-06-04 01:47:23 +00:00
flux-bot
847ed53214 chore(maintenance): automated image update 2026-06-04 01:45:22 +00:00
flux-bot
8fe590ab93 chore(maintenance): automated image update 2026-06-04 01:40:22 +00:00
flux-bot
41738a71bd chore(maintenance): automated image update 2026-06-03 17:14:19 +00:00
flux-bot
cc09315576 chore(bstein-dev-home): automated image update 2026-06-03 09:44:40 +00:00
flux-bot
ef14c57105 chore(bstein-dev-home): automated image update 2026-06-03 09:43:42 +00:00
flux-bot
e7c8453c0a chore(maintenance): automated image update 2026-06-03 01:36:59 +00:00
flux-bot
65db67b70a chore(maintenance): automated image update 2026-06-03 01:36:09 +00:00
flux-bot
4f79f09dad chore(maintenance): automated image update 2026-06-03 01:34:59 +00:00
flux-bot
bdf695691b chore(maintenance): automated image update 2026-06-03 01:33:08 +00:00
flux-bot
5f0d62fd95 chore(maintenance): automated image update 2026-06-02 17:15:09 +00:00
flux-bot
ab557b9985 chore(bstein-dev-home): automated image update 2026-06-02 09:39:40 +00:00
flux-bot
000e20861c chore(maintenance): automated image update 2026-06-02 01:35:20 +00:00
flux-bot
1528a35634 chore(maintenance): automated image update 2026-06-02 01:35:16 +00:00
flux-bot
c724faf33f chore(maintenance): automated image update 2026-06-02 01:34:15 +00:00
flux-bot
869fb6aca2 chore(maintenance): automated image update 2026-06-02 01:32:18 +00:00
flux-bot
0677bb7500 chore(maintenance): automated image update 2026-06-01 17:15:21 +00:00
flux-bot
82cfa87921 chore(maintenance): automated image update 2026-05-31 17:14:24 +00:00
flux-bot
4c6f00dd32 chore(maintenance): automated image update 2026-05-30 17:14:29 +00:00
flux-bot
d6a975423f chore(maintenance): automated image update 2026-05-30 04:43:08 +00:00
flux-bot
eb8052615d chore(maintenance): automated image update 2026-05-30 04:42:52 +00:00
flux-bot
7ec411e21c chore(maintenance): automated image update 2026-05-30 04:41:08 +00:00
flux-bot
79665b0185 chore(maintenance): automated image update 2026-05-30 04:39:51 +00:00
flux-bot
a42b5c5c19 chore(maintenance): automated image update 2026-05-29 17:14:43 +00:00
flux-bot
3c42a7759a chore(maintenance): automated image update 2026-05-29 01:53:00 +00:00
flux-bot
43218b7584 chore(maintenance): automated image update 2026-05-29 01:52:46 +00:00
flux-bot
faf3f5bc69 chore(maintenance): automated image update 2026-05-29 01:52:00 +00:00
flux-bot
625b9d3191 chore(maintenance): automated image update 2026-05-29 01:49:45 +00:00
flux-bot
47a492ac40 chore(maintenance): automated image update 2026-05-28 17:14:42 +00:00
flux-bot
bce683752c chore(bstein-dev-home): automated image update 2026-05-28 09:46:09 +00:00
flux-bot
6e9d25d93e chore(bstein-dev-home): automated image update 2026-05-28 09:44:47 +00:00
flux-bot
f403defecb chore(maintenance): automated image update 2026-05-28 01:32:10 +00:00
flux-bot
eba2170314 chore(maintenance): automated image update 2026-05-28 01:31:51 +00:00
flux-bot
50c53fcf59 chore(maintenance): automated image update 2026-05-28 01:30:10 +00:00
flux-bot
f1f6ef22dd chore(maintenance): automated image update 2026-05-28 01:28:51 +00:00
flux-bot
92db3b155c chore(maintenance): automated image update 2026-05-27 17:14:52 +00:00
flux-bot
f9811b88bf chore(bstein-dev-home): automated image update 2026-05-27 09:36:04 +00:00
flux-bot
df37013d8a chore(bstein-dev-home): automated image update 2026-05-27 09:35:05 +00:00
flux-bot
1e343fb135 chore(maintenance): automated image update 2026-05-26 17:15:09 +00:00
flux-bot
84b2f646fc chore(maintenance): automated image update 2026-05-26 01:32:11 +00:00
flux-bot
52305fa446 chore(maintenance): automated image update 2026-05-26 01:31:12 +00:00
flux-bot
8e900f89f9 chore(maintenance): automated image update 2026-05-26 01:30:11 +00:00
flux-bot
c9a8aa816e chore(maintenance): automated image update 2026-05-26 01:28:12 +00:00
flux-bot
7f1d430dca chore(maintenance): automated image update 2026-05-25 17:14:08 +00:00
flux-bot
76ddc811f9 chore(bstein-dev-home): automated image update 2026-05-25 09:35:17 +00:00
flux-bot
812db05a06 chore(bstein-dev-home): automated image update 2026-05-25 09:34:20 +00:00
flux-bot
eb44827abb chore(maintenance): automated image update 2026-05-25 01:31:19 +00:00
flux-bot
14c2b129d2 chore(maintenance): automated image update 2026-05-25 01:31:15 +00:00
flux-bot
1d28ab9aa4 chore(maintenance): automated image update 2026-05-25 01:30:14 +00:00
flux-bot
f959b98797 chore(maintenance): automated image update 2026-05-25 01:28:17 +00:00
flux-bot
9c4fcfffed chore(maintenance): automated image update 2026-05-24 17:14:17 +00:00
flux-bot
5100e37471 chore(bstein-dev-home): automated image update 2026-05-24 09:37:52 +00:00
flux-bot
39342e7910 chore(bstein-dev-home): automated image update 2026-05-24 09:36:52 +00:00
flux-bot
c3d37fc203 chore(maintenance): automated image update 2026-05-23 17:14:51 +00:00
flux-bot
2859e0f0dd chore(bstein-dev-home): automated image update 2026-05-23 09:38:17 +00:00
flux-bot
f62664c419 chore(bstein-dev-home): automated image update 2026-05-23 09:37:24 +00:00
flux-bot
fae4f2bbcd chore(maintenance): automated image update 2026-05-23 01:40:16 +00:00
flux-bot
0e00181cb4 chore(maintenance): automated image update 2026-05-23 01:37:55 +00:00
flux-bot
f50de4cd49 chore(maintenance): automated image update 2026-05-23 01:36:05 +00:00
jenkins
cf8baafed1 maintenance: document node recovery guardrails 2026-05-22 17:21:59 -03:00
jenkins
c7edc81239 maintenance: stabilize recovered worker nodes 2026-05-22 17:10:01 -03:00
jenkins
46c3e97688 maintenance: make titan-22 link keeper passive 2026-05-22 15:56:50 -03:00
jenkins
5bce6c4c04 openclaw: allow recovered workers while excluding hdd nodes 2026-05-22 15:33:28 -03:00
jenkins
ee5688f297 maintenance: track titan-22 link recovery 2026-05-22 15:25:41 -03:00
flux-bot
c54c7b4452 chore(maintenance): automated image update 2026-05-22 17:11:37 +00:00
jenkins
17dc9a6e52 scheduling: target hdd storage node exclusions 2026-05-22 14:02:17 -03:00
jenkins
155d7d020e scheduling: keep apps off longhorn storage nodes 2026-05-22 13:38:29 -03:00
jenkins
f383818f93 nextcloud: keep collabora off descheduler 2026-05-22 06:57:01 -03:00
jenkins
1fe125b8b3 game-stream(wolf): expose runtime sockets to app containers 2026-05-22 05:37:52 -03:00
jenkins
361a4decb3 game-stream(wolf): retain failed app containers 2026-05-22 05:28:38 -03:00
jenkins
2aea5f4ace game-stream(wolf): use manual Nvidia driver mount 2026-05-22 05:12:41 -03:00
jenkins
ce13ac054c game-stream(wolf): mount Nvidia driver volume 2026-05-22 05:08:57 -03:00
jenkins
a19a19fbd5 maintenance(titan-24): avoid unnecessary Docker restarts 2026-05-22 05:07:40 -03:00
jenkins
f1a72d64fd gpu(titan-24): populate Nvidia driver volume without exec 2026-05-22 05:05:02 -03:00
jenkins
ac9c481ce7 gpu(titan-24): fix Nvidia driver volume bootstrap 2026-05-22 05:02:59 -03:00
jenkins
2ff55289a8 gpu(titan-24): prepare Wolf Nvidia driver volume 2026-05-22 04:59:52 -03:00
jenkins
2d8405d299 crypto: throttle mining during recovery 2026-05-22 04:26:29 -03:00
jenkins
5e27384ea2 monitoring(gpu): show activity share by namespace 2026-05-22 04:22:51 -03:00
flux-bot
ec972a52f1 chore(bstein-dev-home): automated image update 2026-05-22 07:07:12 +00:00
flux-bot
10eed46e81 chore(bstein-dev-home): automated image update 2026-05-22 07:06:25 +00:00
jenkins
d21b61f6d9 monitoring(gpu): count monitored GPU pool devices 2026-05-22 03:23:36 -03:00
jenkins
b367c6dea3 monitoring: keep quality probe on worker nodes 2026-05-22 03:16:01 -03:00
jenkins
6388ef5c6d monitoring(gpu): add pool utilization counters 2026-05-22 03:09:10 -03:00
flux-bot
4ce5a67b94 chore(bstein-dev-home): automated image update 2026-05-22 06:08:50 +00:00
flux-bot
1375bac117 chore(bstein-dev-home): automated image update 2026-05-22 06:08:18 +00:00
jenkins
570b1212d7 monitoring(gpu): normalize utilization pie to pool capacity 2026-05-22 02:55:24 -03:00
jenkins
ea21e106cf keycloak(portal): allow groups scope 2026-05-22 02:48:10 -03:00
jenkins
1c50af1d72 ci(data-prepper): avoid titan-04 during recovery 2026-05-22 02:37:21 -03:00
jenkins
b5dc723e02 monitoring(gpu): hide zero-utilization namespaces 2026-05-22 02:35:51 -03:00
flux-bot
3f24fbdc6d chore(bstein-dev-home): automated image update 2026-05-22 05:33:39 +00:00
flux-bot
1cd9fd18f4 chore(bstein-dev-home): automated image update 2026-05-22 05:32:46 +00:00
flux-bot
e7ad2c3955 chore(maintenance): automated image update 2026-05-22 05:28:56 +00:00
jenkins
fd3da0e2ae monitoring(gpu): add process-level utilization attribution 2026-05-22 02:28:08 -03:00
jenkins
5513608b1a monitoring(gpu): remove ambiguous shared wording 2026-05-22 01:55:25 -03:00
jenkins
72e4dcd84b monitoring(gpu): attribute utilization to namespaces 2026-05-22 01:46:32 -03:00
jenkins
26af225f06 ci(data-prepper): allow recovered titan-04 agents 2026-05-22 01:40:44 -03:00
flux-bot
e368927a0e chore(maintenance): automated image update 2026-05-22 01:50:35 +00:00
flux-bot
825a7a7f37 chore(maintenance): automated image update 2026-05-22 01:50:24 +00:00
flux-bot
0719b5317f chore(maintenance): automated image update 2026-05-22 01:47:23 +00:00
flux-bot
f0ed508277 chore(maintenance): automated image update 2026-05-22 01:42:20 +00:00
flux-bot
ca2ffd52ab chore(bstein-dev-home): automated image update 2026-05-21 22:21:25 +00:00
flux-bot
a4a75a5dda chore(bstein-dev-home): automated image update 2026-05-21 21:40:21 +00:00
jenkins
b44915d158 ci(titan-iac): observe Sonar while project gate is baselined 2026-05-21 17:56:44 -03:00
jenkins
80bc7be00b game-stream: roll gatekeeper on firewall script changes 2026-05-21 17:22:21 -03:00
jenkins
0eba74d9b3 game-stream: gate Moonlight before node routing 2026-05-21 17:17:07 -03:00
flux-bot
0f84be5083 chore(bstein-dev-home): automated image update 2026-05-21 20:14:09 +00:00
flux-bot
19f477ccc8 chore(bstein-dev-home): automated image update 2026-05-21 20:12:06 +00:00
jenkins
3f6970aa1a game-stream: use official WolfManager image 2026-05-21 16:59:47 -03:00
flux-bot
0ff3342ea6 chore(maintenance): automated image update 2026-05-21 19:49:44 +00:00
jenkins
807a31679c ci(titan-iac): exclude privileged game-stream manifests from sonar 2026-05-21 16:28:14 -03:00
jenkins
f064c5b47b game-stream: avoid gatekeeper service env collision 2026-05-21 16:23:08 -03:00
jenkins
d89fec8ae5 game-stream: add Wolf portal access controls 2026-05-21 15:54:56 -03:00
jenkins
1332b611a3 vault-csi: tolerate busy-node probe delays 2026-05-21 15:52:48 -03:00
jenkins
4e82df6891 monitoring(gpu): show utilization with idle fallback 2026-05-21 15:26:02 -03:00
jenkins
6042d8f714 logging: make opensearch tune idempotent 2026-05-21 14:27:56 -03:00
flux-bot
c69ef0064f chore(maintenance): automated image update 2026-05-21 17:14:27 +00:00
jenkins
939231dd6a logging: tune opensearch for single-node recovery 2026-05-21 14:12:36 -03:00
jenkins
e3c05095f8 logging: trim active pod logs on constrained nodes 2026-05-21 13:28:37 -03:00
jenkins
d9955af899 monitoring(gpu): clarify reservation accounting 2026-05-21 13:04:58 -03:00
flux-bot
39db0471d7 chore(maintenance): automated image update 2026-05-21 10:08:55 +00:00
jenkins
323bf85c12 game-stream: pass Wolf OIDC token to Ariadne 2026-05-21 07:07:30 -03:00
flux-bot
38b140580b chore(bstein-dev-home): automated image update 2026-05-21 09:47:03 +00:00
flux-bot
74fb699bac chore(bstein-dev-home): automated image update 2026-05-21 09:44:52 +00:00
flux-bot
e3305b7ddd chore(maintenance): automated image update 2026-05-21 09:09:24 +00:00
flux-bot
4cb49d97f4 chore(maintenance): automated image update 2026-05-21 08:49:11 +00:00
jenkins
dfa53aec9e game-stream: point Wolf proxy at Ariadne service 2026-05-21 05:15:23 -03:00
jenkins
608386a820 logging(opensearch): place recovery pod on titan-05 2026-05-21 04:36:01 -03:00
jenkins
d94535d828 logging(opensearch): pin init container requests 2026-05-21 04:18:06 -03:00
jenkins
c3dcf60145 logging(opensearch): lower cpu request for rpi5 scheduling 2026-05-21 04:14:36 -03:00
jenkins
fb7dd5e5d3 logging(opensearch): patch rendered cpu limit 2026-05-21 03:53:35 -03:00
jenkins
df960fb519 logging(opensearch): set cpu limit above request 2026-05-21 03:49:08 -03:00
jenkins
9544b59380 logging(opensearch): raise heap for ingest pressure 2026-05-21 03:41:38 -03:00
jenkins
c75902a8ef logging(data-prepper): allow slow startup before liveness 2026-05-21 03:35:26 -03:00
jenkins
8fbe82eb5b game-stream: expose Wolf Moonlight ports 2026-05-21 03:30:40 -03:00
jenkins
409295f8cb logging(data-prepper): include bc in runtime image 2026-05-21 03:26:57 -03:00
jenkins
6240133fb4 sso(keycloak): suspend portal admin role ensure 2026-05-21 03:22:44 -03:00
jenkins
25ca8f92a3 agent(openclaw): rely on oauth for control ui auth 2026-05-21 03:15:18 -03:00
jenkins
85468110b3 sso(keycloak): grant portal admin client management 2026-05-21 02:43:19 -03:00
jenkins
ccf76f2c7d agent(openclaw): keep gateway state off jetsons 2026-05-21 02:42:53 -03:00
jenkins
c5dc6a6c80 agent(openclaw): persist gateway state 2026-05-21 02:39:53 -03:00
flux-bot
89345cfddc chore(maintenance): automated image update 2026-05-21 05:38:10 +00:00
jenkins
1470cea862 game-stream: deploy Wolf foundation 2026-05-21 02:07:17 -03:00
jenkins
9f61cff34e maintenance(titan-24): configure Docker NVIDIA runtime 2026-05-21 02:01:29 -03:00
jenkins
e0707b68c6 maintenance(titan-24): start Docker through host systemd 2026-05-21 01:58:27 -03:00
jenkins
b67120ef79 agent(openclaw): isolate oauth cookie state 2026-05-21 01:55:32 -03:00
jenkins
f3a1037dcd maintenance(titan-24): install Docker for Wolf 2026-05-21 01:54:31 -03:00
flux-bot
a408b0bd43 chore(maintenance): automated image update 2026-05-21 01:39:54 +00:00
flux-bot
66594b70c8 chore(maintenance): automated image update 2026-05-21 01:39:05 +00:00
flux-bot
830883777d chore(maintenance): automated image update 2026-05-21 01:37:55 +00:00
flux-bot
eed588cab0 chore(maintenance): automated image update 2026-05-21 01:35:05 +00:00
jenkins
ad99b399f6 comms: trim vault agent scheduling requests 2026-05-20 19:52:43 -03:00
jenkins
c51078c6a3 comms: avoid singleton rollout surge deadlocks 2026-05-20 19:43:48 -03:00
jenkins
3d1af76df7 comms: keep rpi5 workloads off control plane 2026-05-20 19:37:42 -03:00
jenkins
440aec861e comms(mas): keep auth service off control plane 2026-05-20 19:18:34 -03:00
jenkins
1fc7233267 agent(openclaw): trust oauth proxy identity 2026-05-20 18:35:08 -03:00
jenkins
af8a163e70 agent(openclaw): allow public control origin 2026-05-20 18:30:19 -03:00
jenkins
5ec561f620 monitoring(grafana): lower recovery scheduling requests 2026-05-20 18:16:04 -03:00
jenkins
eb4a197eb7 core(nodes): mark rpi4 spillover workers 2026-05-20 18:14:49 -03:00
jenkins
f010d0547f monitoring(grafana): keep off control plane spillover 2026-05-20 18:07:11 -03:00
jenkins
d822c93829 quality: cap placement preference weights 2026-05-20 17:49:09 -03:00
jenkins
5a547f6f01 quality: keep sonar off control plane 2026-05-20 17:45:56 -03:00
jenkins
6fae3edd67 quality: allow sonar to spill onto rpi4 workers 2026-05-20 17:43:39 -03:00
jenkins
ed81d52dd9 monitoring(grafana): avoid fragile placement and init pull 2026-05-20 17:25:39 -03:00
jenkins
8ce8b1aac2 agent(openclaw): expose oauth protected UI 2026-05-20 17:22:12 -03:00
jenkins
400077436b monitoring(grafana): harden scheduling and readiness 2026-05-20 17:00:33 -03:00
jenkins
f2ae3c1b0c monitoring(testing): make branch filter static 2026-05-20 15:10:24 -03:00
jenkins
974955ac83 monitoring(testing): backfill category health rollups 2026-05-20 14:39:07 -03:00
flux-bot
0ea80a8a19 chore(maintenance): automated image update 2026-05-20 17:15:40 +00:00
jenkins
1c6c3992cf monitoring(testing): reduce month-range query cost 2026-05-20 13:26:33 -03:00
jenkins
109698a2e3 monitoring(testing): attach branch labels to run rollups 2026-05-20 12:54:12 -03:00
jenkins
e380b65eb9 monitoring(testing): memoize dashboard freshness panels 2026-05-20 12:45:50 -03:00
jenkins
3a06d29387 monitoring(testing): record check health timelines 2026-05-20 12:14:49 -03:00
jenkins
b70afe2f03 monitoring(testing): memoize slow dashboard panels 2026-05-20 11:52:25 -03:00
jenkins
fe37f12e32 monitoring(testing): surface current gate health 2026-05-20 11:01:28 -03:00
flux-bot
f9641a22b8 chore(maintenance): automated image update 2026-05-20 11:15:51 +00:00
flux-bot
55d554f22b chore(maintenance): automated image update 2026-05-20 11:15:42 +00:00
flux-bot
238ceb5f9b chore(maintenance): automated image update 2026-05-20 11:14:44 +00:00
flux-bot
e7a3266143 chore(maintenance): automated image update 2026-05-20 11:12:41 +00:00
flux-bot
17ce769284 chore(maintenance): automated image update 2026-05-20 10:49:37 +00:00
flux-bot
ae47cd9de5 chore(maintenance): automated image update 2026-05-20 10:49:31 +00:00
flux-bot
c0bb270087 chore(maintenance): automated image update 2026-05-20 10:46:31 +00:00
flux-bot
8b1dd7cb2b chore(maintenance): automated image update 2026-05-20 10:42:30 +00:00
jenkins
f80044258f ci(data-prepper): keep recovery builds schedulable 2026-05-20 07:37:59 -03:00
flux-bot
c633079532 chore(bstein-dev-home): automated image update 2026-05-20 10:37:19 +00:00
flux-bot
6032f6daef chore(bstein-dev-home): automated image update 2026-05-20 10:36:12 +00:00
jenkins
bf2c0c5e4d ci(data-prepper): avoid unstable build nodes 2026-05-20 07:26:13 -03:00
flux-bot
47be1e7c70 chore(bstein-dev-home): automated image update 2026-05-20 10:18:15 +00:00
flux-bot
df56762342 chore(bstein-dev-home): automated image update 2026-05-20 10:16:09 +00:00
flux-bot
afb3955116 chore(maintenance): automated image update 2026-05-20 10:04:07 +00:00
jenkins
b261834537 ci(jenkins): soften agent spread constraints 2026-05-20 06:53:29 -03:00
jenkins
777f4abe69 fix(jenkins): remove deprecated cloud cap field 2026-05-20 06:40:44 -03:00
jenkins
3cb3a39b49 ops: restart jenkins for spread policy 2026-05-20 06:34:43 -03:00
jenkins
26629205fb ci: tighten agent spread and sweeper limits 2026-05-20 06:33:12 -03:00
flux-bot
a914b2fd05 chore(maintenance): automated image update 2026-05-20 09:25:28 +00:00
jenkins
4a197c870e ci(data-prepper): relax agent spread scheduling 2026-05-20 06:02:36 -03:00
jenkins
974dd84938 ci(jenkins): keep recovery agents schedulable 2026-05-20 05:53:42 -03:00
flux-bot
8fba90f8df chore(maintenance): automated image update 2026-05-20 08:41:55 +00:00
jenkins
178e523bc2 ops(traefik): keep ingress available during balancing 2026-05-20 05:23:49 -03:00
jenkins
0115f5f684 ci(titan-iac): relax agent scheduling 2026-05-20 04:58:40 -03:00
jenkins
269136bee9 ops(cert-manager): keep admission webhook available 2026-05-20 04:51:39 -03:00
jenkins
3676fe058f ops: keep jenkins controller on rpi5 2026-05-20 04:42:12 -03:00
jenkins
75cd2eb39f ops: restart jenkins for placement policy 2026-05-20 04:29:40 -03:00
jenkins
48e434a028 ops: harden ci placement and gpu idle reporting 2026-05-20 04:27:26 -03:00
flux-bot
d2202b6955 chore(maintenance): automated image update 2026-05-20 07:15:57 +00:00
flux-bot
8e706d9900 chore(maintenance): automated image update 2026-05-20 06:50:18 +00:00
jenkins
a30d0fffa4 triage: wire openclaw local diagnosis 2026-05-20 03:14:50 -03:00
flux-bot
878f9ed9b8 chore(maintenance): automated image update 2026-05-20 06:08:37 +00:00
flux-bot
e3112ccb6a chore(maintenance): automated image update 2026-05-20 05:47:28 +00:00
flux-bot
e7145094d9 chore(maintenance): automated image update 2026-05-20 05:39:26 +00:00
flux-bot
8511c90178 chore(maintenance): automated image update 2026-05-20 05:29:24 +00:00
jenkins
50e20a7805 mailu: allow tika slow startup 2026-05-20 02:23:36 -03:00
flux-bot
594e02a518 chore(maintenance): automated image update 2026-05-20 05:21:23 +00:00
jenkins
b04726b6e2 mailu: avoid unhealthy titan-14 placement 2026-05-20 02:13:54 -03:00
jenkins
36487543bb vault: prefer rpi5 for injector 2026-05-20 02:03:35 -03:00
flux-bot
265d9df2ac chore(maintenance): automated image update 2026-05-20 04:58:11 +00:00
jenkins
287699b3db maintenance: prefer rpi5 for ariadne 2026-05-20 01:57:03 -03:00
jenkins
54ec6ebdca quality: give sonarqube a startup probe 2026-05-20 01:40:15 -03:00
flux-bot
617186bd20 chore(maintenance): automated image update 2026-05-20 04:38:06 +00:00
flux-bot
8d45fa9e2b chore(maintenance): automated image update 2026-05-20 04:33:07 +00:00
flux-bot
63a1b7bb4f chore(maintenance): automated image update 2026-05-20 04:27:23 +00:00
jenkins
84ecb09328 quality: recreate sonarqube on pvc moves 2026-05-20 01:24:38 -03:00
jenkins
fe172e0cc6 quality: right-size sonarqube cpu request 2026-05-20 01:21:11 -03:00
flux-bot
9b3c7244cc chore(maintenance): automated image update 2026-05-20 04:20:02 +00:00
flux-bot
b879064146 chore(maintenance): automated image update 2026-05-20 04:08:01 +00:00
flux-bot
ae4ceef711 chore(maintenance): automated image update 2026-05-20 04:01:58 +00:00
jenkins
5230341d21 quality: keep sonarqube on rpi5 workers 2026-05-20 00:57:24 -03:00
jenkins
5bf7a5ac20 test(dashboards): expect memoized gate rollups 2026-05-20 00:36:31 -03:00
jenkins
8bf9e12b4e openclaw: allow gateway on jetson lane 2026-05-19 23:37:16 -03:00
jenkins
b648a66f9a jenkins: allow agents to fall back to rpi4 2026-05-19 23:30:34 -03:00
jenkins
dd20678c46 openclaw: route testing triage through ariadne 2026-05-19 23:30:29 -03:00
jenkins
2c4b1b9cc9 monitoring: hide idle gpu share during activity 2026-05-19 23:30:22 -03:00
jenkins
ea2071bbc5 monitoring(testing): memoize gate check health 2026-05-19 22:41:02 -03:00
jenkins
a32995b1a1 openclaw: fix gateway scratch permissions 2026-05-19 20:37:50 -03:00
jenkins
07a2dfbb92 jenkins: lower controller scheduling requests 2026-05-19 20:37:07 -03:00
jenkins
0a59d6f26e openclaw: tune triage kubectl guidance 2026-05-19 20:34:12 -03:00
jenkins
7688673072 openclaw: restart gateway on model changes 2026-05-19 20:29:35 -03:00
jenkins
1776a3266c jenkins: avoid HDD storage nodes for controller 2026-05-19 20:29:12 -03:00
jenkins
c667c97089 openclaw: use gpu-sized triage model 2026-05-19 20:28:26 -03:00
jenkins
44bfb5a68e openclaw: split gateway and inference placement 2026-05-19 20:23:42 -03:00
jenkins
aadc93e681 openclaw: move mvp inference to titan-24 2026-05-19 20:14:44 -03:00
jenkins
dd4aff8861 openclaw: keep mvp access internal 2026-05-19 20:10:15 -03:00
jenkins
3142d35403 openclaw: pin mvp model lane to titan-21 2026-05-19 19:54:11 -03:00
jenkins
887023eaeb ci(testing): treat optional supply-chain as non-blocking 2026-05-19 19:51:10 -03:00
jenkins
e8fb92a44f openclaw: use smaller local triage model 2026-05-19 19:45:10 -03:00
jenkins
3a8a53133c openclaw: pin arm64 gateway image 2026-05-19 19:30:23 -03:00
jenkins
1fcb12bcb8 openclaw: use ephemeral jetson state for mvp 2026-05-19 19:27:22 -03:00
jenkins
1bc58e10c0 openclaw: add testing triage workspace 2026-05-19 19:17:14 -03:00
jenkins
b7caf4cfec maintenance: document rpi reservation privileges 2026-05-19 18:52:45 -03:00
jenkins
2464c61339 ci(jenkins): avoid brittle agent nodes 2026-05-19 18:26:51 -03:00
jenkins
c923be8ff1 ops: fix descheduler policy for chart api 2026-05-19 18:06:30 -03:00
jenkins
4bd83a1aa8 cert-manager: harden webhook resources 2026-05-19 17:35:10 -03:00
jenkins
69ff5b8bb2 vault: use http health probes 2026-05-19 17:25:56 -03:00
jenkins
92c2cf2127 ci(data-prepper): relax agent placement 2026-05-19 17:09:06 -03:00
jenkins
baa8e96fcc quality: loosen sonarqube placement pressure 2026-05-19 16:41:03 -03:00
jenkins
37fe1d5d24 ci(jenkins): reduce default agent cpu request 2026-05-19 16:21:43 -03:00
jenkins
24920c8a56 ops: keep scavenger work out of scheduler headroom 2026-05-19 16:02:10 -03:00
jenkins
ba84082a1e ci(jenkins): tolerate missing workspace in post actions 2026-05-19 15:57:50 -03:00
jenkins
e3e8a046e4 ops: stage rpi reservations without auto restart 2026-05-19 15:51:05 -03:00
jenkins
8806739d3d harbor: lower redis bootstrap request 2026-05-19 15:13:16 -03:00
jenkins
bf908556bf ops: restart rpi agents through host namespace 2026-05-19 15:00:31 -03:00
jenkins
4be03e1514 harbor: keep bootstrap workloads on titan-11 2026-05-19 14:31:41 -03:00
jenkins
a8a17e7978 ops: enforce rpi kubelet reservations via systemd 2026-05-19 14:23:35 -03:00
jenkins
c982b86136 ci(jenkins): roll controller for plugin refresh 2026-05-19 14:07:34 -03:00
jenkins
10a5776c79 ops: roll rpi reservation daemonset 2026-05-19 13:57:10 -03:00
jenkins
399efa46e4 ci(jenkins): bump kubernetes plugin 2026-05-19 13:45:26 -03:00
jenkins
2d46a2b8fb ops: relax p2pool scheduling request 2026-05-19 13:45:02 -03:00
jenkins
0ae76bf1ca ops: tune crypto guardrails after rollout 2026-05-19 13:35:56 -03:00
jenkins
a3e14ce0f2 ops: add resource guardrails for rpi workers 2026-05-19 12:48:40 -03:00
jenkins
c75e0d1b88 monitoring(testing): roll up current test case state 2026-05-19 11:09:29 -03:00
jenkins
eb003f5b32 monitoring(testing): avoid nested f-string parser drift 2026-05-19 08:11:04 -03:00
jenkins
5a356e8aed monitoring: avoid titan-04 for quality gateway 2026-05-19 07:52:18 -03:00
jenkins
e29299a90d monitoring(testing): dedupe run counters by scrape target 2026-05-19 07:46:06 -03:00
flux-bot
c898e71242 chore(bstein-dev-home): automated image update 2026-05-19 09:34:45 +00:00
flux-bot
afcffc6903 chore(bstein-dev-home): automated image update 2026-05-19 09:33:35 +00:00
jenkins
8bf3d63bae monitoring(testing): prefer fresh coverage metrics 2026-05-19 06:31:04 -03:00
jenkins
5d80f882ae monitoring(testing): ignore stale replaced check states 2026-05-19 04:25:56 -03:00
jenkins
ba9b72312a monitoring(testing): derive gate health from raw checks 2026-05-19 03:59:55 -03:00
jenkins
813d057c6d monitoring(testing): clarify category panel window 2026-05-19 02:29:16 -03:00
jenkins
9789ff5338 test(dashboards): align testing row title 2026-05-18 23:42:43 -03:00
flux-bot
6ca3449f76 chore(maintenance): automated image update 2026-05-19 01:33:25 +00:00
flux-bot
84ebdd3e56 chore(maintenance): automated image update 2026-05-19 01:33:01 +00:00
flux-bot
d83bb17cdf chore(maintenance): automated image update 2026-05-19 01:31:25 +00:00
flux-bot
764df923a0 chore(maintenance): automated image update 2026-05-19 01:29:59 +00:00
jenkins
3102862ee9 monitoring(testing): clarify CI run and test history labels 2026-05-18 21:04:14 -03:00
jenkins
3d043424b4 monitoring(testing): filter test category rollups 2026-05-18 19:32:17 -03:00
jenkins
cc2a98b0a2 monitoring(testing): keep latest gate state current 2026-05-18 15:18:22 -03:00
jenkins
28f401cce1 monitoring(testing): count gate checks as boolean states 2026-05-18 14:53:15 -03:00
jenkins
0de90d622a monitoring(testing): clarify CI run health labels 2026-05-18 14:18:56 -03:00
flux-bot
17628a060f chore(maintenance): automated image update 2026-05-18 17:10:49 +00:00
jenkins
aa750f18b0 monitoring(overview): simplify test category lanes 2026-05-18 11:42:42 -03:00
flux-bot
968ab0ff6e chore(maintenance): automated image update 2026-05-18 12:00:56 +00:00
flux-bot
00c35d93ee chore(maintenance): automated image update 2026-05-18 11:59:55 +00:00
flux-bot
3f843f9a18 chore(maintenance): automated image update 2026-05-18 11:57:58 +00:00
flux-bot
30b8affe5b chore(maintenance): automated image update 2026-05-18 11:54:54 +00:00
flux-bot
4a38d0eef2 chore(maintenance): automated image update 2026-05-18 11:46:33 +00:00
flux-bot
9fa5bb6225 chore(bstein-dev-home): automated image update 2026-05-18 09:39:21 +00:00
flux-bot
7eed659692 chore(bstein-dev-home): automated image update 2026-05-18 09:37:22 +00:00
jenkins
6ed0a1f18e monitoring: persist quality gateway metrics 2026-05-18 03:59:31 -03:00
flux-bot
50b76f56d7 chore(maintenance): automated image update 2026-05-18 01:29:30 +00:00
flux-bot
a721546d61 chore(maintenance): automated image update 2026-05-18 01:28:37 +00:00
flux-bot
b0b500a4f6 chore(maintenance): automated image update 2026-05-18 01:27:29 +00:00
flux-bot
404e12c05d chore(maintenance): automated image update 2026-05-18 01:26:36 +00:00
jenkins
f48ed0cd7d monitoring(testing): restrict category panels to taxonomy 2026-05-17 18:26:57 -03:00
jenkins
ea3366c913 monitoring(overview): restore category timeline 2026-05-17 18:01:37 -03:00
jenkins
52bee00432 monitoring(overview): clean up category pass-rate panel 2026-05-17 16:47:17 -03:00
jenkins
eb39335d59 monitoring(testing): use latest check status in gate panels 2026-05-17 15:54:29 -03:00
jenkins
690c82e61b monitoring(vmalert): roll test pass-rate rule update 2026-05-17 15:32:24 -03:00
jenkins
3e8667d48a monitoring(testing): count failed test cases in pass-rate rollup 2026-05-17 15:27:21 -03:00
flux-bot
674f953e55 chore(maintenance): automated image update 2026-05-17 17:11:45 +00:00
flux-bot
bc54865f5c chore(bstein-dev-home): automated image update 2026-05-17 10:05:38 +00:00
flux-bot
2c91470411 chore(bstein-dev-home): automated image update 2026-05-17 10:02:34 +00:00
jenkins
65c6e123cf monitoring(testing): prefer fresh suite quality samples 2026-05-17 05:54:28 -03:00
flux-bot
4a32ad5fe5 chore(maintenance): automated image update 2026-05-17 07:00:02 +00:00
flux-bot
de1940ae00 chore(maintenance): automated image update 2026-05-17 06:58:04 +00:00
flux-bot
35eeb39bdc chore(maintenance): automated image update 2026-05-17 06:48:48 +00:00
flux-bot
fd490d69f5 chore(maintenance): automated image update 2026-05-17 06:43:47 +00:00
jenkins
2d88aab3a3 monitoring: clarify fresh suite signal 2026-05-17 00:28:07 -03:00
jenkins
a69a21f05d monitoring: use fresh quality snapshots 2026-05-16 20:57:56 -03:00
jenkins
8a13b9d4e7 monitoring: require worker for quality gateway 2026-05-16 19:52:11 -03:00
jenkins
cbf345cfcf monitoring: roll quality gateway without surge 2026-05-16 19:50:24 -03:00
jenkins
b5a79e8091 monitoring: keep quality gateway on stable workers 2026-05-16 19:46:56 -03:00
jenkins
c404a967d0 monitoring: attach build branch to quality rollups 2026-05-16 18:22:16 -03:00
jenkins
42ce51baad monitoring: make current gate rollup use latest checks 2026-05-16 18:07:21 -03:00
jenkins
7f209fbbc9 monitoring: refresh quality rollups every minute 2026-05-16 18:00:52 -03:00
jenkins
65298d7357 monitoring: keep quality check status without branch labels 2026-05-16 17:49:37 -03:00
jenkins
7dc03eefce monitoring: reload vmalert quality rules 2026-05-16 17:29:52 -03:00
jenkins
e0c92aa49d monitoring: count only canonical reporting suites 2026-05-16 17:26:20 -03:00
jenkins
588cc3aa14 monitoring: clarify quality gate dashboard tooltips 2026-05-16 17:03:58 -03:00
jenkins
ad86195436 monitoring: add typhon and category test telemetry 2026-05-16 15:38:26 -03:00
flux-bot
8d4ed6b584 chore(maintenance): automated image update 2026-05-16 17:10:16 +00:00
jenkins
df429a57f2 jenkins: reload casc for lesavka schedule 2026-05-16 13:51:55 -03:00
jenkins
c43f7d84e8 monitoring: polish testing dashboard telemetry 2026-05-16 13:48:01 -03:00
jenkins
7bb2b90a13 monitoring: fix gate health status query 2026-05-16 12:31:19 -03:00
jenkins
a739f14a86 monitoring: disable fan intensity legend safely 2026-05-16 12:17:33 -03:00
jenkins
02e8e633a5 monitoring: hide fan threshold legend 2026-05-16 12:14:22 -03:00
jenkins
160c960345 monitoring: add lesavka category test telemetry 2026-05-16 12:07:57 -03:00
jenkins
86034e0aac monitoring: clean overview power and gate panels 2026-05-16 06:38:55 -03:00
flux-bot
c18b43c294 chore(bstein-dev-home): automated image update 2026-05-16 09:34:55 +00:00
flux-bot
5210a700bf chore(bstein-dev-home): automated image update 2026-05-16 09:33:58 +00:00
jenkins
fbf768e90f monitoring: make overview history panels visible 2026-05-16 06:11:22 -03:00
jenkins
4238262ad3 monitoring: fix gpu share and overview legends 2026-05-16 05:58:59 -03:00
jenkins
3492b6026e monitoring: fix overview fan and gate timelines 2026-05-16 05:34:24 -03:00
jenkins
5eef2e9ba3 monitoring: refine overview timeline readability 2026-05-16 05:18:53 -03:00
jenkins
5d01b3a60d monitoring: trial overview health timelines 2026-05-16 05:08:09 -03:00
jenkins
2ede953580 monitoring: trial overview right rail layout 2026-05-16 03:31:04 -03:00
jenkins
1cfc846ffc monitoring: retire duplicate jobs dashboard 2026-05-16 03:04:27 -03:00
jenkins
8fb5831e00 monitoring: publish atlas testing dashboard 2026-05-16 02:56:52 -03:00
jenkins
b6c921b291 monitoring: clarify gitops and check timelines 2026-05-16 02:21:05 -03:00
jenkins
b4229c5a8f monitoring: refine overview and failure colors 2026-05-15 22:44:25 -03:00
flux-bot
f0520d652a chore(maintenance): automated image update 2026-05-16 01:31:40 +00:00
flux-bot
959a60b73a chore(maintenance): automated image update 2026-05-16 01:31:36 +00:00
flux-bot
1c4c24724f chore(maintenance): automated image update 2026-05-16 01:29:34 +00:00
flux-bot
29881676e8 chore(maintenance): automated image update 2026-05-16 01:27:34 +00:00
jenkins
4527f29e7e monitoring: clarify testing and gitops dashboards 2026-05-15 22:07:41 -03:00
jenkins
a2d5c9c83e monitoring(testing): use lane timelines for test health 2026-05-15 21:05:13 -03:00
jenkins
792ac2b946 monitoring(testing): clarify run and compliance history 2026-05-15 20:00:40 -03:00
jenkins
944a778c0a monitoring: clarify testing dashboard health trends 2026-05-15 19:52:46 -03:00
jenkins
2b9cb84383 monitoring: add gitops dashboard status 2026-05-15 19:37:03 -03:00
jenkins
045d144268 flux: decouple apps from traefik readiness 2026-05-15 16:43:31 -03:00
jenkins
b794e3b514 flux: serialize kustomization reconciles 2026-05-15 16:11:12 -03:00
jenkins
3a39d37995 flux: jitter kustomization intervals 2026-05-15 15:53:02 -03:00
jenkins
8d8b3fc821 flux: avoid child kustomization apply churn 2026-05-15 15:28:53 -03:00
jenkins
b18df4caad flux: calm bootstrap polling 2026-05-15 15:12:38 -03:00
jenkins
cf20efed66 flux: reduce bootstrap reconcile churn 2026-05-15 15:08:27 -03:00
jenkins
6adbe457c4 monitoring: tune testing dashboard and gate rollups 2026-05-15 14:26:06 -03:00
flux-bot
0c11a64d25 chore(maintenance): automated image update 2026-05-15 17:11:05 +00:00
jenkins
c79489d0b8 recovery: keep storage nodes as spillover only 2026-05-15 11:52:26 -03:00
flux-bot
67253315f0 chore(bstein-dev-home): automated image update 2026-05-15 09:35:00 +00:00
flux-bot
fa8ab0840b chore(bstein-dev-home): automated image update 2026-05-15 09:33:54 +00:00
flux-bot
bf5550762e chore(maintenance): automated image update 2026-05-15 01:31:39 +00:00
flux-bot
39e023e8f3 chore(maintenance): automated image update 2026-05-15 01:30:50 +00:00
flux-bot
fd0d748c33 chore(maintenance): automated image update 2026-05-15 01:29:38 +00:00
flux-bot
77956ab811 chore(maintenance): automated image update 2026-05-15 01:27:49 +00:00
flux-bot
3ea233abcb chore(maintenance): automated image update 2026-05-14 17:10:25 +00:00
flux-bot
93bc3dfbe5 chore(bstein-dev-home): automated image update 2026-05-14 09:34:15 +00:00
flux-bot
4ca62f6fb5 chore(bstein-dev-home): automated image update 2026-05-14 09:34:04 +00:00
flux-bot
6914b92e67 chore(maintenance): automated image update 2026-05-14 01:32:55 +00:00
flux-bot
613d496491 chore(maintenance): automated image update 2026-05-14 01:32:10 +00:00
flux-bot
570c077190 chore(maintenance): automated image update 2026-05-14 01:30:54 +00:00
flux-bot
b401a4e49f chore(maintenance): automated image update 2026-05-14 01:29:05 +00:00
flux-bot
559bdf2a72 chore(maintenance): automated image update 2026-05-13 17:12:37 +00:00
flux-bot
f3a7fe58c4 chore(bstein-dev-home): automated image update 2026-05-13 09:34:29 +00:00
flux-bot
46ab392e97 chore(bstein-dev-home): automated image update 2026-05-13 09:34:23 +00:00
flux-bot
352e136621 chore(maintenance): automated image update 2026-05-13 01:31:03 +00:00
flux-bot
1b265f43d5 chore(maintenance): automated image update 2026-05-13 01:30:09 +00:00
flux-bot
ecfead7193 chore(maintenance): automated image update 2026-05-13 01:29:03 +00:00
flux-bot
53f5968f8f chore(maintenance): automated image update 2026-05-13 01:27:07 +00:00
flux-bot
8dadb36b97 chore(maintenance): automated image update 2026-05-12 17:09:48 +00:00
flux-bot
74668938cc chore(bstein-dev-home): automated image update 2026-05-12 09:33:54 +00:00
flux-bot
9def813324 chore(bstein-dev-home): automated image update 2026-05-12 09:33:39 +00:00
jenkins
6811958b52 monitoring: align overview generator with restored layout 2026-05-12 04:19:36 -03:00
jenkins
d1cdb4fd13 monitoring: restore atlas overview dashboard 2026-05-12 04:00:26 -03:00
flux-bot
50580623db chore(maintenance): automated image update 2026-05-12 01:31:40 +00:00
flux-bot
7340762622 chore(maintenance): automated image update 2026-05-12 01:31:22 +00:00
flux-bot
2102a5ec76 chore(maintenance): automated image update 2026-05-12 01:30:22 +00:00
flux-bot
850ed8abf6 chore(maintenance): automated image update 2026-05-12 01:28:38 +00:00
flux-bot
05ba76ecaa chore(maintenance): automated image update 2026-05-11 21:07:59 +00:00
flux-bot
6db1c3f5da chore(maintenance): automated image update 2026-05-11 21:07:46 +00:00
flux-bot
63429fff1d chore(maintenance): automated image update 2026-05-11 21:06:45 +00:00
flux-bot
de3f7fea69 chore(maintenance): automated image update 2026-05-11 21:04:59 +00:00
flux-bot
1e5ef8dbd1 chore(maintenance): automated image update 2026-05-11 20:51:40 +00:00
flux-bot
385d21056a chore(bstein-dev-home): automated image update 2026-05-11 20:48:56 +00:00
flux-bot
3c5fa4bbe2 chore(bstein-dev-home): automated image update 2026-05-11 20:47:40 +00:00
jenkins
58adb757c4 monitoring(testing): show LOC compliance as positive percent 2026-05-11 17:36:13 -03:00
flux-bot
d01cfe9066 chore(maintenance): automated image update 2026-05-11 17:10:04 +00:00
flux-bot
d522af7bb7 chore(bstein-dev-home): automated image update 2026-05-11 16:30:19 +00:00
flux-bot
e6dd39b4c7 chore(bstein-dev-home): automated image update 2026-05-11 16:29:00 +00:00
flux-bot
4404454cb9 chore(bstein-dev-home): automated image update 2026-05-11 09:33:16 +00:00
flux-bot
59613d500f chore(bstein-dev-home): automated image update 2026-05-11 09:33:01 +00:00
flux-bot
afeae15443 chore(maintenance): automated image update 2026-05-11 04:54:34 +00:00
flux-bot
ba0155ad3b chore(maintenance): automated image update 2026-05-11 04:54:26 +00:00
flux-bot
2c048cdeda chore(maintenance): automated image update 2026-05-11 04:52:26 +00:00
flux-bot
f307c7f2af chore(maintenance): automated image update 2026-05-11 04:48:32 +00:00
jenkins
a90d84f796 monitoring(testing): use solid threshold bars 2026-05-11 01:01:46 -03:00
jenkins
dad9e4e8f2 monitoring: ignore availability scrape gaps 2026-05-10 16:38:05 -03:00
jenkins
eb57c1fe0f monitoring: count post-start availability gaps 2026-05-10 16:21:47 -03:00
jenkins
e7213d9d1c monitoring: fill pre-telemetry availability 2026-05-10 16:13:13 -03:00
jenkins
7b656dbaeb monitoring: restart vmalert on rule changes 2026-05-10 15:50:50 -03:00
jenkins
01af181442 monitoring: schedule availability rollup deterministically 2026-05-10 15:49:32 -03:00
jenkins
192a36cf8a monitoring: fix vmalert remote write endpoint 2026-05-10 15:47:34 -03:00
jenkins
7f7dde01de monitoring: precompute atlas availability rollup 2026-05-10 15:40:12 -03:00
jenkins
32ffe30145 monitoring: bound atlas availability query 2026-05-10 14:40:55 -03:00
flux-bot
521eda1c00 chore(maintenance): automated image update 2026-05-10 17:09:40 +00:00
flux-bot
49948621d0 chore(bstein-dev-home): automated image update 2026-05-10 09:34:40 +00:00
flux-bot
28b77781d1 chore(bstein-dev-home): automated image update 2026-05-10 09:33:40 +00:00
jenkins
adfbe4ed64 metis: fix pvc rollout and sentinel pulls 2026-05-10 04:36:31 -03:00
jenkins
92fbe0ebdf vaultwarden: avoid suspect workers 2026-05-10 04:27:48 -03:00
flux-bot
b0bd29696e chore(bstein-dev-home): automated image update 2026-05-10 07:13:19 +00:00
flux-bot
496b933c65 chore(bstein-dev-home): automated image update 2026-05-10 07:13:16 +00:00
jenkins
da7ee45366 postgres: avoid unstable nodes 2026-05-10 04:04:34 -03:00
flux-bot
ffdc4bef36 chore(maintenance): automated image update 2026-05-10 06:49:25 +00:00
flux-bot
3aaa96a673 chore(maintenance): automated image update 2026-05-10 06:49:19 +00:00
flux-bot
2f1eb38551 chore(maintenance): automated image update 2026-05-10 06:48:19 +00:00
flux-bot
cdda5be827 chore(bstein-dev-home): automated image update 2026-05-10 06:46:34 +00:00
flux-bot
52682b98f5 chore(maintenance): automated image update 2026-05-10 06:46:25 +00:00
flux-bot
749fa16fca chore(maintenance): automated image update 2026-05-10 06:46:16 +00:00
flux-bot
b0372c41c2 chore(bstein-dev-home): automated image update 2026-05-10 06:43:19 +00:00
jenkins
e96e8943c9 jenkins: keep ci agents on rpi5 workers 2026-05-10 03:25:09 -03:00
flux-bot
acfaa2c3c0 chore(maintenance): automated image update 2026-05-10 06:23:06 +00:00
jenkins
7fb0be3487 jenkins: spread ci agents and cap concurrency 2026-05-10 03:17:50 -03:00
jenkins
fd91537982 ci: avoid titan-06 for Jenkins agents 2026-05-10 02:24:40 -03:00
jenkins
a64d4cee56 jenkins: tolerate slow kubernetes agent exec 2026-05-10 02:17:30 -03:00
jenkins
ba3e24548a jenkins: schedule daily quality jobs 2026-05-09 23:18:32 -03:00
jenkins
4beb08f1cf scheduling: keep longhorn vault sync off storage nodes 2026-05-05 13:46:19 -03:00
jenkins
e2cbbd6963 scheduling: keep singleton apps off storage nodes 2026-05-05 13:37:04 -03:00
jenkins
c46764e80c recovery(atlas): stop post-outage control-plane churn 2026-05-05 10:42:28 -03:00
jenkins
b81053aaec ai(ollama): recover onto live jetson gpu pool 2026-05-05 06:42:15 -03:00
jenkins
9e659b790b recovery(post-outage): restore jellyfin and maintenance sync 2026-05-05 06:31:09 -03:00
jenkins
c07220253e maintenance(metis): run service on longhorn-ready workers 2026-05-05 06:19:15 -03:00
jenkins
39fb0e91e0 maintenance(metis): move runtime state to longhorn 2026-05-05 06:15:22 -03:00
jenkins
6243021ade maintenance(metis): recover on arm64 builders 2026-05-05 06:12:06 -03:00
4a6b54b4c3 logging: trim dated pod log rotations 2026-04-27 16:49:11 -03:00
6c816e9fad logging: trim constrained pod logs earlier 2026-04-27 16:42:02 -03:00
2b5c7ca10b logging: trim oversized rotated pod logs on constrained nodes 2026-04-27 16:31:57 -03:00
45b145667a longhorn: rerun settings ensure job 2026-04-27 16:16:51 -03:00
9fb8dd4839 stability: harden fluent-bit buffering and longhorn node-down recovery 2026-04-27 16:15:13 -03:00
flux-bot
6352e0d976 chore(maintenance): automated image update 2026-04-26 00:59:25 +00:00
flux-bot
d4ff5d482e chore(maintenance): automated image update 2026-04-26 00:59:05 +00:00
flux-bot
b303add71c chore(maintenance): automated image update 2026-04-26 00:57:30 +00:00
flux-bot
a42e61de61 chore(maintenance): automated image update 2026-04-26 00:55:05 +00:00
Codex
6eb0158c6c maintenance(metis): raise remote build timeout 2026-04-25 01:41:36 -03:00
Codex
0171ffad38 keycloak(metis): seed node intranet ips in vault 2026-04-24 22:18:58 -03:00
flux-bot
84934a6d1c chore(maintenance): automated image update 2026-04-24 21:39:36 +00:00
flux-bot
98a2ade86d chore(maintenance): automated image update 2026-04-24 21:39:18 +00:00
flux-bot
738a5184cb chore(maintenance): automated image update 2026-04-24 21:37:35 +00:00
flux-bot
488c2694e3 chore(maintenance): automated image update 2026-04-24 21:36:19 +00:00
flux-bot
015d99dc5f chore(maintenance): automated image update 2026-04-24 21:08:32 +00:00
flux-bot
b80745dc2d chore(maintenance): automated image update 2026-04-24 21:08:15 +00:00
jenkins
0fa1b38f95 recovery(metis): trim node vault password placeholders 2026-04-24 18:07:35 -03:00
flux-bot
49e714c88c chore(maintenance): automated image update 2026-04-24 21:07:32 +00:00
flux-bot
ff0b9762b1 chore(maintenance): automated image update 2026-04-24 21:05:15 +00:00
jenkins
ce36ff099b recovery(metis): rerun node password seeding job 2026-04-24 17:33:40 -03:00
jenkins
6c4a7dea29 recovery(metis): use atlas kv node secrets 2026-04-24 17:29:58 -03:00
jenkins
04a80c1168 recovery(metis): seed per-node vault password slots 2026-04-24 17:24:37 -03:00
flux-bot
8179bd85db chore(maintenance): automated image update 2026-04-24 20:19:26 +00:00
flux-bot
c08499b52d chore(maintenance): automated image update 2026-04-24 20:19:10 +00:00
flux-bot
eca9e494ad chore(maintenance): automated image update 2026-04-24 20:17:26 +00:00
flux-bot
ab0e68f9f3 chore(maintenance): automated image update 2026-04-24 20:15:10 +00:00
flux-bot
0566a47e35 chore(maintenance): automated image update 2026-04-24 17:50:13 +00:00
flux-bot
133597bfd0 chore(maintenance): automated image update 2026-04-24 17:49:55 +00:00
flux-bot
ccf318f977 chore(maintenance): automated image update 2026-04-24 17:48:12 +00:00
flux-bot
8affc052bf chore(maintenance): automated image update 2026-04-24 17:46:54 +00:00
flux-bot
0cf5043977 chore(maintenance): automated image update 2026-04-24 17:20:52 +00:00
flux-bot
f2ffc6c1ef chore(maintenance): automated image update 2026-04-24 17:19:09 +00:00
flux-bot
e7c770b10b chore(maintenance): automated image update 2026-04-24 17:17:52 +00:00
jenkins
0ac3c97f90 maintenance(metis): restore full helper image refs 2026-04-24 13:51:12 -03:00
flux-bot
3e5e37d65a chore(maintenance): automated image update 2026-04-24 16:11:02 +00:00
flux-bot
2acbcbff51 chore(maintenance): automated image update 2026-04-24 16:10:45 +00:00
flux-bot
70b382bc80 chore(maintenance): automated image update 2026-04-24 16:09:02 +00:00
flux-bot
d0191361d4 chore(maintenance): automated image update 2026-04-24 16:06:44 +00:00
flux-bot
59bb0bef78 chore(maintenance): automated image update 2026-04-24 15:56:37 +00:00
jenkins
4b456cf54a maintenance(metis): track arch-specific images 2026-04-24 12:55:47 -03:00
jenkins
91c6023d25 maintenance(metis): move ingress to recovery host 2026-04-24 10:51:09 -03:00
jenkins
85d15cd3e1 maintenance(metis): raise remote pod timeout for recovery builds 2026-04-24 00:01:43 -03:00
jenkins
c0a4cbf03e maintenance(metis): fix remote workspace permissions 2026-04-23 23:45:18 -03:00
jenkins
fad895efbb maintenance(metis): move build scratch to usb storage 2026-04-23 23:37:00 -03:00
jenkins
47b31ebcf4 monitoring(testing): collapse heavy drilldowns 2026-04-22 16:56:52 -03:00
jenkins
88d2225774 test(titan-iac): cover dashboard generator contract 2026-04-22 15:31:36 -03:00
jenkins
a1f6758b95 monitoring(grafana): refresh provisioned dashboards 2026-04-22 15:13:26 -03:00
jenkins
23146aaa8a monitoring(testing): clean canonical suite rows 2026-04-22 14:34:40 -03:00
jenkins
cc757ba082 ci(data-prepper): quote testcase metrics correctly 2026-04-22 13:28:35 -03:00
jenkins
c3c8b60671 ci(data-prepper): retrigger archive fix 2026-04-22 13:23:23 -03:00
jenkins
15792b1cf3 ci(data-prepper): archive junit without plugin dependency 2026-04-22 13:21:52 -03:00
jenkins
e75a5d5675 ci(data-prepper): keep validation labels portable 2026-04-22 13:13:56 -03:00
jenkins
4282810602 ci(data-prepper): retrigger quality publish 2026-04-22 13:07:37 -03:00
jenkins
8a58132dd4 ci(data-prepper): avoid xml parser in metrics publish 2026-04-22 13:04:47 -03:00
jenkins
be0d3e4300 ci(data-prepper): harden quality evidence helpers 2026-04-22 12:58:27 -03:00
jenkins
ba6848a67a ci(data-prepper): publish real testcase metrics 2026-04-22 12:48:36 -03:00
jenkins
23beb08e5e monitoring(testing): split quality trend panels 2026-04-22 12:42:33 -03:00
5d560d962d chore(metis): deploy scratch annotation sync 2026-04-22 04:28:08 -03:00
51ade59a46 fix(metis): keep sentinel rollouts moving on degraded nodes 2026-04-22 03:40:28 -03:00
7f91be27f9 chore(metis): deploy scratch sentinel fix 2026-04-22 03:33:54 -03:00
63cd159151 test(titan-iac): cover mailu sync scripts 2026-04-22 02:53:00 -03:00
443c70d01b monitoring(testing): promote atlas testing layout 2026-04-22 02:26:31 -03:00
flux-bot
9f0ea1683a chore(bstein-dev-home): automated image update 2026-04-22 05:01:25 +00:00
flux-bot
55df293e00 chore(bstein-dev-home): automated image update 2026-04-22 05:00:26 +00:00
3168ffe027 ci(titan-iac): feed coverage into sonar gate 2026-04-22 01:57:19 -03:00
abdefbbd05 ci(quality): enforce sonar and supply-chain gates 2026-04-22 01:29:54 -03:00
flux-bot
ead503d71e chore(bstein-dev-home): automated image update 2026-04-22 04:15:46 +00:00
flux-bot
f54bdf8483 chore(bstein-dev-home): automated image update 2026-04-22 04:14:49 +00:00
flux-bot
80cb4c257f chore(bstein-dev-home): automated image update 2026-04-22 04:06:45 +00:00
flux-bot
228e8a9772 chore(bstein-dev-home): automated image update 2026-04-22 04:05:50 +00:00
15c798b915 gitops(bstein-home): deploy current image tags on main 2026-04-22 00:53:06 -03:00
2ded2eb23d ci(titan-iac): apply supply-chain waiver ledger 2026-04-22 00:42:03 -03:00
flux-bot
e6bb015ef2 chore(maintenance): automated image update 2026-04-22 03:26:48 +00:00
flux-bot
ead7c276b4 chore(maintenance): automated image update 2026-04-22 03:11:42 +00:00
bfad9c19c5 deploy(bstein-home): target non-root frontend port 2026-04-22 00:01:50 -03:00
439a44bc85 ci(data-prepper): scan staged supply-chain inputs 2026-04-21 23:29:53 -03:00
flux-bot
13f179d842 chore(maintenance): automated image update 2026-04-22 02:09:28 +00:00
c0e5df30d5 ci(quality): use preloaded scanner image 2026-04-21 22:50:53 -03:00
flux-bot
79fbf2644b chore(maintenance): automated image update 2026-04-22 01:50:20 +00:00
0eca6adbbb ci(quality): pass sonar token as login 2026-04-21 22:17:55 -03:00
5801633b30 ci(quality): run sonar and supply-chain scans 2026-04-21 22:09:06 -03:00
fac139fd0e monitoring: rotate grafana dedupe job 2026-04-21 21:25:05 -03:00
jenkins
2df830f01b longhorn: bound settings sync curl calls and rerun job 2026-04-21 21:18:41 -03:00
flux-bot
26fab34de5 chore(maintenance): automated image update 2026-04-22 00:16:57 +00:00
jenkins
e29d0fe349 longhorn: rebalance replicas and cap rebuild pressure 2026-04-21 21:12:19 -03:00
jenkins
77f7620eca scheduling: de-prefer spillover nodes for non-longhorn services 2026-04-21 21:00:56 -03:00
fb0dd60954 jenkins: allow slow controller startup 2026-04-21 20:54:42 -03:00
jenkins
4401c26496 jenkins: de-prefer spillover longhorn nodes for controller and agents 2026-04-21 20:48:02 -03:00
9682a17a82 jenkins: avoid recursive volume ownership resets 2026-04-21 20:34:02 -03:00
55d87c0c14 ci(quality): bind sonarqube token credential in pipelines 2026-04-21 20:16:59 -03:00
379f20efc5 jenkins: prefer rpi5 without hard pin 2026-04-21 19:51:09 -03:00
7883593166 ci(jenkins): inject sonarqube token from vault 2026-04-21 19:43:08 -03:00
flux-bot
5509dd86d5 chore(maintenance): automated image update 2026-04-21 22:01:24 +00:00
06b27c9b9a ci(titan-iac): lower agent cpu request 2026-04-21 18:32:45 -03:00
flux-bot
a927affb1f chore(maintenance): automated image update 2026-04-21 21:22:18 +00:00
flux-bot
fab182e91e chore(maintenance): automated image update 2026-04-21 20:59:18 +00:00
d5be9e1ae9 ci(data-prepper): use mirrored base artifact 2026-04-21 16:56:25 -03:00
fb48d473d2 ci(data-prepper): report n/a coverage as complete 2026-04-21 16:32:42 -03:00
5e5cffbdc7 ci(data-prepper): allow arm64 worker scheduling 2026-04-21 15:33:42 -03:00
e1d804dbb0 ci(data-prepper): lower kaniko cpu request 2026-04-21 15:26:13 -03:00
flux-bot
2086427b72 chore(maintenance): automated image update 2026-04-21 17:56:42 +00:00
e811c0cabf ci(jenkins): require rpi5 controller placement 2026-04-21 14:12:14 -03:00
flux-bot
b68c002e2d chore(maintenance): automated image update 2026-04-21 17:05:21 +00:00
cb7e0238dc infra(ci): use harbor python utility images 2026-04-21 13:37:46 -03:00
flux-bot
043a2e75c8 chore(maintenance): automated image update 2026-04-21 16:30:12 +00:00
6ac375f82e ci(titan-iac): use harbor python runner 2026-04-21 13:18:31 -03:00
jenkins
8c1a26ead6 ci(titan-iac): use in-cluster victoria metrics dns 2026-04-21 12:30:06 -03:00
jenkins
d119f838e9 ci(titan-iac): harden quality metric publisher 2026-04-21 12:24:18 -03:00
jenkins
ae2356de6a monitoring(testing): render missing metric zero states 2026-04-21 11:46:15 -03:00
jenkins
c1ac36df17 monitoring(testing): link test metrics to build artifacts 2026-04-21 11:39:13 -03:00
jenkins
cc79f3ebcd ci(titan-iac): include primary branch in quality metrics 2026-04-21 11:08:59 -03:00
jenkins
1f991fc43d harbor: expand registry storage 2026-04-21 10:56:27 -03:00
jenkins
b62980b76d harbor: reduce vault injector bootstrap requests 2026-04-21 10:08:39 -03:00
jenkins
26da4945ea harbor: move registry bootstrap to titan-11 2026-04-21 09:55:29 -03:00
jenkins
d599a162a9 monitoring(testing): add branch evidence panels 2026-04-21 09:35:43 -03:00
jenkins
e53adc17b3 ci(data-prepper): archive full quality evidence 2026-04-21 09:24:09 -03:00
jenkins
7cd40d457d Merge remote-tracking branch 'origin/main' 2026-04-21 09:23:03 -03:00
flux-bot
d559d03bea chore(maintenance): automated image update 2026-04-21 06:32:37 +00:00
flux-bot
691dc3c71b chore(maintenance): automated image update 2026-04-21 06:27:29 +00:00
flux-bot
e81ecdd716 chore(maintenance): automated image update 2026-04-21 06:14:21 +00:00
flux-bot
74e385ad8b chore(maintenance): automated image update 2026-04-21 06:10:27 +00:00
flux-bot
fecd095717 chore(maintenance): automated image update 2026-04-21 06:03:10 +00:00
flux-bot
caa02806c0 chore(maintenance): automated image update 2026-04-21 06:00:02 +00:00
flux-bot
c6c6f90d26 chore(maintenance): automated image update 2026-04-21 05:54:02 +00:00
flux-bot
e4efb89466 chore(maintenance): automated image update 2026-04-21 05:52:01 +00:00
flux-bot
8584885ddd chore(maintenance): automated image update 2026-04-21 05:44:00 +00:00
flux-bot
6aeacaf872 chore(maintenance): automated image update 2026-04-21 05:42:00 +00:00
flux-bot
0146b92cc1 chore(maintenance): automated image update 2026-04-21 05:33:59 +00:00
flux-bot
981fca6cb4 chore(maintenance): automated image update 2026-04-21 05:26:59 +00:00
flux-bot
6dab28081d chore(maintenance): automated image update 2026-04-21 05:12:56 +00:00
flux-bot
6ebc475da2 chore(maintenance): automated image update 2026-04-21 05:05:56 +00:00
flux-bot
fff26ebacb chore(maintenance): automated image update 2026-04-21 04:57:54 +00:00
flux-bot
e3bebaa10b chore(maintenance): automated image update 2026-04-21 04:55:55 +00:00
flux-bot
df16f03e46 chore(maintenance): automated image update 2026-04-21 04:46:53 +00:00
flux-bot
b5243e8566 chore(maintenance): automated image update 2026-04-21 04:36:52 +00:00
flux-bot
4501bbf8f0 chore(maintenance): automated image update 2026-04-21 04:34:52 +00:00
flux-bot
5331d7149a chore(maintenance): automated image update 2026-04-21 04:24:51 +00:00
jenkins
c4b0389892 quality(titan-iac): widen enforced coverage contract 2026-04-20 21:39:53 -03:00
jenkins
387e104359 test(titan-iac): widen tracked quality coverage 2026-04-20 21:34:59 -03:00
jenkins
5ebc320843 ci(titan-iac): support direct script execution for metrics publish 2026-04-20 15:47:20 -03:00
jenkins
006f79658f ci(titan-iac): retrigger after titan-09 cordon 2026-04-20 15:36:51 -03:00
jenkins
9451bb9c61 test(titan-iac): raise quality gate coverage for quality runner 2026-04-20 15:29:46 -03:00
jenkins
655c26c589 quality(titan-iac): split metrics publisher and harden gate lint 2026-04-20 15:21:49 -03:00
jenkins
607d8c21fa monitoring(testing): fix missing-state queries and add test-case drilldowns 2026-04-20 13:45:01 -03:00
jenkins
b7f6cbd87c ci(titan-iac): enforce 30d build and artifact retention 2026-04-20 12:30:57 -03:00
jenkins
a07b49a05f monitoring(testing): fix atlas-jobs coverage and loc query expressions 2026-04-20 12:20:42 -03:00
jenkins
1d4227beec ci(data-prepper): add retention and archive quality artifacts 2026-04-20 10:55:13 -03:00
jenkins
57306201cf monitoring(testing): backfill placeholder test-case metrics across sparse suites 2026-04-20 09:13:34 -03:00
jenkins
7437ec5929 ci(titan-iac): emit placeholder test-case metric when junit has no cases 2026-04-20 09:10:04 -03:00
jenkins
710ec96990 test(titan-iac): update payload unit tests for per-test metric argument 2026-04-20 08:50:39 -03:00
jenkins
cb1c41c6ea ci(titan-iac): infer coverage/loc metrics from quality summary artifacts 2026-04-20 08:43:21 -03:00
jenkins
e8823197f8 monitoring(testing): align test selector with exported job label 2026-04-20 08:38:38 -03:00
jenkins
c5b1302ff6 monitoring(testing): add fallbacks for problematic-test trend queries 2026-04-20 08:37:26 -03:00
jenkins
f02db9801c monitoring(testing): add per-test metrics and flaky-test panels 2026-04-20 08:35:05 -03:00
jenkins
7d113291c9 monitoring(testing): split check trends into per-check success/failure panels 2026-04-20 08:07:30 -03:00
jenkins
47d5416dde ci(titan-iac): harden promote git workspace detection 2026-04-20 00:59:24 -03:00
codex
f2c4204bab monitoring(testing): fix suite all filter aliases and regex templating 2026-04-19 23:22:34 -03:00
codex
71cfdce862 jenkins: source streaming harbor creds from dedicated vault path 2026-04-19 23:02:30 -03:00
codex
d4112e5a74 ci(titan-iac): guard promote stage when workspace lacks .git 2026-04-19 22:58:58 -03:00
codex
6d2c72ff98 jenkins: keep streaming creds optional without vault hard dependency 2026-04-19 22:45:25 -03:00
codex
c8f7cd6ec2 jenkins(logging): split streaming harbor credentials 2026-04-19 22:40:56 -03:00
codex
bd85143aa0 jenkins: stop overriding push creds with harbor-pull secret 2026-04-19 22:36:18 -03:00
codex
cb992d1c53 maintenance(metis): raise remote timeout and improve progress 2026-04-19 22:34:16 -03:00
codex
7be6cfb9cb ci(titan-iac): install git in runner before promote stage 2026-04-19 22:33:22 -03:00
codex
b848e6b6d8 monitoring(dashboards): regenerate atlas-testing from generator 2026-04-19 22:29:20 -03:00
flux-bot
849bba8f5d chore(maintenance): automated image update 2026-04-20 01:19:35 +00:00
codex
86c492d8c1 ci: retrigger titan-iac after titan-18 cordon 2026-04-19 22:07:10 -03:00
codex
1ed8b7233d maintenance(metis): roll duplicate-build fix to 0.1.0-24 2026-04-19 22:03:04 -03:00
codex
ddabda06bf ci: fix data-prepper defaults and restore metrics publisher coverage 2026-04-19 21:57:40 -03:00
codex
881c724725 jenkins: revert sonar vault path injection blocking startup 2026-04-19 21:42:04 -03:00
codex
2db4952c39 jenkins(sonar): wire defaults and observe-mode toggles 2026-04-19 21:30:02 -03:00
codex
57432e01a3 maintenance(metis): export bastion ssh key for replacement readiness 2026-04-19 21:22:57 -03:00
codex
97bc0cea8c maintenance(metis): use inventory path available in remote runner pods 2026-04-19 21:18:30 -03:00
codex
e930aac039 ci(gate): enforce sonar and supply-chain checks across suites 2026-04-19 21:16:42 -03:00
flux-bot
13ec9b2d7d chore(maintenance): automated image update 2026-04-20 00:14:29 +00:00
d8f07c2b70 maintenance(metis): run vault-enabled metis service image 2026-04-19 21:14:19 -03:00
20a255252c maintenance(metis): add titan-16 replacement profile 2026-04-19 21:01:49 -03:00
376e68ec31 maintenance(metis): inject harbor creds into service runtime 2026-04-19 20:52:04 -03:00
flux-bot
7497f8d4e0 chore(maintenance): automated image update 2026-04-19 23:45:10 +00:00
b3270e7231 maintenance(metis): add titan-10 and titan-12 inventory profiles 2026-04-19 20:44:12 -03:00
1dce63fb9b monitoring(testing): render zero-state data for missing/sonar panels 2026-04-19 16:56:22 -03:00
96f3844677 quality(sonarqube): read exporter token from shared oidc vault path 2026-04-19 16:40:39 -03:00
65edbd9ed9 quality(sonarqube): inject exporter token from vault 2026-04-19 16:34:27 -03:00
29138b8a51 ci(metrics): publish canonical titan-iac gate checks 2026-04-19 16:29:07 -03:00
flux-bot
aede5aa899 chore(maintenance): automated image update 2026-04-19 19:19:49 +00:00
12293c9d11 test(ci): align publish_test_metrics unit tests with current API 2026-04-19 16:18:35 -03:00
2d0360be3b ci(metrics): use Pushgateway PUT for suite payload replacement 2026-04-19 16:10:20 -03:00
f9d7694f25 monitoring(testing): harden suite selector and success history query 2026-04-19 15:31:59 -03:00
9e3cc0f760 ci(jenkins): fix glue test VM URL and default SA observer RBAC 2026-04-19 15:06:13 -03:00
32410555cd monitoring: remove combined UPS draw series from history panels 2026-04-19 14:51:25 -03:00
347e7ccc84 monitoring: revert atlas overview dashboard to pre-quality changes 2026-04-19 14:43:41 -03:00
e47a877169 ci: resolve flux branch without Groovy dollar interpolation 2026-04-19 14:41:22 -03:00
592d037522 ci: fix titan-iac and data-prepper pipeline gate publishing 2026-04-19 14:33:26 -03:00
3ccc2a1100 quality: standardize suite checks and add SonarQube stack 2026-04-19 14:18:58 -03:00
9a20f4f854 monitoring(testing): redesign atlas testing dashboard and unify suite aliases 2026-04-18 17:47:06 -03:00
9a8c454123 tests(quality-gate): cover metrics publisher edge paths 2026-04-18 17:29:50 -03:00
flux-bot
e1f430455d chore(maintenance): automated image update 2026-04-18 19:36:24 +00:00
01fe20fe68 monitoring(metrics): normalize platform gate contract and pegasus suite name 2026-04-18 16:34:20 -03:00
2221a2d279 monitoring: alert on soteria backup job creation spikes 2026-04-17 01:09:25 -03:00
flux-bot
20305a7181 chore(maintenance): automated image update 2026-04-17 03:48:15 +00:00
10c813d583 maintenance(soteria): pause backup scheduler during backlog incident 2026-04-16 21:29:14 -03:00
1b041aa813 monitoring(dashboards): fix success-rate fallback expression 2026-04-16 20:02:26 -03:00
8f2b247b5f monitoring(dashboards): fallback idle panels to zero 2026-04-16 19:59:08 -03:00
1f3ce453fb maintenance(soteria): add startup probe and relax liveness 2026-04-16 19:54:07 -03:00
ff11f7ee65 monitoring(vm): raise kube-state-metrics scrape size cap 2026-04-16 19:47:56 -03:00
11d9c5eae3 monitoring(vm): avoid accelerator nodes for vmsingle 2026-04-16 19:39:35 -03:00
95dd0bbd56 monitoring(vm): auto-reload scrape config changes 2026-04-16 19:33:39 -03:00
72e7a39373 monitoring: fix grafana no-data scrape gaps 2026-04-16 19:30:31 -03:00
09d438e8b4 maintenance(titan-24): remove flux temp desktop automation 2026-04-15 22:58:37 -03:00
6752e4c0e5 maintenance(titan-24): keep helper retries armed 2026-04-15 22:50:41 -03:00
e7f3edb4bf maintenance(titan-24): tolerate unreachable helper jobs 2026-04-15 22:30:22 -03:00
c55d5ac3b5 maintenance(titan-24): add desktop helper and rootfs sweep 2026-04-15 22:25:11 -03:00
fb43b02b2a monitoring(soteria): tune PVC backup age thresholds for nightly cadence 2026-04-14 02:17:52 -03:00
55fa72d446 monitoring(overview): align enclosure fonts and shorten fan labels 2026-04-14 01:18:41 -03:00
496f7a12dd monitoring(overview): dedupe typhon series and map fans by port 2026-04-14 00:31:38 -03:00
6b75ae7dcc monitoring(overview): fix jenkins success/failure ranking with single-frame status labels 2026-04-13 23:13:45 -03:00
50a9bda808 typhon: register app and add v2-safe ble/control runtime toggles 2026-04-13 23:07:53 -03:00
c573012a7c monitoring(overview): globally sort jenkins rows across status frames 2026-04-13 23:03:38 -03:00
8ac428f816 monitoring(overview): derive jenkins top-6 in PromQL per panel 2026-04-13 22:38:40 -03:00
99e7dababd monitoring(overview): restore jenkins panel readability with top-6 stat rows 2026-04-13 22:13:08 -03:00
8db72c9475 monitoring(overview): replace jenkins tables with stat lists and fix links/colors 2026-04-13 22:07:24 -03:00
2db8e1423d monitoring(overview): fix jenkins row links, status color, and ordering 2026-04-13 20:58:09 -03:00
flux-bot
3e440ba7cd chore(maintenance): automated image update 2026-04-13 19:52:06 +00:00
e437f55d87 monitoring(overview): make jenkins success/failure panels scrollable lists 2026-04-13 16:24:19 -03:00
3bbd0a6f90 monitoring(jenkins): dedupe weather metrics and cap newest list rows 2026-04-13 14:29:44 -03:00
cf988e361b monitoring(overview): make jenkins success/failure lists readable 2026-04-13 14:25:19 -03:00
flux-bot
7f676fdc70 chore(maintenance): automated image update 2026-04-13 17:21:53 +00:00
flux-bot
f2830ce940 chore(maintenance): automated image update 2026-04-13 16:58:51 +00:00
a05a6a0e88 monitoring(overview): increase jenkins success/failure row legibility 2026-04-13 13:51:03 -03:00
30acfe39c4 maintenance(soteria): grant pod logs and roll out 0.1.0-32 2026-04-13 12:52:38 -03:00
flux-bot
ac62a43815 chore(maintenance): automated image update 2026-04-13 15:49:45 +00:00
4bcb1cc940 monitoring(overview): split jenkins weather into success/failure columns 2026-04-13 12:17:34 -03:00
d0abf9a70d monitoring: slightly reduce fan activity value font 2026-04-13 12:08:01 -03:00
flux-bot
69ab8805a9 chore(maintenance): automated image update 2026-04-13 15:06:41 +00:00
18666d5aec monitoring(jenkins): improve weather panel readability and layout 2026-04-13 11:52:40 -03:00
d847a731fb monitoring: increase ups current stat font size 2026-04-13 11:43:25 -03:00
9f9b00a6fb monitoring(jenkins): switch weather to single stat-list panel 2026-04-13 06:24:58 -03:00
28756ceda8 monitoring: align ups and climate cards to postgres two-stat pattern 2026-04-13 06:22:41 -03:00
56cca6df83 monitoring: rebuild split ups and climate cards from scratch 2026-04-13 06:12:29 -03:00
aa935984a8 monitoring: equalize split ups card heights and row spacing 2026-04-13 05:42:39 -03:00
a2172f56ec monitoring(overview): fix pvc backup health/age panel query 2026-04-13 05:33:28 -03:00
db701b89c2 monitoring(overview): add jenkins success and duration columns 2026-04-13 05:31:43 -03:00
ef352cbdc1 monitoring: prevent compact UPS card value clipping 2026-04-13 05:16:37 -03:00
f6b97ac82e monitoring: fix clipped values in compact split panel 2026-04-13 05:00:01 -03:00
0a28cf07c2 monitoring: force one-row value-only split panels 2026-04-13 04:51:03 -03:00
3dd0bc875d monitoring(jenkins): stop collapsing weather bars into one row 2026-04-13 04:32:13 -03:00
cf30f63fb4 typhon: schedule exporter on arm64 workers 2026-04-13 04:30:03 -03:00
2ae886ec74 monitoring: make split climate and ups panels value-only 2026-04-13 04:27:16 -03:00
4d10919ead monitoring(jenkins): render weather panels with exported job labels 2026-04-13 04:03:56 -03:00
c06ba41d0d monitoring: tighten split panel layout in overview 2026-04-13 03:53:06 -03:00
flux-bot
1ed1d6cf80 chore(maintenance): automated image update 2026-04-13 06:47:57 +00:00
f26d7afbbc monitoring: split climate and ups current panels 2026-04-13 03:35:50 -03:00
e5ffa94c1d maintenance(soteria): roll pvc-node pin fix and pod-read rbac 2026-04-13 03:31:57 -03:00
flux-bot
c2048fa594 chore(maintenance): automated image update 2026-04-13 06:30:55 +00:00
08cec8be77 maintenance(soteria): move restic vault path to shared scope 2026-04-13 03:00:57 -03:00
a6ff6122b0 maintenance(vault): roll sync pod after soteria secret mapping 2026-04-13 02:55:14 -03:00
0ffe1e1905 maintenance(jenkins): stabilize ariadne api token bootstrap 2026-04-13 02:55:10 -03:00
4e9b232a4f maintenance(soteria): source restic credentials from vault 2026-04-13 02:53:38 -03:00
b25422f1b4 maintenance(ariadne): restart to pick jenkins api creds 2026-04-13 02:45:29 -03:00
50c9852cff maintenance(jenkins): provision ariadne api user for weather collector 2026-04-13 02:41:20 -03:00
3d2f5c0778 monitoring(alerts): make soteria backup health rule driver-agnostic 2026-04-13 02:36:39 -03:00
flux-bot
206daf156a chore(maintenance): automated image update 2026-04-13 05:31:46 +00:00
f3e77ea994 Revert "monitoring(overview): recenter climate/ups cards and gate stale offline climate data"
This reverts commit 19d6ffcf2a4268fd414cbe5109aafd043d7bb514.
2026-04-13 02:26:09 -03:00
fbb4736d4a maintenance(soteria): roll pods after restic config switch 2026-04-13 02:24:05 -03:00
f02a782991 maintenance(soteria): enable restic encrypted backup mode 2026-04-13 02:23:01 -03:00
6f96f7b78f maintenance(soteria): fix duplicate b2 config keys 2026-04-13 02:21:25 -03:00
4fb0b371ff maintenance(soteria): switch to encrypted restic backups 2026-04-13 02:15:46 -03:00
flux-bot
4c671a5396 chore(maintenance): automated image update 2026-04-13 05:13:43 +00:00
flux-bot
3c675fd887 chore(maintenance): automated image update 2026-04-13 05:03:42 +00:00
2243072be2 maintenance(flux): update ariadne automation on main 2026-04-13 02:03:20 -03:00
19d6ffcf2a monitoring(overview): recenter climate/ups cards and gate stale offline climate data 2026-04-13 01:43:21 -03:00
53a20a8560 maintenance(soteria): avoid titan-10 scheduling 2026-04-13 01:16:59 -03:00
f1bb65cb73 monitoring(overview): center climate/ups cards and add UPS discharge risk coloring 2026-04-13 01:08:58 -03:00
0576de7a61 maintenance(soteria): roll snapshot-first backup fix image 2026-04-13 00:42:15 -03:00
c409c7ca80 monitoring(jobs): add jenkins build weather job list panels 2026-04-13 00:26:22 -03:00
f2aab54884 monitoring(overview): add fixed labels to canvas 2x2 stat cards 2026-04-13 00:21:56 -03:00
e6785f7db1 monitoring(overview): fix ups/climate 2x2 cards and dynamic climate axes 2026-04-13 00:18:06 -03:00
d514fb35e5 longhorn(core): restore b2 secret objects in vault sync 2026-04-12 23:54:35 -03:00
41a5add906 monitoring(climate): drop zero samples to unlock dynamic history scaling 2026-04-12 23:02:23 -03:00
00fe5e8a0f monitoring(testing): add coverage and code-smell infraction panels 2026-04-12 22:58:33 -03:00
3a148c63e4 monitoring(overview): rebalance climate row widths for current/history panels 2026-04-12 22:57:25 -03:00
f17fa41207 monitoring(overview): restore single-panel cards and dynamic climate axes 2026-04-12 22:53:46 -03:00
d642deb4f4 maintenance(soteria): fix prometheus scrape port to 8080 2026-04-12 22:36:51 -03:00
51e35b8643 monitoring(overview): stack ups current card into draw/runtime rows 2026-04-12 22:25:34 -03:00
e53933ece7 monitoring(overview): stack climate stats into explicit 2x2 rows 2026-04-12 22:19:37 -03:00
4efd28c956 Revert "monitoring(overview): force horizontal stat cards for climate/ups wrap"
This reverts commit 287c339aa0001c1daec161fd9fc73fbd4b267b48.
2026-04-12 22:14:59 -03:00
a1ab78b0c9 monitoring(grafana): mount and provision atlas-testing dashboard 2026-04-12 22:13:58 -03:00
287c339aa0 monitoring(overview): force horizontal stat cards for climate/ups wrap 2026-04-12 22:11:40 -03:00
dc1f1cbb7c monitoring(overview): split climate and ups stats into two-row query groups 2026-04-12 22:07:58 -03:00
4a10163b10 monitoring(overview): tune stat sizing for 2x2 climate/ups cards 2026-04-12 22:03:13 -03:00
f45217f98e monitoring(overview): simplify ups current card to draw/runtime 2026-04-12 21:36:42 -03:00
66da1b3aab monitoring(overview): shorten ups labels for readable stat rows 2026-04-12 21:32:48 -03:00
8d30fddd7d monitoring(overview): wrap ups and climate stats for narrow panels 2026-04-12 21:28:14 -03:00
a0f1149bbb monitoring(overview): restore readable two-row stats for ups and climate 2026-04-12 21:23:28 -03:00
d2672300a3 monitoring(jobs): switch cleanup stats to two-row layout 2026-04-12 20:38:52 -03:00
ed5a59f21d maintenance(soteria): set explicit b2 endpoint and bucket 2026-04-12 20:31:02 -03:00
66bd705971 monitoring: tune stat text sizing for climate and ups 2026-04-12 20:30:17 -03:00
4b78e67036 monitoring: use wide stat layout for ups and climate cards 2026-04-12 20:23:38 -03:00
3a4bdbd42f monitoring: switch ups/climate/fan stats to vertical orientation 2026-04-12 20:12:17 -03:00
e222344cd9 monitoring(jobs): add schedule fallback series for cold starts 2026-04-12 20:09:43 -03:00
a1257b65ff maintenance(ariadne): roll image to 0.1.0-103 for cleanup rollout 2026-04-12 20:06:03 -03:00
299a68ad95 monitoring(jobs): split testing dashboard and clean up job ops view 2026-04-12 20:06:03 -03:00
049a0deb04 maintenance(soteria): roll react ui image and wire b2 monitoring 2026-04-12 20:04:35 -03:00
7d3b12c774 monitoring: restore stat layout for ups/climate/fan rows 2026-04-12 19:56:12 -03:00
ac71b4621c monitoring: render ups/climate/fan panels as row tables 2026-04-12 19:46:39 -03:00
3271369e2d monitoring: set compact stat layout for climate and ups rows 2026-04-12 19:37:08 -03:00
931ee5944d monitoring: pack overview/power stats horizontally 2026-04-12 19:23:10 -03:00
08077f46c6 monitoring(atlas-power): force horizontal layout for stat rows 2026-04-12 19:06:07 -03:00
b9b9308500 maintenance(soteria): roll image to 0.1.0-22 for oauth2 headers 2026-04-12 18:55:09 -03:00
3096e0d7de monitoring(overview): tighten climate labels and drop duplicate temp line 2026-04-12 18:50:25 -03:00
9f5c9bfb86 maintenance(soteria): re-enable flux management for workload resources 2026-04-12 18:41:56 -03:00
6b0d6b017c monitoring(overview): tune climate row and restore ups card density 2026-04-12 18:35:15 -03:00
de3272e160 merge: atlas jobs ariadne schedule observability 2026-04-12 18:33:07 -03:00
8a413c0024 merge: lane2 jenkins cleanup activate 2026-04-12 18:33:00 -03:00
aa24e08744 merge: lane2 jenkins cleanup wiring 2026-04-12 18:32:48 -03:00
cb27592272 monitoring(overview): reflow UPS/climate rows and add jenkins weather 2026-04-12 18:14:54 -03:00
f67ca30f94 monitoring(climate): add C/F history and dedupe typhon series 2026-04-12 17:56:54 -03:00
4864939eef maintenance(ariadne): activate jenkins workspace cleanup deletes 2026-04-12 15:01:35 -03:00
01ecb75c5b scripts: default cleanup verifier to maintenance kustomization 2026-04-12 15:01:11 -03:00
fa30ea0ac2 scripts: add jenkins cleanup rollout verifier 2026-04-12 12:32:20 -03:00
2509d8876a maintenance(ariadne): default jenkins cleanup to safe dry-run 2026-04-12 12:32:20 -03:00
318 changed files with 31598 additions and 5603 deletions

374
Jenkinsfile vendored
View File

@ -7,14 +7,52 @@ pipeline {
apiVersion: v1
kind: Pod
spec:
serviceAccountName: "jenkins"
nodeSelector:
hardware: rpi5
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-04
- titan-06
- titan-11
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
jenkins/jenkins-jenkins-agent: "true"
containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python
image: python:3.12-slim
image: registry.bstein.dev/bstein/python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
command:
- cat
tty: true
@ -24,9 +62,21 @@ spec:
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan-iac'
SUITE_NAME = 'titan_iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '0'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
}
stages {
stage('Checkout') {
@ -36,7 +86,175 @@ spec:
}
stage('Install deps') {
steps {
sh 'pip install --no-cache-dir -r ci/requirements.txt'
sh '''
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
}
}
stage('Prepare local quality evidence') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml,services/game-stream/**"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
}
}
stage('Run quality gate') {
@ -66,8 +284,96 @@ spec:
stage('Enforce quality gate') {
steps {
sh '''
set -eu
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
set -euo pipefail
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
'''
}
}
@ -76,7 +382,7 @@ spec:
script {
env.FLUX_BRANCH = sh(
returnStdout: true,
script: '''awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml'''
script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'"
).trim()
if (!env.FLUX_BRANCH) {
error('Flux branch not found in gotk-sync.yaml')
@ -93,16 +399,28 @@ spec:
}
}
steps {
container('jnlp') {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
git push origin HEAD:${FLUX_BRANCH}
'''
}
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set -euo pipefail
if ! command -v git >/dev/null 2>&1; then
if command -v apk >/dev/null 2>&1; then
apk add --no-cache git >/dev/null
elif command -v apt-get >/dev/null 2>&1; then
apt-get update >/dev/null
apt-get install -y git >/dev/null
fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
git push origin HEAD:${FLUX_BRANCH}
'''
}
}
}
@ -110,15 +428,23 @@ spec:
post {
always {
script {
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
try {
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
} catch (Throwable err) {
echo "junit step unavailable: ${err.class.simpleName}"
try {
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
try {
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
} catch (Throwable err) {
echo "junit step unavailable: ${err.class.simpleName}"
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
} catch (Throwable err) {
if (err.class.simpleName == 'MissingContextVariableException') {
echo 'workspace unavailable; skipping post-build artifact collection'
} else {
throw err
}
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
}
}
}

View File

@ -6,14 +6,52 @@ pipeline {
apiVersion: v1
kind: Pod
spec:
serviceAccountName: "jenkins"
nodeSelector:
hardware: rpi5
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-04
- titan-06
- titan-11
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
jenkins/jenkins-jenkins-agent: "true"
containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python
image: python:3.12-slim
image: registry.bstein.dev/bstein/python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
command:
- cat
tty: true
@ -23,9 +61,21 @@ spec:
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan-iac'
SUITE_NAME = 'titan_iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '0'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
}
stages {
stage('Checkout') {
@ -35,7 +85,175 @@ spec:
}
stage('Install deps') {
steps {
sh 'pip install --no-cache-dir -r ci/requirements.txt'
sh '''
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
}
}
stage('Prepare local quality evidence') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml,services/game-stream/**"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
}
}
stage('Run quality gate') {
@ -65,8 +283,96 @@ spec:
stage('Enforce quality gate') {
steps {
sh '''
set -eu
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
set -euo pipefail
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
'''
}
}
@ -75,7 +381,7 @@ spec:
script {
env.FLUX_BRANCH = sh(
returnStdout: true,
script: '''awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml'''
script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'"
).trim()
if (!env.FLUX_BRANCH) {
error('Flux branch not found in gotk-sync.yaml')
@ -92,16 +398,28 @@ spec:
}
}
steps {
container('jnlp') {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
git push origin HEAD:${FLUX_BRANCH}
'''
}
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set -euo pipefail
if ! command -v git >/dev/null 2>&1; then
if command -v apk >/dev/null 2>&1; then
apk add --no-cache git >/dev/null
elif command -v apt-get >/dev/null 2>&1; then
apt-get update >/dev/null
apt-get install -y git >/dev/null
fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
git push origin HEAD:${FLUX_BRANCH}
'''
}
}
}
@ -109,15 +427,23 @@ spec:
post {
always {
script {
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
try {
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
} catch (Throwable err) {
echo "junit step unavailable: ${err.class.simpleName}"
try {
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
try {
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
} catch (Throwable err) {
echo "junit step unavailable: ${err.class.simpleName}"
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
} catch (Throwable err) {
if (err.class.simpleName == 'MissingContextVariableException') {
echo 'workspace unavailable; skipping post-build artifact collection'
} else {
throw err
}
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
}
}
}

View File

@ -6,30 +6,50 @@ from __future__ import annotations
import json
import os
from glob import glob
from pathlib import Path
import sys
import urllib.error
import urllib.request
import xml.etree.ElementTree as ET
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from ci.scripts import publish_test_metrics_quality as _quality_helpers
CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS
_build_check_statuses = _quality_helpers._build_check_statuses
_combine_statuses = _quality_helpers._combine_statuses
_infer_sonarqube_status = _quality_helpers._infer_sonarqube_status
_infer_source_lines_over_500 = _quality_helpers._infer_source_lines_over_500
_infer_supply_chain_status = _quality_helpers._infer_supply_chain_status
_infer_workspace_coverage_percent = _quality_helpers._infer_workspace_coverage_percent
_load_optional_json = _quality_helpers._load_optional_json
_normalize_result_status = _quality_helpers._normalize_result_status
def _escape_label(value: str) -> str:
"""Escape a Prometheus label value without changing its content."""
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
def _label_str(labels: dict[str, str]) -> str:
"""Render a stable Prometheus label set from a mapping."""
parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
return "{" + ",".join(parts) + "}" if parts else ""
def _read_text(url: str) -> str:
"""Fetch a plain-text response body from the given URL."""
with urllib.request.urlopen(url, timeout=10) as response:
return response.read().decode("utf-8")
def _post_text(url: str, payload: str) -> None:
"""PUT a plain-text payload and fail on any 4xx/5xx response."""
request = urllib.request.Request(
url,
data=payload.encode("utf-8"),
method="POST",
method="PUT",
headers={"Content-Type": "text/plain"},
)
with urllib.request.urlopen(request, timeout=10) as response:
@ -38,6 +58,7 @@ def _post_text(url: str, payload: str) -> None:
def _parse_junit(path: str) -> dict[str, int]:
"""Parse a JUnit XML file into aggregate test counters."""
if not os.path.exists(path):
return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
@ -64,6 +85,7 @@ def _parse_junit(path: str) -> dict[str, int]:
def _collect_junit_totals(pattern: str) -> dict[str, int]:
"""Sum JUnit counters across every XML file matching the pattern."""
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
for path in sorted(glob(pattern)):
parsed = _parse_junit(path)
@ -72,7 +94,38 @@ def _collect_junit_totals(pattern: str) -> dict[str, int]:
return totals
def _collect_junit_cases(pattern: str) -> list[tuple[str, str]]:
"""Collect individual JUnit test-case statuses for flaky-test trend panels."""
cases: list[tuple[str, str]] = []
for path in sorted(glob(pattern)):
if not os.path.exists(path):
continue
root = ET.parse(path).getroot()
suites: list[ET.Element]
if root.tag == "testsuite":
suites = [root]
elif root.tag == "testsuites":
suites = [elem for elem in root if elem.tag == "testsuite"]
else:
suites = []
for suite in suites:
for test_case in suite.findall("testcase"):
case_name = test_case.attrib.get("name", "").strip()
class_name = test_case.attrib.get("classname", "").strip()
if not case_name:
continue
full_name = f"{class_name}.{case_name}" if class_name else case_name
status = "passed"
if test_case.find("failure") is not None or test_case.find("error") is not None:
status = "failed"
elif test_case.find("skipped") is not None:
status = "skipped"
cases.append((full_name, status))
return cases
def _read_exit_code(path: str) -> int:
"""Read the quality-gate exit code, defaulting to failure if missing."""
try:
with open(path, "r", encoding="utf-8") as handle:
return int(handle.read().strip())
@ -81,6 +134,7 @@ def _read_exit_code(path: str) -> int:
def _load_summary(path: str) -> dict:
"""Load the JSON quality-gate summary, returning an empty mapping on error."""
try:
with open(path, "r", encoding="utf-8") as handle:
return json.load(handle)
@ -88,7 +142,26 @@ def _load_summary(path: str) -> dict:
return {}
def _summary_float(summary: dict, key: str) -> float:
"""Extract a float-like value from the summary, defaulting to 0.0."""
value = summary.get(key)
if isinstance(value, (int, float)):
return float(value)
return 0.0
def _summary_int(summary: dict, key: str) -> int:
"""Extract an int-like value from the summary, defaulting to 0."""
value = summary.get(key)
if isinstance(value, int):
return value
if isinstance(value, float):
return int(value)
return 0
def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
"""Return the current counter value for a labeled metric if present."""
text = _read_text(f"{pushgateway_url.rstrip('/')}/metrics")
for line in text.splitlines():
if not line.startswith(metric + "{"):
@ -109,20 +182,34 @@ def _build_payload(
suite: str,
status: str,
tests: dict[str, int],
test_cases: list[tuple[str, str]],
ok_count: int,
failed_count: int,
branch: str,
build_number: str,
jenkins_job: str,
summary: dict | None = None,
workspace_line_coverage_percent: float = 0.0,
source_files_total: int = 0,
source_lines_over_500: int = 0,
check_statuses: dict[str, str] | None = None,
) -> str:
"""Build the Pushgateway payload for the current suite run."""
passed = max(tests["tests"] - tests["failures"] - tests["errors"] - tests["skipped"], 0)
build_labels = _label_str(
{
"suite": suite,
"branch": branch or "unknown",
"build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
}
)
test_case_base_labels = {
"suite": suite,
"branch": branch or "unknown",
"build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
}
lines = [
"# TYPE platform_quality_gate_runs_total counter",
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}',
@ -135,37 +222,85 @@ def _build_payload(
"# TYPE titan_iac_quality_gate_run_status gauge",
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}',
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}',
"# TYPE platform_quality_gate_build_info gauge",
f"platform_quality_gate_build_info{build_labels} 1",
"# TYPE titan_iac_quality_gate_build_info gauge",
f"titan_iac_quality_gate_build_info{build_labels} 1",
"# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {workspace_line_coverage_percent:.3f}',
"# TYPE platform_quality_gate_source_files_total gauge",
f'platform_quality_gate_source_files_total{{suite="{suite}"}} {source_files_total}',
"# TYPE platform_quality_gate_source_lines_over_500_total gauge",
f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
]
results = summary.get("results", []) if isinstance(summary, dict) else []
if results:
if check_statuses:
lines.append("# TYPE titan_iac_quality_gate_checks_total gauge")
for result in results:
check_name = result.get("name")
check_status = result.get("status")
if not check_name or not check_status:
continue
for check_name in CANONICAL_CHECKS:
check_status = check_statuses.get(check_name, "not_applicable")
lines.append(
f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(str(check_name))}",result="{_escape_label(str(check_status))}"}} 1'
f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(check_name)}",result="{_escape_label(check_status)}"}} 1'
)
lines.append("# TYPE platform_quality_gate_test_case_result gauge")
if test_cases:
for test_name, test_status in test_cases:
labels = {
**test_case_base_labels,
"test": test_name,
"status": test_status,
}
lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
)
else:
labels = {**test_case_base_labels, "test": "__no_test_cases__", "status": "skipped"}
lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
)
return "\n".join(lines) + "\n"
def main() -> int:
suite = os.getenv("SUITE_NAME", "titan-iac")
"""Publish the quality-gate metrics and print a compact run summary."""
suite = os.getenv("SUITE_NAME", "titan_iac")
pushgateway_url = os.getenv("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091")
job_name = os.getenv("QUALITY_GATE_JOB_NAME", "platform-quality-ci")
junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml"))
exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc"))
summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json")
branch = os.getenv("BRANCH_NAME", os.getenv("GIT_BRANCH", ""))
branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
if branch.startswith("origin/"):
branch = branch[len("origin/") :]
build_number = os.getenv("BUILD_NUMBER", "")
jenkins_job = os.getenv("JOB_NAME", "titan-iac")
tests = _collect_junit_totals(junit_glob)
test_cases = _collect_junit_cases(junit_glob)
exit_code = _read_exit_code(exit_code_path)
status = "ok" if exit_code == 0 else "failed"
summary = _load_summary(summary_path)
workspace_line_coverage_percent = _summary_float(summary, "workspace_line_coverage_percent")
if workspace_line_coverage_percent <= 0:
workspace_line_coverage_percent = _infer_workspace_coverage_percent(summary, "build/coverage-unit.xml")
source_files_total = _summary_int(summary, "source_files_total")
source_lines_over_500 = _summary_int(summary, "source_lines_over_500")
if source_lines_over_500 <= 0:
source_lines_over_500 = _infer_source_lines_over_500(summary)
sonarqube_report = _load_optional_json(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", "build/sonarqube-quality-gate.json"))
supply_chain_report = _load_optional_json(os.getenv("QUALITY_GATE_IRONBANK_REPORT", "build/ironbank-compliance.json"))
truthy = {"1", "true", "yes", "on"}
supply_chain_required = (
os.getenv("QUALITY_GATE_IRONBANK_REQUIRED", "0").strip().lower() in truthy
or os.getenv("PUBLISH_IMAGES", "false").strip().lower() in truthy
)
check_statuses = _build_check_statuses(
summary=summary,
tests=tests,
workspace_line_coverage_percent=workspace_line_coverage_percent,
source_lines_over_500=source_lines_over_500,
sonarqube_report=sonarqube_report,
supply_chain_report=supply_chain_report,
supply_chain_required=supply_chain_required,
)
ok_count = int(
_fetch_existing_counter(
@ -190,11 +325,17 @@ def main() -> int:
suite=suite,
status=status,
tests=tests,
test_cases=test_cases,
ok_count=ok_count,
failed_count=failed_count,
branch=branch,
build_number=build_number,
jenkins_job=jenkins_job,
summary=summary,
workspace_line_coverage_percent=workspace_line_coverage_percent,
source_files_total=source_files_total,
source_lines_over_500=source_lines_over_500,
check_statuses=check_statuses,
)
push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}"
_post_text(push_url, payload)
@ -208,11 +349,14 @@ def main() -> int:
"tests_skipped": tests["skipped"],
"ok_count": ok_count,
"failed_count": failed_count,
"checks_recorded": len(summary.get("results", [])) if isinstance(summary, dict) else 0,
"checks_recorded": len(check_statuses),
"workspace_line_coverage_percent": workspace_line_coverage_percent,
"source_files_total": source_files_total,
"source_lines_over_500": source_lines_over_500,
}
print(json.dumps(summary, sort_keys=True))
return 0
if __name__ == "__main__":
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main())

View File

@ -0,0 +1,204 @@
#!/usr/bin/env python3
"""Quality/status helpers for publish_test_metrics."""
from __future__ import annotations
import json
from pathlib import Path
import xml.etree.ElementTree as ET
SUCCESS_STATUSES = {"ok", "pass", "passed", "success", "compliant"}
NOT_APPLICABLE_STATUSES = {"not_applicable", "n/a", "na", "none", "skipped"}
FAILED_STATUSES = {"failed", "fail", "error", "errors", "warn", "warning", "red"}
CANONICAL_CHECKS = [
"tests",
"coverage",
"loc",
"docs_naming",
"gate_glue",
"sonarqube",
"supply_chain",
]
def _infer_workspace_coverage_percent(summary: dict, default_xml: str) -> float:
"""Infer workspace line coverage from quality summary coverage XML metadata."""
results = summary.get("results", []) if isinstance(summary, dict) else []
coverage_xml = default_xml
for result in results:
if not isinstance(result, dict):
continue
if str(result.get("name") or "").strip().lower() != "coverage":
continue
candidate = str(result.get("coverage_xml") or "").strip()
if candidate:
coverage_xml = candidate
break
xml_path = Path(coverage_xml)
if not xml_path.exists():
return 0.0
try:
root = ET.parse(xml_path).getroot()
line_rate = root.attrib.get("line-rate")
if line_rate is None:
return 0.0
return float(line_rate) * 100.0
except (ET.ParseError, OSError, ValueError):
return 0.0
def _infer_source_lines_over_500(summary: dict) -> int:
"""Infer over-limit source file count from hygiene issue payloads."""
results = summary.get("results", []) if isinstance(summary, dict) else []
for result in results:
if not isinstance(result, dict):
continue
if str(result.get("name") or "").strip().lower() not in {"hygiene", "loc", "smell"}:
continue
issues = result.get("issues")
if not isinstance(issues, list):
continue
return sum(1 for item in issues if isinstance(item, str) and item.startswith("file exceeds"))
return 0
def _normalize_result_status(value: str | None, default: str = "failed") -> str:
"""Map arbitrary check status text into canonical check result buckets."""
if not value:
return default
normalized = value.strip().lower()
if normalized in SUCCESS_STATUSES:
return "ok"
if normalized in NOT_APPLICABLE_STATUSES:
return "not_applicable"
if normalized in FAILED_STATUSES:
return "failed"
return default
def _load_optional_json(path: str | None) -> dict:
"""Load an optional JSON report file, returning an empty object when absent."""
if not path:
return {}
candidate = Path(path)
if not candidate.exists():
return {}
try:
return json.loads(candidate.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return {}
def _combine_statuses(statuses: list[str]) -> str:
"""Roll up many check statuses into one canonical result."""
if not statuses:
return "not_applicable"
if any(status == "failed" for status in statuses):
return "failed"
if all(status == "not_applicable" for status in statuses):
return "not_applicable"
if all(status in {"ok", "not_applicable"} for status in statuses):
return "ok"
return "failed"
def _infer_sonarqube_status(report: dict) -> str:
"""Infer canonical SonarQube check status from its JSON report payload."""
if not report:
return "not_applicable"
status = (
report.get("projectStatus", {}).get("status")
or report.get("qualityGate", {}).get("status")
or report.get("status")
)
return _normalize_result_status(str(status) if status is not None else None, default="failed")
def _infer_supply_chain_status(report: dict, required: bool) -> str:
"""Infer canonical supply-chain status from IronBank/artifact report payload."""
if not report:
return "failed" if required else "not_applicable"
compliant = report.get("compliant")
if isinstance(compliant, bool):
if compliant:
return "ok"
return "failed" if required else "not_applicable"
status = report.get("status")
if status is None:
return "failed" if required else "not_applicable"
normalized = _normalize_result_status(str(status), default="failed")
if normalized == "failed" and not required:
return "not_applicable"
if normalized == "not_applicable" and required:
return "failed"
return normalized
def _build_check_statuses(
summary: dict | None,
tests: dict[str, int],
workspace_line_coverage_percent: float,
source_lines_over_500: int,
sonarqube_report: dict,
supply_chain_report: dict,
supply_chain_required: bool,
) -> dict[str, str]:
"""Generate the canonical quality-check status map for dashboarding."""
raw_results = summary.get("results", []) if isinstance(summary, dict) else []
status_by_name: dict[str, str] = {}
for result in raw_results:
if not isinstance(result, dict):
continue
check_name = str(result.get("name") or "").strip().lower()
if not check_name:
continue
status_by_name[check_name] = _normalize_result_status(result.get("status"), default="failed")
tests_status = status_by_name.get("tests")
if not tests_status:
candidate_keys = ["unit", "integration", "e2e", "pytest", "test", "tests"]
candidates = [status_by_name[key] for key in candidate_keys if key in status_by_name]
if candidates:
tests_status = _combine_statuses(candidates)
elif tests["tests"] > 0:
tests_status = "ok" if (tests["failures"] + tests["errors"]) == 0 else "failed"
else:
tests_status = "not_applicable"
coverage_status = status_by_name.get("coverage")
if not coverage_status:
if workspace_line_coverage_percent > 0:
coverage_status = "ok" if workspace_line_coverage_percent >= 95.0 else "failed"
else:
coverage_status = "not_applicable"
loc_status = status_by_name.get("loc")
if not loc_status:
loc_status = "ok" if source_lines_over_500 == 0 else "failed"
docs_naming_status = status_by_name.get("docs_naming")
if not docs_naming_status:
candidates = [status_by_name[key] for key in ["docs", "hygiene", "smell", "lint", "naming"] if key in status_by_name]
docs_naming_status = _combine_statuses(candidates) if candidates else "not_applicable"
gate_glue_status = status_by_name.get("gate_glue")
if not gate_glue_status:
candidates = [status_by_name[key] for key in ["gate_glue", "glue", "gate"] if key in status_by_name]
gate_glue_status = _combine_statuses(candidates) if candidates else "not_applicable"
sonarqube_status = status_by_name.get("sonarqube") or _infer_sonarqube_status(sonarqube_report)
supply_chain_status = status_by_name.get("supply_chain") or _infer_supply_chain_status(
supply_chain_report,
required=supply_chain_required,
)
return {
"tests": tests_status,
"coverage": coverage_status,
"loc": loc_status,
"docs_naming": docs_naming_status,
"gate_glue": gate_glue_status,
"sonarqube": sonarqube_status,
"supply_chain": supply_chain_status,
}

View File

@ -0,0 +1,173 @@
"""Build a titan-iac supply-chain compliance report from Trivy evidence."""
from __future__ import annotations
import argparse
import datetime as dt
import json
from pathlib import Path
from typing import Any
FAIL_SEVERITIES = {"HIGH", "CRITICAL"}
def _read_json(path: Path) -> dict[str, Any]:
"""Read a JSON object from disk for use as pipeline evidence."""
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise ValueError(f"{path} must contain a JSON object")
return payload
def _parse_day(raw: str | None) -> dt.date | None:
"""Parse an ISO day while letting optional waiver dates stay optional."""
if not raw:
return None
return dt.date.fromisoformat(raw)
def _today(override: str | None = None) -> dt.date:
"""Return the policy day so tests can pin expiry behavior."""
return _parse_day(override) or dt.date.today()
def _load_waiver_pairs(path: Path | None, policy_day: dt.date) -> tuple[set[tuple[str, str]], int]:
"""Return active ``(misconfiguration id, target)`` waivers and expired count."""
if path is None or not path.exists():
return set(), 0
payload = _read_json(path)
default_expires_at = payload.get("default_expires_at")
active: set[tuple[str, str]] = set()
expired = 0
for entry in payload.get("misconfigurations", []):
if not isinstance(entry, dict):
continue
misconfiguration_id = str(entry.get("id") or "").strip()
if not misconfiguration_id:
continue
expires_at = _parse_day(str(entry.get("expires_at") or default_expires_at or ""))
targets = entry.get("targets", [])
if not isinstance(targets, list):
continue
if expires_at and expires_at < policy_day:
expired += len(targets)
continue
# Waivers are target-specific so a new unsafe manifest fails until it is
# either fixed or deliberately accepted with a fresh expiration.
for target in targets:
if isinstance(target, str) and target:
active.add((misconfiguration_id, target))
return active, expired
def _iter_failed_misconfigurations(payload: dict[str, Any]):
"""Yield failed high/critical Trivy misconfiguration records."""
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
target = str(result.get("Target") or "")
for item in result.get("Misconfigurations") or []:
if not isinstance(item, dict):
continue
if item.get("Status") != "FAIL":
continue
if str(item.get("Severity") or "").upper() not in FAIL_SEVERITIES:
continue
yield target, item
def _count_vulnerabilities(payload: dict[str, Any], severity: str) -> int:
"""Count Trivy vulnerabilities at a specific severity."""
count = 0
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
for item in result.get("Vulnerabilities") or []:
if isinstance(item, dict) and str(item.get("Severity") or "").upper() == severity:
count += 1
return count
def _count_secrets(payload: dict[str, Any]) -> int:
"""Count detected secrets in the Trivy filesystem report."""
count = 0
for result in payload.get("Results", []):
if isinstance(result, dict):
count += len(result.get("Secrets") or [])
return count
def build_report(
trivy_payload: dict[str, Any],
waiver_path: Path | None = None,
today_override: str | None = None,
) -> dict[str, Any]:
"""Build the compliance summary consumed by the quality gate."""
policy_day = _today(today_override)
active_waivers, expired_waivers = _load_waiver_pairs(waiver_path, policy_day)
open_misconfigs: list[dict[str, str]] = []
waived_misconfigs = 0
for target, item in _iter_failed_misconfigurations(trivy_payload):
misconfiguration_id = str(item.get("ID") or "")
if (misconfiguration_id, target) in active_waivers:
waived_misconfigs += 1
continue
open_misconfigs.append(
{
"id": misconfiguration_id,
"target": target,
"severity": str(item.get("Severity") or ""),
"title": str(item.get("Title") or ""),
}
)
critical = _count_vulnerabilities(trivy_payload, "CRITICAL")
high = _count_vulnerabilities(trivy_payload, "HIGH")
secrets = _count_secrets(trivy_payload)
status = "ok" if critical == 0 and secrets == 0 and not open_misconfigs else "failed"
return {
"status": status,
"compliant": status == "ok",
"category": "artifact_security",
"scan_type": "filesystem",
"scanner": "trivy",
"critical_vulnerabilities": critical,
"high_vulnerabilities": high,
"high_vulnerability_policy": "observe",
"secrets": secrets,
"high_or_critical_misconfigurations": len(open_misconfigs),
"waived_misconfigurations": waived_misconfigs,
"expired_waivers": expired_waivers,
"waiver_file": str(waiver_path) if waiver_path else "",
"open_misconfiguration_examples": open_misconfigs[:20],
}
def main(argv: list[str] | None = None) -> int:
"""CLI entrypoint used by Jenkins after the Trivy scan completes."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--trivy-json", required=True)
parser.add_argument("--waivers")
parser.add_argument("--output", required=True)
parser.add_argument("--today")
args = parser.parse_args(argv)
trivy_payload = _read_json(Path(args.trivy_json))
waiver_path = Path(args.waivers) if args.waivers else None
report = build_report(trivy_payload, waiver_path=waiver_path, today_override=args.today)
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main())

View File

@ -0,0 +1,108 @@
"""Glue checks for Ariadne schedules exported to VictoriaMetrics."""
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
import requests
import yaml
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _query(promql: str) -> list[dict]:
vm_url = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
response = requests.get(f"{vm_url}/api/v1/query", params={"query": promql}, timeout=10)
response.raise_for_status()
payload = response.json()
return payload.get("data", {}).get("result", [])
def _expected_tasks() -> list[dict]:
cfg = _load_config()
tasks = [
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def _normalize_task(item: object, cfg: dict) -> dict:
if isinstance(item, str):
return {
"task": item,
"check_last_success": True,
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
}
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
return tracked
def _task_regex(tasks: list[dict]) -> str:
return "|".join(item["task"] for item in tasks)
def test_ariadne_schedule_series_exist():
tasks = _expected_tasks()
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing next-run metrics for: {', '.join(missing)}"
def test_ariadne_schedule_recent_success():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing last-success metrics for: {', '.join(missing)}"
now = datetime.now(timezone.utc)
age_by_task = {
item.get("metric", {}).get("task"): (now - datetime.fromtimestamp(float(item["value"][1]), tz=timezone.utc)).total_seconds() / 3600
for item in series
}
too_old = [
f"{task} ({age_by_task[task]:.1f}h > {item['max_success_age_hours']}h)"
for item in tasks
if (task := item["task"]) in age_by_task and age_by_task[task] > float(item["max_success_age_hours"])
]
assert not too_old, "Ariadne schedules are stale: " + ", ".join(too_old)
def test_ariadne_schedule_last_status_present_and_boolean():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing last-status metrics for: {', '.join(missing)}"
invalid = []
for item in series:
task = item.get("metric", {}).get("task")
value = float(item["value"][1])
if value not in (0.0, 1.0):
invalid.append(f"{task}={value}")
assert not invalid, f"Unexpected Ariadne last-status values: {', '.join(invalid)}"

View File

@ -1,3 +1,5 @@
"""Glue checks for the metrics the quality-gate publishes."""
from __future__ import annotations
import os
@ -23,26 +25,63 @@ def _query(promql: str) -> list[dict]:
return payload.get("data", {}).get("result", [])
def test_glue_metrics_present():
series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
assert series, "No glue cronjob label series found"
def _expected_tasks() -> list[dict]:
cfg = _load_config()
tasks = [
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def test_glue_metrics_success_join():
query = (
"kube_cronjob_status_last_successful_time "
'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
)
series = _query(query)
assert series, "No glue cronjob last success series found"
def _normalize_task(item: object, cfg: dict) -> dict:
if isinstance(item, str):
return {
"task": item,
"check_last_success": True,
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
}
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
return tracked
def _task_regex(tasks: list[dict]) -> str:
return "|".join(item["task"] for item in tasks)
def test_ariadne_schedule_metrics_present():
cfg = _load_config()
expected = cfg.get("ariadne_schedule_tasks", [])
if not expected:
return
series = _query("ariadne_schedule_next_run_timestamp_seconds")
tasks = {item.get("metric", {}).get("task") for item in series}
missing = [task for task in expected if task not in tasks]
tasks = _expected_tasks()
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"
def test_ariadne_schedule_success_and_status_metrics_present():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
success = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
status = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
success_tasks = {item.get("metric", {}).get("task") for item in success}
status_tasks = {item.get("metric", {}).get("task") for item in status}
expected = {item["task"] for item in tasks}
missing_success = sorted(expected - success_tasks)
missing_status = sorted(expected - status_tasks)
assert not missing_success, f"Missing Ariadne success metrics for: {', '.join(missing_success)}"
assert not missing_status, f"Missing Ariadne status metrics for: {', '.join(missing_status)}"

View File

@ -0,0 +1,407 @@
{
"version": 1,
"generated_from": "Jenkins titan-iac build 225 Trivy filesystem scan",
"default_expires_at": "2026-05-22",
"ticket": "atlas-quality-wave-k8s-hardening",
"default_reason": "Existing Kubernetes manifest hardening baseline accepted only for the first quality-gate rollout; fix or renew explicitly before expiry.",
"misconfigurations": [
{
"id": "DS-0002",
"targets": [
"dockerfiles/Dockerfile.ananke-node-helper"
]
},
{
"id": "KSV-0009",
"targets": [
"services/mailu/vip-controller.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml"
]
},
{
"id": "KSV-0010",
"targets": [
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0014",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/guest-register-deployment.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/actual-budget-deployment.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/deployment.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vmalert-atlas-availability.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud-mail-sync/cronjob.yaml",
"services/nextcloud/collabora.yaml",
"services/nextcloud/cronjob.yaml",
"services/nextcloud/deployment.yaml",
"services/nextcloud/maintenance-cronjob.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/planka/deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vault/statefulset.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0017",
"targets": [
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0041",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"infrastructure/traefik/clusterrole.yaml",
"services/bstein-dev-home/rbac.yaml",
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/comms/mas-secrets-ensure-rbac.yaml",
"services/maintenance/soteria-rbac.yaml"
]
},
{
"id": "KSV-0047",
"targets": [
"services/monitoring/rbac.yaml"
]
},
{
"id": "KSV-0053",
"targets": [
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/ariadne-rbac.yaml"
]
},
{
"id": "KSV-0056",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/disable-k3s-traefik-rbac.yaml",
"services/maintenance/k3s-traefik-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0114",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0118",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/coredns-deployment.yaml",
"infrastructure/core/node-prefer-noschedule-cronjob.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vmalert-atlas-availability.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud/collabora.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/sui-metrics/overlays/atlas/patch-node-selector.yaml",
"services/typhon/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0121",
"targets": [
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml"
]
}
]
}

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: ai-llm
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/ai-llm

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: bstein-dev-home-migrations
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/bstein-dev-home/oneoffs/migrations

View File

@ -13,14 +13,14 @@ spec:
git:
checkout:
ref:
branch: feature/ariadne
branch: main
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(bstein-dev-home): automated image update"
push:
branch: feature/ariadne
branch: main
update:
strategy: Setters
path: services/bstein-dev-home

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: bstein-dev-home
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/bstein-dev-home

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: comms
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true
@ -13,5 +15,3 @@ spec:
path: ./services/comms
targetNamespace: comms
timeout: 2m
dependsOn:
- name: traefik

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: crypto
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/crypto

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: finance
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/finance

View File

@ -0,0 +1,29 @@
# clusters/atlas/flux-system/applications/game-stream/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: game-stream
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/game-stream
targetNamespace: game-stream
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: cert-manager
- name: keycloak
- name: traefik
- name: vault
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: oauth2-proxy-wolf
namespace: game-stream
wait: false
timeout: 10m

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: gitea
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/gitea

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: harbor
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/harbor

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: health
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/health
@ -15,7 +17,6 @@ spec:
dependsOn:
- name: keycloak
- name: postgres
- name: traefik
- name: vault
healthChecks:
- apiVersion: apps/v1

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: jellyfin
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/jellyfin

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: jenkins
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/jenkins
@ -14,7 +16,6 @@ spec:
targetNamespace: jenkins
dependsOn:
- name: helm
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: keycloak
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true

View File

@ -21,10 +21,14 @@ resources:
- sui-metrics/kustomization.yaml
- openldap/kustomization.yaml
- keycloak/kustomization.yaml
- quality/kustomization.yaml
- oauth2-proxy/kustomization.yaml
- mailu/kustomization.yaml
- jenkins/kustomization.yaml
- ai-llm/kustomization.yaml
- openclaw/kustomization.yaml
- game-stream/kustomization.yaml
- veles/kustomization.yaml
- typhon/kustomization.yaml
- nextcloud/kustomization.yaml
- nextcloud-mail-sync/kustomization.yaml

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: mailu
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: monerod
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/crypto/monerod

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: nextcloud-mail-sync
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: nextcloud
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/nextcloud

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: oauth2-proxy
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true

View File

@ -0,0 +1,34 @@
# clusters/atlas/flux-system/applications/openclaw/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: openclaw
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/openclaw
targetNamespace: openclaw
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true
timeout: 30m
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: openclaw-ollama
namespace: openclaw
- apiVersion: apps/v1
kind: Deployment
name: openclaw
namespace: openclaw
dependsOn:
- name: cert-manager
- name: core
- name: longhorn
- name: traefik

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: openldap
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
prune: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: outline
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/outline
@ -15,7 +17,6 @@ spec:
dependsOn:
- name: keycloak
- name: mailu
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: pegasus
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/pegasus

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: planka
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/planka
@ -15,7 +17,6 @@ spec:
dependsOn:
- name: keycloak
- name: mailu
- name: traefik
healthChecks:
- apiVersion: apps/v1
kind: Deployment

View File

@ -0,0 +1,36 @@
# clusters/atlas/flux-system/applications/quality/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: quality
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/quality
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: quality
dependsOn:
- name: cert-manager
- name: keycloak
- name: vault
- name: postgres
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: sonarqube
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: sonarqube-exporter
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: oauth2-proxy-sonarqube
namespace: quality
wait: false
timeout: 20m

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: sui-metrics
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/sui-metrics/overlays/atlas

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: typhon
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/typhon
@ -13,6 +15,7 @@ spec:
name: flux-system
targetNamespace: climate
dependsOn:
- name: vault
- name: vault-csi
- name: monitoring
healthChecks:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: vault
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: vaultwarden
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
suspend: false
@ -17,4 +19,3 @@ spec:
wait: true
dependsOn:
- name: helm
- name: traefik

View File

@ -0,0 +1,29 @@
# clusters/atlas/flux-system/applications/veles/image-automation.yaml
# Staged for the first Veles image rollout. Add this file to the parent
# applications kustomization after the namespace exists and the Harbor repos
# have initial tags.
apiVersion: image.toolkit.fluxcd.io/v1
kind: ImageUpdateAutomation
metadata:
name: veles
namespace: veles
spec:
interval: 1m0s
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
git:
checkout:
ref:
branch: main
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(veles): automated image update"
push:
branch: main
update:
strategy: Setters
path: services/veles

View File

@ -0,0 +1,28 @@
# clusters/atlas/flux-system/applications/veles/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: veles
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/veles
targetNamespace: veles
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: cert-manager
- name: core
- name: keycloak
- name: longhorn
- name: traefik
- name: vault
- name: vault-csi
- name: vault-injector
wait: false
timeout: 20m

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: wallet-monero-temp
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/crypto/wallet-monero-temp

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: xmr-miner
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/crypto/xmr-miner

View File

@ -5966,6 +5966,9 @@ spec:
- args:
- --events-addr=http://notification-controller.$(RUNTIME_NAMESPACE).svc.cluster.local./
- --watch-all-namespaces=true
- --concurrent=1
- --requeue-dependency=5s
- --interval-jitter-percentage=30
- --log-level=info
- --log-encoding=json
- --enable-leader-election

View File

@ -7,7 +7,7 @@ metadata:
name: flux-system
namespace: flux-system
spec:
interval: 1m0s
interval: 15m0s
ref:
branch: main
secretRef:
@ -20,7 +20,7 @@ metadata:
name: flux-system
namespace: flux-system
spec:
interval: 10m0s
interval: 1h0m0s
path: ./clusters/atlas/flux-system
prune: true
sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: cert-manager-cleanup
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/cert-manager/cleanup

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: cert-manager
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/cert-manager

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: core
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/core

View File

@ -0,0 +1,21 @@
# clusters/atlas/flux-system/platform/descheduler/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: descheduler
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/descheduler
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: kube-system
dependsOn:
- name: helm
- name: core
wait: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: gitops-ui
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
timeout: 10m
@ -16,5 +18,4 @@ spec:
targetNamespace: flux-system
dependsOn:
- name: helm
- name: traefik
wait: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: helm
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
resources:
- core/kustomization.yaml
- helm/kustomization.yaml
- descheduler/kustomization.yaml
- resource-guardrails/kustomization.yaml
- cert-manager/kustomization.yaml
- metallb/kustomization.yaml
- traefik/kustomization.yaml

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: logging
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/logging

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: longhorn-adopt
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/longhorn/adopt

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: longhorn-ui
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/longhorn/ui-ingress

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: longhorn
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/longhorn/core

View File

@ -13,14 +13,14 @@ spec:
git:
checkout:
ref:
branch: feature/ariadne
branch: main
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(maintenance): automated image update"
push:
branch: feature/ariadne
branch: main
update:
strategy: Setters
path: services/maintenance

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: maintenance
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/maintenance

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: metallb
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: monitoring
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./services/monitoring

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: postgres
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/postgres

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/platform/resource-guardrails/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: resource-guardrails
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/resource-guardrails
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: core
wait: true

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: traefik
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 10m
path: ./infrastructure/traefik

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: vault-csi
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
sourceRef:

View File

@ -4,6 +4,8 @@ kind: Kustomization
metadata:
name: vault-injector
namespace: flux-system
annotations:
kustomize.toolkit.fluxcd.io/ssa: IfNotPresent
spec:
interval: 30m
path: ./infrastructure/vault-injector

View File

@ -2,4 +2,8 @@ FROM python:3.11-slim
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
RUN pip install --no-cache-dir requests psycopg2-binary
RUN pip install --no-cache-dir requests psycopg2-binary \
&& groupadd --system guest-tools \
&& useradd --system --uid 65532 --gid guest-tools --home-dir /nonexistent --shell /usr/sbin/nologin guest-tools
USER guest-tools

View File

@ -1,15 +1,12 @@
FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
# Use the mirrored Harbor artifact so CI does not depend on Docker Hub egress.
FROM registry.bstein.dev/streaming/data-prepper@sha256:32ac6ad42e0f12da08bebee307e290b17d127b30def9b06eeaffbcbbc5033e83
ENV DATA_PREPPER_PATH=/usr/share/data-prepper
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
&& mkdir -p /var/log/data-prepper
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
USER root
RUN apt-get update \
&& apt-get install -y --no-install-recommends bc \
&& rm -rf /var/lib/apt/lists/*
USER 10001
WORKDIR /usr/share/data-prepper

View File

@ -1,10 +1,13 @@
FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
RUN apk add --no-cache ca-certificates \
&& addgroup -S livekit-token \
&& adduser -S -D -H -u 65532 -G livekit-token livekit-token
COPY --from=base /lk-jwt-service /lk-jwt-service
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER livekit-token
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/lk-jwt-service"]

View File

@ -29,10 +29,12 @@ FROM ${DEBIAN_IMAGE}
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends ca-certificates; \
update-ca-certificates; rm -rf /var/lib/apt/lists/*
update-ca-certificates; rm -rf /var/lib/apt/lists/*; \
groupadd --system p2pool; \
useradd --system --uid 65532 --gid p2pool --home-dir /nonexistent --shell /usr/sbin/nologin p2pool
COPY --from=fetch /out/p2pool /usr/local/bin/p2pool
RUN /usr/local/bin/p2pool --version || true
EXPOSE 3333
USER p2pool
ENTRYPOINT ["/usr/local/bin/p2pool"]

View File

@ -26,9 +26,12 @@ RUN set -eux; \
curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \
tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \
install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \
rm -f /opt/monero/monero.tar.bz2
rm -f /opt/monero/monero.tar.bz2; \
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero
ENV PATH="/usr/local/bin:/usr/bin:/bin"
RUN /usr/local/bin/monero-wallet-rpc --version || true
EXPOSE 18083
USER monero

View File

@ -23,10 +23,14 @@ RUN set -eux; \
mkdir -p /opt/monero; \
tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \
rm -f /tmp/monero.tar.bz2; \
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero; \
mkdir -p /data; \
chown monero:monero /data; \
chmod 0770 /data
ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \
PATH="/opt/monero:${PATH}"
USER monero
CMD ["/opt/monero/monerod", "--version"]

View File

@ -1,10 +1,13 @@
FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
RUN apk add --no-cache ca-certificates \
&& addgroup -S oauth2-proxy \
&& adduser -S -D -H -u 65532 -G oauth2-proxy oauth2-proxy
COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER oauth2-proxy
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/bin/oauth2-proxy"]

View File

@ -1,10 +1,13 @@
FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
RUN apk add --no-cache ca-certificates \
&& addgroup -S pegasus \
&& adduser -S -D -H -u 65532 -G pegasus pegasus
COPY --from=base /pegasus /pegasus
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER pegasus
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/pegasus"]

View File

@ -0,0 +1,48 @@
# dockerfiles/Dockerfile.quality-tools
FROM debian:bookworm-slim
ARG SONAR_SCANNER_VERSION=8.0.1.6346
ARG TRIVY_VERSION=0.70.0
ENV TRIVY_CACHE_DIR=/opt/trivy-cache
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bash \
ca-certificates \
curl \
git \
jq \
unzip \
&& rm -rf /var/lib/apt/lists/* \
&& groupadd --system quality-tools \
&& useradd --system --uid 65532 --gid quality-tools --home-dir /nonexistent --shell /usr/sbin/nologin quality-tools
RUN set -eux; \
scanner_zip="sonar-scanner-cli-${SONAR_SCANNER_VERSION}-linux-aarch64.zip"; \
base_url="https://binaries.sonarsource.com/Distribution/sonar-scanner-cli"; \
curl -fsSL "${base_url}/${scanner_zip}" -o "/tmp/${scanner_zip}"; \
curl -fsSL "${base_url}/${scanner_zip}.sha256" -o "/tmp/${scanner_zip}.sha256"; \
printf '%s %s\n' "$(cat "/tmp/${scanner_zip}.sha256")" "/tmp/${scanner_zip}" | sha256sum -c -; \
unzip -q "/tmp/${scanner_zip}" -d /opt; \
ln -s "/opt/sonar-scanner-${SONAR_SCANNER_VERSION}-linux-aarch64/bin/sonar-scanner" /usr/local/bin/sonar-scanner; \
rm -f "/tmp/${scanner_zip}" "/tmp/${scanner_zip}.sha256"
RUN set -eux; \
trivy_tgz="trivy_${TRIVY_VERSION}_Linux-ARM64.tar.gz"; \
curl -fsSL "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/${trivy_tgz}" -o "/tmp/${trivy_tgz}"; \
tar -C /usr/local/bin -xzf "/tmp/${trivy_tgz}" trivy; \
rm -f "/tmp/${trivy_tgz}"; \
trivy --version; \
sonar-scanner -v
RUN set -eux; \
mkdir -p "${TRIVY_CACHE_DIR}"; \
trivy image --download-db-only --cache-dir "${TRIVY_CACHE_DIR}"; \
chmod -R a+rX "${TRIVY_CACHE_DIR}"; \
mkdir -p /workspace; \
chown quality-tools:quality-tools /workspace
WORKDIR /workspace
USER quality-tools

View File

@ -27,12 +27,53 @@ spec:
timeout: 10m
values:
installCRDs: true
replicaCount: 2
podDisruptionBudget:
enabled: true
minAvailable: 1
extraArgs:
- --acme-http01-solver-nameservers=1.1.1.1:53,8.8.8.8:53
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
@ -42,10 +83,63 @@ spec:
- rpi5
- rpi4
webhook:
replicaCount: 2
podDisruptionBudget:
enabled: true
minAvailable: 1
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
failureThreshold: 8
initialDelaySeconds: 90
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
readinessProbe:
failureThreshold: 8
initialDelaySeconds: 10
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 5
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
@ -55,10 +149,51 @@ spec:
- rpi5
- rpi4
cainjector:
replicaCount: 2
podDisruptionBudget:
enabled: true
minAvailable: 1
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:

View File

@ -10,6 +10,7 @@ data:
errors
cache 30
hosts {
192.168.22.9 agent.bstein.dev
192.168.22.9 alerts.bstein.dev
192.168.22.9 auth.bstein.dev
192.168.22.9 bstein.dev
@ -28,6 +29,7 @@ data:
192.168.22.9 matrix.live.bstein.dev
192.168.22.9 metrics.bstein.dev
192.168.22.9 monero.bstein.dev
192.168.22.9 moonlight.bstein.dev
10.43.6.87 money.bstein.dev
192.168.22.9 notes.bstein.dev
192.168.22.9 office.bstein.dev
@ -40,6 +42,7 @@ data:
192.168.22.9 secret.bstein.dev
192.168.22.9 sso.bstein.dev
192.168.22.9 stream.bstein.dev
192.168.22.9 wolf.bstein.dev
192.168.22.9 tasks.bstein.dev
192.168.22.9 vault.bstein.dev
fallthrough

View File

@ -4,8 +4,12 @@ kind: Kustomization
resources:
- ../modules/base
- ../modules/profiles/atlas-ha
- node-prefer-noschedule-serviceaccount.yaml
- node-prefer-noschedule-rbac.yaml
- node-prefer-noschedule-cronjob.yaml
- coredns-custom.yaml
- coredns-deployment.yaml
- ntp-sync-daemonset.yaml
- workload-profiles.yaml
- ../sources/cert-manager/letsencrypt.yaml
- ../sources/cert-manager/letsencrypt-prod.yaml

View File

@ -0,0 +1,80 @@
# infrastructure/core/node-prefer-noschedule-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: node-prefer-noschedule
namespace: kube-system
spec:
schedule: "* * * * *"
concurrencyPolicy: Replace
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
spec:
serviceAccountName: node-prefer-noschedule
restartPolicy: Never
containers:
- name: taint
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command:
- /usr/bin/env
- bash
- -ceu
- |
k() {
kubectl --request-timeout=10s "$@"
}
clear_worker() {
local node="${1}"
local hardware="${2}"
if k get node "${node}" >/dev/null 2>&1; then
k label node "${node}" node-role.kubernetes.io/worker=true "hardware=${hardware}" --overwrite=true || true
k label node "${node}" atlas.bstein.dev/spillover- || true
k taint node "${node}" node.kubernetes.io/unschedulable:NoSchedule- || true
k uncordon "${node}" || true
else
echo "skipping missing node ${node}"
fi
}
clear_worker titan-04 rpi5
clear_worker titan-05 rpi5
clear_worker titan-07 rpi5
clear_worker titan-08 rpi5
clear_worker titan-11 rpi5
clear_worker titan-12 rpi4
clear_worker titan-14 rpi4
clear_worker titan-18 rpi4
clear_worker titan-22 amd64
if k get node titan-22 >/dev/null 2>&1; then
k label node titan-22 atlas.bstein.dev/general-compute=last-resort --overwrite=true || true
fi
if k get node titan-23 >/dev/null 2>&1; then
k label node titan-23 \
veles.bstein.dev/simulation=true \
veles.bstein.dev/node-pool=oceanus \
node-role.kubernetes.io/veles-sim=true \
longhorn-host=true \
hardware=oceanus \
--overwrite=true || true
k label node titan-23 node-role.kubernetes.io/worker- || true
k taint node titan-23 veles.bstein.dev/simulation=true:NoSchedule --overwrite=true || true
else
echo "skipping missing node titan-23"
fi
for node in titan-13 titan-15 titan-17 titan-19; do
if k get node "${node}" >/dev/null 2>&1; then
k label node "${node}" atlas.bstein.dev/spillover=true longhorn-host=true --overwrite=true || true
k taint node "${node}" longhorn=true:PreferNoSchedule --overwrite=true || true
k taint node "${node}" atlas.bstein.dev/spillover=true:PreferNoSchedule --overwrite=true || true
else
echo "skipping missing node ${node}"
fi
done

View File

@ -0,0 +1,22 @@
# infrastructure/core/node-prefer-noschedule-rbac.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: node-prefer-noschedule
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: node-prefer-noschedule
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: node-prefer-noschedule
subjects:
- kind: ServiceAccount
name: node-prefer-noschedule
namespace: kube-system

View File

@ -0,0 +1,6 @@
# infrastructure/core/node-prefer-noschedule-serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: node-prefer-noschedule
namespace: kube-system

View File

@ -0,0 +1,27 @@
# infrastructure/core/workload-profiles.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: atlas-workload-profiles
namespace: kube-system
data:
profiles.yaml: |
profiles:
tiny:
request: { cpu: 25m, memory: 64Mi }
limit: { cpu: 200m, memory: 256Mi }
light:
request: { cpu: 50m, memory: 128Mi }
limit: { cpu: 500m, memory: 512Mi }
standard:
request: { cpu: 250m, memory: 512Mi }
limit: { cpu: "1", memory: 1Gi }
heavy:
request: { cpu: 500m, memory: 1Gi }
limit: { cpu: 1500m, memory: 3Gi }
ci:
request: { cpu: 512m, memory: 512Mi }
limit: { cpu: 1500m, memory: 2Gi }
scavenger:
request: { cpu: 10m, memory: 32Mi }
limit: { cpu: 250m, memory: 256Mi }

View File

@ -0,0 +1,97 @@
# infrastructure/descheduler/helmrelease.yaml
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: descheduler
namespace: kube-system
spec:
interval: 30m
install:
remediation:
retries: 3
upgrade:
remediation:
retries: 3
chart:
spec:
chart: descheduler
version: 0.33.0
sourceRef:
kind: HelmRepository
name: descheduler
namespace: flux-system
values:
kind: CronJob
schedule: "*/20 * * * *"
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
resources:
requests:
cpu: 50m
memory: 96Mi
limits:
cpu: 200m
memory: 256Mi
deschedulerPolicyAPIVersion: descheduler/v1alpha2
deschedulerPolicy:
maxNoOfPodsToEvictPerNode: 2
maxNoOfPodsToEvictPerNamespace: 2
profiles:
- name: atlas-rpi-balance
pluginConfig:
- name: DefaultEvictor
args:
nodeFit: true
minPodAge: 10m
ignorePvcPods: true
evictLocalStoragePods: false
- name: RemovePodsHavingTooManyRestarts
args:
podRestartThreshold: 12
includingInitContainers: true
- name: RemovePodsViolatingNodeAffinity
args:
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
- name: RemovePodsViolatingTopologySpreadConstraint
- name: RemovePodsViolatingNodeTaints
- name: LowNodeUtilization
args:
thresholds:
cpu: 45
memory: 45
pods: 45
targetThresholds:
cpu: 75
memory: 75
pods: 75
plugins:
balance:
enabled:
- RemovePodsViolatingTopologySpreadConstraint
- LowNodeUtilization
deschedule:
enabled:
- RemovePodsHavingTooManyRestarts
- RemovePodsViolatingNodeTaints
- RemovePodsViolatingNodeAffinity
priorityClassName: system-cluster-critical
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-0a
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule

View File

@ -0,0 +1,5 @@
# infrastructure/descheduler/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helmrelease.yaml

View File

@ -26,6 +26,9 @@ spec:
cleanupOnFail: true
timeout: 15m
values:
global:
nodeSelector:
longhorn-host: "true"
service:
ui:
type: NodePort
@ -78,3 +81,23 @@ spec:
tag: v2.16.0
defaultSettings:
systemManagedPodsImagePullPolicy: Always
taintToleration: veles.bstein.dev/simulation=true:NoSchedule
longhornManager:
tolerations:
- key: veles.bstein.dev/simulation
operator: Equal
value: "true"
effect: NoSchedule
nodeSelector:
longhorn-host: "true"
longhornDriver:
tolerations:
- key: veles.bstein.dev/simulation
operator: Equal
value: "true"
effect: NoSchedule
nodeSelector:
longhorn-host: "true"
longhornUI:
nodeSelector:
longhorn-host: "true"

View File

@ -7,7 +7,9 @@ resources:
- secretproviderclass.yaml
- vault-sync-deployment.yaml
- helmrelease.yaml
- veles-recurring-jobs.yaml
- longhorn-settings-ensure-job.yaml
- longhorn-csi-toleration-ensure-job.yaml
- longhorn-disk-tags-ensure-job.yaml
configMapGenerator:

View File

@ -0,0 +1,106 @@
# infrastructure/longhorn/core/longhorn-csi-toleration-ensure-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-csi-toleration-ensure-4
namespace: longhorn-system
spec:
backoffLimit: 0
activeDeadlineSeconds: 240
ttlSecondsAfterFinished: 3600
template:
spec:
serviceAccountName: longhorn-service-account
restartPolicy: Never
nodeSelector:
kubernetes.io/hostname: titan-11
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
- key: node-role.kubernetes.io/worker
operator: Exists
containers:
- name: patch
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
ns="longhorn-system"
ds="longhorn-csi-plugin"
key="veles.bstein.dev/simulation"
value="true"
effect="NoSchedule"
patch_daemonset() {
target="$1"
current="$(kubectl -n "${ns}" get daemonset "${target}" -o json)"
if echo "${current}" | jq -e \
--arg key "${key}" \
--arg value "${value}" \
--arg effect "${effect}" \
'.spec.template.spec.tolerations[]? | select(.key == $key and .value == $value and .effect == $effect)' >/dev/null; then
echo "${target} already tolerates ${key}=${value}:${effect}"
return 0
fi
patch="$(echo "${current}" | jq -c \
--arg key "${key}" \
--arg value "${value}" \
--arg effect "${effect}" \
'{
spec: {
template: {
spec: {
tolerations: ((.spec.template.spec.tolerations // []) + [
{key: $key, operator: "Equal", value: $value, effect: $effect}
])
}
}
}
}')"
kubectl -n "${ns}" patch daemonset "${target}" --type=merge -p "${patch}"
}
patch_daemonset "${ds}"
engine_daemonsets="$(kubectl -n "${ns}" get daemonset -l longhorn.io/component=engine-image -o jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')"
for engine_ds in ${engine_daemonsets}; do
patch_daemonset "${engine_ds}"
done
csi_ready="false"
for attempt in $(seq 1 90); do
if kubectl get csinode titan-23 -o json | jq -e '.spec.drivers[]? | select(.name == "driver.longhorn.io")' >/dev/null; then
echo "driver.longhorn.io registered on titan-23"
csi_ready="true"
break
fi
sleep 2
done
if [ "${csi_ready}" != "true" ]; then
echo "driver.longhorn.io did not register on titan-23 before timeout" >&2
exit 1
fi
for engine_ds in ${engine_daemonsets}; do
for attempt in $(seq 1 90); do
if kubectl -n "${ns}" get pods -o json | jq -e \
--arg engine_ds "${engine_ds}" \
'.items[] | select(.spec.nodeName == "titan-23") | select(.metadata.ownerReferences[]?.name == $engine_ds) | select([.status.containerStatuses[]?.ready] | all)' >/dev/null; then
echo "${engine_ds} ready on titan-23"
break
fi
if [ "${attempt}" = "90" ]; then
echo "${engine_ds} did not become ready on titan-23 before timeout" >&2
exit 1
fi
sleep 2
done
done

View File

@ -2,7 +2,7 @@
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-disk-tags-ensure-1
name: longhorn-disk-tags-ensure-3
namespace: longhorn-system
spec:
backoffLimit: 0

View File

@ -2,15 +2,18 @@
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-settings-ensure-4
name: longhorn-settings-ensure-10
namespace: longhorn-system
spec:
backoffLimit: 0
activeDeadlineSeconds: 240
ttlSecondsAfterFinished: 3600
template:
spec:
serviceAccountName: longhorn-service-account
restartPolicy: Never
nodeSelector:
kubernetes.io/hostname: titan-11
volumes:
- name: longhorn-settings-ensure-script
configMap:

View File

@ -17,10 +17,28 @@ import urllib.request
LONGHORN_NS = "longhorn-system"
LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes"
DESIRED_TAGS = {
"/mnt/astreae": "astreae",
"/mnt/asteria": "asteria",
DESIRED_DISK_TAGS = {
"/mnt/astreae": ["astreae"],
"/mnt/asteria": ["asteria"],
"/mnt/veles": ["veles-oceanus", "veles-db", "veles-artifacts"],
"/mnt/veles-db": ["veles-oceanus", "veles-db"],
"/mnt/veles-artifacts": ["veles-oceanus", "veles-artifacts"],
}
DESIRED_NODE_TAGS = {
"titan-23": ["veles-oceanus"],
}
DESIRED_NODE_DISKS = {
"titan-23": {
"veles-oceanus": {
"path": "/mnt/veles",
"allowScheduling": True,
"evictionRequested": False,
"storageReserved": 0,
"tags": ["veles-oceanus", "veles-db", "veles-artifacts"],
}
}
}
DISABLE_DEFAULT_DISK_NODES = {"titan-23"}
def api_base() -> str:
@ -63,8 +81,30 @@ def list_nodes() -> list[dict]:
return data.get("items", [])
def patch_disk_tags(node_name: str, disk_name: str, desired_tag: str) -> None:
body = {"spec": {"disks": {disk_name: {"tags": [desired_tag]}}}}
def merged_tags(current_tags: list[str], desired_tags: list[str]) -> list[str]:
return sorted(dict.fromkeys([*current_tags, *desired_tags]))
def patch_node_tags(node_name: str, desired_tags: list[str]) -> None:
body = {"spec": {"tags": desired_tags}}
request_json(
"PATCH",
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
body=body,
)
def patch_disk_tags(node_name: str, disk_name: str, desired_tags: list[str]) -> None:
body = {"spec": {"disks": {disk_name: {"tags": desired_tags}}}}
request_json(
"PATCH",
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
body=body,
)
def patch_disks(node_name: str, disks: dict) -> None:
body = {"spec": {"disks": disks}}
request_json(
"PATCH",
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
@ -78,18 +118,52 @@ def main() -> int:
for node in list_nodes():
name = node.get("metadata", {}).get("name", "")
desired_node_tags = DESIRED_NODE_TAGS.get(name)
if desired_node_tags:
current_node_tags = node.get("spec", {}).get("tags") or []
next_node_tags = merged_tags(current_node_tags, desired_node_tags)
if current_node_tags != next_node_tags:
print(f"patching {name} node tags={current_node_tags!r} -> {next_node_tags!r}")
patch_node_tags(name, next_node_tags)
changed += 1
else:
skipped += 1
spec_disks = node.get("spec", {}).get("disks", {}) or {}
desired_disks = DESIRED_NODE_DISKS.get(name, {})
missing_disks = {
disk_name: disk_spec
for disk_name, disk_spec in desired_disks.items()
if disk_name not in spec_disks
}
if missing_disks:
print(f"adding {name} disks={sorted(missing_disks)}")
patch_disks(name, missing_disks)
changed += len(missing_disks)
spec_disks = {**spec_disks, **missing_disks}
if name in DISABLE_DEFAULT_DISK_NODES:
disable_patch = {}
for disk_name, disk in spec_disks.items():
disk_path = (disk.get("path") or "").rstrip("/")
if disk_path == "/var/lib/longhorn" and disk.get("allowScheduling", True):
disable_patch[disk_name] = {"allowScheduling": False}
if disable_patch:
print(f"disabling default Longhorn scheduling on {name} disks={sorted(disable_patch)}")
patch_disks(name, disable_patch)
changed += len(disable_patch)
for disk_name, disk in spec_disks.items():
disk_path = disk.get("path")
desired_tag = DESIRED_TAGS.get(disk_path)
if not desired_tag:
desired_disk_tags = DESIRED_DISK_TAGS.get(disk_path)
if not desired_disk_tags:
continue
current_tags = disk.get("tags") or []
if current_tags == [desired_tag]:
if current_tags == desired_disk_tags:
skipped += 1
continue
print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {[desired_tag]!r}")
patch_disk_tags(name, disk_name, desired_tag)
print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {desired_disk_tags!r}")
patch_disk_tags(name, disk_name, desired_disk_tags)
changed += 1
print(f"done: changed={changed} skipped={skipped}")

View File

@ -4,11 +4,12 @@ set -eu
# Longhorn blocks direct CR patches for some settings; use the internal API instead.
api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
curl_opts="-fsS --connect-timeout 3 --max-time 15"
wait_for_api() {
attempts=30
while [ "${attempts}" -gt 0 ]; do
if curl -fsS "${api_base}" >/dev/null 2>&1; then
if curl ${curl_opts} "${api_base}" >/dev/null 2>&1; then
return 0
fi
attempts=$((attempts - 1))
@ -22,17 +23,32 @@ update_setting() {
name="$1"
value="$2"
current="$(curl -fsS "${api_base}/${name}" || true)"
current="$(curl ${curl_opts} "${api_base}/${name}" || true)"
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
echo "Setting ${name} already set."
return 0
fi
echo "Setting ${name} -> ${value}"
curl -fsS -X PUT \
out="$(mktemp)"
if curl ${curl_opts} -o "${out}" -X PUT \
-H "Content-Type: application/json" \
-d "{\"value\":\"${value}\"}" \
"${api_base}/${name}" >/dev/null
"${api_base}/${name}"; then
rm -f "${out}"
return 0
fi
current="$(curl ${curl_opts} "${api_base}/${name}" || true)"
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
echo "Setting ${name} stored; Longhorn will apply it when current state allows."
rm -f "${out}"
return 0
fi
cat "${out}" >&2 || true
rm -f "${out}"
return 1
}
wait_for_api
@ -40,3 +56,8 @@ update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v
update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"
update_setting taint-toleration "veles.bstein.dev/simulation=true:NoSchedule"
# Keep storage-heavy nodes from getting hammered by rebuild storms and skew.
update_setting replica-auto-balance "best-effort"
update_setting concurrent-replica-rebuild-per-node-limit "2"
update_setting node-down-pod-deletion-policy "delete-both-statefulset-and-deployment-pod"

View File

@ -13,9 +13,27 @@ spec:
- objectName: "harbor-pull__dockerconfigjson"
secretPath: "kv/data/atlas/shared/harbor-pull"
secretKey: "dockerconfigjson"
- objectName: "longhorn-backup-b2__AWS_ACCESS_KEY_ID"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ACCESS_KEY_ID"
- objectName: "longhorn-backup-b2__AWS_SECRET_ACCESS_KEY"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_SECRET_ACCESS_KEY"
- objectName: "longhorn-backup-b2__AWS_ENDPOINTS"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ENDPOINTS"
secretObjects:
- secretName: longhorn-registry
type: kubernetes.io/dockerconfigjson
data:
- objectName: harbor-pull__dockerconfigjson
key: .dockerconfigjson
- secretName: longhorn-backup-b2
type: Opaque
data:
- objectName: longhorn-backup-b2__AWS_ACCESS_KEY_ID
key: AWS_ACCESS_KEY_ID
- objectName: longhorn-backup-b2__AWS_SECRET_ACCESS_KEY
key: AWS_SECRET_ACCESS_KEY
- objectName: longhorn-backup-b2__AWS_ENDPOINTS
key: AWS_ENDPOINTS

View File

@ -26,6 +26,16 @@ spec:
- key: hardware
operator: In
values: ["rpi5", "rpi4"]
- weight: 90
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
containers:
- name: sync
image: alpine:3.20

View File

@ -0,0 +1,60 @@
# infrastructure/longhorn/core/veles-recurring-jobs.yaml
apiVersion: longhorn.io/v1beta2
kind: RecurringJob
metadata:
name: veles-postgres-backup
namespace: longhorn-system
spec:
name: veles-postgres-backup
cron: "30 5 * * *"
task: backup
groups:
- veles
- veles-postgres
retain: 7
concurrency: 1
---
apiVersion: longhorn.io/v1beta2
kind: RecurringJob
metadata:
name: veles-postgres-snapshot
namespace: longhorn-system
spec:
name: veles-postgres-snapshot
cron: "*/30 * * * *"
task: snapshot
groups:
- veles
- veles-postgres
retain: 8
concurrency: 1
---
apiVersion: longhorn.io/v1beta2
kind: RecurringJob
metadata:
name: veles-artifacts-backup
namespace: longhorn-system
spec:
name: veles-artifacts-backup
cron: "45 5 * * *"
task: backup
groups:
- veles
- veles-artifacts
retain: 7
concurrency: 1
---
apiVersion: longhorn.io/v1beta2
kind: RecurringJob
metadata:
name: veles-artifacts-snapshot
namespace: longhorn-system
spec:
name: veles-artifacts-snapshot
cron: "15 */6 * * *"
task: snapshot
groups:
- veles
- veles-artifacts
retain: 8
concurrency: 1

View File

@ -3,3 +3,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- scavenger.yaml
- veles.yaml

View File

@ -0,0 +1,17 @@
# infrastructure/modules/base/priorityclass/veles.yaml
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: veles-core
value: 500
globalDefault: false
description: "For Veles core database, API, and controller workloads"
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: veles-sim
value: 50
globalDefault: false
preemptionPolicy: Never
description: "For Veles simulation jobs; lower than core and non-preempting"

View File

@ -5,3 +5,6 @@ resources:
- asteria.yaml
- asteria-encrypted.yaml
- astreae.yaml
- veles-oceanus-db.yaml
- veles-oceanus-artifacts.yaml
- veles-oceanus-policy.yaml

View File

@ -0,0 +1,21 @@
# infrastructure/modules/base/storageclass/veles-oceanus-artifacts.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: veles-oceanus-artifacts
annotations:
veles.bstein.dev/allowed-namespace: veles
provisioner: driver.longhorn.io
parameters:
nodeSelector: veles-oceanus
diskSelector: veles-oceanus,veles-artifacts
fromBackup: ""
numberOfReplicas: "1"
staleReplicaTimeout: "30"
fsType: ext4
replicaAutoBalance: disabled
dataLocality: strict-local
recurringJobSelector: '[{"name":"veles-artifacts-backup","isGroup":false},{"name":"veles-artifacts-snapshot","isGroup":false}]'
reclaimPolicy: Retain
allowVolumeExpansion: true
volumeBindingMode: WaitForFirstConsumer

View File

@ -0,0 +1,21 @@
# infrastructure/modules/base/storageclass/veles-oceanus-db.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: veles-oceanus-db
annotations:
veles.bstein.dev/allowed-namespace: veles
provisioner: driver.longhorn.io
parameters:
nodeSelector: veles-oceanus
diskSelector: veles-oceanus,veles-db
fromBackup: ""
numberOfReplicas: "1"
staleReplicaTimeout: "30"
fsType: ext4
replicaAutoBalance: disabled
dataLocality: strict-local
recurringJobSelector: '[{"name":"veles-postgres-backup","isGroup":false},{"name":"veles-postgres-snapshot","isGroup":false}]'
reclaimPolicy: Retain
allowVolumeExpansion: true
volumeBindingMode: WaitForFirstConsumer

View File

@ -0,0 +1,25 @@
# infrastructure/modules/base/storageclass/veles-oceanus-policy.yaml
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionPolicy
metadata:
name: veles-oceanus-storage-namespace
spec:
failurePolicy: Fail
matchConstraints:
resourceRules:
- apiGroups: [""]
apiVersions: ["v1"]
operations: ["CREATE", "UPDATE"]
resources: ["persistentvolumeclaims"]
validations:
- expression: "!has(object.spec.storageClassName) || !(object.spec.storageClassName in ['veles-oceanus-db', 'veles-oceanus-artifacts']) || object.metadata.namespace == 'veles'"
message: "Veles Oceanus storage classes are reserved for namespace veles"
---
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingAdmissionPolicyBinding
metadata:
name: veles-oceanus-storage-namespace
spec:
policyName: veles-oceanus-storage-namespace
validationActions:
- Deny

View File

@ -25,6 +25,7 @@ spec:
serviceAccountName: postgres-vault
nodeSelector:
node-role.kubernetes.io/worker: "true"
hardware: rpi5
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
@ -35,7 +36,17 @@ spec:
values: ["true"]
- key: hardware
operator: In
values: ["rpi4", "rpi5"]
values: ["rpi5"]
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-06"]
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: In
values: ["titan-05", "titan-07", "titan-08", "titan-11"]
containers:
- name: postgres
image: postgres:15

View File

@ -0,0 +1,5 @@
# infrastructure/resource-guardrails/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- limitranges.yaml

Some files were not shown because too many files have changed in this diff Show More