Compare commits
694 Commits
feature/ss
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| cc51eb6d1e | |||
| aa608fbf0f | |||
| 436e56c5de | |||
| dda943ce16 | |||
| 043d1cbab3 | |||
| da94cc6f97 | |||
| 7c0a25a0eb | |||
| 7194cad0a8 | |||
| eb567fda06 | |||
| a9d74a066f | |||
| 19b52ac5e3 | |||
| 885e7b6489 | |||
| 8316e5dd15 | |||
| be82109d4e | |||
| 971848558a | |||
| 980c2cf1cc | |||
| 08ac598181 | |||
| 349a46ceab | |||
| 666dcb3faa | |||
| 769d3f41bf | |||
| 62e0a565f5 | |||
| 2a2179a138 | |||
| c1e94d56c8 | |||
| 244578cc01 | |||
| 0146e3dc95 | |||
| 48c379dc88 | |||
| 6001876409 | |||
| 2fe3d5b932 | |||
| 474c472b1d | |||
| 6578a8b08a | |||
| 44c22e3d00 | |||
| 2af817b9db | |||
| 2d90005076 | |||
| a10050e4c7 | |||
| b34f2abefd | |||
| 9409c037c9 | |||
| 3a2bb1bac9 | |||
| f43acaa554 | |||
| c5a7eece35 | |||
| 19d10ce585 | |||
| 7b1c891e70 | |||
| 67ca0d451d | |||
| 4b468b0f97 | |||
| 380aae3b2c | |||
| b9b25565a2 | |||
| 24b0ac78c4 | |||
| 23533e08ee | |||
| fc10eed704 | |||
| ca7a08e791 | |||
| 868075426c | |||
| 029e4d4ca6 | |||
| e97aaafed9 | |||
| 38c8d08ab4 | |||
| ba16f5119b | |||
| 2fe763189d | |||
| 832d5acf68 | |||
| 27e8a77044 | |||
| 65e50d1923 | |||
| e486245aaf | |||
| 34c91c6d08 | |||
| 9e06d7afc8 | |||
| 18e543d95a | |||
| 20364a262c | |||
| 8842662239 | |||
| 12fa7d02aa | |||
| 9bf822ec36 | |||
| ea8eda2c73 | |||
| 243d3112ce | |||
| 4bab34eae1 | |||
| 8bd4d9fc7a | |||
| 69d121aa07 | |||
| 79650616f1 | |||
| c4ad82f122 | |||
| 4e51cf6b6c | |||
| 51bf01a8fd | |||
| 4e6d4f43b2 | |||
| 58dab1ca79 | |||
| 113bcdeded | |||
| e05a949b9f | |||
| 0a10a2d861 | |||
|
|
6fead623fa | ||
|
|
ad01659cc4 | ||
| b04092b63c | |||
| e87fa4369c | |||
| 1b04e6cb00 | |||
| 5f32dff73b | |||
| dfb295e5f0 | |||
| a7f3d49fea | |||
| 246ed6617e | |||
| 1951291090 | |||
| 62a423f32c | |||
|
|
dedf566993 | ||
| 354275f3ad | |||
| 3f159c6c83 | |||
| 631bd09778 | |||
| b7792d30f1 | |||
| 241a8889ee | |||
| 864f1cab20 | |||
| dea70df209 | |||
| f649a6a9a2 | |||
| ca3cfaf1fc | |||
|
|
1682ccfb25 | ||
|
|
18a4c58338 | ||
| 92f4137e9c | |||
| cb7141dfb6 | |||
| cd45b7faba | |||
|
|
d03c846779 | ||
|
|
a00bab5ee7 | ||
| 975783a6b9 | |||
| c3b2c0cebb | |||
| d2ade61d88 | |||
| d74277a8bd | |||
| 31fbe48ca3 | |||
| 70feb1ef85 | |||
| 159c9cfe68 | |||
| b7f454b790 | |||
| 41b131c347 | |||
| 3b1e74d278 | |||
| d8ae9c5901 | |||
| 32851ca057 | |||
| 32125d7bab | |||
| a442ea6d5d | |||
| c0dd00c93d | |||
| 446115f07a | |||
| a2f4c51e1d | |||
|
|
4fcecc4707 | ||
|
|
1459027abc | ||
| 89935a579a | |||
|
|
b1aad04f3e | ||
| 2dc208e919 | |||
| 292d513e10 | |||
| 11ba37a4b2 | |||
| d6b9d64e70 | |||
| 67b9babc0e | |||
| c219019ad5 | |||
| 0ef14c67fd | |||
| 39fd7adb55 | |||
| 600c124ef2 | |||
|
|
5e4a974733 | ||
| f7fc152439 | |||
| bab914c58f | |||
| e24ff4782c | |||
| 9ecdf054d3 | |||
|
|
d9c8632b8d | ||
|
|
d325111f34 | ||
| adc711be62 | |||
| 66ce0caaf4 | |||
| 9ea338b121 | |||
| 270dc93966 | |||
| 0331e7ea99 | |||
|
|
f08d740d83 | ||
|
|
328241b7ac | ||
| c8662a624e | |||
| 689bf10995 | |||
| 37a203509b | |||
|
|
6c413d4a50 | ||
| 1616994b19 | |||
| ec834b7e0f | |||
| 8c90e0e527 | |||
| 6432472be7 | |||
| 72bd22e912 | |||
|
|
879a751429 | ||
| b0abb9bd6e | |||
| b27c80d5c0 | |||
| a61091c052 | |||
| 16d0a22163 | |||
| 2d09e7f965 | |||
| bf2d4cff90 | |||
| 3e4351ef19 | |||
| ff04341559 | |||
| d666e6a156 | |||
| b6e8c01e99 | |||
| 0d5e19e11a | |||
| dfa13e22cc | |||
| 65781aaca7 | |||
| 7bb1bd96fc | |||
| be7846572f | |||
| 0ac0f920ca | |||
| 33b5e2b678 | |||
| fff00dbe95 | |||
| 53e4b4036b | |||
| 28570a1f5c | |||
| 2c3ffdbf95 | |||
| fec7713049 | |||
| 352d4991f4 | |||
| 14d18048d5 | |||
| 7fd71f4bab | |||
|
|
f14be5d7ef | ||
| 10003ca0d7 | |||
| 5aac018a7b | |||
| 36f7de76e9 | |||
| 5f0bc3832d | |||
| cd6eaff7cb | |||
| 83b8e13661 | |||
| ec6b51cfd2 | |||
|
|
04465407d2 | ||
|
|
5a994f4d42 | ||
|
|
af9fcdeae9 | ||
|
|
39df6ff039 | ||
|
|
70e79f25b0 | ||
|
|
f471a30499 | ||
| ee154f1494 | |||
|
|
d0c69cd480 | ||
|
|
6e4e2bdc0c | ||
|
|
0b7d87cef4 | ||
|
|
a27bb0e198 | ||
|
|
cf2d0c5eff | ||
|
|
00eb4be529 | ||
|
|
8b1b824a29 | ||
|
|
a7f5a60190 | ||
|
|
eeb84e8e70 | ||
|
|
82312d0fbf | ||
| 292ec7359b | |||
|
|
473bebaf52 | ||
|
|
d07f14826b | ||
| e7d18be4ed | |||
|
|
437281f6a5 | ||
|
|
67643e3fad | ||
|
|
38d2dad28f | ||
|
|
82fceb11a4 | ||
|
|
8e6d9e1c37 | ||
|
|
a603b3726f | ||
|
|
e43340f2a1 | ||
|
|
115f86907f | ||
|
|
aaef2b7ab5 | ||
|
|
c24f2dafc1 | ||
|
|
d9c3ff8195 | ||
| b94b016b0f | |||
|
|
5ec4bb9c61 | ||
|
|
e2501bd3d0 | ||
|
|
bc2e1058d6 | ||
|
|
45352f79ba | ||
|
|
7b336c76a1 | ||
|
|
0127c62f51 | ||
|
|
ee6ef74982 | ||
| d521c66d60 | |||
|
|
c28444a233 | ||
|
|
8bdf60542d | ||
|
|
0758c2e06d | ||
|
|
00bcc0d4c2 | ||
|
|
60840d1171 | ||
| 3338efa58e | |||
| a988af3262 | |||
|
|
ef42dac97b | ||
|
|
df3f4a0c0b | ||
| fda986ab3d | |||
|
|
ca47e03953 | ||
|
|
3d4208f877 | ||
| 3d2e0ead1c | |||
| 18ac46d4b8 | |||
| 3cacbad4c0 | |||
| 3d633a5627 | |||
| 58d9cb616f | |||
|
|
3474df40d4 | ||
|
|
4c66b538a7 | ||
|
|
2475d4ca9d | ||
|
|
1d39015d33 | ||
|
|
e0bf10cad9 | ||
|
|
72e6a09bd0 | ||
|
|
b1fa40acc1 | ||
|
|
e3247f606f | ||
|
|
2dc680b8f8 | ||
|
|
8dedefb4b4 | ||
|
|
a18f7e98a2 | ||
|
|
62d16ae388 | ||
|
|
d3d680383b | ||
|
|
8545f2bc50 | ||
|
|
5ca247f143 | ||
|
|
4d566a7388 | ||
|
|
8913c5a5f2 | ||
|
|
25c4f3e07b | ||
|
|
8b7e21f0cc | ||
|
|
301909f92e | ||
|
|
0c27b48a1c | ||
|
|
71996fb199 | ||
|
|
7c9ee41180 | ||
| ce5b1d1353 | |||
| 820e624a0b | |||
|
|
cca3a756b3 | ||
|
|
1e815ce011 | ||
|
|
e5281ad4c0 | ||
|
|
1e8a67904c | ||
|
|
0290a5f715 | ||
| 9b5d8ac45c | |||
|
|
05c7642f5c | ||
|
|
efa893b134 | ||
|
|
7eba40a889 | ||
|
|
8b90b44dfd | ||
|
|
21800290ec | ||
| ec5e4ec4a3 | |||
|
|
af024aa16a | ||
|
|
da32ba1680 | ||
| 8788d40dc6 | |||
| d509dfaa22 | |||
| 156effebe3 | |||
| 8e3fe266aa | |||
| 3fc17b0c7c | |||
| d9695d32f6 | |||
| 0697d7b1b3 | |||
| d2f118ed32 | |||
| 5e35b5f7a2 | |||
| 94953ab0fe | |||
| ba2b9acbcc | |||
|
|
955bbcf58f | ||
|
|
62c0e32bc4 | ||
|
|
6dcbdcf704 | ||
|
|
c84af0b8df | ||
|
|
3891f1d063 | ||
|
|
beb923cf0e | ||
|
|
aa3db22eaf | ||
|
|
592435f760 | ||
|
|
d54115df55 | ||
|
|
75e2c745f7 | ||
|
|
71122fc200 | ||
|
|
41d38033b5 | ||
|
|
067134fa1b | ||
|
|
eb5256e6bc | ||
|
|
d3b1a925b8 | ||
|
|
6f4e5dbfe7 | ||
|
|
d9cda5b6af | ||
|
|
30b86a693f | ||
|
|
da16998d2e | ||
|
|
3a48569330 | ||
|
|
3a987c29ff | ||
|
|
66cb72947f | ||
|
|
1039590b14 | ||
|
|
298d261146 | ||
| 4721d44a33 | |||
| db4c3b7c51 | |||
| b0996e9a4f | |||
|
|
2138b93242 | ||
| 8b35ab0292 | |||
| 2e407e1962 | |||
| 5ae6b4b00c | |||
| ae1fd5b661 | |||
| 4e65f02fba | |||
| 88de0f7cee | |||
| 08716c6be6 | |||
| a0caeb407c | |||
| 6eeb551239 | |||
| 98b063f2dd | |||
| 698b2fd96b | |||
|
|
a9f6b04baa | ||
|
|
d8a3b5250e | ||
|
|
4484fed039 | ||
| 7cf5e7e39d | |||
| 4de4630911 | |||
| 6ac3b41b30 | |||
| 810e4c0efb | |||
| 5e4ed17942 | |||
| a41ac1548c | |||
| b87fe4899c | |||
| 0efc1ed6c4 | |||
| 439d824300 | |||
| 80a7ec26e2 | |||
| 0d4f14c397 | |||
| fb6ddce0c7 | |||
| 1fedb5ecbe | |||
| 0bb45bca83 | |||
| c846d2c1ba | |||
| 163f98c594 | |||
| 954d0d36b9 | |||
| 6db7521114 | |||
| 13891e794a | |||
| 1522b7a019 | |||
| 5c40efdbcc | |||
| 9ac66919d5 | |||
| c80f26625d | |||
| f5eec19e11 | |||
| b54da8e3e0 | |||
| 9f6824ad56 | |||
| 0d3c5eb976 | |||
| 9cdf244d98 | |||
| 36ae49f1fc | |||
| b8d8240383 | |||
| fe30570b62 | |||
| 8e9db51f9d | |||
| ea6e600007 | |||
| b8f2d00547 | |||
| 132074f0ff | |||
| 56b36330b2 | |||
| 557663f524 | |||
| 5fe8866623 | |||
| e2e7e58f32 | |||
| 95a7ac235f | |||
| 814d1ce211 | |||
| d996bda2c1 | |||
| 2bbbf019ff | |||
| 34fb371270 | |||
| 14864a3b8c | |||
| cfcda87f67 | |||
| cac8a3cdde | |||
| 3e0260b945 | |||
| a8be46b422 | |||
| a86d68ca74 | |||
| f527da9cdb | |||
| 8be01698a9 | |||
| 278b4541a2 | |||
| 7d999cc6c6 | |||
| cffe53edbe | |||
| 1b2243e2a8 | |||
| 34c42cfb62 | |||
| 84cd05b08a | |||
| 9ff88f7f13 | |||
| 901f3e797c | |||
| 4b0d8fb301 | |||
| c1f0ea421d | |||
| 67e422f56f | |||
| c7e81674b0 | |||
| cff3ed0759 | |||
| 7171e5a9ea | |||
| 776aea25f5 | |||
| fbdf53a9a8 | |||
| a0c3b9f953 | |||
| 61619ddf77 | |||
| ff3ed195ac | |||
| bb41c219f6 | |||
| 791108723e | |||
| c4ce7e3981 | |||
| 2c546f8eae | |||
| b09679a812 | |||
| 89316a5901 | |||
| 35816115f8 | |||
| 2802c1e8b6 | |||
| d943359606 | |||
| 21899b8a79 | |||
| bed3563ae6 | |||
| d5a19ca9c3 | |||
| f4b08b93eb | |||
| aaf7e23603 | |||
| 67203d1147 | |||
| 6935de7a6c | |||
| fe9132e45e | |||
| b6609a9706 | |||
| 73c829c81f | |||
| 979470eeb8 | |||
| da200235bb | |||
| ae3b0afbff | |||
| 0eb526c907 | |||
| c70054a30e | |||
| 084242746e | |||
| a5bec3e543 | |||
| 6e3faeb9fd | |||
| 0b15007e2c | |||
| 435ed5d426 | |||
| 1fb3d179ef | |||
| d7812623cd | |||
| 4874ccda4d | |||
| 8b8d2c4aa8 | |||
| 343d41ecc7 | |||
| a6ac0c363e | |||
| 0d27107411 | |||
| c9cb088198 | |||
| 7cd2f3c587 | |||
| 4c4c0867a7 | |||
| 9c2cb1b037 | |||
| 418d201da0 | |||
| f753f114c7 | |||
| 74f089dc21 | |||
| a9b94c87be | |||
| 792b7b1417 | |||
| 0ddbb5ec79 | |||
| e64ba4ca3c | |||
| 758610dff0 | |||
| b576da53c2 | |||
| f91459e55a | |||
| e729adc6ef | |||
| 96b93a1687 | |||
| 578ef5e830 | |||
| ebb300b939 | |||
| be10e01c2f | |||
| 5f1b61d25e | |||
| 0e3c8ef952 | |||
| 6997d5e202 | |||
| f9830c6678 | |||
| 1293ffe0a5 | |||
| 69d67b39a5 | |||
| 931e41a76f | |||
| f15b80872e | |||
| df3a56656d | |||
| 309931f7a5 | |||
| 6cf46cf789 | |||
| 16b7fcd120 | |||
| 8192dfeebe | |||
| 71bab17665 | |||
| 356dba3a33 | |||
| 268a1d9449 | |||
| acfab6a150 | |||
| 728f2cd2ee | |||
| ef5ac62544 | |||
| ee622cbb0b | |||
| a9c2d3c5e8 | |||
| 008130f8d0 | |||
| 376eae3fa1 | |||
| ba546bf63f | |||
| 84fa9e7dbc | |||
| 9a3c3a3d3e | |||
| 36d0df817a | |||
| cee565892b | |||
| b0ac30e719 | |||
| 343165b2fa | |||
| 3cf34b53e9 | |||
| c5b8396bd8 | |||
| 6028d82aa3 | |||
| 1cc1b9bea5 | |||
| 3274b9257c | |||
| 1a3d35094e | |||
| 9047dfa3b5 | |||
| 9dd2a72063 | |||
| 9eedcad520 | |||
| 64d0a70191 | |||
| cd60ebc982 | |||
| 928b2a8706 | |||
| 7b009caf97 | |||
| 86ea701ff0 | |||
| 6ec0414fcd | |||
| 33e35193fb | |||
| 1b4f46bb41 | |||
| 5eff31595e | |||
| 622c7acaa4 | |||
| 8f990031f1 | |||
| a9351bc737 | |||
| f4c6827c8c | |||
| 62fa6ef371 | |||
| 3e3061fe5b | |||
| 354a803ff4 | |||
| 368dd81c5e | |||
| e1bd962956 | |||
| d9fabbf353 | |||
| 55992ea48f | |||
| 42e987f4ee | |||
| 71a1a55a01 | |||
| f8ffa830b7 | |||
| 8535d50faa | |||
| dc62b4998b | |||
| 2f176d5a36 | |||
| 1fb7b27de4 | |||
| b07f32e7c8 | |||
| d9d31f7701 | |||
| 1eb7d58259 | |||
| 401df4d68c | |||
| 4406724da5 | |||
| 7c3006736c | |||
| 9f3d2db63d | |||
| beb646f78f | |||
| 4faa039a8e | |||
| ef504eea80 | |||
| 671d4d5dce | |||
| 9474ab97f2 | |||
| cf5d7dfa00 | |||
| 5cd196e043 | |||
| 8ad9f0a664 | |||
| f5231d282b | |||
| bb1bf3c017 | |||
| b1489a8dd9 | |||
| 5816d4f399 | |||
| d90950b82e | |||
| 66e7e6acc5 | |||
| 7817248eb9 | |||
| 9993b501a6 | |||
| a2b2c7db9d | |||
| 8db4b4f0b5 | |||
| 70a52dec06 | |||
| c759fb1dbb | |||
| c0d0e64bc6 | |||
| 5899c9acb3 | |||
| de6665c450 | |||
| e6210644c2 | |||
| c30f1fc587 | |||
| bf9a24681c | |||
| 69cee91dda | |||
| 2ccc33b105 | |||
| 760c9cbe6b | |||
| 76151a082c | |||
| c7fa52ab27 | |||
| 88f862e18a | |||
| 4dba510d6f | |||
| 9a9ecc4903 | |||
| a7998fc0bf | |||
| 72d49f88fe | |||
| fb992f0cff | |||
| 53da4c20ab | |||
| f9fa6dcbb4 | |||
| 2ecd274f28 | |||
| feb9d6997c | |||
| 9e6673d02e | |||
| d69545cdb5 | |||
| 756a1af2e6 | |||
| 74a2b3e28d | |||
| 84ccf35c44 | |||
| e885c7d6ce | |||
| 86c9951cc4 | |||
| 85c3d9c2f7 | |||
| cd14e70d02 | |||
| f5a3894c2b | |||
| 511403c4a6 | |||
| 8fed4a08c5 | |||
| 7f96daa7b8 | |||
| 139ca78c3d | |||
| 836ce605b6 | |||
| 88be97d860 | |||
| 35dcc5d66c | |||
| c1b771298a | |||
| e94ea272ce | |||
| 81e79fd19a | |||
| 3af97973e0 | |||
| 0733127039 | |||
| 82090c1953 | |||
| 6c8d3b24f2 | |||
| d898c71c08 | |||
| 52cc04dee9 | |||
| 98cdafb162 | |||
| 0b21c8f40d | |||
| e8d004c1b9 | |||
| c38f77302f | |||
| 4bb6c7e212 | |||
| e391a78f25 | |||
| 349a6cca3b | |||
| 71f533ca1f | |||
| 9652d9d3cf | |||
| 22e3004b0a | |||
| 9743064ad3 | |||
| 8a750ac3ab | |||
| eeeb69fb7a | |||
| 713fedfe73 | |||
| c98d24e91e | |||
| 4ff2f3e889 | |||
| bb9a4e6d8b | |||
| fb671865e5 | |||
| fb9578b624 | |||
| 4f1fb62ab3 | |||
| 98d67293bc | |||
| f6fc250fe1 | |||
| 393916ded9 | |||
| e92cfa7dba | |||
| d559aeb464 | |||
| 6ba509dbe1 | |||
| ab50780f49 | |||
| 9c16d0fbc0 | |||
| 89f4b0dbdf | |||
| 58c880d9ce | |||
| 92fbde08eb | |||
| 0aa16757e9 | |||
| 36fb225cbd | |||
| 16c62d5a4a | |||
| 1add32e683 | |||
| b1f9df4d83 | |||
| b8e50bb0a6 | |||
| 37302664c2 | |||
| 5683b3f941 | |||
| 9ec08e1dc2 | |||
| 6898641b0a | |||
| 35369d53d8 | |||
| 96a7c67674 | |||
| de3db3133b | |||
| 8d526e383f | |||
| bb2a3ba904 | |||
| 3384533acd | |||
| 4111fb079f | |||
| fd2ae6bdd5 | |||
| 8a358832f3 | |||
| c3541b72c3 | |||
| 55234f8536 | |||
| 50aec198a4 | |||
| cb5796cb71 | |||
| 5a9ceeab24 | |||
| b82195f2d7 | |||
| 1d894ea80f | |||
| 537d304b36 | |||
| e776f004c9 | |||
| 8fa38268d9 | |||
| 4a1c4766b8 | |||
| bcc15c3e0a | |||
| 0b5dcde3a3 | |||
| 46777f9ec9 | |||
| 98554e5fa4 | |||
| b97146f4d1 | |||
| 928b9379d8 | |||
| b710f45e5c | |||
| e6a3ae5f7b | |||
| 71fd00d845 | |||
| fa8ec588a8 | |||
| 47f0d1736e | |||
| 098a06e723 | |||
| bcef167b50 | |||
| fbde129d4c | |||
| 4332ded0c3 | |||
| bbe5ded0a6 | |||
| 4602656578 | |||
| 8ee7d046d2 | |||
| b7798db4f1 | |||
| c2bc8a9512 | |||
| 07fde43749 | |||
| 730b9775a3 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -6,3 +6,5 @@ __pycache__/
|
||||
*.py[cod]
|
||||
.pytest_cache
|
||||
.venv
|
||||
.venv-ci
|
||||
tmp/
|
||||
|
||||
77
Jenkinsfile
vendored
Normal file
77
Jenkinsfile
vendored
Normal file
@ -0,0 +1,77 @@
|
||||
// Mirror of ci/Jenkinsfile.titan-iac for multibranch discovery.
|
||||
pipeline {
|
||||
agent {
|
||||
kubernetes {
|
||||
defaultContainer 'python'
|
||||
yaml """
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
spec:
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: python
|
||||
image: python:3.12-slim
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
"""
|
||||
}
|
||||
}
|
||||
environment {
|
||||
PIP_DISABLE_PIP_VERSION_CHECK = '1'
|
||||
PYTHONUNBUFFERED = '1'
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
steps {
|
||||
checkout scm
|
||||
}
|
||||
}
|
||||
stage('Install deps') {
|
||||
steps {
|
||||
sh 'pip install --no-cache-dir -r ci/requirements.txt'
|
||||
}
|
||||
}
|
||||
stage('Glue tests') {
|
||||
steps {
|
||||
sh 'pytest -q ci/tests/glue'
|
||||
}
|
||||
}
|
||||
stage('Resolve Flux branch') {
|
||||
steps {
|
||||
script {
|
||||
env.FLUX_BRANCH = sh(
|
||||
returnStdout: true,
|
||||
script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
|
||||
).trim()
|
||||
if (!env.FLUX_BRANCH) {
|
||||
error('Flux branch not found in gotk-sync.yaml')
|
||||
}
|
||||
echo "Flux branch: ${env.FLUX_BRANCH}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Promote') {
|
||||
when {
|
||||
expression {
|
||||
def branch = env.BRANCH_NAME ?: (env.GIT_BRANCH ?: '').replaceFirst('origin/', '')
|
||||
return env.FLUX_BRANCH && branch == env.FLUX_BRANCH
|
||||
}
|
||||
}
|
||||
steps {
|
||||
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
|
||||
sh '''
|
||||
set +x
|
||||
git config user.email "jenkins@bstein.dev"
|
||||
git config user.name "jenkins"
|
||||
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
|
||||
git push origin HEAD:${FLUX_BRANCH}
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
76
ci/Jenkinsfile.titan-iac
Normal file
76
ci/Jenkinsfile.titan-iac
Normal file
@ -0,0 +1,76 @@
|
||||
pipeline {
|
||||
agent {
|
||||
kubernetes {
|
||||
defaultContainer 'python'
|
||||
yaml """
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
spec:
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: python
|
||||
image: python:3.12-slim
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
"""
|
||||
}
|
||||
}
|
||||
environment {
|
||||
PIP_DISABLE_PIP_VERSION_CHECK = '1'
|
||||
PYTHONUNBUFFERED = '1'
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
steps {
|
||||
checkout scm
|
||||
}
|
||||
}
|
||||
stage('Install deps') {
|
||||
steps {
|
||||
sh 'pip install --no-cache-dir -r ci/requirements.txt'
|
||||
}
|
||||
}
|
||||
stage('Glue tests') {
|
||||
steps {
|
||||
sh 'pytest -q ci/tests/glue'
|
||||
}
|
||||
}
|
||||
stage('Resolve Flux branch') {
|
||||
steps {
|
||||
script {
|
||||
env.FLUX_BRANCH = sh(
|
||||
returnStdout: true,
|
||||
script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
|
||||
).trim()
|
||||
if (!env.FLUX_BRANCH) {
|
||||
error('Flux branch not found in gotk-sync.yaml')
|
||||
}
|
||||
echo "Flux branch: ${env.FLUX_BRANCH}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Promote') {
|
||||
when {
|
||||
expression {
|
||||
def branch = env.BRANCH_NAME ?: (env.GIT_BRANCH ?: '').replaceFirst('origin/', '')
|
||||
return env.FLUX_BRANCH && branch == env.FLUX_BRANCH
|
||||
}
|
||||
}
|
||||
steps {
|
||||
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
|
||||
sh '''
|
||||
set +x
|
||||
git config user.email "jenkins@bstein.dev"
|
||||
git config user.name "jenkins"
|
||||
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
|
||||
git push origin HEAD:${FLUX_BRANCH}
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
4
ci/requirements.txt
Normal file
4
ci/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
pytest==8.3.4
|
||||
kubernetes==30.1.0
|
||||
PyYAML==6.0.2
|
||||
requests==2.32.3
|
||||
16
ci/tests/glue/config.yaml
Normal file
16
ci/tests/glue/config.yaml
Normal file
@ -0,0 +1,16 @@
|
||||
max_success_age_hours: 48
|
||||
allow_suspended:
|
||||
- bstein-dev-home/vaultwarden-cred-sync
|
||||
- comms/othrys-room-reset
|
||||
- comms/pin-othrys-invite
|
||||
- comms/seed-othrys-room
|
||||
- finance/firefly-user-sync
|
||||
- health/wger-admin-ensure
|
||||
- health/wger-user-sync
|
||||
- mailu-mailserver/mailu-sync-nightly
|
||||
- nextcloud/nextcloud-mail-sync
|
||||
ariadne_schedule_tasks:
|
||||
- schedule.mailu_sync
|
||||
- schedule.nextcloud_sync
|
||||
- schedule.vaultwarden_sync
|
||||
- schedule.wger_admin
|
||||
46
ci/tests/glue/test_glue_cronjobs.py
Normal file
46
ci/tests/glue/test_glue_cronjobs.py
Normal file
@ -0,0 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from kubernetes import client, config
|
||||
|
||||
|
||||
CONFIG_PATH = Path(__file__).with_name("config.yaml")
|
||||
|
||||
|
||||
def _load_config() -> dict:
|
||||
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
|
||||
return yaml.safe_load(handle) or {}
|
||||
|
||||
|
||||
def _load_kube():
|
||||
try:
|
||||
config.load_incluster_config()
|
||||
except config.ConfigException:
|
||||
config.load_kube_config()
|
||||
|
||||
|
||||
def test_glue_cronjobs_recent_success():
|
||||
cfg = _load_config()
|
||||
max_age_hours = int(cfg.get("max_success_age_hours", 48))
|
||||
allow_suspended = set(cfg.get("allow_suspended", []))
|
||||
|
||||
_load_kube()
|
||||
batch = client.BatchV1Api()
|
||||
cronjobs = batch.list_cron_job_for_all_namespaces(label_selector="atlas.bstein.dev/glue=true").items
|
||||
|
||||
assert cronjobs, "No glue cronjobs found with atlas.bstein.dev/glue=true"
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
for cronjob in cronjobs:
|
||||
name = f"{cronjob.metadata.namespace}/{cronjob.metadata.name}"
|
||||
if cronjob.spec.suspend:
|
||||
assert name in allow_suspended, f"{name} is suspended but not in allow_suspended"
|
||||
continue
|
||||
|
||||
last_success = cronjob.status.last_successful_time
|
||||
assert last_success is not None, f"{name} has no lastSuccessfulTime"
|
||||
age_hours = (now - last_success).total_seconds() / 3600
|
||||
assert age_hours <= max_age_hours, f"{name} last success {age_hours:.1f}h ago"
|
||||
48
ci/tests/glue/test_glue_metrics.py
Normal file
48
ci/tests/glue/test_glue_metrics.py
Normal file
@ -0,0 +1,48 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
|
||||
VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
|
||||
CONFIG_PATH = Path(__file__).with_name("config.yaml")
|
||||
|
||||
|
||||
def _load_config() -> dict:
|
||||
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
|
||||
return yaml.safe_load(handle) or {}
|
||||
|
||||
|
||||
def _query(promql: str) -> list[dict]:
|
||||
response = requests.get(f"{VM_URL}/api/v1/query", params={"query": promql}, timeout=10)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
return payload.get("data", {}).get("result", [])
|
||||
|
||||
|
||||
def test_glue_metrics_present():
|
||||
series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
|
||||
assert series, "No glue cronjob label series found"
|
||||
|
||||
|
||||
def test_glue_metrics_success_join():
|
||||
query = (
|
||||
"kube_cronjob_status_last_successful_time "
|
||||
'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
|
||||
)
|
||||
series = _query(query)
|
||||
assert series, "No glue cronjob last success series found"
|
||||
|
||||
|
||||
def test_ariadne_schedule_metrics_present():
|
||||
cfg = _load_config()
|
||||
expected = cfg.get("ariadne_schedule_tasks", [])
|
||||
if not expected:
|
||||
return
|
||||
series = _query("ariadne_schedule_next_run_timestamp_seconds")
|
||||
tasks = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [task for task in expected if task not in tasks]
|
||||
assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"
|
||||
@ -1,13 +0,0 @@
|
||||
# clusters/atlas/applications/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- ../../services/crypto
|
||||
- ../../services/gitea
|
||||
- ../../services/jellyfin
|
||||
- ../../services/comms
|
||||
- ../../services/monitoring
|
||||
- ../../services/logging
|
||||
- ../../services/pegasus
|
||||
- ../../services/vault
|
||||
- ../../services/bstein-dev-home
|
||||
@ -0,0 +1,17 @@
|
||||
# clusters/atlas/flux-system/applications/bstein-dev-home-migrations/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: bstein-dev-home-migrations
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/bstein-dev-home/oneoffs/migrations
|
||||
prune: true
|
||||
force: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
targetNamespace: bstein-dev-home
|
||||
wait: false
|
||||
suspend: true
|
||||
@ -3,7 +3,7 @@ apiVersion: image.toolkit.fluxcd.io/v1
|
||||
kind: ImageUpdateAutomation
|
||||
metadata:
|
||||
name: bstein-dev-home
|
||||
namespace: flux-system
|
||||
namespace: bstein-dev-home
|
||||
spec:
|
||||
interval: 1m0s
|
||||
sourceRef:
|
||||
@ -13,14 +13,14 @@ spec:
|
||||
git:
|
||||
checkout:
|
||||
ref:
|
||||
branch: main
|
||||
branch: feature/ariadne
|
||||
commit:
|
||||
author:
|
||||
email: ops@bstein.dev
|
||||
name: flux-bot
|
||||
messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}"
|
||||
messageTemplate: "chore(bstein-dev-home): automated image update"
|
||||
push:
|
||||
branch: main
|
||||
branch: feature/ariadne
|
||||
update:
|
||||
strategy: Setters
|
||||
path: services/bstein-dev-home
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# clusters/atlas/flux-system/applications/communication/kustomization.yaml
|
||||
# clusters/atlas/flux-system/applications/comms/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
# clusters/atlas/flux-system/applications/finance/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: finance
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/finance
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
targetNamespace: finance
|
||||
healthChecks:
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: actual-budget
|
||||
namespace: finance
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: firefly
|
||||
namespace: finance
|
||||
wait: false
|
||||
@ -13,11 +13,6 @@ spec:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
healthChecks:
|
||||
- apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
name: harbor
|
||||
namespace: harbor
|
||||
wait: false
|
||||
dependsOn:
|
||||
- name: core
|
||||
|
||||
@ -0,0 +1,25 @@
|
||||
# clusters/atlas/flux-system/applications/health/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: health
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/health
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
targetNamespace: health
|
||||
dependsOn:
|
||||
- name: keycloak
|
||||
- name: postgres
|
||||
- name: traefik
|
||||
- name: vault
|
||||
healthChecks:
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: wger
|
||||
namespace: health
|
||||
wait: false
|
||||
@ -12,10 +12,12 @@ resources:
|
||||
- pegasus/image-automation.yaml
|
||||
- bstein-dev-home/kustomization.yaml
|
||||
- bstein-dev-home/image-automation.yaml
|
||||
- bstein-dev-home-migrations/kustomization.yaml
|
||||
- harbor/kustomization.yaml
|
||||
- harbor/image-automation.yaml
|
||||
- jellyfin/kustomization.yaml
|
||||
- xmr-miner/kustomization.yaml
|
||||
- wallet-monero-temp/kustomization.yaml
|
||||
- sui-metrics/kustomization.yaml
|
||||
- openldap/kustomization.yaml
|
||||
- keycloak/kustomization.yaml
|
||||
@ -25,6 +27,7 @@ resources:
|
||||
- ai-llm/kustomization.yaml
|
||||
- nextcloud/kustomization.yaml
|
||||
- nextcloud-mail-sync/kustomization.yaml
|
||||
- postgres/kustomization.yaml
|
||||
- outline/kustomization.yaml
|
||||
- planka/kustomization.yaml
|
||||
- finance/kustomization.yaml
|
||||
- health/kustomization.yaml
|
||||
|
||||
@ -3,7 +3,7 @@ apiVersion: image.toolkit.fluxcd.io/v1
|
||||
kind: ImageUpdateAutomation
|
||||
metadata:
|
||||
name: pegasus
|
||||
namespace: flux-system
|
||||
namespace: jellyfin
|
||||
spec:
|
||||
interval: 1m0s
|
||||
sourceRef:
|
||||
|
||||
@ -0,0 +1,19 @@
|
||||
# clusters/atlas/flux-system/applications/wallet-monero-temp/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: wallet-monero-temp
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/crypto/wallet-monero-temp
|
||||
targetNamespace: crypto
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
dependsOn:
|
||||
- name: crypto
|
||||
- name: xmr-miner
|
||||
wait: true
|
||||
@ -1,3 +1,4 @@
|
||||
# clusters/atlas/flux-system/gotk-components.yaml
|
||||
---
|
||||
# This manifest was generated by flux. DO NOT EDIT.
|
||||
# Flux Version: v2.7.5
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
# clusters/atlas/flux-system/gotk-sync.yaml
|
||||
# This manifest was generated by flux. DO NOT EDIT.
|
||||
---
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
@ -8,7 +9,7 @@ metadata:
|
||||
spec:
|
||||
interval: 1m0s
|
||||
ref:
|
||||
branch: main
|
||||
branch: feature/ariadne
|
||||
secretRef:
|
||||
name: flux-system-gitea
|
||||
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
# clusters/atlas/flux-system/platform/cert-manager-cleanup/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: cert-manager-cleanup
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 30m
|
||||
path: ./infrastructure/cert-manager/cleanup
|
||||
prune: true
|
||||
force: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
targetNamespace: cert-manager
|
||||
wait: true
|
||||
@ -0,0 +1,19 @@
|
||||
# clusters/atlas/flux-system/platform/cert-manager/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: cert-manager
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 30m
|
||||
path: ./infrastructure/cert-manager
|
||||
prune: true
|
||||
force: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
targetNamespace: cert-manager
|
||||
dependsOn:
|
||||
- name: helm
|
||||
wait: true
|
||||
@ -4,11 +4,17 @@ kind: Kustomization
|
||||
resources:
|
||||
- core/kustomization.yaml
|
||||
- helm/kustomization.yaml
|
||||
- cert-manager/kustomization.yaml
|
||||
- metallb/kustomization.yaml
|
||||
- traefik/kustomization.yaml
|
||||
- gitops-ui/kustomization.yaml
|
||||
- monitoring/kustomization.yaml
|
||||
- logging/kustomization.yaml
|
||||
- maintenance/kustomization.yaml
|
||||
- maintenance/image-automation.yaml
|
||||
- longhorn-adopt/kustomization.yaml
|
||||
- longhorn/kustomization.yaml
|
||||
- longhorn-ui/kustomization.yaml
|
||||
- postgres/kustomization.yaml
|
||||
- ../platform/vault-csi/kustomization.yaml
|
||||
- ../platform/vault-injector/kustomization.yaml
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
# clusters/atlas/flux-system/platform/longhorn-adopt/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: longhorn-adopt
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 30m
|
||||
path: ./infrastructure/longhorn/adopt
|
||||
prune: true
|
||||
force: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
targetNamespace: longhorn-system
|
||||
wait: true
|
||||
@ -15,4 +15,5 @@ spec:
|
||||
namespace: flux-system
|
||||
dependsOn:
|
||||
- name: core
|
||||
- name: longhorn
|
||||
wait: true
|
||||
|
||||
@ -0,0 +1,20 @@
|
||||
# clusters/atlas/flux-system/platform/longhorn/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: longhorn
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 30m
|
||||
path: ./infrastructure/longhorn/core
|
||||
prune: true
|
||||
force: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
targetNamespace: longhorn-system
|
||||
dependsOn:
|
||||
- name: helm
|
||||
- name: longhorn-adopt
|
||||
wait: false
|
||||
@ -0,0 +1,26 @@
|
||||
# clusters/atlas/flux-system/platform/maintenance/image-automation.yaml
|
||||
apiVersion: image.toolkit.fluxcd.io/v1
|
||||
kind: ImageUpdateAutomation
|
||||
metadata:
|
||||
name: maintenance
|
||||
namespace: maintenance
|
||||
spec:
|
||||
interval: 1m0s
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
git:
|
||||
checkout:
|
||||
ref:
|
||||
branch: feature/ariadne
|
||||
commit:
|
||||
author:
|
||||
email: ops@bstein.dev
|
||||
name: flux-bot
|
||||
messageTemplate: "chore(maintenance): automated image update"
|
||||
push:
|
||||
branch: feature/ariadne
|
||||
update:
|
||||
strategy: Setters
|
||||
path: services/maintenance
|
||||
@ -8,6 +8,7 @@ spec:
|
||||
interval: 10m
|
||||
path: ./services/maintenance
|
||||
prune: true
|
||||
force: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# clusters/atlas/flux-system/applications/postgres/kustomization.yaml
|
||||
# clusters/atlas/flux-system/platform/postgres/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
@ -6,7 +6,7 @@ metadata:
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/postgres
|
||||
path: ./infrastructure/postgres
|
||||
prune: true
|
||||
force: true
|
||||
sourceRef:
|
||||
@ -0,0 +1,16 @@
|
||||
# clusters/atlas/flux-system/platform/vault-injector/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: vault-injector
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 30m
|
||||
path: ./infrastructure/vault-injector
|
||||
targetNamespace: vault
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
wait: true
|
||||
@ -1,8 +0,0 @@
|
||||
# clusters/atlas/platform/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- ../../../infrastructure/modules/base
|
||||
- ../../../infrastructure/modules/profiles/atlas-ha
|
||||
- ../../../infrastructure/sources/cert-manager/letsencrypt.yaml
|
||||
- ../../../infrastructure/metallb
|
||||
5
dockerfiles/Dockerfile.comms-guest-tools
Normal file
5
dockerfiles/Dockerfile.comms-guest-tools
Normal file
@ -0,0 +1,5 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
|
||||
RUN pip install --no-cache-dir requests psycopg2-binary
|
||||
9
dockerfiles/Dockerfile.harbor-core-vault
Normal file
9
dockerfiles/Dockerfile.harbor-core-vault
Normal file
@ -0,0 +1,9 @@
|
||||
FROM registry.bstein.dev/infra/harbor-core:v2.14.1-arm64
|
||||
|
||||
USER root
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
USER harbor
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/harbor/entrypoint.sh"]
|
||||
9
dockerfiles/Dockerfile.harbor-jobservice-vault
Normal file
9
dockerfiles/Dockerfile.harbor-jobservice-vault
Normal file
@ -0,0 +1,9 @@
|
||||
FROM registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64
|
||||
|
||||
USER root
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
USER harbor
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/harbor/entrypoint.sh"]
|
||||
9
dockerfiles/Dockerfile.harbor-registry-vault
Normal file
9
dockerfiles/Dockerfile.harbor-registry-vault
Normal file
@ -0,0 +1,9 @@
|
||||
FROM registry.bstein.dev/infra/harbor-registry:v2.14.1-arm64
|
||||
|
||||
USER root
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
USER harbor
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/home/harbor/entrypoint.sh"]
|
||||
9
dockerfiles/Dockerfile.harbor-registryctl-vault
Normal file
9
dockerfiles/Dockerfile.harbor-registryctl-vault
Normal file
@ -0,0 +1,9 @@
|
||||
FROM registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64
|
||||
|
||||
USER root
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
USER harbor
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/home/harbor/start.sh"]
|
||||
10
dockerfiles/Dockerfile.livekit-token-vault
Normal file
10
dockerfiles/Dockerfile.livekit-token-vault
Normal file
@ -0,0 +1,10 @@
|
||||
FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates
|
||||
COPY --from=base /lk-jwt-service /lk-jwt-service
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/lk-jwt-service"]
|
||||
10
dockerfiles/Dockerfile.oauth2-proxy-vault
Normal file
10
dockerfiles/Dockerfile.oauth2-proxy-vault
Normal file
@ -0,0 +1,10 @@
|
||||
FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates
|
||||
COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/bin/oauth2-proxy"]
|
||||
10
dockerfiles/Dockerfile.pegasus-vault
Normal file
10
dockerfiles/Dockerfile.pegasus-vault
Normal file
@ -0,0 +1,10 @@
|
||||
FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates
|
||||
COPY --from=base /pegasus /pegasus
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/pegasus"]
|
||||
34
dockerfiles/vault-entrypoint.sh
Normal file
34
dockerfiles/vault-entrypoint.sh
Normal file
@ -0,0 +1,34 @@
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
if [ -n "${VAULT_ENV_FILE:-}" ]; then
|
||||
if [ -f "${VAULT_ENV_FILE}" ]; then
|
||||
# shellcheck disable=SC1090
|
||||
. "${VAULT_ENV_FILE}"
|
||||
else
|
||||
echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "${VAULT_COPY_FILES:-}" ]; then
|
||||
old_ifs="$IFS"
|
||||
IFS=','
|
||||
for pair in ${VAULT_COPY_FILES}; do
|
||||
src="${pair%%:*}"
|
||||
dest="${pair#*:}"
|
||||
if [ -z "${src}" ] || [ -z "${dest}" ]; then
|
||||
echo "Vault copy entry malformed: ${pair}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "${src}" ]; then
|
||||
echo "Vault file not found: ${src}" >&2
|
||||
exit 1
|
||||
fi
|
||||
mkdir -p "$(dirname "${dest}")"
|
||||
cp "${src}" "${dest}"
|
||||
done
|
||||
IFS="$old_ifs"
|
||||
fi
|
||||
|
||||
exec "$@"
|
||||
@ -0,0 +1,40 @@
|
||||
# infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: cert-manager-cleanup-2
|
||||
namespace: cert-manager
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: cert-manager-cleanup
|
||||
restartPolicy: Never
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: node-role.kubernetes.io/worker
|
||||
operator: Exists
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values: ["arm64"]
|
||||
containers:
|
||||
- name: cleanup
|
||||
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||
command: ["/usr/bin/env", "bash"]
|
||||
args: ["/scripts/cert_manager_cleanup.sh"]
|
||||
volumeMounts:
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: script
|
||||
configMap:
|
||||
name: cert-manager-cleanup-script
|
||||
defaultMode: 0555
|
||||
@ -0,0 +1,58 @@
|
||||
# infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: cert-manager-cleanup
|
||||
namespace: cert-manager
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: cert-manager-cleanup
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- pods
|
||||
- services
|
||||
- endpoints
|
||||
- configmaps
|
||||
- secrets
|
||||
- serviceaccounts
|
||||
verbs: ["get", "list", "watch", "delete"]
|
||||
- apiGroups: ["apps"]
|
||||
resources:
|
||||
- deployments
|
||||
- daemonsets
|
||||
- statefulsets
|
||||
- replicasets
|
||||
verbs: ["get", "list", "watch", "delete"]
|
||||
- apiGroups: ["batch"]
|
||||
resources:
|
||||
- jobs
|
||||
- cronjobs
|
||||
verbs: ["get", "list", "watch", "delete"]
|
||||
- apiGroups: ["rbac.authorization.k8s.io"]
|
||||
resources:
|
||||
- roles
|
||||
- rolebindings
|
||||
- clusterroles
|
||||
- clusterrolebindings
|
||||
verbs: ["get", "list", "watch", "delete"]
|
||||
- apiGroups: ["admissionregistration.k8s.io"]
|
||||
resources:
|
||||
- validatingwebhookconfigurations
|
||||
- mutatingwebhookconfigurations
|
||||
verbs: ["get", "list", "watch", "delete"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: cert-manager-cleanup
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: cert-manager-cleanup
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: cert-manager-cleanup
|
||||
namespace: cert-manager
|
||||
15
infrastructure/cert-manager/cleanup/kustomization.yaml
Normal file
15
infrastructure/cert-manager/cleanup/kustomization.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
# infrastructure/cert-manager/cleanup/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- cert-manager-cleanup-rbac.yaml
|
||||
- cert-manager-cleanup-job.yaml
|
||||
|
||||
configMapGenerator:
|
||||
- name: cert-manager-cleanup-script
|
||||
namespace: cert-manager
|
||||
files:
|
||||
- cert_manager_cleanup.sh=scripts/cert_manager_cleanup.sh
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
5
infrastructure/cert-manager/cleanup/namespace.yaml
Normal file
5
infrastructure/cert-manager/cleanup/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
# infrastructure/cert-manager/cleanup/namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: cert-manager
|
||||
@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
namespace="cert-manager"
|
||||
selectors=(
|
||||
"app.kubernetes.io/name=cert-manager"
|
||||
"app.kubernetes.io/instance=cert-manager"
|
||||
"app.kubernetes.io/instance=certmanager-prod"
|
||||
)
|
||||
|
||||
delete_namespaced() {
|
||||
local selector="$1"
|
||||
kubectl -n "${namespace}" delete deployment,daemonset,statefulset,replicaset \
|
||||
--selector "${selector}" --ignore-not-found --wait=false
|
||||
kubectl -n "${namespace}" delete pod,service,endpoints,serviceaccount,configmap,secret \
|
||||
--selector "${selector}" --ignore-not-found --wait=false
|
||||
kubectl -n "${namespace}" delete role,rolebinding \
|
||||
--selector "${selector}" --ignore-not-found --wait=false
|
||||
kubectl -n "${namespace}" delete job,cronjob \
|
||||
--selector "${selector}" --ignore-not-found --wait=false
|
||||
}
|
||||
|
||||
delete_cluster_scoped() {
|
||||
local selector="$1"
|
||||
kubectl delete clusterrole,clusterrolebinding \
|
||||
--selector "${selector}" --ignore-not-found --wait=false
|
||||
kubectl delete mutatingwebhookconfiguration,validatingwebhookconfiguration \
|
||||
--selector "${selector}" --ignore-not-found --wait=false
|
||||
}
|
||||
|
||||
for selector in "${selectors[@]}"; do
|
||||
delete_namespaced "${selector}"
|
||||
delete_cluster_scoped "${selector}"
|
||||
done
|
||||
|
||||
kubectl delete mutatingwebhookconfiguration cert-manager-webhook --ignore-not-found --wait=false
|
||||
kubectl delete validatingwebhookconfiguration cert-manager-webhook --ignore-not-found --wait=false
|
||||
67
infrastructure/cert-manager/helmrelease.yaml
Normal file
67
infrastructure/cert-manager/helmrelease.yaml
Normal file
@ -0,0 +1,67 @@
|
||||
# infrastructure/cert-manager/helmrelease.yaml
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: cert-manager
|
||||
namespace: cert-manager
|
||||
spec:
|
||||
interval: 30m
|
||||
chart:
|
||||
spec:
|
||||
chart: cert-manager
|
||||
version: v1.17.0
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: jetstack
|
||||
namespace: flux-system
|
||||
install:
|
||||
crds: CreateReplace
|
||||
remediation: { retries: 3 }
|
||||
timeout: 10m
|
||||
upgrade:
|
||||
crds: CreateReplace
|
||||
remediation:
|
||||
retries: 3
|
||||
remediateLastFailure: true
|
||||
cleanupOnFail: true
|
||||
timeout: 10m
|
||||
values:
|
||||
installCRDs: true
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi5
|
||||
- rpi4
|
||||
webhook:
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi5
|
||||
- rpi4
|
||||
cainjector:
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi5
|
||||
- rpi4
|
||||
6
infrastructure/cert-manager/kustomization.yaml
Normal file
6
infrastructure/cert-manager/kustomization.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
# infrastructure/cert-manager/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- helmrelease.yaml
|
||||
5
infrastructure/cert-manager/namespace.yaml
Normal file
5
infrastructure/cert-manager/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
# infrastructure/cert-manager/namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: cert-manager
|
||||
47
infrastructure/core/coredns-custom.yaml
Normal file
47
infrastructure/core/coredns-custom.yaml
Normal file
@ -0,0 +1,47 @@
|
||||
# infrastructure/core/coredns-custom.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: coredns-custom
|
||||
namespace: kube-system
|
||||
data:
|
||||
bstein-dev.server: |
|
||||
bstein.dev:53 {
|
||||
errors
|
||||
cache 30
|
||||
hosts {
|
||||
192.168.22.9 alerts.bstein.dev
|
||||
192.168.22.9 auth.bstein.dev
|
||||
192.168.22.9 bstein.dev
|
||||
10.43.6.87 budget.bstein.dev
|
||||
192.168.22.9 call.live.bstein.dev
|
||||
192.168.22.9 cd.bstein.dev
|
||||
192.168.22.9 chat.ai.bstein.dev
|
||||
192.168.22.9 ci.bstein.dev
|
||||
192.168.22.9 cloud.bstein.dev
|
||||
192.168.22.9 health.bstein.dev
|
||||
192.168.22.9 kit.live.bstein.dev
|
||||
192.168.22.9 live.bstein.dev
|
||||
192.168.22.9 logs.bstein.dev
|
||||
192.168.22.9 longhorn.bstein.dev
|
||||
192.168.22.4 mail.bstein.dev
|
||||
192.168.22.9 matrix.live.bstein.dev
|
||||
192.168.22.9 metrics.bstein.dev
|
||||
192.168.22.9 monero.bstein.dev
|
||||
10.43.6.87 money.bstein.dev
|
||||
192.168.22.9 notes.bstein.dev
|
||||
192.168.22.9 office.bstein.dev
|
||||
192.168.22.9 pegasus.bstein.dev
|
||||
3.136.224.193 pm-bounces.bstein.dev
|
||||
3.150.68.49 pm-bounces.bstein.dev
|
||||
18.189.137.81 pm-bounces.bstein.dev
|
||||
192.168.22.9 registry.bstein.dev
|
||||
192.168.22.9 scm.bstein.dev
|
||||
192.168.22.9 secret.bstein.dev
|
||||
192.168.22.9 sso.bstein.dev
|
||||
192.168.22.9 stream.bstein.dev
|
||||
192.168.22.9 tasks.bstein.dev
|
||||
192.168.22.9 vault.bstein.dev
|
||||
fallthrough
|
||||
}
|
||||
}
|
||||
141
infrastructure/core/coredns-deployment.yaml
Normal file
141
infrastructure/core/coredns-deployment.yaml
Normal file
@ -0,0 +1,141 @@
|
||||
# infrastructure/core/coredns-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: coredns
|
||||
namespace: kube-system
|
||||
labels:
|
||||
k8s-app: kube-dns
|
||||
kubernetes.io/name: CoreDNS
|
||||
spec:
|
||||
progressDeadlineSeconds: 600
|
||||
replicas: 2
|
||||
revisionHistoryLimit: 0
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxSurge: 25%
|
||||
maxUnavailable: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: kube-dns
|
||||
spec:
|
||||
containers:
|
||||
- name: coredns
|
||||
image: registry.bstein.dev/infra/coredns:1.12.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- -conf
|
||||
- /etc/coredns/Corefile
|
||||
ports:
|
||||
- containerPort: 53
|
||||
name: dns
|
||||
protocol: UDP
|
||||
- containerPort: 53
|
||||
name: dns-tcp
|
||||
protocol: TCP
|
||||
- containerPort: 9153
|
||||
name: metrics
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8080
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: 8181
|
||||
scheme: HTTP
|
||||
periodSeconds: 2
|
||||
timeoutSeconds: 1
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
resources:
|
||||
limits:
|
||||
memory: 170Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 70Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
add:
|
||||
- NET_BIND_SERVICE
|
||||
drop:
|
||||
- all
|
||||
readOnlyRootFilesystem: true
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/coredns
|
||||
readOnly: true
|
||||
- name: custom-config-volume
|
||||
mountPath: /etc/coredns/custom
|
||||
readOnly: true
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi5
|
||||
- rpi4
|
||||
- key: node-role.kubernetes.io/worker
|
||||
operator: In
|
||||
values:
|
||||
- "true"
|
||||
dnsPolicy: Default
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
priorityClassName: system-cluster-critical
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
serviceAccountName: coredns
|
||||
tolerations:
|
||||
- key: CriticalAddonsOnly
|
||||
operator: Exists
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: DoNotSchedule
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
- maxSkew: 1
|
||||
topologyKey: topology.kubernetes.io/zone
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: coredns
|
||||
defaultMode: 420
|
||||
items:
|
||||
- key: Corefile
|
||||
path: Corefile
|
||||
- key: NodeHosts
|
||||
path: NodeHosts
|
||||
- name: custom-config-volume
|
||||
configMap:
|
||||
name: coredns-custom
|
||||
optional: true
|
||||
defaultMode: 420
|
||||
@ -4,5 +4,8 @@ kind: Kustomization
|
||||
resources:
|
||||
- ../modules/base
|
||||
- ../modules/profiles/atlas-ha
|
||||
- coredns-custom.yaml
|
||||
- coredns-deployment.yaml
|
||||
- ntp-sync-daemonset.yaml
|
||||
- ../sources/cert-manager/letsencrypt.yaml
|
||||
- ../sources/cert-manager/letsencrypt-prod.yaml
|
||||
|
||||
50
infrastructure/core/ntp-sync-daemonset.yaml
Normal file
50
infrastructure/core/ntp-sync-daemonset.yaml
Normal file
@ -0,0 +1,50 @@
|
||||
# infrastructure/core/ntp-sync-daemonset.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: ntp-sync
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: ntp-sync
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ntp-sync
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ntp-sync
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: DoesNotExist
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: DoesNotExist
|
||||
containers:
|
||||
- name: ntp-sync
|
||||
image: public.ecr.aws/docker/library/busybox:1.36.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
while true; do
|
||||
ntpd -q -p pool.ntp.org || true
|
||||
sleep 300
|
||||
done
|
||||
securityContext:
|
||||
capabilities:
|
||||
add: ["SYS_TIME"]
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 16Mi
|
||||
limits:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
15
infrastructure/longhorn/adopt/kustomization.yaml
Normal file
15
infrastructure/longhorn/adopt/kustomization.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
# infrastructure/longhorn/adopt/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- longhorn-adopt-rbac.yaml
|
||||
- longhorn-helm-adopt-job.yaml
|
||||
|
||||
configMapGenerator:
|
||||
- name: longhorn-helm-adopt-script
|
||||
namespace: longhorn-system
|
||||
files:
|
||||
- longhorn_helm_adopt.sh=scripts/longhorn_helm_adopt.sh
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
56
infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml
Normal file
56
infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml
Normal file
@ -0,0 +1,56 @@
|
||||
# infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: longhorn-helm-adopt
|
||||
namespace: longhorn-system
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: longhorn-helm-adopt
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- configmaps
|
||||
- services
|
||||
- serviceaccounts
|
||||
- secrets
|
||||
verbs: ["get", "list", "watch", "patch", "update"]
|
||||
- apiGroups: ["apps"]
|
||||
resources:
|
||||
- deployments
|
||||
- daemonsets
|
||||
verbs: ["get", "list", "watch", "patch", "update"]
|
||||
- apiGroups: ["batch"]
|
||||
resources:
|
||||
- jobs
|
||||
verbs: ["get", "list", "watch", "patch", "update"]
|
||||
- apiGroups: ["rbac.authorization.k8s.io"]
|
||||
resources:
|
||||
- roles
|
||||
- rolebindings
|
||||
- clusterroles
|
||||
- clusterrolebindings
|
||||
verbs: ["get", "list", "watch", "patch", "update"]
|
||||
- apiGroups: ["apiextensions.k8s.io"]
|
||||
resources:
|
||||
- customresourcedefinitions
|
||||
verbs: ["get", "list", "watch", "patch", "update"]
|
||||
- apiGroups: ["scheduling.k8s.io"]
|
||||
resources:
|
||||
- priorityclasses
|
||||
verbs: ["get", "list", "watch", "patch", "update"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: longhorn-helm-adopt
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: longhorn-helm-adopt
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: longhorn-helm-adopt
|
||||
namespace: longhorn-system
|
||||
40
infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml
Normal file
40
infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml
Normal file
@ -0,0 +1,40 @@
|
||||
# infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: longhorn-helm-adopt-2
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: longhorn-helm-adopt
|
||||
restartPolicy: Never
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: node-role.kubernetes.io/worker
|
||||
operator: Exists
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values: ["arm64"]
|
||||
containers:
|
||||
- name: adopt
|
||||
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||
command: ["/usr/bin/env", "bash"]
|
||||
args: ["/scripts/longhorn_helm_adopt.sh"]
|
||||
volumeMounts:
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: script
|
||||
configMap:
|
||||
name: longhorn-helm-adopt-script
|
||||
defaultMode: 0555
|
||||
5
infrastructure/longhorn/adopt/namespace.yaml
Normal file
5
infrastructure/longhorn/adopt/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
# infrastructure/longhorn/adopt/namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: longhorn-system
|
||||
52
infrastructure/longhorn/adopt/scripts/longhorn_helm_adopt.sh
Normal file
52
infrastructure/longhorn/adopt/scripts/longhorn_helm_adopt.sh
Normal file
@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
release_name="longhorn"
|
||||
release_namespace="longhorn-system"
|
||||
selector="app.kubernetes.io/instance=${release_name}"
|
||||
|
||||
annotate_and_label() {
|
||||
local scope="$1"
|
||||
local kind="$2"
|
||||
if [ "${scope}" = "namespaced" ]; then
|
||||
kubectl -n "${release_namespace}" annotate "${kind}" -l "${selector}" \
|
||||
meta.helm.sh/release-name="${release_name}" \
|
||||
meta.helm.sh/release-namespace="${release_namespace}" \
|
||||
--overwrite >/dev/null 2>&1 || true
|
||||
kubectl -n "${release_namespace}" label "${kind}" -l "${selector}" \
|
||||
app.kubernetes.io/managed-by=Helm --overwrite >/dev/null 2>&1 || true
|
||||
else
|
||||
kubectl annotate "${kind}" -l "${selector}" \
|
||||
meta.helm.sh/release-name="${release_name}" \
|
||||
meta.helm.sh/release-namespace="${release_namespace}" \
|
||||
--overwrite >/dev/null 2>&1 || true
|
||||
kubectl label "${kind}" -l "${selector}" \
|
||||
app.kubernetes.io/managed-by=Helm --overwrite >/dev/null 2>&1 || true
|
||||
fi
|
||||
}
|
||||
|
||||
namespaced_kinds=(
|
||||
configmap
|
||||
service
|
||||
serviceaccount
|
||||
deployment
|
||||
daemonset
|
||||
job
|
||||
role
|
||||
rolebinding
|
||||
)
|
||||
|
||||
cluster_kinds=(
|
||||
clusterrole
|
||||
clusterrolebinding
|
||||
customresourcedefinition
|
||||
priorityclass
|
||||
)
|
||||
|
||||
for kind in "${namespaced_kinds[@]}"; do
|
||||
annotate_and_label "namespaced" "${kind}"
|
||||
done
|
||||
|
||||
for kind in "${cluster_kinds[@]}"; do
|
||||
annotate_and_label "cluster" "${kind}"
|
||||
done
|
||||
80
infrastructure/longhorn/core/helmrelease.yaml
Normal file
80
infrastructure/longhorn/core/helmrelease.yaml
Normal file
@ -0,0 +1,80 @@
|
||||
# infrastructure/longhorn/core/helmrelease.yaml
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: longhorn
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
interval: 30m
|
||||
chart:
|
||||
spec:
|
||||
chart: longhorn
|
||||
version: 1.8.2
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: longhorn
|
||||
namespace: flux-system
|
||||
install:
|
||||
crds: Skip
|
||||
remediation: { retries: 3 }
|
||||
timeout: 15m
|
||||
upgrade:
|
||||
crds: Skip
|
||||
remediation:
|
||||
retries: 3
|
||||
remediateLastFailure: true
|
||||
cleanupOnFail: true
|
||||
timeout: 15m
|
||||
values:
|
||||
service:
|
||||
ui:
|
||||
type: NodePort
|
||||
nodePort: 30824
|
||||
privateRegistry:
|
||||
createSecret: false
|
||||
registrySecret: longhorn-registry
|
||||
image:
|
||||
pullPolicy: Always
|
||||
longhorn:
|
||||
engine:
|
||||
repository: registry.bstein.dev/infra/longhorn-engine
|
||||
tag: v1.8.2
|
||||
manager:
|
||||
repository: registry.bstein.dev/infra/longhorn-manager
|
||||
tag: v1.8.2
|
||||
ui:
|
||||
repository: registry.bstein.dev/infra/longhorn-ui
|
||||
tag: v1.8.2
|
||||
instanceManager:
|
||||
repository: registry.bstein.dev/infra/longhorn-instance-manager
|
||||
tag: v1.8.2
|
||||
shareManager:
|
||||
repository: registry.bstein.dev/infra/longhorn-share-manager
|
||||
tag: v1.8.2
|
||||
backingImageManager:
|
||||
repository: registry.bstein.dev/infra/longhorn-backing-image-manager
|
||||
tag: v1.8.2
|
||||
supportBundleKit:
|
||||
repository: registry.bstein.dev/infra/longhorn-support-bundle-kit
|
||||
tag: v0.0.56
|
||||
csi:
|
||||
attacher:
|
||||
repository: registry.bstein.dev/infra/longhorn-csi-attacher
|
||||
tag: v4.9.0
|
||||
provisioner:
|
||||
repository: registry.bstein.dev/infra/longhorn-csi-provisioner
|
||||
tag: v5.3.0
|
||||
nodeDriverRegistrar:
|
||||
repository: registry.bstein.dev/infra/longhorn-csi-node-driver-registrar
|
||||
tag: v2.14.0
|
||||
resizer:
|
||||
repository: registry.bstein.dev/infra/longhorn-csi-resizer
|
||||
tag: v1.13.2
|
||||
snapshotter:
|
||||
repository: registry.bstein.dev/infra/longhorn-csi-snapshotter
|
||||
tag: v8.2.0
|
||||
livenessProbe:
|
||||
repository: registry.bstein.dev/infra/longhorn-livenessprobe
|
||||
tag: v2.16.0
|
||||
defaultSettings:
|
||||
systemManagedPodsImagePullPolicy: Always
|
||||
18
infrastructure/longhorn/core/kustomization.yaml
Normal file
18
infrastructure/longhorn/core/kustomization.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
# infrastructure/longhorn/core/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- vault-serviceaccount.yaml
|
||||
- secretproviderclass.yaml
|
||||
- vault-sync-deployment.yaml
|
||||
- helmrelease.yaml
|
||||
- longhorn-settings-ensure-job.yaml
|
||||
|
||||
configMapGenerator:
|
||||
- name: longhorn-settings-ensure-script
|
||||
files:
|
||||
- longhorn_settings_ensure.sh=scripts/longhorn_settings_ensure.sh
|
||||
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
@ -0,0 +1,36 @@
|
||||
# infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: longhorn-settings-ensure-4
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
ttlSecondsAfterFinished: 3600
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: longhorn-service-account
|
||||
restartPolicy: Never
|
||||
volumes:
|
||||
- name: longhorn-settings-ensure-script
|
||||
configMap:
|
||||
name: longhorn-settings-ensure-script
|
||||
defaultMode: 0555
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values: ["arm64"]
|
||||
- key: node-role.kubernetes.io/worker
|
||||
operator: Exists
|
||||
containers:
|
||||
- name: apply
|
||||
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||
command: ["/scripts/longhorn_settings_ensure.sh"]
|
||||
volumeMounts:
|
||||
- name: longhorn-settings-ensure-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
5
infrastructure/longhorn/core/namespace.yaml
Normal file
5
infrastructure/longhorn/core/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
# infrastructure/longhorn/core/namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: longhorn-system
|
||||
@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
# Longhorn blocks direct CR patches for some settings; use the internal API instead.
|
||||
|
||||
api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
|
||||
|
||||
wait_for_api() {
|
||||
attempts=30
|
||||
while [ "${attempts}" -gt 0 ]; do
|
||||
if curl -fsS "${api_base}" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
attempts=$((attempts - 1))
|
||||
sleep 2
|
||||
done
|
||||
echo "Longhorn API not ready after retries." >&2
|
||||
return 1
|
||||
}
|
||||
|
||||
update_setting() {
|
||||
name="$1"
|
||||
value="$2"
|
||||
|
||||
current="$(curl -fsS "${api_base}/${name}" || true)"
|
||||
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
|
||||
echo "Setting ${name} already set."
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Setting ${name} -> ${value}"
|
||||
curl -fsS -X PUT \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"value\":\"${value}\"}" \
|
||||
"${api_base}/${name}" >/dev/null
|
||||
}
|
||||
|
||||
wait_for_api
|
||||
update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v1.8.2"
|
||||
update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
|
||||
update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
|
||||
update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"
|
||||
21
infrastructure/longhorn/core/secretproviderclass.yaml
Normal file
21
infrastructure/longhorn/core/secretproviderclass.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
# infrastructure/longhorn/core/secretproviderclass.yaml
|
||||
apiVersion: secrets-store.csi.x-k8s.io/v1
|
||||
kind: SecretProviderClass
|
||||
metadata:
|
||||
name: longhorn-vault
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
provider: vault
|
||||
parameters:
|
||||
vaultAddress: "http://vault.vault.svc.cluster.local:8200"
|
||||
roleName: "longhorn"
|
||||
objects: |
|
||||
- objectName: "harbor-pull__dockerconfigjson"
|
||||
secretPath: "kv/data/atlas/shared/harbor-pull"
|
||||
secretKey: "dockerconfigjson"
|
||||
secretObjects:
|
||||
- secretName: longhorn-registry
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
- objectName: harbor-pull__dockerconfigjson
|
||||
key: .dockerconfigjson
|
||||
6
infrastructure/longhorn/core/vault-serviceaccount.yaml
Normal file
6
infrastructure/longhorn/core/vault-serviceaccount.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
# infrastructure/longhorn/core/vault-serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: longhorn-vault-sync
|
||||
namespace: longhorn-system
|
||||
45
infrastructure/longhorn/core/vault-sync-deployment.yaml
Normal file
45
infrastructure/longhorn/core/vault-sync-deployment.yaml
Normal file
@ -0,0 +1,45 @@
|
||||
# infrastructure/longhorn/core/vault-sync-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: longhorn-vault-sync
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: longhorn-vault-sync
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: longhorn-vault-sync
|
||||
spec:
|
||||
serviceAccountName: longhorn-vault-sync
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 80
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5", "rpi4"]
|
||||
containers:
|
||||
- name: sync
|
||||
image: alpine:3.20
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- "sleep infinity"
|
||||
volumeMounts:
|
||||
- name: vault-secrets
|
||||
mountPath: /vault/secrets
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: vault-secrets
|
||||
csi:
|
||||
driver: secrets-store.csi.k8s.io
|
||||
readOnly: true
|
||||
volumeAttributes:
|
||||
secretProviderClass: longhorn-vault
|
||||
@ -2,6 +2,7 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- serviceaccount.yaml
|
||||
- oauth2-proxy-longhorn.yaml
|
||||
- middleware.yaml
|
||||
- ingress.yaml
|
||||
- oauth2-proxy-longhorn.yaml
|
||||
|
||||
@ -32,7 +32,18 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
app: oauth2-proxy-longhorn
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/role: "longhorn"
|
||||
vault.hashicorp.com/agent-inject-secret-oidc-config: "kv/data/atlas/longhorn/oauth2-proxy"
|
||||
vault.hashicorp.com/agent-inject-template-oidc-config: |
|
||||
{{- with secret "kv/data/atlas/longhorn/oauth2-proxy" -}}
|
||||
client_id = "{{ .Data.data.client_id }}"
|
||||
client_secret = "{{ .Data.data.client_secret }}"
|
||||
cookie_secret = "{{ .Data.data.cookie_secret }}"
|
||||
{{- end -}}
|
||||
spec:
|
||||
serviceAccountName: longhorn-vault
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
@ -50,6 +61,7 @@ spec:
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- --provider=oidc
|
||||
- --config=/vault/secrets/oidc-config
|
||||
- --redirect-url=https://longhorn.bstein.dev/oauth2/callback
|
||||
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
|
||||
- --scope=openid profile email groups
|
||||
@ -69,22 +81,6 @@ spec:
|
||||
- --skip-jwt-bearer-tokens=true
|
||||
- --oidc-groups-claim=groups
|
||||
- --cookie-domain=longhorn.bstein.dev
|
||||
env:
|
||||
- name: OAUTH2_PROXY_CLIENT_ID
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: oauth2-proxy-longhorn-oidc
|
||||
key: client_id
|
||||
- name: OAUTH2_PROXY_CLIENT_SECRET
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: oauth2-proxy-longhorn-oidc
|
||||
key: client_secret
|
||||
- name: OAUTH2_PROXY_COOKIE_SECRET
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: oauth2-proxy-longhorn-oidc
|
||||
key: cookie_secret
|
||||
ports:
|
||||
- containerPort: 4180
|
||||
name: http
|
||||
|
||||
6
infrastructure/longhorn/ui-ingress/serviceaccount.yaml
Normal file
6
infrastructure/longhorn/ui-ingress/serviceaccount.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
# infrastructure/longhorn/ui-ingress/serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: longhorn-vault
|
||||
namespace: longhorn-system
|
||||
47
infrastructure/metallb/helmrelease.yaml
Normal file
47
infrastructure/metallb/helmrelease.yaml
Normal file
@ -0,0 +1,47 @@
|
||||
# infrastructure/metallb/helmrelease.yaml
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: metallb
|
||||
namespace: metallb-system
|
||||
spec:
|
||||
interval: 30m
|
||||
chart:
|
||||
spec:
|
||||
chart: metallb
|
||||
version: 0.15.3
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: metallb
|
||||
namespace: flux-system
|
||||
install:
|
||||
crds: CreateReplace
|
||||
remediation: { retries: 3 }
|
||||
timeout: 10m
|
||||
upgrade:
|
||||
crds: CreateReplace
|
||||
remediation:
|
||||
retries: 3
|
||||
remediateLastFailure: true
|
||||
cleanupOnFail: true
|
||||
timeout: 10m
|
||||
values:
|
||||
loadBalancerClass: metallb
|
||||
prometheus:
|
||||
metricsPort: 7472
|
||||
controller:
|
||||
logLevel: info
|
||||
webhookMode: enabled
|
||||
tlsMinVersion: VersionTLS12
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi4
|
||||
- rpi5
|
||||
speaker:
|
||||
logLevel: info
|
||||
@ -3,8 +3,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- metallb-rendered.yaml
|
||||
- helmrelease.yaml
|
||||
- ippool.yaml
|
||||
patchesStrategicMerge:
|
||||
- patches/node-placement.yaml
|
||||
- patches/speaker-loglevel.yaml
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,27 +0,0 @@
|
||||
# infrastructure/metallb/patches/node-placement.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: metallb-controller
|
||||
namespace: metallb-system
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: controller
|
||||
args:
|
||||
- --port=7472
|
||||
- --log-level=info
|
||||
- --webhook-mode=enabled
|
||||
- --tls-min-version=VersionTLS12
|
||||
- --lb-class=metallb
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi4
|
||||
- rpi5
|
||||
@ -1,15 +0,0 @@
|
||||
# infrastructure/metallb/patches/speaker-loglevel.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: metallb-speaker
|
||||
namespace: metallb-system
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: speaker
|
||||
args:
|
||||
- --port=7472
|
||||
- --log-level=info
|
||||
- --lb-class=metallb
|
||||
@ -0,0 +1,24 @@
|
||||
# infrastructure/modules/base/storageclass/asteria-encrypted.yaml
|
||||
apiVersion: storage.k8s.io/v1
|
||||
kind: StorageClass
|
||||
metadata:
|
||||
name: asteria-encrypted
|
||||
parameters:
|
||||
diskSelector: asteria
|
||||
fromBackup: ""
|
||||
numberOfReplicas: "2"
|
||||
staleReplicaTimeout: "30"
|
||||
fsType: "ext4"
|
||||
replicaAutoBalance: "least-effort"
|
||||
dataLocality: "disabled"
|
||||
encrypted: "true"
|
||||
csi.storage.k8s.io/provisioner-secret-name: ${pvc.name}
|
||||
csi.storage.k8s.io/provisioner-secret-namespace: ${pvc.namespace}
|
||||
csi.storage.k8s.io/node-publish-secret-name: ${pvc.name}
|
||||
csi.storage.k8s.io/node-publish-secret-namespace: ${pvc.namespace}
|
||||
csi.storage.k8s.io/node-stage-secret-name: ${pvc.name}
|
||||
csi.storage.k8s.io/node-stage-secret-namespace: ${pvc.namespace}
|
||||
provisioner: driver.longhorn.io
|
||||
reclaimPolicy: Retain
|
||||
allowVolumeExpansion: true
|
||||
volumeBindingMode: Immediate
|
||||
@ -3,4 +3,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- asteria.yaml
|
||||
- asteria-encrypted.yaml
|
||||
- astreae.yaml
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# services/postgres/kustomization.yaml
|
||||
# infrastructure/postgres/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: postgres
|
||||
@ -1,4 +1,4 @@
|
||||
# services/postgres/namespace.yaml
|
||||
# infrastructure/postgres/namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
@ -1,4 +1,4 @@
|
||||
# services/postgres/secretproviderclass.yaml
|
||||
# infrastructure/postgres/secretproviderclass.yaml
|
||||
apiVersion: secrets-store.csi.x-k8s.io/v1
|
||||
kind: SecretProviderClass
|
||||
metadata:
|
||||
@ -11,5 +11,5 @@ spec:
|
||||
roleName: "postgres"
|
||||
objects: |
|
||||
- objectName: "postgres_password"
|
||||
secretPath: "kv/data/postgres"
|
||||
secretPath: "kv/data/atlas/postgres/postgres-db"
|
||||
secretKey: "POSTGRES_PASSWORD"
|
||||
23
infrastructure/postgres/service.yaml
Normal file
23
infrastructure/postgres/service.yaml
Normal file
@ -0,0 +1,23 @@
|
||||
# infrastructure/postgres/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: postgres-service
|
||||
namespace: postgres
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9187"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: postgres
|
||||
port: 5432
|
||||
protocol: TCP
|
||||
targetPort: 5432
|
||||
- name: metrics
|
||||
port: 9187
|
||||
protocol: TCP
|
||||
targetPort: 9187
|
||||
selector:
|
||||
app: postgres
|
||||
@ -1,4 +1,4 @@
|
||||
# services/postgres/serviceaccount.yaml
|
||||
# infrastructure/postgres/serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
@ -1,4 +1,4 @@
|
||||
# services/postgres/statefulset.yaml
|
||||
# infrastructure/postgres/statefulset.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
@ -58,6 +58,23 @@ spec:
|
||||
- name: vault-secrets
|
||||
mountPath: /mnt/vault
|
||||
readOnly: true
|
||||
- name: postgres-exporter
|
||||
image: quay.io/prometheuscommunity/postgres-exporter:v0.15.0
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 9187
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: DATA_SOURCE_URI
|
||||
value: "localhost:5432/postgres?sslmode=disable"
|
||||
- name: DATA_SOURCE_USER
|
||||
value: postgres
|
||||
- name: DATA_SOURCE_PASS_FILE
|
||||
value: /mnt/vault/postgres_password
|
||||
volumeMounts:
|
||||
- name: vault-secrets
|
||||
mountPath: /mnt/vault
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: vault-secrets
|
||||
csi:
|
||||
@ -1,10 +1,11 @@
|
||||
# infrastructure/sources/cert-manager/letsencrypt-prod.yaml
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt-prod
|
||||
spec:
|
||||
acme:
|
||||
email: brad.stein@gmail.com
|
||||
email: brad@bstein.dev
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-prod-account-key
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
# infrastructure/sources/cert-manager/letsencrypt.yaml
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: letsencrypt
|
||||
spec:
|
||||
acme:
|
||||
email: brad.stein@gmail.com
|
||||
email: brad@bstein.dev
|
||||
server: https://acme-v02.api.letsencrypt.org/directory
|
||||
privateKeySecretRef:
|
||||
name: letsencrypt-account-key
|
||||
|
||||
9
infrastructure/sources/helm/ananace.yaml
Normal file
9
infrastructure/sources/helm/ananace.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
# infrastructure/sources/helm/ananace.yaml
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: ananace
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 1h
|
||||
url: https://ananace.gitlab.io/charts
|
||||
@ -2,15 +2,18 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- ananace.yaml
|
||||
- fluent-bit.yaml
|
||||
- grafana.yaml
|
||||
- hashicorp.yaml
|
||||
- jetstack.yaml
|
||||
- jenkins.yaml
|
||||
- mailu.yaml
|
||||
- metallb.yaml
|
||||
- opentelemetry.yaml
|
||||
- opensearch.yaml
|
||||
- harbor.yaml
|
||||
- longhorn.yaml
|
||||
- prometheus.yaml
|
||||
- victoria-metrics.yaml
|
||||
- secrets-store-csi.yaml
|
||||
|
||||
9
infrastructure/sources/helm/longhorn.yaml
Normal file
9
infrastructure/sources/helm/longhorn.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
# infrastructure/sources/helm/longhorn.yaml
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: longhorn
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 30m
|
||||
url: https://charts.longhorn.io
|
||||
9
infrastructure/sources/helm/metallb.yaml
Normal file
9
infrastructure/sources/helm/metallb.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
# infrastructure/sources/helm/metallb.yaml
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: metallb
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 1h
|
||||
url: https://metallb.github.io/metallb
|
||||
3119
infrastructure/traefik/crds.yaml
Normal file
3119
infrastructure/traefik/crds.yaml
Normal file
File diff suppressed because it is too large
Load Diff
@ -27,6 +27,8 @@ items:
|
||||
creationTimestamp: null
|
||||
labels:
|
||||
app: traefik
|
||||
app.kubernetes.io/instance: traefik-kube-system
|
||||
app.kubernetes.io/name: traefik
|
||||
spec:
|
||||
containers:
|
||||
- args:
|
||||
|
||||
@ -5,6 +5,7 @@ metadata:
|
||||
name: traefik
|
||||
namespace: flux-system
|
||||
resources:
|
||||
- crds.yaml
|
||||
- deployment.yaml
|
||||
- serviceaccount.yaml
|
||||
- clusterrole.yaml
|
||||
|
||||
@ -3,9 +3,10 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: traefik
|
||||
namespace: kube-system
|
||||
namespace: traefik
|
||||
annotations:
|
||||
metallb.universe.tf/address-pool: communication-pool
|
||||
metallb.universe.tf/allow-shared-ip: traefik
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
loadBalancerClass: metallb
|
||||
@ -20,5 +21,4 @@ spec:
|
||||
targetPort: websecure
|
||||
protocol: TCP
|
||||
selector:
|
||||
app.kubernetes.io/instance: traefik-kube-system
|
||||
app.kubernetes.io/name: traefik
|
||||
app: traefik
|
||||
|
||||
@ -17,4 +17,5 @@ spec:
|
||||
values:
|
||||
syncSecret:
|
||||
enabled: true
|
||||
enableSecretRotation: false
|
||||
enableSecretRotation: true
|
||||
rotationPollInterval: 2m
|
||||
|
||||
43
infrastructure/vault-injector/helmrelease.yaml
Normal file
43
infrastructure/vault-injector/helmrelease.yaml
Normal file
@ -0,0 +1,43 @@
|
||||
# infrastructure/vault-injector/helmrelease.yaml
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: vault-injector
|
||||
namespace: vault
|
||||
spec:
|
||||
interval: 30m
|
||||
chart:
|
||||
spec:
|
||||
chart: vault
|
||||
version: 0.31.0
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: hashicorp
|
||||
namespace: flux-system
|
||||
install:
|
||||
remediation: { retries: 3 }
|
||||
timeout: 10m
|
||||
upgrade:
|
||||
remediation:
|
||||
retries: 3
|
||||
remediateLastFailure: true
|
||||
cleanupOnFail: true
|
||||
timeout: 10m
|
||||
values:
|
||||
global:
|
||||
externalVaultAddr: http://vault.vault.svc.cluster.local:8200
|
||||
tlsDisable: true
|
||||
server:
|
||||
enabled: false
|
||||
csi:
|
||||
enabled: false
|
||||
injector:
|
||||
enabled: true
|
||||
replicas: 1
|
||||
agentImage:
|
||||
repository: hashicorp/vault
|
||||
tag: "1.17.6"
|
||||
webhook:
|
||||
failurePolicy: Ignore
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
5
infrastructure/vault-injector/kustomization.yaml
Normal file
5
infrastructure/vault-injector/kustomization.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
# infrastructure/vault-injector/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- helmrelease.yaml
|
||||
@ -1,8 +1,8 @@
|
||||
{
|
||||
"counts": {
|
||||
"helmrelease_host_hints": 7,
|
||||
"http_endpoints": 35,
|
||||
"services": 44,
|
||||
"workloads": 49
|
||||
"helmrelease_host_hints": 19,
|
||||
"http_endpoints": 45,
|
||||
"services": 47,
|
||||
"workloads": 74
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1880
knowledge/catalog/metrics.json
Normal file
1880
knowledge/catalog/metrics.json
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@ -17,6 +17,11 @@ flowchart LR
|
||||
host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_backend
|
||||
wl_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Deployment)"]
|
||||
svc_bstein_dev_home_bstein_dev_home_backend --> wl_bstein_dev_home_bstein_dev_home_backend
|
||||
host_budget_bstein_dev["budget.bstein.dev"]
|
||||
svc_finance_actual_budget["finance/actual-budget (Service)"]
|
||||
host_budget_bstein_dev --> svc_finance_actual_budget
|
||||
wl_finance_actual_budget["finance/actual-budget (Deployment)"]
|
||||
svc_finance_actual_budget --> wl_finance_actual_budget
|
||||
host_call_live_bstein_dev["call.live.bstein.dev"]
|
||||
svc_comms_element_call["comms/element-call (Service)"]
|
||||
host_call_live_bstein_dev --> svc_comms_element_call
|
||||
@ -37,6 +42,11 @@ flowchart LR
|
||||
host_cloud_bstein_dev --> svc_nextcloud_nextcloud
|
||||
wl_nextcloud_nextcloud["nextcloud/nextcloud (Deployment)"]
|
||||
svc_nextcloud_nextcloud --> wl_nextcloud_nextcloud
|
||||
host_health_bstein_dev["health.bstein.dev"]
|
||||
svc_health_wger["health/wger (Service)"]
|
||||
host_health_bstein_dev --> svc_health_wger
|
||||
wl_health_wger["health/wger (Deployment)"]
|
||||
svc_health_wger --> wl_health_wger
|
||||
host_kit_live_bstein_dev["kit.live.bstein.dev"]
|
||||
svc_comms_livekit_token_service["comms/livekit-token-service (Service)"]
|
||||
host_kit_live_bstein_dev --> svc_comms_livekit_token_service
|
||||
@ -47,15 +57,22 @@ flowchart LR
|
||||
wl_comms_livekit["comms/livekit (Deployment)"]
|
||||
svc_comms_livekit --> wl_comms_livekit
|
||||
host_live_bstein_dev["live.bstein.dev"]
|
||||
svc_comms_othrys_element_element_web["comms/othrys-element-element-web (Service)"]
|
||||
host_live_bstein_dev --> svc_comms_othrys_element_element_web
|
||||
wl_comms_othrys_element_element_web["comms/othrys-element-element-web (Deployment)"]
|
||||
svc_comms_othrys_element_element_web --> wl_comms_othrys_element_element_web
|
||||
host_live_bstein_dev --> svc_comms_matrix_wellknown
|
||||
svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"]
|
||||
host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
||||
wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"]
|
||||
svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse
|
||||
svc_comms_matrix_guest_register["comms/matrix-guest-register (Service)"]
|
||||
host_live_bstein_dev --> svc_comms_matrix_guest_register
|
||||
wl_comms_matrix_guest_register["comms/matrix-guest-register (Deployment)"]
|
||||
svc_comms_matrix_guest_register --> wl_comms_matrix_guest_register
|
||||
svc_comms_matrix_authentication_service["comms/matrix-authentication-service (Service)"]
|
||||
host_live_bstein_dev --> svc_comms_matrix_authentication_service
|
||||
wl_comms_matrix_authentication_service["comms/matrix-authentication-service (Deployment)"]
|
||||
svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service
|
||||
host_logs_bstein_dev["logs.bstein.dev"]
|
||||
svc_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Service)"]
|
||||
host_logs_bstein_dev --> svc_logging_oauth2_proxy_logs
|
||||
wl_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Deployment)"]
|
||||
svc_logging_oauth2_proxy_logs --> wl_logging_oauth2_proxy_logs
|
||||
host_longhorn_bstein_dev["longhorn.bstein.dev"]
|
||||
svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"]
|
||||
host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn
|
||||
@ -65,21 +82,25 @@ flowchart LR
|
||||
svc_mailu_mailserver_mailu_front["mailu-mailserver/mailu-front (Service)"]
|
||||
host_mail_bstein_dev --> svc_mailu_mailserver_mailu_front
|
||||
host_matrix_live_bstein_dev["matrix.live.bstein.dev"]
|
||||
svc_comms_matrix_authentication_service["comms/matrix-authentication-service (Service)"]
|
||||
host_matrix_live_bstein_dev --> svc_comms_matrix_authentication_service
|
||||
wl_comms_matrix_authentication_service["comms/matrix-authentication-service (Deployment)"]
|
||||
svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service
|
||||
host_matrix_live_bstein_dev --> svc_comms_matrix_wellknown
|
||||
host_matrix_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
||||
svc_comms_matrix_guest_register["comms/matrix-guest-register (Service)"]
|
||||
host_matrix_live_bstein_dev --> svc_comms_matrix_guest_register
|
||||
wl_comms_matrix_guest_register["comms/matrix-guest-register (Deployment)"]
|
||||
svc_comms_matrix_guest_register --> wl_comms_matrix_guest_register
|
||||
host_monero_bstein_dev["monero.bstein.dev"]
|
||||
svc_crypto_monerod["crypto/monerod (Service)"]
|
||||
host_monero_bstein_dev --> svc_crypto_monerod
|
||||
wl_crypto_monerod["crypto/monerod (Deployment)"]
|
||||
svc_crypto_monerod --> wl_crypto_monerod
|
||||
host_money_bstein_dev["money.bstein.dev"]
|
||||
svc_finance_firefly["finance/firefly (Service)"]
|
||||
host_money_bstein_dev --> svc_finance_firefly
|
||||
wl_finance_firefly["finance/firefly (Deployment)"]
|
||||
svc_finance_firefly --> wl_finance_firefly
|
||||
host_notes_bstein_dev["notes.bstein.dev"]
|
||||
svc_outline_outline["outline/outline (Service)"]
|
||||
host_notes_bstein_dev --> svc_outline_outline
|
||||
wl_outline_outline["outline/outline (Deployment)"]
|
||||
svc_outline_outline --> wl_outline_outline
|
||||
host_office_bstein_dev["office.bstein.dev"]
|
||||
svc_nextcloud_collabora["nextcloud/collabora (Service)"]
|
||||
host_office_bstein_dev --> svc_nextcloud_collabora
|
||||
@ -110,6 +131,11 @@ flowchart LR
|
||||
host_stream_bstein_dev --> svc_jellyfin_jellyfin
|
||||
wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"]
|
||||
svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin
|
||||
host_tasks_bstein_dev["tasks.bstein.dev"]
|
||||
svc_planka_planka["planka/planka (Service)"]
|
||||
host_tasks_bstein_dev --> svc_planka_planka
|
||||
wl_planka_planka["planka/planka (Deployment)"]
|
||||
svc_planka_planka --> wl_planka_planka
|
||||
host_vault_bstein_dev["vault.bstein.dev"]
|
||||
svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"]
|
||||
host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service
|
||||
@ -133,23 +159,30 @@ flowchart LR
|
||||
wl_comms_livekit_token_service
|
||||
svc_comms_livekit
|
||||
wl_comms_livekit
|
||||
svc_comms_othrys_element_element_web
|
||||
wl_comms_othrys_element_element_web
|
||||
svc_comms_othrys_synapse_matrix_synapse
|
||||
wl_comms_othrys_synapse_matrix_synapse
|
||||
svc_comms_matrix_authentication_service
|
||||
wl_comms_matrix_authentication_service
|
||||
svc_comms_matrix_guest_register
|
||||
wl_comms_matrix_guest_register
|
||||
svc_comms_matrix_authentication_service
|
||||
wl_comms_matrix_authentication_service
|
||||
end
|
||||
subgraph crypto[crypto]
|
||||
svc_crypto_monerod
|
||||
wl_crypto_monerod
|
||||
end
|
||||
subgraph finance[finance]
|
||||
svc_finance_actual_budget
|
||||
wl_finance_actual_budget
|
||||
svc_finance_firefly
|
||||
wl_finance_firefly
|
||||
end
|
||||
subgraph gitea[gitea]
|
||||
svc_gitea_gitea
|
||||
wl_gitea_gitea
|
||||
end
|
||||
subgraph health[health]
|
||||
svc_health_wger
|
||||
wl_health_wger
|
||||
end
|
||||
subgraph jellyfin[jellyfin]
|
||||
svc_jellyfin_pegasus
|
||||
wl_jellyfin_pegasus
|
||||
@ -160,6 +193,10 @@ flowchart LR
|
||||
svc_jenkins_jenkins
|
||||
wl_jenkins_jenkins
|
||||
end
|
||||
subgraph logging[logging]
|
||||
svc_logging_oauth2_proxy_logs
|
||||
wl_logging_oauth2_proxy_logs
|
||||
end
|
||||
subgraph longhorn_system[longhorn-system]
|
||||
svc_longhorn_system_oauth2_proxy_longhorn
|
||||
wl_longhorn_system_oauth2_proxy_longhorn
|
||||
@ -173,6 +210,14 @@ flowchart LR
|
||||
svc_nextcloud_collabora
|
||||
wl_nextcloud_collabora
|
||||
end
|
||||
subgraph outline[outline]
|
||||
svc_outline_outline
|
||||
wl_outline_outline
|
||||
end
|
||||
subgraph planka[planka]
|
||||
svc_planka_planka
|
||||
wl_planka_planka
|
||||
end
|
||||
subgraph sso[sso]
|
||||
svc_sso_oauth2_proxy
|
||||
wl_sso_oauth2_proxy
|
||||
|
||||
@ -70,6 +70,7 @@ WORKER_NODES = [
|
||||
"titan-13",
|
||||
"titan-14",
|
||||
"titan-15",
|
||||
"titan-16",
|
||||
"titan-17",
|
||||
"titan-18",
|
||||
"titan-19",
|
||||
@ -85,19 +86,17 @@ WORKER_TOTAL = len(WORKER_NODES)
|
||||
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
||||
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
||||
# Namespaces considered infrastructure (excluded from workload counts)
|
||||
INFRA_NAMESPACES = [
|
||||
"kube-system",
|
||||
"longhorn-system",
|
||||
"metallb-system",
|
||||
INFRA_PATTERNS = [
|
||||
"kube-.*",
|
||||
".*-system",
|
||||
"traefik",
|
||||
"monitoring",
|
||||
"logging",
|
||||
"cert-manager",
|
||||
"flux-system",
|
||||
"traefik",
|
||||
"maintenance",
|
||||
"postgres",
|
||||
]
|
||||
INFRA_REGEX = f"^({'|'.join(INFRA_NAMESPACES)})$"
|
||||
INFRA_REGEX = f"^({'|'.join(INFRA_PATTERNS)})$"
|
||||
# Namespaces allowed on control plane without counting as workloads
|
||||
CP_ALLOWED_NS = INFRA_REGEX
|
||||
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
||||
@ -209,7 +208,66 @@ def namespace_ram_raw(scope_var):
|
||||
|
||||
|
||||
def namespace_gpu_usage_instant(scope_var):
|
||||
return f"sum(DCGM_FI_DEV_GPU_UTIL{{{namespace_gpu_selector(scope_var)}}}) by (namespace)"
|
||||
return gpu_usage_by_namespace(scope_var)
|
||||
|
||||
|
||||
def jetson_gpu_util_by_node():
|
||||
return 'max by (node) (jetson_gr3d_freq_percent{node!=""})'
|
||||
|
||||
|
||||
def dcgm_gpu_util_by_node():
|
||||
dcgm_pod = 'label_replace(DCGM_FI_DEV_GPU_UTIL, "pod", "$1", "Hostname", "(.*)")'
|
||||
dcgm_ns = 'label_replace(' + dcgm_pod + ', "namespace", "monitoring", "", "")'
|
||||
return (
|
||||
"avg by (node) ("
|
||||
f"{dcgm_ns} * on(namespace,pod) group_left(node) "
|
||||
'kube_pod_info{namespace="monitoring"}'
|
||||
")"
|
||||
)
|
||||
|
||||
|
||||
def gpu_util_by_node():
|
||||
return f"{dcgm_gpu_util_by_node()} or {jetson_gpu_util_by_node()}"
|
||||
|
||||
|
||||
def gpu_util_by_hostname():
|
||||
return 'label_replace(' + gpu_util_by_node() + ', "Hostname", "$1", "node", "(.*)")'
|
||||
|
||||
|
||||
def gpu_node_labels():
|
||||
return 'kube_node_labels{label_accelerator=~".+"} or kube_node_labels{label_jetson="true"}'
|
||||
|
||||
|
||||
def gpu_requests_by_namespace_node(scope_var):
|
||||
return (
|
||||
"sum by (namespace,node) ("
|
||||
f'kube_pod_container_resource_requests{{resource=~"nvidia.com/gpu.*",{scope_var}}} '
|
||||
"* on(namespace,pod) group_left(node) kube_pod_info "
|
||||
f"* on(node) group_left() ({gpu_node_labels()})"
|
||||
")"
|
||||
)
|
||||
|
||||
|
||||
def gpu_usage_by_namespace(scope_var):
|
||||
requests_by_ns = gpu_requests_by_namespace_node(scope_var)
|
||||
total_by_node = f"sum by (node) ({requests_by_ns})"
|
||||
return (
|
||||
"sum by (namespace) ("
|
||||
f"({requests_by_ns}) / clamp_min({total_by_node}, 1) "
|
||||
f"* on(node) group_left() ({gpu_util_by_node()})"
|
||||
")"
|
||||
)
|
||||
|
||||
|
||||
def jetson_gpu_usage_by_namespace(scope_var):
|
||||
requests_by_ns = jetson_gpu_requests(scope_var)
|
||||
total_by_node = f"sum by (node) ({requests_by_ns})"
|
||||
return (
|
||||
"sum by (namespace) ("
|
||||
f"({requests_by_ns}) / clamp_min({total_by_node}, 1) "
|
||||
f"* on(node) group_left() {jetson_gpu_util_by_node()}"
|
||||
")"
|
||||
)
|
||||
|
||||
|
||||
def namespace_share_expr(resource_expr):
|
||||
@ -229,7 +287,7 @@ def namespace_gpu_share_expr(scope_var):
|
||||
usage = namespace_gpu_usage_instant(scope_var)
|
||||
total = f"(sum({usage}) or on() vector(0))"
|
||||
share = f"100 * ({usage}) / clamp_min({total}, 1)"
|
||||
idle = 'label_replace(vector(100), "namespace", "idle", "", "") and on() (' + total + " == 0)"
|
||||
idle = 'label_replace(vector(100), "namespace", "idle", "", "") * scalar(' + total + " == bool 0)"
|
||||
return f"({share}) or ({idle})"
|
||||
|
||||
|
||||
@ -319,6 +377,76 @@ NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"'
|
||||
NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
|
||||
NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"'
|
||||
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]
|
||||
GLUE_LABEL = 'label_atlas_bstein_dev_glue="true"'
|
||||
GLUE_JOBS = f"kube_cronjob_labels{{{GLUE_LABEL}}}"
|
||||
GLUE_FILTER = f"and on(namespace,cronjob) {GLUE_JOBS}"
|
||||
GLUE_LAST_SUCCESS = f"(kube_cronjob_status_last_successful_time {GLUE_FILTER})"
|
||||
GLUE_LAST_SCHEDULE = f"(kube_cronjob_status_last_schedule_time {GLUE_FILTER})"
|
||||
GLUE_SUSPENDED = f"(kube_cronjob_spec_suspend {GLUE_FILTER}) == 1"
|
||||
GLUE_ACTIVE = f"(kube_cronjob_status_active {GLUE_FILTER})"
|
||||
GLUE_LAST_SUCCESS_AGE = f"(time() - {GLUE_LAST_SUCCESS})"
|
||||
GLUE_LAST_SCHEDULE_AGE = f"(time() - {GLUE_LAST_SCHEDULE})"
|
||||
GLUE_LAST_SUCCESS_AGE_HOURS = f"({GLUE_LAST_SUCCESS_AGE}) / 3600"
|
||||
GLUE_LAST_SCHEDULE_AGE_HOURS = f"({GLUE_LAST_SCHEDULE_AGE}) / 3600"
|
||||
GLUE_STALE_WINDOW_SEC = 36 * 3600
|
||||
GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})"
|
||||
GLUE_MISSING = f"({GLUE_JOBS} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time)"
|
||||
GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
|
||||
GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
|
||||
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE})) or on() vector(0)"
|
||||
GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE}) or on() vector(0)"
|
||||
GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED}) or on() vector(0)"
|
||||
ARIADNE_TASK_ERRORS_RANGE = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[$__range]))'
|
||||
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
|
||||
ARIADNE_TASK_ERRORS_1H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[1h]))'
|
||||
ARIADNE_TASK_ERRORS_30D = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[30d]))'
|
||||
ARIADNE_TASK_SUCCESS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="ok"}[24h]))'
|
||||
ARIADNE_TASK_RUNS_BY_STATUS_1H = 'sum by (status) (increase(ariadne_task_runs_total[1h]))'
|
||||
ARIADNE_TASK_ERRORS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[1h]))'
|
||||
ARIADNE_TASK_ERRORS_24H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[24h]))'
|
||||
ARIADNE_TASK_RUNS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total[1h]))'
|
||||
ARIADNE_TASK_ATTEMPTS_SERIES = 'sum(increase(ariadne_task_runs_total[$__interval]))'
|
||||
ARIADNE_TASK_FAILURES_SERIES = 'sum(increase(ariadne_task_runs_total{status="error"}[$__interval]))'
|
||||
ARIADNE_TASK_WARNINGS_SERIES = (
|
||||
'sum(increase(ariadne_task_runs_total{status!~"ok|error"}[$__interval])) or on() vector(0)'
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600"
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
|
||||
"(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600"
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
|
||||
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
|
||||
)
|
||||
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
||||
ARIADNE_CI_COVERAGE = 'ariadne_ci_coverage_percent{repo="ariadne"}'
|
||||
ARIADNE_CI_TESTS = 'ariadne_ci_tests_total{repo="ariadne"}'
|
||||
ARIADNE_TEST_SUCCESS_RATE = (
|
||||
"100 * "
|
||||
'sum(max_over_time(ariadne_ci_tests_total{repo="ariadne",result="passed"}[30d])) '
|
||||
"/ clamp_min("
|
||||
'sum(max_over_time(ariadne_ci_tests_total{repo="ariadne",result=~"passed|failed|error"}[30d])), 1)'
|
||||
)
|
||||
ARIADNE_TEST_FAILURES_24H = (
|
||||
'sum by (result) (max_over_time(ariadne_ci_tests_total{repo="ariadne",result=~"failed|error"}[24h]))'
|
||||
)
|
||||
POSTGRES_CONN_USED = (
|
||||
'label_replace(sum(pg_stat_activity_count), "conn", "used", "__name__", ".*") '
|
||||
'or label_replace(max(pg_settings_max_connections), "conn", "max", "__name__", ".*")'
|
||||
)
|
||||
POSTGRES_CONN_HOTTEST = 'topk(1, sum by (datname) (pg_stat_activity_count))'
|
||||
ONEOFF_JOB_OWNER = (
|
||||
'label_replace(kube_job_owner{owner_kind="CronJob"}, "owner_name", "$1", "job_name", "(.*)")'
|
||||
)
|
||||
ONEOFF_JOB_PODS = f'(kube_pod_owner{{owner_kind="Job"}} unless on(namespace, owner_name) {ONEOFF_JOB_OWNER})'
|
||||
ONEOFF_JOB_POD_AGE_HOURS = (
|
||||
'((time() - kube_pod_start_time{pod!=""}) / 3600) '
|
||||
f'* on(namespace,pod) group_left(owner_name) {ONEOFF_JOB_PODS} '
|
||||
'* on(namespace,pod) group_left(phase) '
|
||||
'max by (namespace,pod,phase) (kube_pod_status_phase{phase=~"Running|Succeeded"})'
|
||||
)
|
||||
GLUE_LAST_SUCCESS_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SUCCESS}[$__range])) / 3600"
|
||||
GLUE_LAST_SCHEDULE_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SCHEDULE}[$__range])) / 3600"
|
||||
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
|
||||
GPU_NODE_REGEX = "|".join(GPU_NODES)
|
||||
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
|
||||
@ -496,6 +624,7 @@ def timeseries_panel(
|
||||
grid,
|
||||
*,
|
||||
unit="none",
|
||||
max_value=None,
|
||||
legend=None,
|
||||
legend_display="table",
|
||||
legend_placement="bottom",
|
||||
@ -520,6 +649,8 @@ def timeseries_panel(
|
||||
"tooltip": {"mode": "multi"},
|
||||
},
|
||||
}
|
||||
if max_value is not None:
|
||||
panel["fieldConfig"]["defaults"]["max"] = max_value
|
||||
if legend:
|
||||
panel["targets"][0]["legendFormat"] = legend
|
||||
if legend_calcs:
|
||||
@ -671,13 +802,22 @@ def bargauge_panel(
|
||||
grid,
|
||||
*,
|
||||
unit="none",
|
||||
legend=None,
|
||||
links=None,
|
||||
limit=None,
|
||||
sort_order="desc",
|
||||
thresholds=None,
|
||||
decimals=None,
|
||||
instant=False,
|
||||
overrides=None,
|
||||
):
|
||||
"""Return a bar gauge panel with label-aware reduction."""
|
||||
cleaned_expr = expr.strip()
|
||||
if not cleaned_expr.startswith(("sort(", "sort_desc(")):
|
||||
if sort_order == "desc":
|
||||
expr = f"sort_desc({expr})"
|
||||
elif sort_order == "asc":
|
||||
expr = f"sort({expr})"
|
||||
panel = {
|
||||
"id": panel_id,
|
||||
"type": "bargauge",
|
||||
@ -685,7 +825,12 @@ def bargauge_panel(
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": grid,
|
||||
"targets": [
|
||||
{"expr": expr, "refId": "A", "legendFormat": "{{node}}", **({"instant": True} if instant else {})}
|
||||
{
|
||||
"expr": expr,
|
||||
"refId": "A",
|
||||
"legendFormat": legend or "{{node}}",
|
||||
**({"instant": True} if instant else {}),
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@ -715,6 +860,8 @@ def bargauge_panel(
|
||||
},
|
||||
},
|
||||
}
|
||||
if overrides:
|
||||
panel["fieldConfig"]["overrides"].extend(overrides)
|
||||
if decimals is not None:
|
||||
panel["fieldConfig"]["defaults"]["decimals"] = decimals
|
||||
if links:
|
||||
@ -723,7 +870,7 @@ def bargauge_panel(
|
||||
panel["transformations"] = [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {"fields": ["Value"], "order": "desc"},
|
||||
"options": {"fields": ["Value"], "order": sort_order},
|
||||
}
|
||||
]
|
||||
if limit:
|
||||
@ -763,6 +910,15 @@ def build_overview():
|
||||
{"color": "red", "value": 3},
|
||||
],
|
||||
}
|
||||
age_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 6},
|
||||
{"color": "orange", "value": 24},
|
||||
{"color": "red", "value": 48},
|
||||
],
|
||||
}
|
||||
|
||||
row1_stats = [
|
||||
{
|
||||
@ -965,7 +1121,7 @@ def build_overview():
|
||||
30,
|
||||
"Mail Sent (1d)",
|
||||
'max(postmark_outbound_sent{window="1d"})',
|
||||
{"h": 2, "w": 6, "x": 0, "y": 8},
|
||||
{"h": 3, "w": 4, "x": 0, "y": 8},
|
||||
unit="none",
|
||||
links=link_to("atlas-mail"),
|
||||
)
|
||||
@ -976,7 +1132,7 @@ def build_overview():
|
||||
"type": "stat",
|
||||
"title": "Mail Bounces (1d)",
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": {"h": 2, "w": 6, "x": 12, "y": 8},
|
||||
"gridPos": {"h": 3, "w": 4, "x": 8, "y": 8},
|
||||
"targets": [
|
||||
{
|
||||
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
|
||||
@ -1022,7 +1178,7 @@ def build_overview():
|
||||
32,
|
||||
"Mail Success Rate (1d)",
|
||||
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
|
||||
{"h": 2, "w": 6, "x": 6, "y": 8},
|
||||
{"h": 3, "w": 4, "x": 4, "y": 8},
|
||||
unit="percent",
|
||||
thresholds=mail_success_thresholds,
|
||||
decimals=1,
|
||||
@ -1034,13 +1190,38 @@ def build_overview():
|
||||
33,
|
||||
"Mail Limit Used (30d)",
|
||||
"max(postmark_sending_limit_used_percent)",
|
||||
{"h": 2, "w": 6, "x": 18, "y": 8},
|
||||
{"h": 3, "w": 4, "x": 12, "y": 8},
|
||||
unit="percent",
|
||||
thresholds=mail_limit_thresholds,
|
||||
decimals=1,
|
||||
links=link_to("atlas-mail"),
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
34,
|
||||
"Postgres Connections Used",
|
||||
POSTGRES_CONN_USED,
|
||||
{"h": 3, "w": 4, "x": 16, "y": 8},
|
||||
decimals=0,
|
||||
text_mode="name_and_value",
|
||||
legend="{{conn}}",
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
35,
|
||||
"Postgres Hottest Connections",
|
||||
POSTGRES_CONN_HOTTEST,
|
||||
{"h": 3, "w": 4, "x": 20, "y": 8},
|
||||
unit="none",
|
||||
decimals=0,
|
||||
text_mode="name_and_value",
|
||||
legend="{{datname}}",
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
|
||||
storage_panels = [
|
||||
(23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"),
|
||||
@ -1054,13 +1235,104 @@ def build_overview():
|
||||
panel_id,
|
||||
title,
|
||||
expr,
|
||||
{"h": 6, "w": 6, "x": 6 * idx, "y": 10},
|
||||
{"h": 3, "w": 6, "x": 6 * idx, "y": 11},
|
||||
unit=unit,
|
||||
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
|
||||
links=link_to("atlas-storage"),
|
||||
)
|
||||
)
|
||||
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
40,
|
||||
"One-off Job Pods (age hours)",
|
||||
ONEOFF_JOB_POD_AGE_HOURS,
|
||||
{"h": 6, "w": 6, "x": 0, "y": 14},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{pod}}",
|
||||
thresholds=age_thresholds,
|
||||
limit=8,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
{
|
||||
"id": 41,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne Attempts / Failures",
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": {"h": 6, "w": 6, "x": 6, "y": 14},
|
||||
"targets": [
|
||||
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
|
||||
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"},
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {"unit": "none"},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "Attempts"},
|
||||
"properties": [
|
||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}
|
||||
],
|
||||
},
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "Failures"},
|
||||
"properties": [
|
||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
"options": {
|
||||
"legend": {"displayMode": "table", "placement": "right"},
|
||||
"tooltip": {"mode": "multi"},
|
||||
},
|
||||
}
|
||||
)
|
||||
panels.append(
|
||||
timeseries_panel(
|
||||
42,
|
||||
"Ariadne Test Success Rate",
|
||||
ARIADNE_TEST_SUCCESS_RATE,
|
||||
{"h": 6, "w": 6, "x": 12, "y": 14},
|
||||
unit="percent",
|
||||
max_value=100,
|
||||
legend=None,
|
||||
legend_display="list",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
43,
|
||||
"Tests with Failures (24h)",
|
||||
ARIADNE_TEST_FAILURES_24H,
|
||||
{"h": 6, "w": 6, "x": 18, "y": 14},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{result}}",
|
||||
overrides=[
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "error"},
|
||||
"properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "yellow"}}],
|
||||
},
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "failed"},
|
||||
"properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}],
|
||||
},
|
||||
],
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 5},
|
||||
{"color": "red", "value": 10},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
cpu_scope = "$namespace_scope_cpu"
|
||||
gpu_scope = "$namespace_scope_gpu"
|
||||
ram_scope = "$namespace_scope_ram"
|
||||
@ -1070,9 +1342,9 @@ def build_overview():
|
||||
11,
|
||||
"Namespace CPU Share",
|
||||
namespace_cpu_share_expr(cpu_scope),
|
||||
{"h": 9, "w": 8, "x": 0, "y": 16},
|
||||
{"h": 9, "w": 8, "x": 0, "y": 20},
|
||||
links=namespace_scope_links("namespace_scope_cpu"),
|
||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -1080,9 +1352,9 @@ def build_overview():
|
||||
12,
|
||||
"Namespace GPU Share",
|
||||
namespace_gpu_share_expr(gpu_scope),
|
||||
{"h": 9, "w": 8, "x": 8, "y": 16},
|
||||
{"h": 9, "w": 8, "x": 8, "y": 20},
|
||||
links=namespace_scope_links("namespace_scope_gpu"),
|
||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -1090,9 +1362,9 @@ def build_overview():
|
||||
13,
|
||||
"Namespace RAM Share",
|
||||
namespace_ram_share_expr(ram_scope),
|
||||
{"h": 9, "w": 8, "x": 16, "y": 16},
|
||||
{"h": 9, "w": 8, "x": 16, "y": 20},
|
||||
links=namespace_scope_links("namespace_scope_ram"),
|
||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
)
|
||||
|
||||
@ -1102,7 +1374,7 @@ def build_overview():
|
||||
14,
|
||||
"Worker Node CPU",
|
||||
node_cpu_expr(worker_filter),
|
||||
{"h": 12, "w": 12, "x": 0, "y": 32},
|
||||
{"h": 12, "w": 12, "x": 0, "y": 36},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_calcs=["last"],
|
||||
@ -1116,7 +1388,7 @@ def build_overview():
|
||||
15,
|
||||
"Worker Node RAM",
|
||||
node_mem_expr(worker_filter),
|
||||
{"h": 12, "w": 12, "x": 12, "y": 32},
|
||||
{"h": 12, "w": 12, "x": 12, "y": 36},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_calcs=["last"],
|
||||
@ -1131,7 +1403,7 @@ def build_overview():
|
||||
16,
|
||||
"Control plane CPU",
|
||||
node_cpu_expr(CONTROL_ALL_REGEX),
|
||||
{"h": 10, "w": 12, "x": 0, "y": 44},
|
||||
{"h": 10, "w": 12, "x": 0, "y": 48},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_display="table",
|
||||
@ -1143,7 +1415,7 @@ def build_overview():
|
||||
17,
|
||||
"Control plane RAM",
|
||||
node_mem_expr(CONTROL_ALL_REGEX),
|
||||
{"h": 10, "w": 12, "x": 12, "y": 44},
|
||||
{"h": 10, "w": 12, "x": 12, "y": 48},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_display="table",
|
||||
@ -1156,7 +1428,7 @@ def build_overview():
|
||||
28,
|
||||
"Node Pod Share",
|
||||
'(sum(kube_pod_info{pod!="" , node!=""}) by (node) / clamp_min(sum(kube_pod_info{pod!="" , node!=""}), 1)) * 100',
|
||||
{"h": 10, "w": 12, "x": 0, "y": 54},
|
||||
{"h": 10, "w": 12, "x": 0, "y": 58},
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -1164,7 +1436,7 @@ def build_overview():
|
||||
29,
|
||||
"Top Nodes by Pod Count",
|
||||
'topk(12, sum(kube_pod_info{pod!="" , node!=""}) by (node))',
|
||||
{"h": 10, "w": 12, "x": 12, "y": 54},
|
||||
{"h": 10, "w": 12, "x": 12, "y": 58},
|
||||
unit="none",
|
||||
limit=12,
|
||||
decimals=0,
|
||||
@ -1186,7 +1458,7 @@ def build_overview():
|
||||
18,
|
||||
"Cluster Ingress Throughput",
|
||||
NET_INGRESS_EXPR,
|
||||
{"h": 7, "w": 8, "x": 0, "y": 25},
|
||||
{"h": 7, "w": 8, "x": 0, "y": 29},
|
||||
unit="Bps",
|
||||
legend="Ingress (Traefik)",
|
||||
legend_display="list",
|
||||
@ -1199,7 +1471,7 @@ def build_overview():
|
||||
19,
|
||||
"Cluster Egress Throughput",
|
||||
NET_EGRESS_EXPR,
|
||||
{"h": 7, "w": 8, "x": 8, "y": 25},
|
||||
{"h": 7, "w": 8, "x": 8, "y": 29},
|
||||
unit="Bps",
|
||||
legend="Egress (Traefik)",
|
||||
legend_display="list",
|
||||
@ -1212,7 +1484,7 @@ def build_overview():
|
||||
20,
|
||||
"Intra-Cluster Throughput",
|
||||
NET_INTERNAL_EXPR,
|
||||
{"h": 7, "w": 8, "x": 16, "y": 25},
|
||||
{"h": 7, "w": 8, "x": 16, "y": 29},
|
||||
unit="Bps",
|
||||
legend="Internal traffic",
|
||||
legend_display="list",
|
||||
@ -1226,7 +1498,7 @@ def build_overview():
|
||||
21,
|
||||
"Root Filesystem Usage",
|
||||
root_usage_expr(),
|
||||
{"h": 16, "w": 12, "x": 0, "y": 64},
|
||||
{"h": 16, "w": 12, "x": 0, "y": 68},
|
||||
unit="percent",
|
||||
legend="{{node}}",
|
||||
legend_calcs=["last"],
|
||||
@ -1241,7 +1513,7 @@ def build_overview():
|
||||
22,
|
||||
"Nodes Closest to Full Root Disks",
|
||||
f"topk(12, {root_usage_expr()})",
|
||||
{"h": 16, "w": 12, "x": 12, "y": 64},
|
||||
{"h": 16, "w": 12, "x": 12, "y": 68},
|
||||
unit="percent",
|
||||
thresholds=PERCENT_THRESHOLDS,
|
||||
links=link_to("atlas-storage"),
|
||||
@ -1727,7 +1999,7 @@ def build_storage_dashboard():
|
||||
stat_panel(
|
||||
31,
|
||||
"Maintenance Cron Freshness (s)",
|
||||
'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"})',
|
||||
'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob="image-sweeper"})',
|
||||
{"h": 4, "w": 12, "x": 12, "y": 44},
|
||||
unit="s",
|
||||
thresholds={
|
||||
@ -2136,6 +2408,285 @@ def build_mail_dashboard():
|
||||
}
|
||||
|
||||
|
||||
def build_jobs_dashboard():
|
||||
panels = []
|
||||
age_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 6},
|
||||
{"color": "orange", "value": 24},
|
||||
{"color": "red", "value": 48},
|
||||
],
|
||||
}
|
||||
recent_error_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": None},
|
||||
{"color": "orange", "value": 1},
|
||||
{"color": "yellow", "value": 6},
|
||||
{"color": "green", "value": 24},
|
||||
],
|
||||
}
|
||||
|
||||
task_error_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 3},
|
||||
{"color": "red", "value": 5},
|
||||
],
|
||||
}
|
||||
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
1,
|
||||
"Ariadne Task Errors (range)",
|
||||
ARIADNE_TASK_ERRORS_RANGE,
|
||||
{"h": 7, "w": 8, "x": 0, "y": 0},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=task_error_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
{
|
||||
"id": 2,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne Attempts / Failures",
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": {"h": 7, "w": 8, "x": 8, "y": 0},
|
||||
"targets": [
|
||||
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
|
||||
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"},
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {"unit": "none"},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "Attempts"},
|
||||
"properties": [
|
||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}
|
||||
],
|
||||
},
|
||||
{
|
||||
"matcher": {"id": "byName", "options": "Failures"},
|
||||
"properties": [
|
||||
{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
"options": {
|
||||
"legend": {"displayMode": "table", "placement": "right"},
|
||||
"tooltip": {"mode": "multi"},
|
||||
},
|
||||
}
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
3,
|
||||
"One-off Job Pods (age hours)",
|
||||
ONEOFF_JOB_POD_AGE_HOURS,
|
||||
{"h": 7, "w": 8, "x": 16, "y": 0},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{pod}}",
|
||||
thresholds=age_thresholds,
|
||||
limit=12,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
4,
|
||||
"Glue Jobs Stale (>36h)",
|
||||
GLUE_STALE_COUNT,
|
||||
{"h": 4, "w": 4, "x": 0, "y": 7},
|
||||
unit="none",
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 2},
|
||||
{"color": "red", "value": 3},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
5,
|
||||
"Glue Jobs Missing Success",
|
||||
GLUE_MISSING_COUNT,
|
||||
{"h": 4, "w": 4, "x": 4, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
6,
|
||||
"Glue Jobs Suspended",
|
||||
GLUE_SUSPENDED_COUNT,
|
||||
{"h": 4, "w": 4, "x": 8, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
7,
|
||||
"Ariadne Task Errors (1h)",
|
||||
ARIADNE_TASK_ERRORS_1H_TOTAL,
|
||||
{"h": 4, "w": 4, "x": 12, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
8,
|
||||
"Ariadne Task Errors (24h)",
|
||||
ARIADNE_TASK_ERRORS_24H_TOTAL,
|
||||
{"h": 4, "w": 4, "x": 16, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
9,
|
||||
"Ariadne Task Runs (1h)",
|
||||
ARIADNE_TASK_RUNS_1H_TOTAL,
|
||||
{"h": 4, "w": 4, "x": 20, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
10,
|
||||
"Ariadne Schedule Last Error (hours ago)",
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 17},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=recent_error_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
11,
|
||||
"Ariadne Schedule Last Success (hours ago)",
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 17},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
12,
|
||||
"Glue Jobs Last Success (hours ago)",
|
||||
GLUE_LAST_SUCCESS_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 23},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
13,
|
||||
"Glue Jobs Last Schedule (hours ago)",
|
||||
GLUE_LAST_SCHEDULE_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 23},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
14,
|
||||
"Ariadne Task Errors (1h)",
|
||||
ARIADNE_TASK_ERRORS_1H,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 29},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=task_error_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
15,
|
||||
"Ariadne Task Errors (30d)",
|
||||
ARIADNE_TASK_ERRORS_30D,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 29},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=task_error_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
16,
|
||||
"Ariadne Access Requests",
|
||||
ARIADNE_ACCESS_REQUESTS,
|
||||
{"h": 6, "w": 8, "x": 0, "y": 11},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{status}}",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
17,
|
||||
"Ariadne CI Coverage (%)",
|
||||
ARIADNE_CI_COVERAGE,
|
||||
{"h": 6, "w": 4, "x": 8, "y": 11},
|
||||
unit="percent",
|
||||
decimals=1,
|
||||
instant=True,
|
||||
legend="{{branch}}",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
18,
|
||||
"Ariadne CI Tests (latest)",
|
||||
ARIADNE_CI_TESTS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 11},
|
||||
unit="none",
|
||||
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"uid": "atlas-jobs",
|
||||
"title": "Atlas Jobs",
|
||||
"folderUid": PRIVATE_FOLDER,
|
||||
"editable": True,
|
||||
"panels": panels,
|
||||
"time": {"from": "now-7d", "to": "now"},
|
||||
"annotations": {"list": []},
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": ["atlas", "jobs", "glue"],
|
||||
}
|
||||
|
||||
|
||||
def build_gpu_dashboard():
|
||||
panels = []
|
||||
gpu_scope = "$namespace_scope_gpu"
|
||||
@ -2146,7 +2697,7 @@ def build_gpu_dashboard():
|
||||
namespace_gpu_share_expr(gpu_scope),
|
||||
{"h": 8, "w": 12, "x": 0, "y": 0},
|
||||
links=namespace_scope_links("namespace_scope_gpu"),
|
||||
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||
description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
@ -2165,7 +2716,7 @@ def build_gpu_dashboard():
|
||||
timeseries_panel(
|
||||
3,
|
||||
"GPU Util by Node",
|
||||
'sum by (Hostname) (DCGM_FI_DEV_GPU_UTIL{pod!=""})',
|
||||
gpu_util_by_hostname(),
|
||||
{"h": 8, "w": 12, "x": 0, "y": 8},
|
||||
unit="percent",
|
||||
legend="{{Hostname}}",
|
||||
@ -2229,6 +2780,10 @@ DASHBOARDS = {
|
||||
"builder": build_mail_dashboard,
|
||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml",
|
||||
},
|
||||
"atlas-jobs": {
|
||||
"builder": build_jobs_dashboard,
|
||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-jobs.yaml",
|
||||
},
|
||||
"atlas-gpu": {
|
||||
"builder": build_gpu_dashboard,
|
||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user