diff --git a/clusters/atlas/flux-system/platform/descheduler/kustomization.yaml b/clusters/atlas/flux-system/platform/descheduler/kustomization.yaml new file mode 100644 index 00000000..f1466000 --- /dev/null +++ b/clusters/atlas/flux-system/platform/descheduler/kustomization.yaml @@ -0,0 +1,21 @@ +# clusters/atlas/flux-system/platform/descheduler/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: descheduler + namespace: flux-system + annotations: + kustomize.toolkit.fluxcd.io/ssa: IfNotPresent +spec: + interval: 30m + path: ./infrastructure/descheduler + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + targetNamespace: kube-system + dependsOn: + - name: helm + - name: core + wait: true diff --git a/clusters/atlas/flux-system/platform/kustomization.yaml b/clusters/atlas/flux-system/platform/kustomization.yaml index 6e75b040..1d0d8715 100644 --- a/clusters/atlas/flux-system/platform/kustomization.yaml +++ b/clusters/atlas/flux-system/platform/kustomization.yaml @@ -4,6 +4,8 @@ kind: Kustomization resources: - core/kustomization.yaml - helm/kustomization.yaml + - descheduler/kustomization.yaml + - resource-guardrails/kustomization.yaml - cert-manager/kustomization.yaml - metallb/kustomization.yaml - traefik/kustomization.yaml diff --git a/clusters/atlas/flux-system/platform/resource-guardrails/kustomization.yaml b/clusters/atlas/flux-system/platform/resource-guardrails/kustomization.yaml new file mode 100644 index 00000000..90260edb --- /dev/null +++ b/clusters/atlas/flux-system/platform/resource-guardrails/kustomization.yaml @@ -0,0 +1,19 @@ +# clusters/atlas/flux-system/platform/resource-guardrails/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: resource-guardrails + namespace: flux-system + annotations: + kustomize.toolkit.fluxcd.io/ssa: IfNotPresent +spec: + interval: 10m + path: ./infrastructure/resource-guardrails + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + dependsOn: + - name: core + wait: true diff --git a/infrastructure/core/kustomization.yaml b/infrastructure/core/kustomization.yaml index d34e023c..dae83588 100644 --- a/infrastructure/core/kustomization.yaml +++ b/infrastructure/core/kustomization.yaml @@ -10,5 +10,6 @@ resources: - coredns-custom.yaml - coredns-deployment.yaml - ntp-sync-daemonset.yaml + - workload-profiles.yaml - ../sources/cert-manager/letsencrypt.yaml - ../sources/cert-manager/letsencrypt-prod.yaml diff --git a/infrastructure/core/workload-profiles.yaml b/infrastructure/core/workload-profiles.yaml new file mode 100644 index 00000000..16dc15bf --- /dev/null +++ b/infrastructure/core/workload-profiles.yaml @@ -0,0 +1,27 @@ +# infrastructure/core/workload-profiles.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-workload-profiles + namespace: kube-system +data: + profiles.yaml: | + profiles: + tiny: + request: { cpu: 25m, memory: 64Mi } + limit: { cpu: 200m, memory: 256Mi } + light: + request: { cpu: 50m, memory: 128Mi } + limit: { cpu: 500m, memory: 512Mi } + standard: + request: { cpu: 250m, memory: 512Mi } + limit: { cpu: "1", memory: 1Gi } + heavy: + request: { cpu: 500m, memory: 1Gi } + limit: { cpu: 1500m, memory: 3Gi } + ci: + request: { cpu: 512m, memory: 512Mi } + limit: { cpu: 1500m, memory: 2Gi } + scavenger: + request: { cpu: 10m, memory: 32Mi } + limit: { cpu: 250m, memory: 256Mi } diff --git a/infrastructure/descheduler/helmrelease.yaml b/infrastructure/descheduler/helmrelease.yaml new file mode 100644 index 00000000..a7890068 --- /dev/null +++ b/infrastructure/descheduler/helmrelease.yaml @@ -0,0 +1,100 @@ +# infrastructure/descheduler/helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: descheduler + namespace: kube-system +spec: + interval: 30m + install: + remediation: + retries: 3 + upgrade: + remediation: + retries: 3 + chart: + spec: + chart: descheduler + version: 0.33.0 + sourceRef: + kind: HelmRepository + name: descheduler + namespace: flux-system + values: + kind: CronJob + schedule: "*/20 * * * *" + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + resources: + requests: + cpu: 50m + memory: 96Mi + limits: + cpu: 200m + memory: 256Mi + deschedulerPolicyAPIVersion: descheduler/v1alpha2 + deschedulerPolicy: + maxNoOfPodsToEvictPerNode: 2 + maxNoOfPodsToEvictPerNamespace: 2 + profiles: + - name: atlas-rpi-balance + pluginConfig: + - name: DefaultEvictor + args: + nodeFit: true + minPodAge: 10m + podProtections: + extraEnabled: + - PodsWithPVC + - PodsWithLocalStorage + - SystemCriticalPods + - name: RemovePodsHavingTooManyRestarts + args: + podRestartThreshold: 12 + includingInitContainers: true + - name: RemovePodsViolatingNodeAffinity + args: + nodeAffinityType: + - requiredDuringSchedulingIgnoredDuringExecution + - name: RemovePodsViolatingTopologySpreadConstraint + - name: RemovePodsViolatingNodeTaints + - name: LowNodeUtilization + args: + thresholds: + cpu: 45 + memory: 45 + pods: 45 + targetThresholds: + cpu: 75 + memory: 75 + pods: 75 + plugins: + balance: + enabled: + - RemovePodsViolatingTopologySpreadConstraint + - LowNodeUtilization + deschedule: + enabled: + - RemovePodsHavingTooManyRestarts + - RemovePodsViolatingNodeTaints + - RemovePodsViolatingNodeAffinity + priorityClassName: system-cluster-critical + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule diff --git a/infrastructure/descheduler/kustomization.yaml b/infrastructure/descheduler/kustomization.yaml new file mode 100644 index 00000000..47358065 --- /dev/null +++ b/infrastructure/descheduler/kustomization.yaml @@ -0,0 +1,5 @@ +# infrastructure/descheduler/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helmrelease.yaml diff --git a/infrastructure/resource-guardrails/kustomization.yaml b/infrastructure/resource-guardrails/kustomization.yaml new file mode 100644 index 00000000..38b06d82 --- /dev/null +++ b/infrastructure/resource-guardrails/kustomization.yaml @@ -0,0 +1,5 @@ +# infrastructure/resource-guardrails/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - limitranges.yaml diff --git a/infrastructure/resource-guardrails/limitranges.yaml b/infrastructure/resource-guardrails/limitranges.yaml new file mode 100644 index 00000000..24762aae --- /dev/null +++ b/infrastructure/resource-guardrails/limitranges.yaml @@ -0,0 +1,182 @@ +# infrastructure/resource-guardrails/limitranges.yaml +apiVersion: v1 +kind: List +items: + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: ai + spec: &defaultCompute + limits: + - type: Container + defaultRequest: + cpu: 50m + memory: 96Mi + default: + cpu: 500m + memory: 512Mi + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: bstein-dev-home + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: cert-manager + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: climate + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: comms + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: crypto + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: finance + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: gitea + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: harbor + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: health + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: jellyfin + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: jenkins + spec: + limits: + - type: Container + defaultRequest: + cpu: 100m + memory: 256Mi + default: + cpu: 1500m + memory: 2Gi + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: logging + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: mailu-mailserver + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: maintenance + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: metallb-system + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: monitoring + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: nextcloud + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: outline + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: planka + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: postgres + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: quality + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: sso + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: sui-metrics + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: traefik + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: vault + spec: *defaultCompute + - apiVersion: v1 + kind: LimitRange + metadata: + name: atlas-default-compute + namespace: vaultwarden + spec: *defaultCompute diff --git a/infrastructure/sources/helm/descheduler.yaml b/infrastructure/sources/helm/descheduler.yaml new file mode 100644 index 00000000..57e71acf --- /dev/null +++ b/infrastructure/sources/helm/descheduler.yaml @@ -0,0 +1,9 @@ +# infrastructure/sources/helm/descheduler.yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: descheduler + namespace: flux-system +spec: + interval: 1h + url: https://kubernetes-sigs.github.io/descheduler/ diff --git a/infrastructure/sources/helm/kustomization.yaml b/infrastructure/sources/helm/kustomization.yaml index e45f58f8..b789dcc3 100644 --- a/infrastructure/sources/helm/kustomization.yaml +++ b/infrastructure/sources/helm/kustomization.yaml @@ -3,6 +3,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - ananace.yaml + - descheduler.yaml - fluent-bit.yaml - grafana.yaml - hashicorp.yaml diff --git a/services/crypto/monerod/deployment.yaml b/services/crypto/monerod/deployment.yaml index 1ea85a0d..74a72bf2 100644 --- a/services/crypto/monerod/deployment.yaml +++ b/services/crypto/monerod/deployment.yaml @@ -4,14 +4,18 @@ kind: Deployment metadata: name: monerod namespace: crypto - labels: { app: monerod } + labels: + app: monerod + atlas.bstein.dev/workload-profile: heavy spec: replicas: 1 strategy: { type: Recreate } selector: { matchLabels: { app: monerod } } template: metadata: - labels: { app: monerod } + labels: + app: monerod + atlas.bstein.dev/workload-profile: heavy spec: securityContext: fsGroup: 1000 @@ -41,6 +45,7 @@ spec: - key: kubernetes.io/hostname operator: NotIn values: ["titan-12","titan-13","titan-15","titan-17","titan-19"] + terminationGracePeriodSeconds: 120 containers: - name: monerod image: registry.bstein.dev/crypto/monerod:0.18.4.1 @@ -83,7 +88,13 @@ spec: periodSeconds: 20 timeoutSeconds: 20 failureThreshold: 36 - terminationGracePeriodSeconds: 120 + resources: + requests: + cpu: 250m + memory: 1Gi + limits: + cpu: 1500m + memory: 3Gi lifecycle: preStop: exec: diff --git a/services/crypto/xmr-miner/deployment.yaml b/services/crypto/xmr-miner/deployment.yaml index 820c2ce5..31db7ce9 100644 --- a/services/crypto/xmr-miner/deployment.yaml +++ b/services/crypto/xmr-miner/deployment.yaml @@ -4,14 +4,18 @@ kind: Deployment metadata: name: monero-p2pool namespace: crypto - labels: { app: monero-p2pool } + labels: + app: monero-p2pool + atlas.bstein.dev/workload-profile: light spec: replicas: 1 selector: matchLabels: { app: monero-p2pool } template: metadata: - labels: { app: monero-p2pool } + labels: + app: monero-p2pool + atlas.bstein.dev/workload-profile: light annotations: vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/role: "crypto" @@ -87,6 +91,13 @@ spec: tcpSocket: { port: 3333 } initialDelaySeconds: 10 periodSeconds: 10 + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi volumeMounts: - { name: p2pool-bin, mountPath: /opt/p2pool } volumes: diff --git a/services/crypto/xmr-miner/xmrig-daemonset.yaml b/services/crypto/xmr-miner/xmrig-daemonset.yaml index a1ee2ae8..f36f832e 100644 --- a/services/crypto/xmr-miner/xmrig-daemonset.yaml +++ b/services/crypto/xmr-miner/xmrig-daemonset.yaml @@ -4,14 +4,18 @@ kind: DaemonSet metadata: name: monero-xmrig namespace: crypto - labels: { app: monero-xmrig } + labels: + app: monero-xmrig + atlas.bstein.dev/workload-profile: scavenger spec: selector: matchLabels: { app: monero-xmrig } updateStrategy: { type: RollingUpdate } template: metadata: - labels: { app: monero-xmrig } + labels: + app: monero-xmrig + atlas.bstein.dev/workload-profile: scavenger spec: priorityClassName: scavenger nodeSelector: @@ -47,3 +51,10 @@ spec: --donate-level N \ --cpu-priority 1 \ --threads "${THR}" ${EXTRA} + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 250m + memory: 256Mi diff --git a/services/gitea/deployment.yaml b/services/gitea/deployment.yaml index be5be132..75b27fb9 100644 --- a/services/gitea/deployment.yaml +++ b/services/gitea/deployment.yaml @@ -6,6 +6,7 @@ metadata: namespace: gitea labels: app: gitea + atlas.bstein.dev/workload-profile: heavy spec: replicas: 1 selector: @@ -20,6 +21,7 @@ spec: metadata: labels: app: gitea + atlas.bstein.dev/workload-profile: heavy annotations: vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-init-first: "true" @@ -197,6 +199,13 @@ spec: value: "true" - name: SSH_PORT value: "2242" + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: 1500m + memory: 2Gi volumeMounts: - name: gitea-data mountPath: /data diff --git a/services/harbor/helmrelease.yaml b/services/harbor/helmrelease.yaml index 26419cc9..0f62598e 100644 --- a/services/harbor/helmrelease.yaml +++ b/services/harbor/helmrelease.yaml @@ -77,10 +77,16 @@ spec: internal: nodeSelector: ananke.bstein.dev/harbor-bootstrap: "true" - kubernetes.io/hostname: titan-11 image: repository: registry.bstein.dev/infra/harbor-redis tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-redis:tag"} + resources: + requests: + cpu: 50m + memory: 96Mi + limits: + cpu: 250m + memory: 256Mi affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -114,10 +120,16 @@ spec: core: nodeSelector: ananke.bstein.dev/harbor-bootstrap: "true" - kubernetes.io/hostname: titan-11 image: repository: registry.bstein.dev/infra/harbor-core tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-core:tag"} + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 750m + memory: 1Gi serviceAccountName: harbor-vault-sync automountServiceAccountToken: true existingSecret: harbor-core @@ -180,10 +192,16 @@ spec: jobservice: nodeSelector: ananke.bstein.dev/harbor-bootstrap: "true" - kubernetes.io/hostname: titan-11 image: repository: registry.bstein.dev/infra/harbor-jobservice tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-jobservice:tag"} + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi serviceAccountName: harbor-vault-sync automountServiceAccountToken: true existingSecret: harbor-jobservice @@ -227,10 +245,16 @@ spec: portal: nodeSelector: ananke.bstein.dev/harbor-bootstrap: "true" - kubernetes.io/hostname: titan-11 image: repository: registry.bstein.dev/infra/harbor-portal tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-portal:tag"} + resources: + requests: + cpu: 25m + memory: 64Mi + limits: + cpu: 200m + memory: 128Mi affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -255,7 +279,13 @@ spec: registry: nodeSelector: ananke.bstein.dev/harbor-bootstrap: "true" - kubernetes.io/hostname: titan-11 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 1 + memory: 1Gi registry: image: repository: registry.bstein.dev/infra/harbor-registry @@ -338,10 +368,16 @@ spec: nginx: nodeSelector: ananke.bstein.dev/harbor-bootstrap: "true" - kubernetes.io/hostname: titan-11 image: repository: registry.bstein.dev/infra/harbor-nginx tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-nginx:tag"} + resources: + requests: + cpu: 25m + memory: 64Mi + limits: + cpu: 200m + memory: 128Mi affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index ea50685c..8ffc2494 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -6,6 +6,7 @@ metadata: namespace: jellyfin labels: app: jellyfin + atlas.bstein.dev/workload-profile: heavy spec: replicas: 1 strategy: @@ -20,6 +21,7 @@ spec: metadata: labels: app: jellyfin + atlas.bstein.dev/workload-profile: heavy annotations: vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/role: "pegasus" @@ -134,6 +136,9 @@ spec: requests: cpu: "500m" memory: 1Gi + limits: + cpu: "1500m" + memory: 3Gi volumeMounts: - name: jellyfin-vault-entrypoint mountPath: /entrypoint.sh diff --git a/services/jenkins/configmap-jcasc.yaml b/services/jenkins/configmap-jcasc.yaml index 877e0e53..2de9681d 100644 --- a/services/jenkins/configmap-jcasc.yaml +++ b/services/jenkins/configmap-jcasc.yaml @@ -474,7 +474,7 @@ data: plainText clouds: - kubernetes: - containerCapStr: "4" + containerCapStr: "3" connectTimeout: "20" readTimeout: "90" jenkinsUrl: "http://jenkins.jenkins.svc.cluster.local:8080" diff --git a/services/maintenance/kustomization.yaml b/services/maintenance/kustomization.yaml index 64e33ddb..444d0fbe 100644 --- a/services/maintenance/kustomization.yaml +++ b/services/maintenance/kustomization.yaml @@ -32,6 +32,7 @@ resources: - disable-k3s-traefik-daemonset.yaml - oneoffs/k3s-traefik-cleanup-job.yaml - node-nofile-daemonset.yaml + - rpi-resource-reservation-daemonset.yaml - metis-sentinel-amd64-daemonset.yaml - metis-sentinel-arm64-daemonset.yaml - k3s-agent-restart-daemonset.yaml @@ -84,3 +85,9 @@ configMapGenerator: - node_image_sweeper.sh=scripts/node_image_sweeper.sh options: disableNameSuffixHash: true + - name: rpi-resource-reservation-script + namespace: maintenance + files: + - rpi_resource_reservation.sh=scripts/rpi_resource_reservation.sh + options: + disableNameSuffixHash: true diff --git a/services/maintenance/rpi-resource-reservation-daemonset.yaml b/services/maintenance/rpi-resource-reservation-daemonset.yaml new file mode 100644 index 00000000..298a1781 --- /dev/null +++ b/services/maintenance/rpi-resource-reservation-daemonset.yaml @@ -0,0 +1,69 @@ +# services/maintenance/rpi-resource-reservation-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: rpi-resource-reservation + namespace: maintenance +spec: + selector: + matchLabels: + app: rpi-resource-reservation + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: rpi-resource-reservation + spec: + serviceAccountName: node-nofile + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi4 + - rpi5 + tolerations: + - key: node.kubernetes.io/unschedulable + operator: Exists + effect: NoSchedule + - key: node.kubernetes.io/not-ready + operator: Exists + effect: NoExecute + - key: node.kubernetes.io/unreachable + operator: Exists + effect: NoExecute + containers: + - name: reservation + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/rpi_resource_reservation.sh"] + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 96Mi + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - name: host-root + mountPath: /host + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: host-root + hostPath: + path: / + - name: script + configMap: + name: rpi-resource-reservation-script + defaultMode: 0555 diff --git a/services/maintenance/scripts/rpi_resource_reservation.sh b/services/maintenance/scripts/rpi_resource_reservation.sh new file mode 100644 index 00000000..dda7e7bf --- /dev/null +++ b/services/maintenance/scripts/rpi_resource_reservation.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail + +host_root="/host" +unit="k3s-agent" +unit_file="${host_root}/etc/systemd/system/${unit}.service" +config_dir="${host_root}/etc/rancher/k3s/config.yaml.d" +config_file="${config_dir}/90-atlas-rpi-reservations.yaml" + +if [ ! -f "${unit_file}" ]; then + echo "k3s-agent unit not found; this guardrail only manages worker agents" + sleep infinity +fi + +tmp_file="$(mktemp)" +cat > "${tmp_file}" <<'EOF' +# Managed by Flux via services/maintenance/scripts/rpi_resource_reservation.sh. +# Keep RPi workers below saturation so kubelet and the OS keep enough headroom +# to evict or recover before the board wedges. +kubelet-arg+: + - "system-reserved=cpu=250m,memory=384Mi,ephemeral-storage=1Gi" + - "kube-reserved=cpu=150m,memory=256Mi,ephemeral-storage=1Gi" + - "eviction-hard=memory.available<512Mi,nodefs.available<10%,imagefs.available<10%" + - "eviction-soft=memory.available<768Mi,nodefs.available<15%,imagefs.available<15%" + - "eviction-soft-grace-period=memory.available=1m,nodefs.available=2m,imagefs.available=2m" + - "eviction-max-pod-grace-period=60" +EOF + +changed=0 +if [ ! -f "${config_file}" ] || ! cmp -s "${tmp_file}" "${config_file}"; then + mkdir -p "${config_dir}" + install -m 0644 "${tmp_file}" "${config_file}" + changed=1 +fi +rm -f "${tmp_file}" + +if [ "${changed}" -eq 1 ]; then + delay="$(( (RANDOM % 420) + 30 ))" + echo "updated ${config_file}; restarting ${unit} after ${delay}s" + sleep "${delay}" + chroot "${host_root}" /bin/systemctl daemon-reload + chroot "${host_root}" /bin/systemctl restart "${unit}" +else + echo "${config_file} already up to date" +fi + +sleep infinity