diff --git a/services/maintenance/kustomization.yaml b/services/maintenance/kustomization.yaml index b5e03a34..ac1d924a 100644 --- a/services/maintenance/kustomization.yaml +++ b/services/maintenance/kustomization.yaml @@ -38,6 +38,7 @@ resources: - metis-sentinel-arm64-daemonset.yaml - k3s-agent-restart-daemonset.yaml - titan-24-docker-daemonset.yaml + - titan-22-link-keeper-daemonset.yaml - node-image-sweeper-serviceaccount.yaml - node-image-sweeper-daemonset.yaml - metis-service.yaml diff --git a/services/maintenance/titan-22-link-keeper-daemonset.yaml b/services/maintenance/titan-22-link-keeper-daemonset.yaml new file mode 100644 index 00000000..49d6bd84 --- /dev/null +++ b/services/maintenance/titan-22-link-keeper-daemonset.yaml @@ -0,0 +1,87 @@ +# services/maintenance/titan-22-link-keeper-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: titan-22-link-keeper + namespace: maintenance + labels: + app: titan-22-link-keeper +spec: + selector: + matchLabels: + app: titan-22-link-keeper + template: + metadata: + labels: + app: titan-22-link-keeper + spec: + hostNetwork: true + hostPID: true + nodeSelector: + kubernetes.io/hostname: titan-22 + tolerations: + - key: node.kubernetes.io/unschedulable + operator: Exists + effect: NoSchedule + - key: node.kubernetes.io/not-ready + operator: Exists + effect: NoExecute + - key: node.kubernetes.io/unreachable + operator: Exists + effect: NoExecute + - key: node.kubernetes.io/unreachable + operator: Exists + effect: NoSchedule + - key: node.kubernetes.io/disk-pressure + operator: Exists + effect: NoSchedule + - key: node.kubernetes.io/memory-pressure + operator: Exists + effect: NoSchedule + - key: node.kubernetes.io/pid-pressure + operator: Exists + effect: NoSchedule + - key: node.kubernetes.io/network-unavailable + operator: Exists + effect: NoSchedule + containers: + - name: link-keeper + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + imagePullPolicy: IfNotPresent + securityContext: + privileged: true + runAsUser: 0 + command: + - /bin/bash + - -lc + - | + set -u + while true; do + nsenter -t 1 -m -u -i -n -p -- /bin/sh -lc ' + date -Is + ip link set enp5s0 up || true + ethtool --set-eee enp5s0 eee off || true + + if ! ethtool enp5s0 | grep -q "Link detected: yes"; then + ethtool -s enp5s0 advertise 0x80000000002f autoneg on || ethtool -s enp5s0 autoneg on || true + sleep 3 + fi + + if ! ethtool enp5s0 | grep -q "Link detected: yes"; then + echo "link still down after autoneg; trying forced 2.5G" + ethtool -s enp5s0 speed 2500 duplex full autoneg off || true + sleep 4 + fi + + if ! ethtool enp5s0 | grep -q "Link detected: yes"; then + echo "link still down after 2.5G; trying forced 1G" + ethtool -s enp5s0 speed 1000 duplex full autoneg off || true + sleep 4 + fi + + ethtool --show-eee enp5s0 || true + ethtool enp5s0 | sed -n "1,45p" || true + ip -br addr show enp5s0 || true + ' + sleep 15 + done