#!/usr/bin/env bash set -euo pipefail if [[ "${EUID}" -ne 0 ]]; then echo "Run as root: sudo ./scripts/install.sh" >&2 exit 1 fi REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" BIN_DIR="/usr/local/bin" CONF_DIR="/etc/hecate" STATE_DIR="/var/lib/hecate" SYSTEMD_DIR="/etc/systemd/system" LIB_DIR="/usr/local/lib/hecate" START_NOW=1 INSTALL_DEPS=1 ENABLE_BOOTSTRAP="${HECATE_ENABLE_BOOTSTRAP:-auto}" MANAGE_NUT="${HECATE_MANAGE_NUT:-1}" NUT_UPS_NAME="${HECATE_NUT_UPS_NAME:-}" NUT_VENDOR_ID="${HECATE_NUT_VENDOR_ID:-0764}" NUT_PRODUCT_ID="${HECATE_NUT_PRODUCT_ID:-0601}" NUT_MONITOR_USER="${HECATE_NUT_MONITOR_USER:-monuser}" NUT_MONITOR_PASSWORD="${HECATE_NUT_MONITOR_PASSWORD:-hecateupsmon}" FORCE_CONFIG_TEMPLATE="${HECATE_FORCE_CONFIG_TEMPLATE:-}" while [[ $# -gt 0 ]]; do case "$1" in --no-start) START_NOW=0 shift ;; --skip-deps) INSTALL_DEPS=0 shift ;; *) echo "Unknown argument: $1" >&2 exit 1 ;; esac done resolve_nut_ups_name() { if [[ -n "${NUT_UPS_NAME}" ]]; then return 0 fi if [[ -f "${CONF_DIR}/hecate.yaml" ]]; then local target="" target="$(grep -Eo 'target:[[:space:]]*[A-Za-z0-9._-]+@localhost' "${CONF_DIR}/hecate.yaml" | head -n 1 | awk '{print $2}')" if [[ -n "${target}" ]]; then NUT_UPS_NAME="${target%@localhost}" echo "[install] inferred NUT UPS name from config: ${NUT_UPS_NAME}" return 0 fi fi NUT_UPS_NAME="pyrphoros" echo "[install] defaulting NUT UPS name to ${NUT_UPS_NAME}" } read_hecate_role() { if [[ ! -f "${CONF_DIR}/hecate.yaml" ]]; then echo "coordinator" return 0 fi local role role="$(awk '/^[[:space:]]*role:[[:space:]]*/ {print $2; exit}' "${CONF_DIR}/hecate.yaml" 2>/dev/null || true)" if [[ -z "${role}" ]]; then role="coordinator" fi echo "${role}" } migration_yaml_lookup() { local key="$1" awk -F': *' -v k="${key}" '$1 == k {print $2; exit}' "${CONF_DIR}/hecate.yaml" 2>/dev/null || true } first_control_plane_name() { awk ' /^control_planes:[[:space:]]*$/ {in_list=1; next} in_list && /^[[:space:]]*-[[:space:]]*/ {gsub(/^[[:space:]]*-[[:space:]]*/, "", $0); print $0; exit} in_list && /^[^[:space:]]/ {in_list=0} ' "${CONF_DIR}/hecate.yaml" 2>/dev/null || true } lookup_node_host() { local node="$1" awk -F': *' -v n="${node}" '$1 == " " n {print $2; exit}' "${CONF_DIR}/hecate.yaml" 2>/dev/null || true } ensure_hecate_kubeconfig() { local kubeconfig_path kubeconfig_path="$(migration_yaml_lookup "kubeconfig")" if [[ -z "${kubeconfig_path}" ]]; then kubeconfig_path="/etc/hecate/kubeconfig" fi install -d -m 0750 "$(dirname "${kubeconfig_path}")" if [[ -s "${kubeconfig_path}" ]] && KUBECONFIG="${kubeconfig_path}" kubectl version --request-timeout=5s >/dev/null 2>&1; then return 0 fi if [[ -r /etc/rancher/k3s/k3s.yaml ]]; then install -m 0600 /etc/rancher/k3s/k3s.yaml "${kubeconfig_path}" echo "[install] refreshed kubeconfig from local /etc/rancher/k3s/k3s.yaml" if KUBECONFIG="${kubeconfig_path}" kubectl version --request-timeout=5s >/dev/null 2>&1; then return 0 fi fi local cp_name cp_host ssh_user ssh_port ssh_cfg ssh_key cp_name="$(first_control_plane_name)" if [[ -z "${cp_name}" ]]; then echo "[install] warning: cannot infer control plane name; kubeconfig bootstrap skipped" return 0 fi cp_host="$(lookup_node_host "${cp_name}")" if [[ -z "${cp_host}" ]]; then cp_host="${cp_name}" fi ssh_user="$(migration_yaml_lookup "ssh_user")" ssh_port="$(migration_yaml_lookup "ssh_port")" ssh_cfg="$(migration_yaml_lookup "ssh_config_file")" ssh_key="$(migration_yaml_lookup "ssh_identity_file")" if [[ -z "${ssh_port}" ]]; then ssh_port="2277" fi local target target="${cp_host}" if [[ -n "${ssh_user}" ]]; then target="${ssh_user}@${cp_host}" fi local ssh_args=( -o BatchMode=yes -o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new ) if [[ -n "${ssh_cfg}" && -f "${ssh_cfg}" ]]; then ssh_args+=(-F "${ssh_cfg}") fi if [[ -n "${ssh_key}" && -f "${ssh_key}" ]]; then ssh_args+=(-i "${ssh_key}") fi if [[ -n "${ssh_port}" ]]; then ssh_args+=(-p "${ssh_port}") fi local remote_cfg if remote_cfg="$(ssh "${ssh_args[@]}" "${target}" "sudo cat /etc/rancher/k3s/k3s.yaml" 2>/dev/null)"; then printf '%s\n' "${remote_cfg}" > "${kubeconfig_path}" sed -Ei "s#server:[[:space:]]*https://127\\.0\\.0\\.1:6443#server: https://${cp_host}:6443#g" "${kubeconfig_path}" || true chmod 0600 "${kubeconfig_path}" echo "[install] bootstrapped kubeconfig from control plane ${cp_name} (${cp_host})" if KUBECONFIG="${kubeconfig_path}" kubectl version --request-timeout=5s >/dev/null 2>&1; then return 0 fi else echo "[install] warning: failed to fetch kubeconfig from ${cp_name} (${cp_host})" fi echo "[install] warning: kubeconfig at ${kubeconfig_path} is still not validated; local startup fallback may fail" } migrate_hecate_config() { if [[ ! -f "${CONF_DIR}/hecate.yaml" ]]; then return 0 fi local changed=0 if grep -Eq 'default_budget_seconds:[[:space:]]*300' "${CONF_DIR}/hecate.yaml"; then sed -Ei 's/(default_budget_seconds:[[:space:]]*)300/\11380/' "${CONF_DIR}/hecate.yaml" echo "[install] migrated default_budget_seconds 300 -> 1380 in ${CONF_DIR}/hecate.yaml" changed=1 fi if grep -Eq 'runtime_safety_factor:[[:space:]]*1\.10' "${CONF_DIR}/hecate.yaml"; then sed -Ei 's/(runtime_safety_factor:[[:space:]]*)1\.10/\11.25/' "${CONF_DIR}/hecate.yaml" echo "[install] migrated runtime_safety_factor 1.10 -> 1.25 in ${CONF_DIR}/hecate.yaml" changed=1 fi if grep -Eq '^ssh_node_users:[[:space:]]*$' "${CONF_DIR}/hecate.yaml" \ && grep -Eq '^ titan-24:[[:space:]]*tethys[[:space:]]*$' "${CONF_DIR}/hecate.yaml"; then sed -Ei 's/^ titan-24:[[:space:]]*tethys[[:space:]]*$/ titan-24: atlas/' "${CONF_DIR}/hecate.yaml" echo "[install] migrated ssh_node_users titan-24 override to atlas" changed=1 fi if grep -Eq '^ command_timeout_seconds:[[:space:]]*[0-9]+' "${CONF_DIR}/hecate.yaml" \ && ! grep -Eq '^ startup_guard_max_age_seconds:[[:space:]]*[0-9]+' "${CONF_DIR}/hecate.yaml"; then sed -Ei '/^ command_timeout_seconds:[[:space:]]*[0-9]+/a\ startup_guard_max_age_seconds: 900' "${CONF_DIR}/hecate.yaml" echo "[install] added coordination.startup_guard_max_age_seconds=900" changed=1 fi local default_restore_cp default_restore_cp="$(first_control_plane_name)" if [[ -z "${default_restore_cp}" ]]; then default_restore_cp="titan-0a" fi if grep -Eq '^ api_poll_seconds:[[:space:]]*[0-9]+' "${CONF_DIR}/hecate.yaml" \ && ! grep -Eq '^ auto_etcd_restore_on_api_failure:[[:space:]]*(true|false)' "${CONF_DIR}/hecate.yaml"; then sed -Ei '/^ api_poll_seconds:[[:space:]]*[0-9]+/a\ auto_etcd_restore_on_api_failure: true\n etcd_restore_control_plane: '"${default_restore_cp}"'' "${CONF_DIR}/hecate.yaml" echo "[install] added startup.auto_etcd_restore_on_api_failure + startup.etcd_restore_control_plane defaults" changed=1 fi local role role="$(read_hecate_role)" local inventory_block local managed_block if [[ "${role}" == "coordinator" ]]; then inventory_block='ssh_node_hosts: titan-db: 192.168.22.10 titan-0a: 192.168.22.11 titan-0b: 192.168.22.12 titan-0c: 192.168.22.13 titan-04: 192.168.22.30 titan-05: 192.168.22.31 titan-06: 192.168.22.32 titan-07: 192.168.22.33 titan-08: 192.168.22.34 titan-09: 192.168.22.35 titan-10: 192.168.22.36 titan-11: 192.168.22.37 titan-12: 192.168.22.40 titan-13: 192.168.22.41 titan-14: 192.168.22.42 titan-15: 192.168.22.43 titan-17: 192.168.22.45 titan-18: 192.168.22.46 titan-19: 192.168.22.47 titan-20: 192.168.22.20 titan-21: 192.168.22.21 titan-22: 192.168.22.22 titan-24: 192.168.22.26' managed_block='ssh_managed_nodes: - titan-db - titan-0a - titan-0b - titan-0c - titan-04 - titan-05 - titan-06 - titan-07 - titan-08 - titan-09 - titan-10 - titan-11 - titan-12 - titan-13 - titan-14 - titan-15 - titan-17 - titan-18 - titan-19 - titan-20 - titan-21 - titan-22 - titan-24' elif [[ "${role}" == "peer" ]]; then inventory_block='ssh_node_hosts: titan-db: 192.168.22.10 titan-0a: 192.168.22.11 titan-0b: 192.168.22.12 titan-0c: 192.168.22.13 titan-04: 192.168.22.30 titan-05: 192.168.22.31 titan-06: 192.168.22.32 titan-07: 192.168.22.33 titan-08: 192.168.22.34 titan-09: 192.168.22.35 titan-10: 192.168.22.36 titan-11: 192.168.22.37 titan-12: 192.168.22.40 titan-13: 192.168.22.41 titan-14: 192.168.22.42 titan-15: 192.168.22.43 titan-17: 192.168.22.45 titan-18: 192.168.22.46 titan-19: 192.168.22.47 titan-20: 192.168.22.20 titan-21: 192.168.22.21 titan-22: 192.168.22.22 titan-24: 192.168.22.26' managed_block='ssh_managed_nodes: - titan-db - titan-0a - titan-0b - titan-0c - titan-04 - titan-05 - titan-06 - titan-07 - titan-08 - titan-09 - titan-10 - titan-11 - titan-12 - titan-13 - titan-14 - titan-15 - titan-17 - titan-18 - titan-19 - titan-20 - titan-21 - titan-22 - titan-24' fi if [[ -n "${inventory_block}" ]]; then if grep -Eq '^ssh_node_hosts:[[:space:]]*\{\}[[:space:]]*$' "${CONF_DIR}/hecate.yaml"; then perl -0pi -e 's#ssh_node_hosts:\s*\{\}\n#'"${inventory_block}"'\n#s' "${CONF_DIR}/hecate.yaml" echo "[install] hydrated ssh_node_hosts inventory for role=${role}" changed=1 fi fi if [[ -n "${managed_block}" ]]; then if grep -Eq '^ssh_managed_nodes:[[:space:]]*\[\][[:space:]]*$' "${CONF_DIR}/hecate.yaml"; then perl -0pi -e 's#ssh_managed_nodes:\s*\[\]\n#'"${managed_block}"'\n#s' "${CONF_DIR}/hecate.yaml" echo "[install] hydrated ssh_managed_nodes inventory for role=${role}" changed=1 fi if ! grep -Eq '^ - titan-04$' "${CONF_DIR}/hecate.yaml" || ! grep -Eq '^ - titan-21$' "${CONF_DIR}/hecate.yaml"; then perl -0pi -e 's#ssh_managed_nodes:\n(?: - .*\n)*#'"${managed_block}"'\n#s' "${CONF_DIR}/hecate.yaml" echo "[install] refreshed ssh_managed_nodes coverage for role=${role}" changed=1 fi fi if [[ "${role}" == "peer" ]]; then if grep -Eq '^ssh_managed_nodes:[[:space:]]*$' "${CONF_DIR}/hecate.yaml" \ && grep -Eq '^ - titan-db$' "${CONF_DIR}/hecate.yaml" \ && grep -Eq '^ - titan-24$' "${CONF_DIR}/hecate.yaml" \ && ! grep -Eq '^ - titan-0a$' "${CONF_DIR}/hecate.yaml"; then perl -0pi -e 's#ssh_managed_nodes:\n - titan-db\n - titan-24\n#ssh_managed_nodes:\n - titan-db\n - titan-0a\n - titan-0b\n - titan-0c\n - titan-04\n - titan-05\n - titan-06\n - titan-07\n - titan-08\n - titan-09\n - titan-10\n - titan-11\n - titan-12\n - titan-13\n - titan-14\n - titan-15\n - titan-17\n - titan-18\n - titan-19\n - titan-20\n - titan-21\n - titan-22\n - titan-24\n#s' "${CONF_DIR}/hecate.yaml" echo "[install] expanded peer ssh_managed_nodes for bootstrap fallback coverage" changed=1 fi if ! grep -Eq '^ - services/gitea$' "${CONF_DIR}/hecate.yaml"; then perl -0pi -e 's#local_bootstrap_paths:\n(?: - .*\n)*#local_bootstrap_paths:\n - infrastructure/core\n - clusters/atlas/flux-system\n - infrastructure/sources/helm\n - infrastructure/metallb\n - infrastructure/traefik\n - infrastructure/vault-csi\n - infrastructure/vault-injector\n - services/vault\n - infrastructure/postgres\n - services/gitea\n#s' "${CONF_DIR}/hecate.yaml" echo "[install] refreshed peer local_bootstrap_paths for full fallback bootstrap parity" changed=1 fi if perl -0777 -ne 'exit(!(/local_bootstrap_paths:\n - infrastructure\/core\n/s))' "${CONF_DIR}/hecate.yaml"; then perl -0pi -e 's#local_bootstrap_paths:\n - infrastructure/core\n#local_bootstrap_paths:\n - infrastructure/core\n - clusters/atlas/flux-system\n - infrastructure/sources/helm\n - infrastructure/metallb\n - infrastructure/traefik\n - infrastructure/vault-csi\n - infrastructure/vault-injector\n - services/vault\n - infrastructure/postgres\n - services/gitea\n#s' "${CONF_DIR}/hecate.yaml" echo "[install] expanded peer local_bootstrap_paths for full fallback bootstrap parity" changed=1 fi fi if [[ "${changed}" -eq 1 ]]; then chmod 0640 "${CONF_DIR}/hecate.yaml" || true fi } ensure_apt_packages() { local missing=() for pkg in "$@"; do if ! dpkg -s "${pkg}" >/dev/null 2>&1; then missing+=("${pkg}") fi done if [[ ${#missing[@]} -eq 0 ]]; then return 0 fi echo "[install] apt install: ${missing[*]}" export DEBIAN_FRONTEND=noninteractive apt-get update -y apt-get install -y "${missing[@]}" } install_kubectl_if_missing() { if command -v kubectl >/dev/null 2>&1; then return 0 fi ensure_apt_packages kubernetes-client || true if command -v kubectl >/dev/null 2>&1; then return 0 fi echo "[install] installing kubectl via upstream binary" local arch arch="$(uname -m)" case "${arch}" in x86_64) arch="amd64" ;; aarch64|arm64) arch="arm64" ;; *) echo "Unsupported arch for kubectl install: ${arch}" >&2; return 1 ;; esac local version version="$(curl -fsSL https://dl.k8s.io/release/stable.txt)" curl -fsSL -o /usr/local/bin/kubectl "https://dl.k8s.io/release/${version}/bin/linux/${arch}/kubectl" chmod 0755 /usr/local/bin/kubectl } ensure_dependencies() { if [[ "${INSTALL_DEPS}" -eq 0 ]]; then echo "[install] skipping dependency installation" return 0 fi if ! command -v apt-get >/dev/null 2>&1; then echo "This installer currently supports apt-based hosts only." >&2 exit 1 fi ensure_apt_packages ca-certificates curl git openssh-client jq nut-client nut-server nut-monitor golang-go install_kubectl_if_missing } configure_nut() { if [[ "${MANAGE_NUT}" != "1" ]]; then echo "[install] skipping NUT configuration (HECATE_MANAGE_NUT=${MANAGE_NUT})" return 0 fi echo "[install] configuring NUT + udev for UPS ${NUT_UPS_NAME} (${NUT_VENDOR_ID}:${NUT_PRODUCT_ID})" install -d -m 0755 /etc/nut /etc/udev/rules.d cat > /etc/nut/nut.conf < /etc/nut/ups.conf < /etc/nut/upsd.users </dev/null 2>&1; then chown root:nut /etc/nut/upsd.users else chown root:root /etc/nut/upsd.users fi cat > /etc/nut/upsmon.conf < /etc/udev/rules.d/99-hecate-ups.rules </dev/null 2>&1 || true systemctl restart nut-driver-enumerator.service >/dev/null 2>&1 || true systemctl restart "nut-driver@${NUT_UPS_NAME}.service" >/dev/null 2>&1 || true systemctl restart nut-server.service nut-monitor.service >/dev/null 2>&1 || true } ensure_dependencies echo "[install] building hecate" cd "${REPO_DIR}" mkdir -p dist go build -o dist/hecate ./cmd/hecate echo "[install] installing binary" install -d -m 0755 "${BIN_DIR}" install -m 0755 dist/hecate "${BIN_DIR}/hecate" echo "[install] installing config + state dirs" install -d -m 0750 "${CONF_DIR}" install -d -m 0750 "${STATE_DIR}" install -d -m 0755 "${LIB_DIR}" if [[ -n "${FORCE_CONFIG_TEMPLATE}" ]]; then case "${FORCE_CONFIG_TEMPLATE}" in coordinator) install -m 0640 configs/hecate.titan-db.yaml "${CONF_DIR}/hecate.yaml" echo "[install] forced config template: coordinator" ;; peer) install -m 0640 configs/hecate.tethys.yaml "${CONF_DIR}/hecate.yaml" echo "[install] forced config template: peer" ;; example) install -m 0640 configs/hecate.example.yaml "${CONF_DIR}/hecate.yaml" echo "[install] forced config template: example" ;; *) echo "[install] unknown HECATE_FORCE_CONFIG_TEMPLATE value: ${FORCE_CONFIG_TEMPLATE}" >&2 exit 1 ;; esac elif [[ ! -f "${CONF_DIR}/hecate.yaml" ]]; then install -m 0640 configs/hecate.example.yaml "${CONF_DIR}/hecate.yaml" echo "[install] wrote default config to ${CONF_DIR}/hecate.yaml" else echo "[install] keeping existing config at ${CONF_DIR}/hecate.yaml" fi migrate_hecate_config ensure_hecate_kubeconfig echo "[install] installing systemd units" install -m 0644 deploy/systemd/hecate.service "${SYSTEMD_DIR}/hecate.service" install -m 0644 deploy/systemd/hecate-bootstrap.service "${SYSTEMD_DIR}/hecate-bootstrap.service" install -m 0644 deploy/systemd/hecate-update.service "${SYSTEMD_DIR}/hecate-update.service" install -m 0644 deploy/systemd/hecate-update.timer "${SYSTEMD_DIR}/hecate-update.timer" install -m 0755 scripts/hecate-self-update.sh "${LIB_DIR}/hecate-self-update.sh" resolve_nut_ups_name configure_nut systemctl daemon-reload systemctl enable hecate.service hecate-update.timer if [[ "${ENABLE_BOOTSTRAP}" == "1" ]]; then systemctl enable hecate-bootstrap.service elif [[ "${ENABLE_BOOTSTRAP}" == "0" ]]; then systemctl disable hecate-bootstrap.service >/dev/null 2>&1 || true else role="$(read_hecate_role)" systemctl enable hecate-bootstrap.service echo "[install] auto-enabled hecate-bootstrap.service for role=${role}" fi if [[ "${START_NOW}" -eq 1 ]]; then systemctl restart hecate.service systemctl restart hecate-update.timer echo "[install] hecate.service restarted" fi echo "[install] done" echo "Next steps:" echo " 1. Edit /etc/hecate/hecate.yaml" echo " 2. Run: hecate status --config /etc/hecate/hecate.yaml" echo " 3. Test dry run: hecate startup --config /etc/hecate/hecate.yaml" echo " 4. Trigger bootstrap now (db host): systemctl start hecate-bootstrap.service" echo " 5. Trigger self-update now: systemctl start hecate-update.service"