ananke/configs/ananke.titan-db.yaml

261 lines
7.0 KiB
YAML
Raw Normal View History

# /etc/ananke/ananke.yaml for titan-db (coordinator)
kubeconfig: /etc/ananke/kubeconfig
ssh_user: atlas
ssh_port: 2277
ssh_config_file: /home/atlas/.ssh/config
ssh_identity_file: /home/atlas/.ssh/id_ed25519
ssh_node_hosts:
titan-db: 192.168.22.10
titan-0a: 192.168.22.11
titan-0b: 192.168.22.12
titan-0c: 192.168.22.13
titan-04: 192.168.22.30
titan-05: 192.168.22.31
titan-06: 192.168.22.32
titan-07: 192.168.22.33
titan-08: 192.168.22.34
titan-09: 192.168.22.35
titan-10: 192.168.22.36
titan-11: 192.168.22.37
titan-12: 192.168.22.40
titan-13: 192.168.22.41
titan-14: 192.168.22.42
titan-15: 192.168.22.43
titan-17: 192.168.22.45
titan-18: 192.168.22.46
titan-19: 192.168.22.47
titan-20: 192.168.22.20
titan-21: 192.168.22.21
titan-22: 192.168.22.22
titan-24: 192.168.22.26
ssh_node_users:
titan-24: atlas
ssh_managed_nodes:
- titan-db
- titan-0a
- titan-0b
- titan-0c
- titan-04
- titan-05
- titan-06
- titan-07
- titan-08
- titan-09
- titan-10
- titan-11
- titan-12
- titan-13
- titan-14
- titan-15
- titan-17
- titan-18
- titan-19
- titan-20
- titan-21
- titan-22
- titan-24
ssh_jump_host: ""
ssh_jump_user: ""
iac_repo_path: /opt/titan-iac
expected_flux_branch: main
expected_flux_source_url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
control_planes:
- titan-0a
- titan-0b
- titan-0c
workers:
- titan-04
- titan-05
- titan-06
- titan-07
- titan-08
- titan-09
- titan-10
- titan-11
- titan-12
- titan-13
- titan-14
- titan-15
- titan-17
- titan-18
- titan-19
- titan-20
- titan-21
- titan-22
- titan-24
local_bootstrap_paths:
- infrastructure/core
- clusters/atlas/flux-system
- infrastructure/sources/helm
- infrastructure/metallb
- infrastructure/traefik
- infrastructure/cert-manager
- infrastructure/vault-csi
- infrastructure/vault-injector
- services/vault
- infrastructure/postgres
- services/gitea
- services/keycloak
- services/oauth2-proxy
excluded_namespaces:
- kube-system
- kube-public
- kube-node-lease
- flux-system
- traefik
- metallb-system
- cert-manager
- longhorn-system
- vault
- postgres
- maintenance
startup:
api_wait_seconds: 1200
api_poll_seconds: 2
shutdown_cooldown_seconds: 45
minimum_battery_percent: 20
require_node_inventory_reachability: true
node_inventory_reachability_wait_seconds: 300
node_inventory_reachability_poll_seconds: 5
required_node_labels:
titan-09:
ananke.bstein.dev/harbor-bootstrap: "true"
require_time_sync: true
time_sync_wait_seconds: 240
time_sync_poll_seconds: 5
time_sync_mode: quorum
time_sync_quorum: 2
reconcile_access_on_boot: true
auto_etcd_restore_on_api_failure: true
etcd_restore_control_plane: titan-0a
require_storage_ready: true
storage_ready_wait_seconds: 420
storage_ready_poll_seconds: 5
storage_min_ready_nodes: 2
storage_critical_pvcs:
- vault/data-vault-0
- postgres/postgres-data-postgres-0
- gitea/gitea-data
- sso/keycloak-data
require_post_start_probes: true
post_start_probe_wait_seconds: 240
post_start_probe_poll_seconds: 5
post_start_probes:
- https://scm.bstein.dev/api/healthz
- https://metrics.bstein.dev/api/health
require_service_checklist: true
service_checklist_wait_seconds: 420
service_checklist_poll_seconds: 5
service_checklist_stability_seconds: 120
service_checklist_auth:
mode: keycloak_robotuser
keycloak_base_url: https://sso.bstein.dev
realm: atlas
robot_username: robotuser
admin_secret_namespace: sso
admin_secret_name: keycloak-admin
admin_secret_username_key: username
admin_secret_password_key: password
service_checklist:
- name: gitea-api
url: https://scm.bstein.dev/api/healthz
accepted_statuses: [200]
body_contains: pass
timeout_seconds: 12
- name: grafana-api
url: https://metrics.bstein.dev/api/health
accepted_statuses: [200]
body_contains: '"database":"ok"'
timeout_seconds: 12
- name: keycloak-oidc
url: https://sso.bstein.dev/realms/atlas/.well-known/openid-configuration
accepted_statuses: [200]
body_contains: '"issuer":"https://sso.bstein.dev/realms/atlas"'
timeout_seconds: 12
- name: harbor-registry
url: https://registry.bstein.dev/v2/
accepted_statuses: [401]
body_contains: unauthorized
timeout_seconds: 12
- name: longhorn-api-user-session
url: https://longhorn.bstein.dev/v1
accepted_statuses: [200]
require_robot_auth: true
follow_redirects: true
final_url_contains: /v1
final_url_not_contains: /oauth2/sign_in
body_contains: '"id":"v1"'
timeout_seconds: 12
require_critical_service_endpoints: true
critical_service_endpoint_wait_seconds: 420
critical_service_endpoint_poll_seconds: 5
critical_service_endpoints:
- monitoring/victoria-metrics-single-server
require_ingress_checklist: true
ingress_checklist_wait_seconds: 420
ingress_checklist_poll_seconds: 5
ingress_checklist_accepted_statuses: [200, 301, 302, 307, 308, 401, 403, 404]
ingress_checklist_ignore_hosts: []
ingress_checklist_insecure_skip_tls: false
require_node_ssh_auth: true
node_ssh_auth_wait_seconds: 240
node_ssh_auth_poll_seconds: 5
require_flux_health: true
flux_health_wait_seconds: 900
flux_health_poll_seconds: 5
ignore_flux_kustomizations: []
require_workload_convergence: true
workload_convergence_wait_seconds: 900
workload_convergence_poll_seconds: 5
ignore_workload_namespaces: []
ignore_workloads: []
ignore_unavailable_nodes: []
auto_recycle_stuck_pods: true
stuck_pod_grace_seconds: 180
vault_unseal_key_file: /var/lib/ananke/vault-unseal.key
vault_unseal_breakglass_command: "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -i /home/atlas/.ssh/id_ed25519 -p 1122 brad@99.183.132.163 'cat ~/.ananke-breakglass/vault-unseal.key'"
vault_unseal_breakglass_timeout_seconds: 15
shutdown:
default_budget_seconds: 1380
history_min_samples: 3
emergency_budget_seconds: 420
emergency_history_min_samples: 3
emergency_skip_etcd_snapshot: true
emergency_skip_drain: true
skip_etcd_snapshot: false
skip_drain: false
drain_parallelism: 6
scale_parallelism: 8
ssh_parallelism: 8
ups:
enabled: true
provider: nut
targets:
- name: Pyrphoros
target: pyrphoros@localhost
poll_seconds: 5
runtime_safety_factor: 1.25
debounce_count: 3
telemetry_timeout_seconds: 90
coordination:
forward_shutdown_host: ""
forward_shutdown_user: atlas
forward_shutdown_config: /etc/ananke/ananke.yaml
peer_hosts:
- titan-24
fallback_local_shutdown: true
command_timeout_seconds: 25
startup_guard_max_age_seconds: 900
role: coordinator
allow_startup_on_battery: false
metrics:
enabled: true
bind_addr: 0.0.0.0:9560
path: /metrics
state:
dir: /var/lib/ananke
reports_dir: /var/lib/ananke/reports
run_history_path: /var/lib/ananke/runs.json
lock_path: /var/lib/ananke/ananke.lock
intent_path: /var/lib/ananke/intent.json