ananke/configs/ananke.example.yaml

201 lines
5.9 KiB
YAML

# /etc/ananke/ananke.yaml
kubeconfig: /etc/ananke/kubeconfig
ssh_user: atlas
ssh_port: 2277
ssh_config_file: ""
ssh_identity_file: /home/atlas/.ssh/id_ed25519
ssh_node_hosts: {}
ssh_node_users: {}
ssh_managed_nodes: []
ssh_jump_host: ""
ssh_jump_user: ""
iac_repo_path: /opt/titan-iac
expected_flux_branch: main
expected_flux_source_url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
control_planes:
- titan-0a
- titan-0b
- titan-0c
workers: []
local_bootstrap_paths:
- infrastructure/core
- clusters/atlas/flux-system
- infrastructure/sources/helm
- infrastructure/metallb
- infrastructure/traefik
- infrastructure/cert-manager
- infrastructure/vault-csi
- infrastructure/vault-injector
- services/vault
- infrastructure/postgres
- services/gitea
- services/keycloak
- services/oauth2-proxy
excluded_namespaces:
- kube-system
- kube-public
- kube-node-lease
- flux-system
- traefik
- metallb-system
- cert-manager
- longhorn-system
- vault
- postgres
- maintenance
startup:
api_wait_seconds: 1200
api_poll_seconds: 2
shutdown_cooldown_seconds: 45
minimum_battery_percent: 20
require_node_inventory_reachability: true
node_inventory_reachability_wait_seconds: 300
node_inventory_reachability_poll_seconds: 5
node_inventory_reachability_required_nodes: []
required_node_labels:
titan-09:
ananke.bstein.dev/harbor-bootstrap: "true"
require_time_sync: true
time_sync_wait_seconds: 240
time_sync_poll_seconds: 5
time_sync_mode: quorum
time_sync_quorum: 2
reconcile_access_on_boot: true
auto_etcd_restore_on_api_failure: true
etcd_restore_control_plane: titan-0a
require_storage_ready: true
storage_ready_wait_seconds: 420
storage_ready_poll_seconds: 5
storage_min_ready_nodes: 2
storage_critical_pvcs:
- vault/data-vault-0
- postgres/postgres-data-postgres-0
- gitea/gitea-data
- sso/keycloak-data
require_post_start_probes: true
post_start_probe_wait_seconds: 240
post_start_probe_poll_seconds: 5
post_start_probes:
- https://scm.bstein.dev/api/healthz
- https://metrics.bstein.dev/api/health
require_service_checklist: true
service_checklist_wait_seconds: 420
service_checklist_poll_seconds: 5
service_checklist_stability_seconds: 120
service_checklist_auth:
mode: keycloak_robotuser
keycloak_base_url: https://sso.bstein.dev
realm: atlas
robot_username: robotuser
admin_secret_namespace: sso
admin_secret_name: keycloak-admin
admin_secret_username_key: username
admin_secret_password_key: password
service_checklist_explicit_only: false
service_checklist:
- name: gitea-api
url: https://scm.bstein.dev/api/healthz
accepted_statuses: [200]
body_contains: pass
timeout_seconds: 12
- name: grafana-api
url: https://metrics.bstein.dev/api/health
accepted_statuses: [200]
body_contains: '"database":"ok"'
timeout_seconds: 12
- name: keycloak-oidc
url: https://sso.bstein.dev/realms/atlas/.well-known/openid-configuration
accepted_statuses: [200]
body_contains: '"issuer":"https://sso.bstein.dev/realms/atlas"'
timeout_seconds: 12
- name: harbor-registry
url: https://registry.bstein.dev/v2/
accepted_statuses: [401]
body_contains: unauthorized
timeout_seconds: 12
- name: longhorn-api-user-session
url: https://longhorn.bstein.dev/v1
accepted_statuses: [200]
require_robot_auth: true
follow_redirects: true
final_url_contains: /v1
final_url_not_contains: /oauth2/sign_in
body_contains: '"id":"v1"'
timeout_seconds: 12
require_critical_service_endpoints: true
critical_service_endpoint_wait_seconds: 420
critical_service_endpoint_poll_seconds: 5
critical_service_endpoints:
- monitoring/victoria-metrics-single-server
require_ingress_checklist: true
ingress_checklist_wait_seconds: 420
ingress_checklist_poll_seconds: 5
ingress_checklist_accepted_statuses: [200, 301, 302, 307, 308, 401, 403, 404]
ingress_checklist_ignore_hosts: []
ingress_checklist_insecure_skip_tls: false
require_node_ssh_auth: true
node_ssh_auth_wait_seconds: 240
node_ssh_auth_poll_seconds: 5
node_ssh_auth_required_nodes: []
require_flux_health: true
flux_health_wait_seconds: 900
flux_health_poll_seconds: 5
flux_health_required_kustomizations: []
ignore_flux_kustomizations: []
require_workload_convergence: true
workload_convergence_wait_seconds: 900
workload_convergence_poll_seconds: 5
workload_convergence_required_namespaces: []
ignore_workload_namespaces: []
ignore_workloads: []
ignore_unavailable_nodes: []
auto_recycle_stuck_pods: true
stuck_pod_grace_seconds: 180
vault_unseal_key_file: /var/lib/ananke/vault-unseal.key
vault_unseal_breakglass_command: ""
vault_unseal_breakglass_timeout_seconds: 15
shutdown:
default_budget_seconds: 1380
history_min_samples: 3
emergency_budget_seconds: 420
emergency_history_min_samples: 3
emergency_skip_etcd_snapshot: true
emergency_skip_drain: true
skip_etcd_snapshot: false
skip_drain: false
drain_parallelism: 6
scale_parallelism: 8
ssh_parallelism: 8
ups:
enabled: true
provider: nut
target: pyrphoros@localhost
targets:
- name: Pyrphoros
target: pyrphoros@localhost
poll_seconds: 5
runtime_safety_factor: 1.25
on_battery_grace_seconds: 90
debounce_count: 3
telemetry_timeout_seconds: 90
coordination:
forward_shutdown_host: ""
forward_shutdown_user: atlas
forward_shutdown_config: /etc/ananke/ananke.yaml
peer_hosts: []
fallback_local_shutdown: true
command_timeout_seconds: 25
startup_guard_max_age_seconds: 900
role: coordinator
allow_startup_on_battery: false
metrics:
enabled: true
bind_addr: 0.0.0.0:9560
path: /metrics
state:
dir: /var/lib/ananke
reports_dir: /var/lib/ananke/reports
run_history_path: /var/lib/ananke/runs.json
lock_path: /var/lib/ananke/ananke.lock
intent_path: /var/lib/ananke/intent.json