ananke/internal/config/validate.go

393 lines
16 KiB
Go

package config
import (
"fmt"
neturl "net/url"
"strings"
)
// Validate runs one orchestration or CLI step.
// Signature: (c Config) Validate() error.
// Why: keeps behavior explicit so startup/shutdown workflows remain maintainable as services evolve.
func (c Config) Validate() error {
if len(c.ControlPlanes) == 0 {
return fmt.Errorf("config.control_planes must not be empty")
}
if c.ExpectedFluxBranch == "" {
return fmt.Errorf("config.expected_flux_branch must not be empty")
}
if c.ExpectedFluxSource == "" {
return fmt.Errorf("config.expected_flux_source_url must not be empty")
}
if c.IACRepoPath == "" {
return fmt.Errorf("config.iac_repo_path must not be empty")
}
if c.Shutdown.DefaultBudgetSeconds <= 0 {
return fmt.Errorf("config.shutdown.default_budget_seconds must be > 0")
}
if c.Shutdown.HistoryMinSamples <= 0 {
return fmt.Errorf("config.shutdown.history_min_samples must be > 0")
}
if c.Shutdown.EmergencyBudgetSec <= 0 {
return fmt.Errorf("config.shutdown.emergency_budget_seconds must be > 0")
}
if c.Shutdown.EmergencyMinSamples <= 0 {
return fmt.Errorf("config.shutdown.emergency_history_min_samples must be > 0")
}
if c.Shutdown.DrainParallelism <= 0 {
return fmt.Errorf("config.shutdown.drain_parallelism must be > 0")
}
if c.Shutdown.ScaleParallelism <= 0 {
return fmt.Errorf("config.shutdown.scale_parallelism must be > 0")
}
if c.Shutdown.SSHParallelism <= 0 {
return fmt.Errorf("config.shutdown.ssh_parallelism must be > 0")
}
if c.Startup.APIWaitSeconds <= 0 {
return fmt.Errorf("config.startup.api_wait_seconds must be > 0")
}
if c.Startup.APIPollSeconds <= 0 {
return fmt.Errorf("config.startup.api_poll_seconds must be > 0")
}
if c.Startup.ShutdownCooldownSeconds <= 0 {
return fmt.Errorf("config.startup.shutdown_cooldown_seconds must be > 0")
}
if c.Startup.MinimumBatteryPercent < 0 || c.Startup.MinimumBatteryPercent > 100 {
return fmt.Errorf("config.startup.minimum_battery_percent must be between 0 and 100")
}
if c.Startup.NodeInventoryReachWaitSeconds <= 0 {
return fmt.Errorf("config.startup.node_inventory_reachability_wait_seconds must be > 0")
}
if c.Startup.NodeInventoryReachPollSeconds <= 0 {
return fmt.Errorf("config.startup.node_inventory_reachability_poll_seconds must be > 0")
}
for _, node := range c.Startup.NodeInventoryReachRequiredNodes {
if strings.TrimSpace(node) == "" {
return fmt.Errorf("config.startup.node_inventory_reachability_required_nodes entries must not be empty")
}
}
for node, labels := range c.Startup.RequiredNodeLabels {
if strings.TrimSpace(node) == "" {
return fmt.Errorf("config.startup.required_node_labels keys must not be empty")
}
if len(labels) == 0 {
return fmt.Errorf("config.startup.required_node_labels[%q] must include at least one label", node)
}
for key, value := range labels {
if strings.TrimSpace(key) == "" {
return fmt.Errorf("config.startup.required_node_labels[%q] contains empty label key", node)
}
if strings.TrimSpace(value) == "" {
return fmt.Errorf("config.startup.required_node_labels[%q][%q] must not be empty", node, key)
}
}
}
if c.Startup.TimeSyncWaitSeconds <= 0 {
return fmt.Errorf("config.startup.time_sync_wait_seconds must be > 0")
}
if c.Startup.TimeSyncPollSeconds <= 0 {
return fmt.Errorf("config.startup.time_sync_poll_seconds must be > 0")
}
if c.Startup.TimeSyncMode != "strict" && c.Startup.TimeSyncMode != "quorum" {
return fmt.Errorf("config.startup.time_sync_mode must be strict or quorum")
}
if c.Startup.TimeSyncMode == "quorum" && c.Startup.TimeSyncQuorum <= 0 {
return fmt.Errorf("config.startup.time_sync_quorum must be > 0 when time_sync_mode=quorum")
}
if c.Startup.EtcdRestoreControlPlane != "" {
found := false
for _, cp := range c.ControlPlanes {
if cp == c.Startup.EtcdRestoreControlPlane {
found = true
break
}
}
if !found {
return fmt.Errorf("config.startup.etcd_restore_control_plane must be one of config.control_planes when set")
}
}
if c.Startup.StorageReadyWaitSeconds <= 0 {
return fmt.Errorf("config.startup.storage_ready_wait_seconds must be > 0")
}
if c.Startup.StorageReadyPollSeconds <= 0 {
return fmt.Errorf("config.startup.storage_ready_poll_seconds must be > 0")
}
if c.Startup.StorageMinReadyNodes <= 0 {
return fmt.Errorf("config.startup.storage_min_ready_nodes must be > 0")
}
for _, pvc := range c.Startup.StorageCriticalPVCs {
if strings.Count(strings.TrimSpace(pvc), "/") != 1 {
return fmt.Errorf("config.startup.storage_critical_pvcs entries must be namespace/name, got %q", pvc)
}
}
if c.Startup.PostStartProbeWaitSeconds <= 0 {
return fmt.Errorf("config.startup.post_start_probe_wait_seconds must be > 0")
}
if c.Startup.PostStartProbePollSeconds <= 0 {
return fmt.Errorf("config.startup.post_start_probe_poll_seconds must be > 0")
}
if c.Startup.RequirePostStartProbes && len(c.Startup.PostStartProbes) == 0 {
return fmt.Errorf("config.startup.post_start_probes must not be empty when require_post_start_probes is true")
}
if c.Startup.ServiceChecklistWaitSeconds <= 0 {
return fmt.Errorf("config.startup.service_checklist_wait_seconds must be > 0")
}
if c.Startup.ServiceChecklistPollSeconds <= 0 {
return fmt.Errorf("config.startup.service_checklist_poll_seconds must be > 0")
}
if c.Startup.ServiceChecklistStabilitySec < 0 {
return fmt.Errorf("config.startup.service_checklist_stability_seconds must be >= 0")
}
if c.Startup.RequireServiceChecklist && len(c.Startup.ServiceChecklist) == 0 {
return fmt.Errorf("config.startup.service_checklist must not be empty when require_service_checklist is true")
}
authMode := strings.TrimSpace(c.Startup.ServiceChecklistAuth.Mode)
if authMode != "none" && authMode != "keycloak_robotuser" {
return fmt.Errorf("config.startup.service_checklist_auth.mode must be none or keycloak_robotuser")
}
if authMode == "keycloak_robotuser" {
baseURL := strings.TrimSpace(c.Startup.ServiceChecklistAuth.KeycloakBaseURL)
parsed, err := neturl.Parse(baseURL)
if err != nil || parsed.Scheme == "" || parsed.Host == "" {
return fmt.Errorf("config.startup.service_checklist_auth.keycloak_base_url is invalid: %q", baseURL)
}
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.Realm) == "" {
return fmt.Errorf("config.startup.service_checklist_auth.realm must not be empty")
}
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.RobotUsername) == "" {
return fmt.Errorf("config.startup.service_checklist_auth.robot_username must not be empty")
}
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretNamespace) == "" {
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_namespace must not be empty")
}
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretName) == "" {
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_name must not be empty")
}
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretUsernameKey) == "" {
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_username_key must not be empty")
}
if strings.TrimSpace(c.Startup.ServiceChecklistAuth.AdminSecretPasswordKey) == "" {
return fmt.Errorf("config.startup.service_checklist_auth.admin_secret_password_key must not be empty")
}
}
for i, check := range c.Startup.ServiceChecklist {
if strings.TrimSpace(check.Name) == "" {
return fmt.Errorf("config.startup.service_checklist[%d].name must not be empty", i)
}
rawURL := strings.TrimSpace(check.URL)
if rawURL == "" {
return fmt.Errorf("config.startup.service_checklist[%d].url must not be empty", i)
}
parsed, err := neturl.Parse(rawURL)
if err != nil || parsed.Scheme == "" || parsed.Host == "" {
return fmt.Errorf("config.startup.service_checklist[%d].url is invalid: %q", i, rawURL)
}
if check.TimeoutSeconds <= 0 {
return fmt.Errorf("config.startup.service_checklist[%d].timeout_seconds must be > 0", i)
}
if check.RequireRobotAuth && authMode == "none" {
return fmt.Errorf("config.startup.service_checklist[%d] requires robot auth but service_checklist_auth.mode is none", i)
}
if (strings.TrimSpace(check.FinalURLContains) != "" || strings.TrimSpace(check.FinalURLNotContains) != "") &&
!(check.FollowRedirects || check.RequireRobotAuth) {
return fmt.Errorf("config.startup.service_checklist[%d] uses final_url_* markers without redirects enabled", i)
}
for _, code := range check.AcceptedStatuses {
if code < 100 || code > 599 {
return fmt.Errorf("config.startup.service_checklist[%d].accepted_statuses contains invalid HTTP code %d", i, code)
}
}
}
if c.Startup.CriticalServiceEndpointWaitSec <= 0 {
return fmt.Errorf("config.startup.critical_service_endpoint_wait_seconds must be > 0")
}
if c.Startup.CriticalServiceEndpointPollSec <= 0 {
return fmt.Errorf("config.startup.critical_service_endpoint_poll_seconds must be > 0")
}
if c.Startup.RequireCriticalServiceEndpoints && len(c.Startup.CriticalServiceEndpoints) == 0 {
return fmt.Errorf("config.startup.critical_service_endpoints must not be empty when require_critical_service_endpoints is true")
}
for _, entry := range c.Startup.CriticalServiceEndpoints {
entry = strings.TrimSpace(entry)
if entry == "" {
return fmt.Errorf("config.startup.critical_service_endpoints entries must not be empty")
}
if strings.Count(entry, "/") != 1 {
return fmt.Errorf("config.startup.critical_service_endpoints entries must be namespace/service, got %q", entry)
}
}
if c.Startup.IngressChecklistWaitSeconds <= 0 {
return fmt.Errorf("config.startup.ingress_checklist_wait_seconds must be > 0")
}
if c.Startup.IngressChecklistPollSeconds <= 0 {
return fmt.Errorf("config.startup.ingress_checklist_poll_seconds must be > 0")
}
for _, code := range c.Startup.IngressChecklistAccepted {
if code < 100 || code > 599 {
return fmt.Errorf("config.startup.ingress_checklist_accepted_statuses contains invalid HTTP code %d", code)
}
}
for _, host := range c.Startup.IngressChecklistIgnoreHosts {
if strings.TrimSpace(host) == "" {
return fmt.Errorf("config.startup.ingress_checklist_ignore_hosts entries must not be empty")
}
}
if c.Startup.NodeSSHAuthWaitSeconds <= 0 {
return fmt.Errorf("config.startup.node_ssh_auth_wait_seconds must be > 0")
}
if c.Startup.NodeSSHAuthPollSeconds <= 0 {
return fmt.Errorf("config.startup.node_ssh_auth_poll_seconds must be > 0")
}
for _, node := range c.Startup.NodeSSHAuthRequiredNodes {
if strings.TrimSpace(node) == "" {
return fmt.Errorf("config.startup.node_ssh_auth_required_nodes entries must not be empty")
}
}
if c.Startup.FluxHealthWaitSeconds <= 0 {
return fmt.Errorf("config.startup.flux_health_wait_seconds must be > 0")
}
if c.Startup.FluxHealthPollSeconds <= 0 {
return fmt.Errorf("config.startup.flux_health_poll_seconds must be > 0")
}
for _, item := range c.Startup.FluxHealthRequiredKustomizations {
item = strings.TrimSpace(item)
if item == "" {
return fmt.Errorf("config.startup.flux_health_required_kustomizations entries must not be empty")
}
if strings.Count(item, "/") != 1 {
return fmt.Errorf("config.startup.flux_health_required_kustomizations entries must be namespace/name, got %q", item)
}
}
if c.Startup.WorkloadConvergenceWaitSeconds <= 0 {
return fmt.Errorf("config.startup.workload_convergence_wait_seconds must be > 0")
}
if c.Startup.WorkloadConvergencePollSeconds <= 0 {
return fmt.Errorf("config.startup.workload_convergence_poll_seconds must be > 0")
}
for _, ns := range c.Startup.WorkloadConvergenceRequiredNamespaces {
if strings.TrimSpace(ns) == "" {
return fmt.Errorf("config.startup.workload_convergence_required_namespaces entries must not be empty")
}
}
if c.Startup.StuckPodGraceSeconds <= 0 {
return fmt.Errorf("config.startup.stuck_pod_grace_seconds must be > 0")
}
for _, probe := range c.Startup.PostStartProbes {
if strings.TrimSpace(probe) == "" {
return fmt.Errorf("config.startup.post_start_probes entries must not be empty")
}
}
for _, item := range c.Startup.IgnoreFluxKustomizations {
item = strings.TrimSpace(item)
if item == "" {
return fmt.Errorf("config.startup.ignore_flux_kustomizations entries must not be empty")
}
if strings.Count(item, "/") != 1 {
return fmt.Errorf("config.startup.ignore_flux_kustomizations entries must be namespace/name, got %q", item)
}
}
for _, item := range c.Startup.IgnoreWorkloads {
item = strings.TrimSpace(item)
if item == "" {
return fmt.Errorf("config.startup.ignore_workloads entries must not be empty")
}
parts := strings.Split(item, "/")
if len(parts) != 2 && len(parts) != 3 {
return fmt.Errorf("config.startup.ignore_workloads entries must be namespace/name or namespace/kind/name, got %q", item)
}
}
for _, ns := range c.Startup.IgnoreWorkloadNamespaces {
if strings.TrimSpace(ns) == "" {
return fmt.Errorf("config.startup.ignore_workload_namespaces entries must not be empty")
}
}
for _, item := range c.Startup.FluxHealthRequiredKustomizations {
if containsTrimmed(c.Startup.IgnoreFluxKustomizations, item) {
return fmt.Errorf("config.startup.flux_health_required_kustomizations must not overlap ignore_flux_kustomizations (%q)", strings.TrimSpace(item))
}
}
for _, ns := range c.Startup.WorkloadConvergenceRequiredNamespaces {
if containsTrimmed(c.Startup.IgnoreWorkloadNamespaces, ns) {
return fmt.Errorf("config.startup.workload_convergence_required_namespaces must not overlap ignore_workload_namespaces (%q)", strings.TrimSpace(ns))
}
}
for _, node := range c.Startup.IgnoreUnavailableNodes {
if strings.TrimSpace(node) == "" {
return fmt.Errorf("config.startup.ignore_unavailable_nodes entries must not be empty")
}
}
if strings.TrimSpace(c.Startup.VaultUnsealKeyFile) == "" {
return fmt.Errorf("config.startup.vault_unseal_key_file must not be empty")
}
if c.SSHPort <= 0 || c.SSHPort > 65535 {
return fmt.Errorf("config.ssh_port must be in range 1-65535")
}
if c.UPS.Enabled {
if c.UPS.Provider == "" {
return fmt.Errorf("config.ups.provider must not be empty when ups is enabled")
}
if c.UPS.OnBatteryGraceSeconds < 0 {
return fmt.Errorf("config.ups.on_battery_grace_seconds must be >= 0")
}
if c.UPS.Target == "" && len(c.UPS.Targets) == 0 {
return fmt.Errorf("config.ups.target or config.ups.targets must be set when ups is enabled")
}
for _, t := range c.UPS.Targets {
if t.Target == "" {
return fmt.Errorf("config.ups.targets[].target must not be empty")
}
}
}
if c.Coordination.ForwardShutdownHost != "" {
if c.Coordination.ForwardShutdownConfig == "" {
return fmt.Errorf("config.coordination.forward_shutdown_config must not be empty when forward_shutdown_host is set")
}
}
if c.Startup.AutoQuarantineSchedulingStorms {
if c.Startup.SchedulingStormEventThreshold <= 0 {
return fmt.Errorf("config.startup.scheduling_storm_event_threshold must be > 0 when auto_quarantine_scheduling_storms is enabled")
}
if c.Startup.SchedulingStormWindowSeconds <= 0 {
return fmt.Errorf("config.startup.scheduling_storm_window_seconds must be > 0 when auto_quarantine_scheduling_storms is enabled")
}
}
for _, peer := range c.Coordination.PeerHosts {
if strings.TrimSpace(peer) == "" {
return fmt.Errorf("config.coordination.peer_hosts entries must not be empty")
}
}
if c.Coordination.StartupGuardMaxAgeSec <= 0 {
return fmt.Errorf("config.coordination.startup_guard_max_age_seconds must be > 0")
}
if c.Coordination.Role != "coordinator" && c.Coordination.Role != "peer" {
return fmt.Errorf("config.coordination.role must be coordinator or peer")
}
if c.State.ReportsDir == "" {
return fmt.Errorf("config.state.reports_dir must not be empty")
}
if c.State.RunHistoryPath == "" || c.State.LockPath == "" {
return fmt.Errorf("config.state.run_history_path and config.state.lock_path must not be empty")
}
if c.State.IntentPath == "" {
return fmt.Errorf("config.state.intent_path must not be empty")
}
return nil
}
// containsTrimmed runs one orchestration or CLI step.
// Signature: containsTrimmed(entries []string, needle string) bool.
// Why: startup config now supports both required and ignored recovery scopes, so
// validation needs a single normalized overlap check for those lists.
func containsTrimmed(entries []string, needle string) bool {
needle = strings.TrimSpace(needle)
if needle == "" {
return false
}
for _, entry := range entries {
if strings.TrimSpace(entry) == needle {
return true
}
}
return false
}