using Microsoft.Extensions.Options; namespace ZB.MOM.WW.ScadaBridge.ClusterInfrastructure; /// /// CI-004: Validates at startup. The values it /// guards carry cluster-wide consequences — the design doc /// (Component-ClusterInfrastructure.md) is emphatic that misconfiguring /// them produces a total cluster shutdown or an indefinitely blocked singleton. /// Registered with ValidateOnStart() so a bad appsettings.json /// fails fast at boot rather than failing far from the cause. /// public sealed class ClusterOptionsValidator : IValidateOptions { /// Split-brain resolver strategies safe for ScadaBridge's two-node clusters. private static readonly HashSet AllowedStrategies = new(StringComparer.OrdinalIgnoreCase) { "keep-oldest" }; /// /// Validates the cluster options, returning a failure result if any critical settings are misconfigured. /// /// Named options instance name (unused; all instances are validated identically). /// The cluster options to validate. public ValidateOptionsResult Validate(string? name, ClusterOptions options) { var failures = new List(); if (options.SeedNodes is null || options.SeedNodes.Count < 2) { // CI-012: design doc states "both nodes are seed nodes — each node lists // both itself and its partner" so a properly-configured deployment lists // two. Accepting a single-seed configuration silently defeats the // "no startup ordering dependency" guarantee called out by // Component-ClusterInfrastructure.md (Node Configuration). failures.Add( "ClusterOptions.SeedNodes must contain at least 2 seed nodes " + "(Component-ClusterInfrastructure.md → Node Configuration: " + "both nodes are seed nodes); a single-seed configuration defeats " + "the no-startup-ordering-dependency guarantee."); } if (string.IsNullOrWhiteSpace(options.SplitBrainResolverStrategy) || !AllowedStrategies.Contains(options.SplitBrainResolverStrategy)) { failures.Add( $"ClusterOptions.SplitBrainResolverStrategy must be 'keep-oldest' for a two-node cluster; " + $"'{options.SplitBrainResolverStrategy}' would risk a total cluster shutdown on a partition."); } if (options.MinNrOfMembers != 1) { failures.Add( $"ClusterOptions.MinNrOfMembers must be 1 (was {options.MinNrOfMembers}); " + "any other value blocks the cluster singleton after failover and halts all data collection."); } if (options.StableAfter <= TimeSpan.Zero) { failures.Add("ClusterOptions.StableAfter must be a positive duration."); } if (options.HeartbeatInterval <= TimeSpan.Zero) { failures.Add("ClusterOptions.HeartbeatInterval must be a positive duration."); } if (options.FailureDetectionThreshold <= TimeSpan.Zero) { failures.Add("ClusterOptions.FailureDetectionThreshold must be a positive duration."); } if (options.HeartbeatInterval >= options.FailureDetectionThreshold) { failures.Add( $"ClusterOptions.HeartbeatInterval ({options.HeartbeatInterval}) must be well below " + $"FailureDetectionThreshold ({options.FailureDetectionThreshold}); otherwise nodes are " + "declared unreachable before a heartbeat can arrive."); } if (!options.DownIfAlone) { failures.Add( "ClusterOptions.DownIfAlone must be true for the keep-oldest resolver " + "(Component-ClusterInfrastructure.md → Split-Brain Resolution); with it false the " + "oldest node can run as an isolated single-node cluster during a partition while the " + "younger node forms its own, producing two live clusters."); } return failures.Count > 0 ? ValidateOptionsResult.Fail(failures) : ValidateOptionsResult.Success; } }