fix(cluster-infrastructure): resolve ClusterInfrastructure-002..006 — options validation, DI registration, down-if-alone
This commit is contained in:
@@ -0,0 +1,72 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace ScadaLink.ClusterInfrastructure;
|
||||
|
||||
/// <summary>
|
||||
/// CI-004: Validates <see cref="ClusterOptions"/> at startup. The values it
|
||||
/// guards carry cluster-wide consequences — the design doc
|
||||
/// (<c>Component-ClusterInfrastructure.md</c>) is emphatic that misconfiguring
|
||||
/// them produces a total cluster shutdown or an indefinitely blocked singleton.
|
||||
/// Registered with <c>ValidateOnStart()</c> so a bad <c>appsettings.json</c>
|
||||
/// fails fast at boot rather than failing far from the cause.
|
||||
/// </summary>
|
||||
public sealed class ClusterOptionsValidator : IValidateOptions<ClusterOptions>
|
||||
{
|
||||
/// <summary>Split-brain resolver strategies safe for ScadaLink's two-node clusters.</summary>
|
||||
private static readonly HashSet<string> AllowedStrategies = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"keep-oldest"
|
||||
};
|
||||
|
||||
public ValidateOptionsResult Validate(string? name, ClusterOptions options)
|
||||
{
|
||||
var failures = new List<string>();
|
||||
|
||||
if (options.SeedNodes is null || options.SeedNodes.Count == 0)
|
||||
{
|
||||
failures.Add("ClusterOptions.SeedNodes must contain at least one seed node.");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(options.SplitBrainResolverStrategy)
|
||||
|| !AllowedStrategies.Contains(options.SplitBrainResolverStrategy))
|
||||
{
|
||||
failures.Add(
|
||||
$"ClusterOptions.SplitBrainResolverStrategy must be 'keep-oldest' for a two-node cluster; " +
|
||||
$"'{options.SplitBrainResolverStrategy}' would risk a total cluster shutdown on a partition.");
|
||||
}
|
||||
|
||||
if (options.MinNrOfMembers != 1)
|
||||
{
|
||||
failures.Add(
|
||||
$"ClusterOptions.MinNrOfMembers must be 1 (was {options.MinNrOfMembers}); " +
|
||||
"any other value blocks the cluster singleton after failover and halts all data collection.");
|
||||
}
|
||||
|
||||
if (options.StableAfter <= TimeSpan.Zero)
|
||||
{
|
||||
failures.Add("ClusterOptions.StableAfter must be a positive duration.");
|
||||
}
|
||||
|
||||
if (options.HeartbeatInterval <= TimeSpan.Zero)
|
||||
{
|
||||
failures.Add("ClusterOptions.HeartbeatInterval must be a positive duration.");
|
||||
}
|
||||
|
||||
if (options.FailureDetectionThreshold <= TimeSpan.Zero)
|
||||
{
|
||||
failures.Add("ClusterOptions.FailureDetectionThreshold must be a positive duration.");
|
||||
}
|
||||
|
||||
if (options.HeartbeatInterval >= options.FailureDetectionThreshold)
|
||||
{
|
||||
failures.Add(
|
||||
$"ClusterOptions.HeartbeatInterval ({options.HeartbeatInterval}) must be well below " +
|
||||
$"FailureDetectionThreshold ({options.FailureDetectionThreshold}); otherwise nodes are " +
|
||||
"declared unreachable before a heartbeat can arrive.");
|
||||
}
|
||||
|
||||
return failures.Count > 0
|
||||
? ValidateOptionsResult.Fail(failures)
|
||||
: ValidateOptionsResult.Success;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user