feat(dcl): add failover state machine to DataConnectionActor with round-robin endpoint switching
This commit is contained in:
@@ -25,11 +25,15 @@ namespace ScadaLink.DataConnectionLayer.Actors;
|
||||
/// </summary>
|
||||
public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
{
|
||||
public enum ActiveEndpoint { Primary, Backup }
|
||||
|
||||
private readonly ILoggingAdapter _log = Context.GetLogger();
|
||||
private readonly string _connectionName;
|
||||
private readonly IDataConnection _adapter;
|
||||
private IDataConnection _adapter;
|
||||
private readonly DataConnectionOptions _options;
|
||||
private readonly ISiteHealthCollector _healthCollector;
|
||||
private readonly IDataConnectionFactory _factory;
|
||||
private readonly string _protocolType;
|
||||
|
||||
public IStash Stash { get; set; } = null!;
|
||||
public ITimerScheduler Timers { get; set; } = null!;
|
||||
@@ -60,10 +64,12 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
private int _totalSubscribed;
|
||||
private int _resolvedTags;
|
||||
|
||||
private readonly IDictionary<string, string> _connectionDetails;
|
||||
private IDictionary<string, string> _connectionDetails;
|
||||
private readonly IDictionary<string, string> _primaryConfig;
|
||||
private readonly IDictionary<string, string>? _backupConfig;
|
||||
private readonly int _failoverRetryCount;
|
||||
private ActiveEndpoint _activeEndpoint = ActiveEndpoint.Primary;
|
||||
private int _consecutiveFailures;
|
||||
|
||||
/// <summary>
|
||||
/// Captured Self reference for use from non-actor threads (event handlers, callbacks).
|
||||
@@ -76,6 +82,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
IDataConnection adapter,
|
||||
DataConnectionOptions options,
|
||||
ISiteHealthCollector healthCollector,
|
||||
IDataConnectionFactory factory,
|
||||
string protocolType,
|
||||
IDictionary<string, string>? primaryConfig = null,
|
||||
IDictionary<string, string>? backupConfig = null,
|
||||
int failoverRetryCount = 3)
|
||||
@@ -84,6 +92,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
_adapter = adapter;
|
||||
_options = options;
|
||||
_healthCollector = healthCollector;
|
||||
_factory = factory;
|
||||
_protocolType = protocolType;
|
||||
_primaryConfig = primaryConfig ?? new Dictionary<string, string>();
|
||||
_backupConfig = backupConfig;
|
||||
_failoverRetryCount = failoverRetryCount;
|
||||
@@ -288,7 +298,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
{
|
||||
if (result.Success)
|
||||
{
|
||||
_log.Info("[{0}] Reconnected successfully", _connectionName);
|
||||
_log.Info("[{0}] Reconnected successfully on {1} endpoint", _connectionName, _activeEndpoint);
|
||||
_consecutiveFailures = 0;
|
||||
|
||||
// WP-10: Transparent re-subscribe — re-establish all active subscriptions
|
||||
ReSubscribeAll();
|
||||
@@ -297,8 +308,43 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
}
|
||||
else
|
||||
{
|
||||
_log.Warning("[{0}] Reconnect failed: {1}. Retrying in {2}s",
|
||||
_connectionName, result.Error, _options.ReconnectInterval.TotalSeconds);
|
||||
_consecutiveFailures++;
|
||||
|
||||
// Failover: switch endpoint after exhausting retry count (only if backup is configured)
|
||||
if (_backupConfig != null && _consecutiveFailures >= _failoverRetryCount)
|
||||
{
|
||||
var previousEndpoint = _activeEndpoint;
|
||||
_activeEndpoint = _activeEndpoint == ActiveEndpoint.Primary
|
||||
? ActiveEndpoint.Backup
|
||||
: ActiveEndpoint.Primary;
|
||||
_consecutiveFailures = 0;
|
||||
|
||||
var newConfig = _activeEndpoint == ActiveEndpoint.Primary
|
||||
? _primaryConfig
|
||||
: _backupConfig;
|
||||
|
||||
// Dispose old adapter (fire-and-forget — don't await in actor context)
|
||||
_adapter.Disconnected -= OnAdapterDisconnected;
|
||||
_ = _adapter.DisposeAsync().AsTask();
|
||||
|
||||
// Create new adapter for the target endpoint
|
||||
_adapter = _factory.Create(_protocolType, newConfig);
|
||||
_connectionDetails = newConfig;
|
||||
|
||||
// Wire disconnect handler on new adapter
|
||||
_adapter.Disconnected += OnAdapterDisconnected;
|
||||
|
||||
_log.Warning("[{0}] Failing over from {1} to {2}",
|
||||
_connectionName, previousEndpoint, _activeEndpoint);
|
||||
}
|
||||
else
|
||||
{
|
||||
var retryLimit = _backupConfig != null ? _failoverRetryCount.ToString() : "∞";
|
||||
_log.Warning("[{0}] Reconnect failed: {1}. Retrying in {2}s (attempt {3}/{4})",
|
||||
_connectionName, result.Error, _options.ReconnectInterval.TotalSeconds,
|
||||
_consecutiveFailures, retryLimit);
|
||||
}
|
||||
|
||||
Timers.StartSingleTimer("reconnect", new AttemptConnect(), _options.ReconnectInterval);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user