feat(dcl): add active endpoint to health reports and log failover events
Add ActiveEndpoint field to DataConnectionHealthReport showing which endpoint is active (Primary, Backup, or Primary with no backup configured). Log failover transitions and connection restoration events to the site event log via ISiteEventLogger, passed as an optional parameter through the actor hierarchy for backwards compatibility.
This commit is contained in:
@@ -10,4 +10,5 @@ public record DataConnectionHealthReport(
|
||||
ConnectionHealth Status,
|
||||
int TotalSubscribedTags,
|
||||
int ResolvedTags,
|
||||
string ActiveEndpoint,
|
||||
DateTimeOffset Timestamp);
|
||||
|
||||
@@ -4,6 +4,7 @@ using ScadaLink.Commons.Interfaces.Protocol;
|
||||
using ScadaLink.Commons.Messages.DataConnection;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
using ScadaLink.SiteEventLogging;
|
||||
|
||||
namespace ScadaLink.DataConnectionLayer.Actors;
|
||||
|
||||
@@ -34,6 +35,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
private readonly ISiteHealthCollector _healthCollector;
|
||||
private readonly IDataConnectionFactory _factory;
|
||||
private readonly string _protocolType;
|
||||
private readonly ISiteEventLogger? _siteEventLogger;
|
||||
|
||||
public IStash Stash { get; set; } = null!;
|
||||
public ITimerScheduler Timers { get; set; } = null!;
|
||||
@@ -86,7 +88,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
string protocolType,
|
||||
IDictionary<string, string>? primaryConfig = null,
|
||||
IDictionary<string, string>? backupConfig = null,
|
||||
int failoverRetryCount = 3)
|
||||
int failoverRetryCount = 3,
|
||||
ISiteEventLogger? siteEventLogger = null)
|
||||
{
|
||||
_connectionName = connectionName;
|
||||
_adapter = adapter;
|
||||
@@ -97,6 +100,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
_primaryConfig = primaryConfig ?? new Dictionary<string, string>();
|
||||
_backupConfig = backupConfig;
|
||||
_failoverRetryCount = failoverRetryCount;
|
||||
_siteEventLogger = siteEventLogger;
|
||||
_connectionDetails = _primaryConfig;
|
||||
}
|
||||
|
||||
@@ -301,6 +305,14 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
_log.Info("[{0}] Reconnected successfully on {1} endpoint", _connectionName, _activeEndpoint);
|
||||
_consecutiveFailures = 0;
|
||||
|
||||
// Log restoration event to site event log
|
||||
if (_siteEventLogger != null)
|
||||
{
|
||||
_ = _siteEventLogger.LogEventAsync(
|
||||
"connection", "Info", null, _connectionName,
|
||||
$"Connection restored on {_activeEndpoint} endpoint", null);
|
||||
}
|
||||
|
||||
// WP-10: Transparent re-subscribe — re-establish all active subscriptions
|
||||
ReSubscribeAll();
|
||||
|
||||
@@ -336,6 +348,15 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
|
||||
_log.Warning("[{0}] Failing over from {1} to {2}",
|
||||
_connectionName, previousEndpoint, _activeEndpoint);
|
||||
|
||||
// Log failover event to site event log
|
||||
if (_siteEventLogger != null)
|
||||
{
|
||||
_ = _siteEventLogger.LogEventAsync(
|
||||
"connection", "Warning", null, _connectionName,
|
||||
$"Failover from {previousEndpoint} to {_activeEndpoint}",
|
||||
$"After {_failoverRetryCount} consecutive failures");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -570,8 +591,11 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
|
||||
private void ReplyWithHealthReport()
|
||||
{
|
||||
var status = _adapter.Status;
|
||||
var endpointLabel = _backupConfig == null
|
||||
? "Primary (no backup)"
|
||||
: _activeEndpoint.ToString();
|
||||
Sender.Tell(new DataConnectionHealthReport(
|
||||
_connectionName, status, _totalSubscribed, _resolvedTags, DateTimeOffset.UtcNow));
|
||||
_connectionName, status, _totalSubscribed, _resolvedTags, endpointLabel, DateTimeOffset.UtcNow));
|
||||
}
|
||||
|
||||
// ── Internal message handlers for piped async results ──
|
||||
|
||||
@@ -3,6 +3,7 @@ using Akka.Event;
|
||||
using ScadaLink.Commons.Interfaces.Protocol;
|
||||
using ScadaLink.Commons.Messages.DataConnection;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
using ScadaLink.SiteEventLogging;
|
||||
|
||||
namespace ScadaLink.DataConnectionLayer.Actors;
|
||||
|
||||
@@ -17,16 +18,19 @@ public class DataConnectionManagerActor : ReceiveActor
|
||||
private readonly IDataConnectionFactory _factory;
|
||||
private readonly DataConnectionOptions _options;
|
||||
private readonly ISiteHealthCollector _healthCollector;
|
||||
private readonly ISiteEventLogger? _siteEventLogger;
|
||||
private readonly Dictionary<string, IActorRef> _connectionActors = new();
|
||||
|
||||
public DataConnectionManagerActor(
|
||||
IDataConnectionFactory factory,
|
||||
DataConnectionOptions options,
|
||||
ISiteHealthCollector healthCollector)
|
||||
ISiteHealthCollector healthCollector,
|
||||
ISiteEventLogger? siteEventLogger = null)
|
||||
{
|
||||
_factory = factory;
|
||||
_options = options;
|
||||
_healthCollector = healthCollector;
|
||||
_siteEventLogger = siteEventLogger;
|
||||
|
||||
Receive<CreateConnectionCommand>(HandleCreateConnection);
|
||||
Receive<SubscribeTagsRequest>(HandleRoute);
|
||||
@@ -52,7 +56,8 @@ public class DataConnectionManagerActor : ReceiveActor
|
||||
_factory, command.ProtocolType,
|
||||
command.PrimaryConnectionDetails,
|
||||
command.BackupConnectionDetails,
|
||||
command.FailoverRetryCount));
|
||||
command.FailoverRetryCount,
|
||||
_siteEventLogger));
|
||||
|
||||
// Sanitize name for Akka actor path (replace spaces and invalid chars)
|
||||
var actorName = new string(command.ConnectionName
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.SiteEventLogging/ScadaLink.SiteEventLogging.csproj" />
|
||||
<ProjectReference Include="../../lmxproxy/src/ZB.MOM.WW.LmxProxy.Client/ZB.MOM.WW.LmxProxy.Client.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
@@ -231,9 +231,10 @@ akka {{
|
||||
if (dclFactory != null)
|
||||
{
|
||||
var healthCollector = _serviceProvider.GetRequiredService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
|
||||
var siteEventLogger = _serviceProvider.GetService<ScadaLink.SiteEventLogging.ISiteEventLogger>();
|
||||
dclManager = _actorSystem!.ActorOf(
|
||||
Props.Create(() => new ScadaLink.DataConnectionLayer.Actors.DataConnectionManagerActor(
|
||||
dclFactory, dclOptions, healthCollector)),
|
||||
dclFactory, dclOptions, healthCollector, siteEventLogger)),
|
||||
"dcl-manager");
|
||||
_logger.LogInformation("Data Connection Layer manager actor created");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user