feat(dcl): add active endpoint to health reports and log failover events

Add ActiveEndpoint field to DataConnectionHealthReport showing which
endpoint is active (Primary, Backup, or Primary with no backup configured).
Log failover transitions and connection restoration events to the site
event log via ISiteEventLogger, passed as an optional parameter through
the actor hierarchy for backwards compatibility.
This commit is contained in:
Joseph Doherty
2026-03-22 08:34:05 -04:00
parent da290fa4f8
commit 801c0c1df2
5 changed files with 37 additions and 5 deletions

View File

@@ -10,4 +10,5 @@ public record DataConnectionHealthReport(
ConnectionHealth Status, ConnectionHealth Status,
int TotalSubscribedTags, int TotalSubscribedTags,
int ResolvedTags, int ResolvedTags,
string ActiveEndpoint,
DateTimeOffset Timestamp); DateTimeOffset Timestamp);

View File

@@ -4,6 +4,7 @@ using ScadaLink.Commons.Interfaces.Protocol;
using ScadaLink.Commons.Messages.DataConnection; using ScadaLink.Commons.Messages.DataConnection;
using ScadaLink.Commons.Types.Enums; using ScadaLink.Commons.Types.Enums;
using ScadaLink.HealthMonitoring; using ScadaLink.HealthMonitoring;
using ScadaLink.SiteEventLogging;
namespace ScadaLink.DataConnectionLayer.Actors; namespace ScadaLink.DataConnectionLayer.Actors;
@@ -34,6 +35,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
private readonly ISiteHealthCollector _healthCollector; private readonly ISiteHealthCollector _healthCollector;
private readonly IDataConnectionFactory _factory; private readonly IDataConnectionFactory _factory;
private readonly string _protocolType; private readonly string _protocolType;
private readonly ISiteEventLogger? _siteEventLogger;
public IStash Stash { get; set; } = null!; public IStash Stash { get; set; } = null!;
public ITimerScheduler Timers { get; set; } = null!; public ITimerScheduler Timers { get; set; } = null!;
@@ -86,7 +88,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
string protocolType, string protocolType,
IDictionary<string, string>? primaryConfig = null, IDictionary<string, string>? primaryConfig = null,
IDictionary<string, string>? backupConfig = null, IDictionary<string, string>? backupConfig = null,
int failoverRetryCount = 3) int failoverRetryCount = 3,
ISiteEventLogger? siteEventLogger = null)
{ {
_connectionName = connectionName; _connectionName = connectionName;
_adapter = adapter; _adapter = adapter;
@@ -97,6 +100,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_primaryConfig = primaryConfig ?? new Dictionary<string, string>(); _primaryConfig = primaryConfig ?? new Dictionary<string, string>();
_backupConfig = backupConfig; _backupConfig = backupConfig;
_failoverRetryCount = failoverRetryCount; _failoverRetryCount = failoverRetryCount;
_siteEventLogger = siteEventLogger;
_connectionDetails = _primaryConfig; _connectionDetails = _primaryConfig;
} }
@@ -301,6 +305,14 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_log.Info("[{0}] Reconnected successfully on {1} endpoint", _connectionName, _activeEndpoint); _log.Info("[{0}] Reconnected successfully on {1} endpoint", _connectionName, _activeEndpoint);
_consecutiveFailures = 0; _consecutiveFailures = 0;
// Log restoration event to site event log
if (_siteEventLogger != null)
{
_ = _siteEventLogger.LogEventAsync(
"connection", "Info", null, _connectionName,
$"Connection restored on {_activeEndpoint} endpoint", null);
}
// WP-10: Transparent re-subscribe — re-establish all active subscriptions // WP-10: Transparent re-subscribe — re-establish all active subscriptions
ReSubscribeAll(); ReSubscribeAll();
@@ -336,6 +348,15 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_log.Warning("[{0}] Failing over from {1} to {2}", _log.Warning("[{0}] Failing over from {1} to {2}",
_connectionName, previousEndpoint, _activeEndpoint); _connectionName, previousEndpoint, _activeEndpoint);
// Log failover event to site event log
if (_siteEventLogger != null)
{
_ = _siteEventLogger.LogEventAsync(
"connection", "Warning", null, _connectionName,
$"Failover from {previousEndpoint} to {_activeEndpoint}",
$"After {_failoverRetryCount} consecutive failures");
}
} }
else else
{ {
@@ -570,8 +591,11 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
private void ReplyWithHealthReport() private void ReplyWithHealthReport()
{ {
var status = _adapter.Status; var status = _adapter.Status;
var endpointLabel = _backupConfig == null
? "Primary (no backup)"
: _activeEndpoint.ToString();
Sender.Tell(new DataConnectionHealthReport( Sender.Tell(new DataConnectionHealthReport(
_connectionName, status, _totalSubscribed, _resolvedTags, DateTimeOffset.UtcNow)); _connectionName, status, _totalSubscribed, _resolvedTags, endpointLabel, DateTimeOffset.UtcNow));
} }
// ── Internal message handlers for piped async results ── // ── Internal message handlers for piped async results ──

View File

@@ -3,6 +3,7 @@ using Akka.Event;
using ScadaLink.Commons.Interfaces.Protocol; using ScadaLink.Commons.Interfaces.Protocol;
using ScadaLink.Commons.Messages.DataConnection; using ScadaLink.Commons.Messages.DataConnection;
using ScadaLink.HealthMonitoring; using ScadaLink.HealthMonitoring;
using ScadaLink.SiteEventLogging;
namespace ScadaLink.DataConnectionLayer.Actors; namespace ScadaLink.DataConnectionLayer.Actors;
@@ -17,16 +18,19 @@ public class DataConnectionManagerActor : ReceiveActor
private readonly IDataConnectionFactory _factory; private readonly IDataConnectionFactory _factory;
private readonly DataConnectionOptions _options; private readonly DataConnectionOptions _options;
private readonly ISiteHealthCollector _healthCollector; private readonly ISiteHealthCollector _healthCollector;
private readonly ISiteEventLogger? _siteEventLogger;
private readonly Dictionary<string, IActorRef> _connectionActors = new(); private readonly Dictionary<string, IActorRef> _connectionActors = new();
public DataConnectionManagerActor( public DataConnectionManagerActor(
IDataConnectionFactory factory, IDataConnectionFactory factory,
DataConnectionOptions options, DataConnectionOptions options,
ISiteHealthCollector healthCollector) ISiteHealthCollector healthCollector,
ISiteEventLogger? siteEventLogger = null)
{ {
_factory = factory; _factory = factory;
_options = options; _options = options;
_healthCollector = healthCollector; _healthCollector = healthCollector;
_siteEventLogger = siteEventLogger;
Receive<CreateConnectionCommand>(HandleCreateConnection); Receive<CreateConnectionCommand>(HandleCreateConnection);
Receive<SubscribeTagsRequest>(HandleRoute); Receive<SubscribeTagsRequest>(HandleRoute);
@@ -52,7 +56,8 @@ public class DataConnectionManagerActor : ReceiveActor
_factory, command.ProtocolType, _factory, command.ProtocolType,
command.PrimaryConnectionDetails, command.PrimaryConnectionDetails,
command.BackupConnectionDetails, command.BackupConnectionDetails,
command.FailoverRetryCount)); command.FailoverRetryCount,
_siteEventLogger));
// Sanitize name for Akka actor path (replace spaces and invalid chars) // Sanitize name for Akka actor path (replace spaces and invalid chars)
var actorName = new string(command.ConnectionName var actorName = new string(command.ConnectionName

View File

@@ -21,6 +21,7 @@
<ItemGroup> <ItemGroup>
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" /> <ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
<ProjectReference Include="../ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" /> <ProjectReference Include="../ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
<ProjectReference Include="../ScadaLink.SiteEventLogging/ScadaLink.SiteEventLogging.csproj" />
<ProjectReference Include="../../lmxproxy/src/ZB.MOM.WW.LmxProxy.Client/ZB.MOM.WW.LmxProxy.Client.csproj" /> <ProjectReference Include="../../lmxproxy/src/ZB.MOM.WW.LmxProxy.Client/ZB.MOM.WW.LmxProxy.Client.csproj" />
</ItemGroup> </ItemGroup>

View File

@@ -231,9 +231,10 @@ akka {{
if (dclFactory != null) if (dclFactory != null)
{ {
var healthCollector = _serviceProvider.GetRequiredService<ScadaLink.HealthMonitoring.ISiteHealthCollector>(); var healthCollector = _serviceProvider.GetRequiredService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
var siteEventLogger = _serviceProvider.GetService<ScadaLink.SiteEventLogging.ISiteEventLogger>();
dclManager = _actorSystem!.ActorOf( dclManager = _actorSystem!.ActorOf(
Props.Create(() => new ScadaLink.DataConnectionLayer.Actors.DataConnectionManagerActor( Props.Create(() => new ScadaLink.DataConnectionLayer.Actors.DataConnectionManagerActor(
dclFactory, dclOptions, healthCollector)), dclFactory, dclOptions, healthCollector, siteEventLogger)),
"dcl-manager"); "dcl-manager");
_logger.LogInformation("Data Connection Layer manager actor created"); _logger.LogInformation("Data Connection Layer manager actor created");
} }