feat(dcl): add active endpoint to health reports and log failover events

Add ActiveEndpoint field to DataConnectionHealthReport showing which
endpoint is active (Primary, Backup, or Primary with no backup configured).
Log failover transitions and connection restoration events to the site
event log via ISiteEventLogger, passed as an optional parameter through
the actor hierarchy for backwards compatibility.
This commit is contained in:
Joseph Doherty
2026-03-22 08:34:05 -04:00
parent da290fa4f8
commit 801c0c1df2
5 changed files with 37 additions and 5 deletions

View File

@@ -10,4 +10,5 @@ public record DataConnectionHealthReport(
ConnectionHealth Status,
int TotalSubscribedTags,
int ResolvedTags,
string ActiveEndpoint,
DateTimeOffset Timestamp);

View File

@@ -4,6 +4,7 @@ using ScadaLink.Commons.Interfaces.Protocol;
using ScadaLink.Commons.Messages.DataConnection;
using ScadaLink.Commons.Types.Enums;
using ScadaLink.HealthMonitoring;
using ScadaLink.SiteEventLogging;
namespace ScadaLink.DataConnectionLayer.Actors;
@@ -34,6 +35,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
private readonly ISiteHealthCollector _healthCollector;
private readonly IDataConnectionFactory _factory;
private readonly string _protocolType;
private readonly ISiteEventLogger? _siteEventLogger;
public IStash Stash { get; set; } = null!;
public ITimerScheduler Timers { get; set; } = null!;
@@ -86,7 +88,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
string protocolType,
IDictionary<string, string>? primaryConfig = null,
IDictionary<string, string>? backupConfig = null,
int failoverRetryCount = 3)
int failoverRetryCount = 3,
ISiteEventLogger? siteEventLogger = null)
{
_connectionName = connectionName;
_adapter = adapter;
@@ -97,6 +100,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_primaryConfig = primaryConfig ?? new Dictionary<string, string>();
_backupConfig = backupConfig;
_failoverRetryCount = failoverRetryCount;
_siteEventLogger = siteEventLogger;
_connectionDetails = _primaryConfig;
}
@@ -301,6 +305,14 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_log.Info("[{0}] Reconnected successfully on {1} endpoint", _connectionName, _activeEndpoint);
_consecutiveFailures = 0;
// Log restoration event to site event log
if (_siteEventLogger != null)
{
_ = _siteEventLogger.LogEventAsync(
"connection", "Info", null, _connectionName,
$"Connection restored on {_activeEndpoint} endpoint", null);
}
// WP-10: Transparent re-subscribe — re-establish all active subscriptions
ReSubscribeAll();
@@ -336,6 +348,15 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_log.Warning("[{0}] Failing over from {1} to {2}",
_connectionName, previousEndpoint, _activeEndpoint);
// Log failover event to site event log
if (_siteEventLogger != null)
{
_ = _siteEventLogger.LogEventAsync(
"connection", "Warning", null, _connectionName,
$"Failover from {previousEndpoint} to {_activeEndpoint}",
$"After {_failoverRetryCount} consecutive failures");
}
}
else
{
@@ -570,8 +591,11 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
private void ReplyWithHealthReport()
{
var status = _adapter.Status;
var endpointLabel = _backupConfig == null
? "Primary (no backup)"
: _activeEndpoint.ToString();
Sender.Tell(new DataConnectionHealthReport(
_connectionName, status, _totalSubscribed, _resolvedTags, DateTimeOffset.UtcNow));
_connectionName, status, _totalSubscribed, _resolvedTags, endpointLabel, DateTimeOffset.UtcNow));
}
// ── Internal message handlers for piped async results ──

View File

@@ -3,6 +3,7 @@ using Akka.Event;
using ScadaLink.Commons.Interfaces.Protocol;
using ScadaLink.Commons.Messages.DataConnection;
using ScadaLink.HealthMonitoring;
using ScadaLink.SiteEventLogging;
namespace ScadaLink.DataConnectionLayer.Actors;
@@ -17,16 +18,19 @@ public class DataConnectionManagerActor : ReceiveActor
private readonly IDataConnectionFactory _factory;
private readonly DataConnectionOptions _options;
private readonly ISiteHealthCollector _healthCollector;
private readonly ISiteEventLogger? _siteEventLogger;
private readonly Dictionary<string, IActorRef> _connectionActors = new();
public DataConnectionManagerActor(
IDataConnectionFactory factory,
DataConnectionOptions options,
ISiteHealthCollector healthCollector)
ISiteHealthCollector healthCollector,
ISiteEventLogger? siteEventLogger = null)
{
_factory = factory;
_options = options;
_healthCollector = healthCollector;
_siteEventLogger = siteEventLogger;
Receive<CreateConnectionCommand>(HandleCreateConnection);
Receive<SubscribeTagsRequest>(HandleRoute);
@@ -52,7 +56,8 @@ public class DataConnectionManagerActor : ReceiveActor
_factory, command.ProtocolType,
command.PrimaryConnectionDetails,
command.BackupConnectionDetails,
command.FailoverRetryCount));
command.FailoverRetryCount,
_siteEventLogger));
// Sanitize name for Akka actor path (replace spaces and invalid chars)
var actorName = new string(command.ConnectionName

View File

@@ -21,6 +21,7 @@
<ItemGroup>
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
<ProjectReference Include="../ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
<ProjectReference Include="../ScadaLink.SiteEventLogging/ScadaLink.SiteEventLogging.csproj" />
<ProjectReference Include="../../lmxproxy/src/ZB.MOM.WW.LmxProxy.Client/ZB.MOM.WW.LmxProxy.Client.csproj" />
</ItemGroup>

View File

@@ -231,9 +231,10 @@ akka {{
if (dclFactory != null)
{
var healthCollector = _serviceProvider.GetRequiredService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
var siteEventLogger = _serviceProvider.GetService<ScadaLink.SiteEventLogging.ISiteEventLogger>();
dclManager = _actorSystem!.ActorOf(
Props.Create(() => new ScadaLink.DataConnectionLayer.Actors.DataConnectionManagerActor(
dclFactory, dclOptions, healthCollector)),
dclFactory, dclOptions, healthCollector, siteEventLogger)),
"dcl-manager");
_logger.LogInformation("Data Connection Layer manager actor created");
}