From 801c0c1df2738c820e10a25d3456e43d5e0e05e1 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 22 Mar 2026 08:34:05 -0400 Subject: [PATCH] feat(dcl): add active endpoint to health reports and log failover events Add ActiveEndpoint field to DataConnectionHealthReport showing which endpoint is active (Primary, Backup, or Primary with no backup configured). Log failover transitions and connection restoration events to the site event log via ISiteEventLogger, passed as an optional parameter through the actor hierarchy for backwards compatibility. --- .../DataConnectionHealthReport.cs | 1 + .../Actors/DataConnectionActor.cs | 28 +++++++++++++++++-- .../Actors/DataConnectionManagerActor.cs | 9 ++++-- .../ScadaLink.DataConnectionLayer.csproj | 1 + .../Actors/AkkaHostedService.cs | 3 +- 5 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/ScadaLink.Commons/Messages/DataConnection/DataConnectionHealthReport.cs b/src/ScadaLink.Commons/Messages/DataConnection/DataConnectionHealthReport.cs index 4c4fc7d..414e0eb 100644 --- a/src/ScadaLink.Commons/Messages/DataConnection/DataConnectionHealthReport.cs +++ b/src/ScadaLink.Commons/Messages/DataConnection/DataConnectionHealthReport.cs @@ -10,4 +10,5 @@ public record DataConnectionHealthReport( ConnectionHealth Status, int TotalSubscribedTags, int ResolvedTags, + string ActiveEndpoint, DateTimeOffset Timestamp); diff --git a/src/ScadaLink.DataConnectionLayer/Actors/DataConnectionActor.cs b/src/ScadaLink.DataConnectionLayer/Actors/DataConnectionActor.cs index e15975c..9f1df75 100644 --- a/src/ScadaLink.DataConnectionLayer/Actors/DataConnectionActor.cs +++ b/src/ScadaLink.DataConnectionLayer/Actors/DataConnectionActor.cs @@ -4,6 +4,7 @@ using ScadaLink.Commons.Interfaces.Protocol; using ScadaLink.Commons.Messages.DataConnection; using ScadaLink.Commons.Types.Enums; using ScadaLink.HealthMonitoring; +using ScadaLink.SiteEventLogging; namespace ScadaLink.DataConnectionLayer.Actors; @@ -34,6 +35,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers private readonly ISiteHealthCollector _healthCollector; private readonly IDataConnectionFactory _factory; private readonly string _protocolType; + private readonly ISiteEventLogger? _siteEventLogger; public IStash Stash { get; set; } = null!; public ITimerScheduler Timers { get; set; } = null!; @@ -86,7 +88,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers string protocolType, IDictionary? primaryConfig = null, IDictionary? backupConfig = null, - int failoverRetryCount = 3) + int failoverRetryCount = 3, + ISiteEventLogger? siteEventLogger = null) { _connectionName = connectionName; _adapter = adapter; @@ -97,6 +100,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers _primaryConfig = primaryConfig ?? new Dictionary(); _backupConfig = backupConfig; _failoverRetryCount = failoverRetryCount; + _siteEventLogger = siteEventLogger; _connectionDetails = _primaryConfig; } @@ -301,6 +305,14 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers _log.Info("[{0}] Reconnected successfully on {1} endpoint", _connectionName, _activeEndpoint); _consecutiveFailures = 0; + // Log restoration event to site event log + if (_siteEventLogger != null) + { + _ = _siteEventLogger.LogEventAsync( + "connection", "Info", null, _connectionName, + $"Connection restored on {_activeEndpoint} endpoint", null); + } + // WP-10: Transparent re-subscribe — re-establish all active subscriptions ReSubscribeAll(); @@ -336,6 +348,15 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers _log.Warning("[{0}] Failing over from {1} to {2}", _connectionName, previousEndpoint, _activeEndpoint); + + // Log failover event to site event log + if (_siteEventLogger != null) + { + _ = _siteEventLogger.LogEventAsync( + "connection", "Warning", null, _connectionName, + $"Failover from {previousEndpoint} to {_activeEndpoint}", + $"After {_failoverRetryCount} consecutive failures"); + } } else { @@ -570,8 +591,11 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers private void ReplyWithHealthReport() { var status = _adapter.Status; + var endpointLabel = _backupConfig == null + ? "Primary (no backup)" + : _activeEndpoint.ToString(); Sender.Tell(new DataConnectionHealthReport( - _connectionName, status, _totalSubscribed, _resolvedTags, DateTimeOffset.UtcNow)); + _connectionName, status, _totalSubscribed, _resolvedTags, endpointLabel, DateTimeOffset.UtcNow)); } // ── Internal message handlers for piped async results ── diff --git a/src/ScadaLink.DataConnectionLayer/Actors/DataConnectionManagerActor.cs b/src/ScadaLink.DataConnectionLayer/Actors/DataConnectionManagerActor.cs index f5b8349..505f028 100644 --- a/src/ScadaLink.DataConnectionLayer/Actors/DataConnectionManagerActor.cs +++ b/src/ScadaLink.DataConnectionLayer/Actors/DataConnectionManagerActor.cs @@ -3,6 +3,7 @@ using Akka.Event; using ScadaLink.Commons.Interfaces.Protocol; using ScadaLink.Commons.Messages.DataConnection; using ScadaLink.HealthMonitoring; +using ScadaLink.SiteEventLogging; namespace ScadaLink.DataConnectionLayer.Actors; @@ -17,16 +18,19 @@ public class DataConnectionManagerActor : ReceiveActor private readonly IDataConnectionFactory _factory; private readonly DataConnectionOptions _options; private readonly ISiteHealthCollector _healthCollector; + private readonly ISiteEventLogger? _siteEventLogger; private readonly Dictionary _connectionActors = new(); public DataConnectionManagerActor( IDataConnectionFactory factory, DataConnectionOptions options, - ISiteHealthCollector healthCollector) + ISiteHealthCollector healthCollector, + ISiteEventLogger? siteEventLogger = null) { _factory = factory; _options = options; _healthCollector = healthCollector; + _siteEventLogger = siteEventLogger; Receive(HandleCreateConnection); Receive(HandleRoute); @@ -52,7 +56,8 @@ public class DataConnectionManagerActor : ReceiveActor _factory, command.ProtocolType, command.PrimaryConnectionDetails, command.BackupConnectionDetails, - command.FailoverRetryCount)); + command.FailoverRetryCount, + _siteEventLogger)); // Sanitize name for Akka actor path (replace spaces and invalid chars) var actorName = new string(command.ConnectionName diff --git a/src/ScadaLink.DataConnectionLayer/ScadaLink.DataConnectionLayer.csproj b/src/ScadaLink.DataConnectionLayer/ScadaLink.DataConnectionLayer.csproj index a29649d..f5463c7 100644 --- a/src/ScadaLink.DataConnectionLayer/ScadaLink.DataConnectionLayer.csproj +++ b/src/ScadaLink.DataConnectionLayer/ScadaLink.DataConnectionLayer.csproj @@ -21,6 +21,7 @@ + diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index 04e2175..a1c805f 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -231,9 +231,10 @@ akka {{ if (dclFactory != null) { var healthCollector = _serviceProvider.GetRequiredService(); + var siteEventLogger = _serviceProvider.GetService(); dclManager = _actorSystem!.ActorOf( Props.Create(() => new ScadaLink.DataConnectionLayer.Actors.DataConnectionManagerActor( - dclFactory, dclOptions, healthCollector)), + dclFactory, dclOptions, healthCollector, siteEventLogger)), "dcl-manager"); _logger.LogInformation("Data Connection Layer manager actor created"); }