feat(dcl): add active endpoint to health reports and log failover events

Add ActiveEndpoint field to DataConnectionHealthReport showing which
endpoint is active (Primary, Backup, or Primary with no backup configured).
Log failover transitions and connection restoration events to the site
event log via ISiteEventLogger, passed as an optional parameter through
the actor hierarchy for backwards compatibility.
This commit is contained in:
Joseph Doherty
2026-03-22 08:34:05 -04:00
parent da290fa4f8
commit 801c0c1df2
5 changed files with 37 additions and 5 deletions

View File

@@ -4,6 +4,7 @@ using ScadaLink.Commons.Interfaces.Protocol;
using ScadaLink.Commons.Messages.DataConnection;
using ScadaLink.Commons.Types.Enums;
using ScadaLink.HealthMonitoring;
using ScadaLink.SiteEventLogging;
namespace ScadaLink.DataConnectionLayer.Actors;
@@ -34,6 +35,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
private readonly ISiteHealthCollector _healthCollector;
private readonly IDataConnectionFactory _factory;
private readonly string _protocolType;
private readonly ISiteEventLogger? _siteEventLogger;
public IStash Stash { get; set; } = null!;
public ITimerScheduler Timers { get; set; } = null!;
@@ -86,7 +88,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
string protocolType,
IDictionary<string, string>? primaryConfig = null,
IDictionary<string, string>? backupConfig = null,
int failoverRetryCount = 3)
int failoverRetryCount = 3,
ISiteEventLogger? siteEventLogger = null)
{
_connectionName = connectionName;
_adapter = adapter;
@@ -97,6 +100,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_primaryConfig = primaryConfig ?? new Dictionary<string, string>();
_backupConfig = backupConfig;
_failoverRetryCount = failoverRetryCount;
_siteEventLogger = siteEventLogger;
_connectionDetails = _primaryConfig;
}
@@ -301,6 +305,14 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_log.Info("[{0}] Reconnected successfully on {1} endpoint", _connectionName, _activeEndpoint);
_consecutiveFailures = 0;
// Log restoration event to site event log
if (_siteEventLogger != null)
{
_ = _siteEventLogger.LogEventAsync(
"connection", "Info", null, _connectionName,
$"Connection restored on {_activeEndpoint} endpoint", null);
}
// WP-10: Transparent re-subscribe — re-establish all active subscriptions
ReSubscribeAll();
@@ -336,6 +348,15 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_log.Warning("[{0}] Failing over from {1} to {2}",
_connectionName, previousEndpoint, _activeEndpoint);
// Log failover event to site event log
if (_siteEventLogger != null)
{
_ = _siteEventLogger.LogEventAsync(
"connection", "Warning", null, _connectionName,
$"Failover from {previousEndpoint} to {_activeEndpoint}",
$"After {_failoverRetryCount} consecutive failures");
}
}
else
{
@@ -570,8 +591,11 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
private void ReplyWithHealthReport()
{
var status = _adapter.Status;
var endpointLabel = _backupConfig == null
? "Primary (no backup)"
: _activeEndpoint.ToString();
Sender.Tell(new DataConnectionHealthReport(
_connectionName, status, _totalSubscribed, _resolvedTags, DateTimeOffset.UtcNow));
_connectionName, status, _totalSubscribed, _resolvedTags, endpointLabel, DateTimeOffset.UtcNow));
}
// ── Internal message handlers for piped async results ──