Phase 3B: Site I/O & Observability — Communication, DCL, Script/Alarm actors, Health, Event Logging

Communication Layer (WP-1–5):
- 8 message patterns with correlation IDs, per-pattern timeouts
- Central/Site communication actors, transport heartbeat config
- Connection failure handling (no central buffering, debug streams killed)

Data Connection Layer (WP-6–14, WP-34):
- Connection actor with Become/Stash lifecycle (Connecting/Connected/Reconnecting)
- OPC UA + LmxProxy adapters behind IDataConnection
- Auto-reconnect, bad quality propagation, transparent re-subscribe
- Write-back, tag path resolution with retry, health reporting
- Protocol extensibility via DataConnectionFactory

Site Runtime (WP-15–25, WP-32–33):
- ScriptActor/ScriptExecutionActor (triggers, concurrent execution, blocking I/O dispatcher)
- AlarmActor/AlarmExecutionActor (ValueMatch/RangeViolation/RateOfChange, in-memory state)
- SharedScriptLibrary (inline execution), ScriptRuntimeContext (API)
- ScriptCompilationService (Roslyn, forbidden API enforcement, execution timeout)
- Recursion limit (default 10), call direction enforcement
- SiteStreamManager (per-subscriber bounded buffers, fire-and-forget)
- Debug view backend (snapshot + stream), concurrency serialization
- Local artifact storage (4 SQLite tables)

Health Monitoring (WP-26–28):
- SiteHealthCollector (thread-safe counters, connection state)
- HealthReportSender (30s interval, monotonic sequence numbers)
- CentralHealthAggregator (offline detection 60s, online recovery)

Site Event Logging (WP-29–31):
- SiteEventLogger (SQLite, 6 event categories, ISO 8601 UTC)
- EventLogPurgeService (30-day retention, 1GB cap)
- EventLogQueryService (filters, keyword search, keyset pagination)

541 tests pass, zero warnings.
This commit is contained in:
Joseph Doherty
2026-03-16 20:57:25 -04:00
parent a3bf0c43f3
commit 389f5a0378
97 changed files with 8308 additions and 127 deletions

View File

@@ -0,0 +1,476 @@
using Akka.Actor;
using Akka.Event;
using ScadaLink.Commons.Interfaces.Protocol;
using ScadaLink.Commons.Messages.DataConnection;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.DataConnectionLayer.Actors;
/// <summary>
/// WP-6: Connection actor using Akka.NET Become/Stash pattern for lifecycle state machine.
///
/// States:
/// - Connecting: stash subscribe/write requests; attempts connection
/// - Connected: unstash and process all requests
/// - Reconnecting: push bad quality for all subscribed tags, stash new requests,
/// fixed-interval reconnect
///
/// WP-9: Auto-reconnect with bad quality on disconnect.
/// WP-10: Transparent re-subscribe after reconnection.
/// WP-11: Write-back support (synchronous failure to caller, no S&amp;F).
/// WP-12: Tag path resolution with retry.
/// WP-13: Health reporting (connection status + tag resolution counts).
/// WP-14: Subscription lifecycle (register on create, cleanup on stop).
/// </summary>
public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
{
private readonly ILoggingAdapter _log = Context.GetLogger();
private readonly string _connectionName;
private readonly IDataConnection _adapter;
private readonly DataConnectionOptions _options;
public IStash Stash { get; set; } = null!;
public ITimerScheduler Timers { get; set; } = null!;
/// <summary>
/// Active subscriptions: instanceUniqueName → set of tag paths.
/// </summary>
private readonly Dictionary<string, HashSet<string>> _subscriptionsByInstance = new();
/// <summary>
/// Subscription IDs returned by the adapter: tagPath → subscriptionId.
/// </summary>
private readonly Dictionary<string, string> _subscriptionIds = new();
/// <summary>
/// Tags whose path resolution failed and are awaiting retry.
/// </summary>
private readonly HashSet<string> _unresolvedTags = new();
/// <summary>
/// Subscribers: instanceUniqueName → IActorRef (the Instance Actor).
/// </summary>
private readonly Dictionary<string, IActorRef> _subscribers = new();
/// <summary>
/// Tracks total subscribed and resolved tags for health reporting.
/// </summary>
private int _totalSubscribed;
private int _resolvedTags;
public DataConnectionActor(
string connectionName,
IDataConnection adapter,
DataConnectionOptions options)
{
_connectionName = connectionName;
_adapter = adapter;
_options = options;
}
protected override void PreStart()
{
_log.Info("DataConnectionActor [{0}] starting in Connecting state", _connectionName);
BecomeConnecting();
}
protected override void PostStop()
{
_log.Info("DataConnectionActor [{0}] stopping — disposing adapter", _connectionName);
// Clean up the adapter asynchronously
_ = _adapter.DisposeAsync().AsTask();
}
protected override void OnReceive(object message)
{
// Default handler — should not be reached due to Become
Unhandled(message);
}
// ── Connecting State ──
private void BecomeConnecting()
{
_log.Info("[{0}] Entering Connecting state", _connectionName);
Become(Connecting);
Self.Tell(new AttemptConnect());
}
private void Connecting(object message)
{
switch (message)
{
case AttemptConnect:
HandleAttemptConnect();
break;
case ConnectResult result:
HandleConnectResult(result);
break;
case SubscribeTagsRequest:
case WriteTagRequest:
case UnsubscribeTagsRequest:
Stash.Stash();
break;
case GetHealthReport:
ReplyWithHealthReport();
break;
default:
Unhandled(message);
break;
}
}
// ── Connected State ──
private void BecomeConnected()
{
_log.Info("[{0}] Entering Connected state", _connectionName);
Become(Connected);
Stash.UnstashAll();
}
private void Connected(object message)
{
switch (message)
{
case SubscribeTagsRequest req:
HandleSubscribe(req);
break;
case UnsubscribeTagsRequest req:
HandleUnsubscribe(req);
break;
case WriteTagRequest req:
HandleWrite(req);
break;
case AdapterDisconnected:
HandleDisconnect();
break;
case RetryTagResolution:
HandleRetryTagResolution();
break;
case GetHealthReport:
ReplyWithHealthReport();
break;
default:
Unhandled(message);
break;
}
}
// ── Reconnecting State ──
private void BecomeReconnecting()
{
_log.Warning("[{0}] Entering Reconnecting state", _connectionName);
Become(Reconnecting);
// WP-9: Push bad quality for all subscribed tags on disconnect
PushBadQualityForAllTags();
// Schedule reconnect attempt
Timers.StartSingleTimer("reconnect", new AttemptConnect(), _options.ReconnectInterval);
}
private void Reconnecting(object message)
{
switch (message)
{
case AttemptConnect:
HandleAttemptConnect();
break;
case ConnectResult result:
HandleReconnectResult(result);
break;
case SubscribeTagsRequest:
case WriteTagRequest:
Stash.Stash();
break;
case UnsubscribeTagsRequest req:
// Allow unsubscribe even during reconnect (for cleanup on instance stop)
HandleUnsubscribe(req);
break;
case GetHealthReport:
ReplyWithHealthReport();
break;
default:
Unhandled(message);
break;
}
}
// ── Connection Management ──
private void HandleAttemptConnect()
{
_log.Debug("[{0}] Attempting connection...", _connectionName);
var self = Self;
_adapter.ConnectAsync(new Dictionary<string, string>()).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
return new ConnectResult(true, null);
return new ConnectResult(false, t.Exception?.GetBaseException().Message);
}).PipeTo(self);
}
private void HandleConnectResult(ConnectResult result)
{
if (result.Success)
{
_log.Info("[{0}] Connection established", _connectionName);
BecomeConnected();
}
else
{
_log.Warning("[{0}] Connection failed: {1}. Retrying in {2}s",
_connectionName, result.Error, _options.ReconnectInterval.TotalSeconds);
Timers.StartSingleTimer("reconnect", new AttemptConnect(), _options.ReconnectInterval);
}
}
private void HandleReconnectResult(ConnectResult result)
{
if (result.Success)
{
_log.Info("[{0}] Reconnected successfully", _connectionName);
// WP-10: Transparent re-subscribe — re-establish all active subscriptions
ReSubscribeAll();
BecomeConnected();
}
else
{
_log.Warning("[{0}] Reconnect failed: {1}. Retrying in {2}s",
_connectionName, result.Error, _options.ReconnectInterval.TotalSeconds);
Timers.StartSingleTimer("reconnect", new AttemptConnect(), _options.ReconnectInterval);
}
}
private void HandleDisconnect()
{
_log.Warning("[{0}] Adapter reported disconnect", _connectionName);
BecomeReconnecting();
}
// ── Subscription Management (WP-14) ──
private void HandleSubscribe(SubscribeTagsRequest request)
{
_log.Debug("[{0}] Subscribing {1} tags for instance {2}",
_connectionName, request.TagPaths.Count, request.InstanceUniqueName);
_subscribers[request.InstanceUniqueName] = Sender;
if (!_subscriptionsByInstance.ContainsKey(request.InstanceUniqueName))
_subscriptionsByInstance[request.InstanceUniqueName] = new HashSet<string>();
var instanceTags = _subscriptionsByInstance[request.InstanceUniqueName];
var self = Self;
var sender = Sender;
Task.Run(async () =>
{
foreach (var tagPath in request.TagPaths)
{
if (_subscriptionIds.ContainsKey(tagPath))
{
// Already subscribed — just track for this instance
instanceTags.Add(tagPath);
continue;
}
try
{
var subId = await _adapter.SubscribeAsync(tagPath, (path, value) =>
{
self.Tell(new TagValueReceived(path, value));
});
_subscriptionIds[tagPath] = subId;
instanceTags.Add(tagPath);
_totalSubscribed++;
_resolvedTags++;
}
catch (Exception ex)
{
// WP-12: Tag path resolution failure — mark as unresolved, retry later
_unresolvedTags.Add(tagPath);
instanceTags.Add(tagPath);
_totalSubscribed++;
self.Tell(new TagResolutionFailed(tagPath, ex.Message));
}
}
return new SubscribeTagsResponse(
request.CorrelationId, request.InstanceUniqueName, true, null, DateTimeOffset.UtcNow);
}).PipeTo(sender);
// Start tag resolution retry timer if we have unresolved tags
if (_unresolvedTags.Count > 0)
{
Timers.StartPeriodicTimer(
"tag-resolution-retry",
new RetryTagResolution(),
_options.TagResolutionRetryInterval,
_options.TagResolutionRetryInterval);
}
}
private void HandleUnsubscribe(UnsubscribeTagsRequest request)
{
_log.Debug("[{0}] Unsubscribing all tags for instance {1}",
_connectionName, request.InstanceUniqueName);
if (!_subscriptionsByInstance.TryGetValue(request.InstanceUniqueName, out var tags))
return;
// WP-14: Cleanup on Instance Actor stop
foreach (var tagPath in tags)
{
// Check if any other instance is still subscribed to this tag
var otherSubscribers = _subscriptionsByInstance
.Where(kvp => kvp.Key != request.InstanceUniqueName && kvp.Value.Contains(tagPath))
.Any();
if (!otherSubscribers && _subscriptionIds.TryGetValue(tagPath, out var subId))
{
_ = _adapter.UnsubscribeAsync(subId);
_subscriptionIds.Remove(tagPath);
_unresolvedTags.Remove(tagPath);
_totalSubscribed--;
if (!_unresolvedTags.Contains(tagPath))
_resolvedTags--;
}
}
_subscriptionsByInstance.Remove(request.InstanceUniqueName);
_subscribers.Remove(request.InstanceUniqueName);
}
// ── Write Support (WP-11) ──
private void HandleWrite(WriteTagRequest request)
{
_log.Debug("[{0}] Writing to tag {1}", _connectionName, request.TagPath);
var sender = Sender;
// WP-11: Write through DCL to device, failure returned synchronously
_adapter.WriteAsync(request.TagPath, request.Value).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
{
var result = t.Result;
return new WriteTagResponse(
request.CorrelationId, result.Success, result.ErrorMessage, DateTimeOffset.UtcNow);
}
return new WriteTagResponse(
request.CorrelationId, false, t.Exception?.GetBaseException().Message, DateTimeOffset.UtcNow);
}).PipeTo(sender);
}
// ── Tag Resolution Retry (WP-12) ──
private void HandleRetryTagResolution()
{
if (_unresolvedTags.Count == 0)
{
Timers.Cancel("tag-resolution-retry");
return;
}
_log.Debug("[{0}] Retrying resolution for {1} unresolved tags", _connectionName, _unresolvedTags.Count);
var self = Self;
var toResolve = _unresolvedTags.ToList();
foreach (var tagPath in toResolve)
{
_adapter.SubscribeAsync(tagPath, (path, value) =>
{
self.Tell(new TagValueReceived(path, value));
}).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
return new TagResolutionSucceeded(tagPath, t.Result) as object;
return new TagResolutionFailed(tagPath, t.Exception?.GetBaseException().Message ?? "Unknown error");
}).PipeTo(self);
}
}
// ── Bad Quality Push (WP-9) ──
private void PushBadQualityForAllTags()
{
var now = DateTimeOffset.UtcNow;
foreach (var (instanceName, tags) in _subscriptionsByInstance)
{
if (!_subscribers.TryGetValue(instanceName, out var subscriber))
continue;
subscriber.Tell(new ConnectionQualityChanged(_connectionName, QualityCode.Bad, now));
}
}
// ── Re-subscribe (WP-10) ──
private void ReSubscribeAll()
{
_log.Info("[{0}] Re-subscribing {1} tags after reconnect", _connectionName, _subscriptionIds.Count);
var self = Self;
var allTags = _subscriptionIds.Keys.ToList();
_subscriptionIds.Clear();
_resolvedTags = 0;
foreach (var tagPath in allTags)
{
_adapter.SubscribeAsync(tagPath, (path, value) =>
{
self.Tell(new TagValueReceived(path, value));
}).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
return new TagResolutionSucceeded(tagPath, t.Result) as object;
return new TagResolutionFailed(tagPath, t.Exception?.GetBaseException().Message ?? "Unknown error");
}).PipeTo(self);
}
}
// ── Health Reporting (WP-13) ──
private void ReplyWithHealthReport()
{
var status = _adapter.Status;
Sender.Tell(new DataConnectionHealthReport(
_connectionName, status, _totalSubscribed, _resolvedTags, DateTimeOffset.UtcNow));
}
// ── Internal message handlers for piped async results ──
private void HandleTagValueReceived(TagValueReceived msg)
{
// Fan out to all subscribed instances
foreach (var (instanceName, tags) in _subscriptionsByInstance)
{
if (!tags.Contains(msg.TagPath))
continue;
if (_subscribers.TryGetValue(instanceName, out var subscriber))
{
subscriber.Tell(new TagValueUpdate(
_connectionName, msg.TagPath, msg.Value.Value, msg.Value.Quality, msg.Value.Timestamp));
}
}
}
// ── Internal messages ──
internal record AttemptConnect;
internal record ConnectResult(bool Success, string? Error);
internal record AdapterDisconnected;
internal record TagValueReceived(string TagPath, TagValue Value);
internal record TagResolutionFailed(string TagPath, string Error);
internal record TagResolutionSucceeded(string TagPath, string SubscriptionId);
internal record RetryTagResolution;
public record GetHealthReport;
}

View File

@@ -0,0 +1,142 @@
using Akka.Actor;
using Akka.Event;
using ScadaLink.Commons.Interfaces.Protocol;
using ScadaLink.Commons.Messages.DataConnection;
namespace ScadaLink.DataConnectionLayer.Actors;
/// <summary>
/// WP-34: Protocol extensibility — manages DataConnectionActor instances.
/// Routes messages to the correct connection actor based on connection name.
/// Adding a new protocol = implement IDataConnection + register with IDataConnectionFactory.
/// </summary>
public class DataConnectionManagerActor : ReceiveActor
{
private readonly ILoggingAdapter _log = Context.GetLogger();
private readonly IDataConnectionFactory _factory;
private readonly DataConnectionOptions _options;
private readonly Dictionary<string, IActorRef> _connectionActors = new();
public DataConnectionManagerActor(
IDataConnectionFactory factory,
DataConnectionOptions options)
{
_factory = factory;
_options = options;
Receive<CreateConnectionCommand>(HandleCreateConnection);
Receive<SubscribeTagsRequest>(HandleRoute);
Receive<UnsubscribeTagsRequest>(HandleRoute);
Receive<WriteTagRequest>(HandleRouteWrite);
Receive<RemoveConnectionCommand>(HandleRemoveConnection);
Receive<GetAllHealthReports>(HandleGetAllHealthReports);
}
private void HandleCreateConnection(CreateConnectionCommand command)
{
if (_connectionActors.ContainsKey(command.ConnectionName))
{
_log.Warning("Connection {0} already exists", command.ConnectionName);
return;
}
// WP-34: Factory creates the correct adapter based on protocol type
var adapter = _factory.Create(command.ProtocolType, command.ConnectionDetails);
var props = Props.Create(() => new DataConnectionActor(
command.ConnectionName, adapter, _options));
var actorRef = Context.ActorOf(props, command.ConnectionName);
_connectionActors[command.ConnectionName] = actorRef;
_log.Info("Created DataConnectionActor for {0} (protocol={1})",
command.ConnectionName, command.ProtocolType);
}
private void HandleRoute(SubscribeTagsRequest request)
{
if (_connectionActors.TryGetValue(request.ConnectionName, out var actor))
actor.Forward(request);
else
{
_log.Warning("No connection actor for {0}", request.ConnectionName);
Sender.Tell(new SubscribeTagsResponse(
request.CorrelationId, request.InstanceUniqueName, false,
$"Unknown connection: {request.ConnectionName}", DateTimeOffset.UtcNow));
}
}
private void HandleRoute(UnsubscribeTagsRequest request)
{
if (_connectionActors.TryGetValue(request.ConnectionName, out var actor))
actor.Forward(request);
else
_log.Warning("No connection actor for {0} during unsubscribe", request.ConnectionName);
}
private void HandleRouteWrite(WriteTagRequest request)
{
if (_connectionActors.TryGetValue(request.ConnectionName, out var actor))
actor.Forward(request);
else
{
_log.Warning("No connection actor for {0}", request.ConnectionName);
Sender.Tell(new WriteTagResponse(
request.CorrelationId, false,
$"Unknown connection: {request.ConnectionName}", DateTimeOffset.UtcNow));
}
}
private void HandleRemoveConnection(RemoveConnectionCommand command)
{
if (_connectionActors.TryGetValue(command.ConnectionName, out var actor))
{
Context.Stop(actor);
_connectionActors.Remove(command.ConnectionName);
_log.Info("Removed DataConnectionActor for {0}", command.ConnectionName);
}
}
private void HandleGetAllHealthReports(GetAllHealthReports _)
{
// Forward health report requests to all connection actors
foreach (var actor in _connectionActors.Values)
{
actor.Forward(new DataConnectionActor.GetHealthReport());
}
}
/// <summary>
/// OneForOneStrategy with Restart for connection actors — a failed connection
/// should restart and attempt reconnection.
/// </summary>
protected override SupervisorStrategy SupervisorStrategy()
{
return new OneForOneStrategy(
maxNrOfRetries: 10,
withinTimeRange: TimeSpan.FromMinutes(1),
decider: Decider.From(ex =>
{
_log.Warning(ex, "DataConnectionActor threw exception, restarting");
return Directive.Restart;
}));
}
}
/// <summary>
/// Command to create a new data connection actor for a specific protocol.
/// </summary>
public record CreateConnectionCommand(
string ConnectionName,
string ProtocolType,
IDictionary<string, string> ConnectionDetails);
/// <summary>
/// Command to remove a data connection actor.
/// </summary>
public record RemoveConnectionCommand(string ConnectionName);
/// <summary>
/// Request for health reports from all active connections.
/// </summary>
public record GetAllHealthReports;