Phase 3B: Site I/O & Observability — Communication, DCL, Script/Alarm actors, Health, Event Logging

Communication Layer (WP-1–5):
- 8 message patterns with correlation IDs, per-pattern timeouts
- Central/Site communication actors, transport heartbeat config
- Connection failure handling (no central buffering, debug streams killed)

Data Connection Layer (WP-6–14, WP-34):
- Connection actor with Become/Stash lifecycle (Connecting/Connected/Reconnecting)
- OPC UA + LmxProxy adapters behind IDataConnection
- Auto-reconnect, bad quality propagation, transparent re-subscribe
- Write-back, tag path resolution with retry, health reporting
- Protocol extensibility via DataConnectionFactory

Site Runtime (WP-15–25, WP-32–33):
- ScriptActor/ScriptExecutionActor (triggers, concurrent execution, blocking I/O dispatcher)
- AlarmActor/AlarmExecutionActor (ValueMatch/RangeViolation/RateOfChange, in-memory state)
- SharedScriptLibrary (inline execution), ScriptRuntimeContext (API)
- ScriptCompilationService (Roslyn, forbidden API enforcement, execution timeout)
- Recursion limit (default 10), call direction enforcement
- SiteStreamManager (per-subscriber bounded buffers, fire-and-forget)
- Debug view backend (snapshot + stream), concurrency serialization
- Local artifact storage (4 SQLite tables)

Health Monitoring (WP-26–28):
- SiteHealthCollector (thread-safe counters, connection state)
- HealthReportSender (30s interval, monotonic sequence numbers)
- CentralHealthAggregator (offline detection 60s, online recovery)

Site Event Logging (WP-29–31):
- SiteEventLogger (SQLite, 6 event categories, ISO 8601 UTC)
- EventLogPurgeService (30-day retention, 1GB cap)
- EventLogQueryService (filters, keyword search, keyset pagination)

541 tests pass, zero warnings.
This commit is contained in:
Joseph Doherty
2026-03-16 20:57:25 -04:00
parent a3bf0c43f3
commit 389f5a0378
97 changed files with 8308 additions and 127 deletions

View File

@@ -0,0 +1,476 @@
using Akka.Actor;
using Akka.Event;
using ScadaLink.Commons.Interfaces.Protocol;
using ScadaLink.Commons.Messages.DataConnection;
using ScadaLink.Commons.Types.Enums;
namespace ScadaLink.DataConnectionLayer.Actors;
/// <summary>
/// WP-6: Connection actor using Akka.NET Become/Stash pattern for lifecycle state machine.
///
/// States:
/// - Connecting: stash subscribe/write requests; attempts connection
/// - Connected: unstash and process all requests
/// - Reconnecting: push bad quality for all subscribed tags, stash new requests,
/// fixed-interval reconnect
///
/// WP-9: Auto-reconnect with bad quality on disconnect.
/// WP-10: Transparent re-subscribe after reconnection.
/// WP-11: Write-back support (synchronous failure to caller, no S&amp;F).
/// WP-12: Tag path resolution with retry.
/// WP-13: Health reporting (connection status + tag resolution counts).
/// WP-14: Subscription lifecycle (register on create, cleanup on stop).
/// </summary>
public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
{
private readonly ILoggingAdapter _log = Context.GetLogger();
private readonly string _connectionName;
private readonly IDataConnection _adapter;
private readonly DataConnectionOptions _options;
public IStash Stash { get; set; } = null!;
public ITimerScheduler Timers { get; set; } = null!;
/// <summary>
/// Active subscriptions: instanceUniqueName → set of tag paths.
/// </summary>
private readonly Dictionary<string, HashSet<string>> _subscriptionsByInstance = new();
/// <summary>
/// Subscription IDs returned by the adapter: tagPath → subscriptionId.
/// </summary>
private readonly Dictionary<string, string> _subscriptionIds = new();
/// <summary>
/// Tags whose path resolution failed and are awaiting retry.
/// </summary>
private readonly HashSet<string> _unresolvedTags = new();
/// <summary>
/// Subscribers: instanceUniqueName → IActorRef (the Instance Actor).
/// </summary>
private readonly Dictionary<string, IActorRef> _subscribers = new();
/// <summary>
/// Tracks total subscribed and resolved tags for health reporting.
/// </summary>
private int _totalSubscribed;
private int _resolvedTags;
public DataConnectionActor(
string connectionName,
IDataConnection adapter,
DataConnectionOptions options)
{
_connectionName = connectionName;
_adapter = adapter;
_options = options;
}
protected override void PreStart()
{
_log.Info("DataConnectionActor [{0}] starting in Connecting state", _connectionName);
BecomeConnecting();
}
protected override void PostStop()
{
_log.Info("DataConnectionActor [{0}] stopping — disposing adapter", _connectionName);
// Clean up the adapter asynchronously
_ = _adapter.DisposeAsync().AsTask();
}
protected override void OnReceive(object message)
{
// Default handler — should not be reached due to Become
Unhandled(message);
}
// ── Connecting State ──
private void BecomeConnecting()
{
_log.Info("[{0}] Entering Connecting state", _connectionName);
Become(Connecting);
Self.Tell(new AttemptConnect());
}
private void Connecting(object message)
{
switch (message)
{
case AttemptConnect:
HandleAttemptConnect();
break;
case ConnectResult result:
HandleConnectResult(result);
break;
case SubscribeTagsRequest:
case WriteTagRequest:
case UnsubscribeTagsRequest:
Stash.Stash();
break;
case GetHealthReport:
ReplyWithHealthReport();
break;
default:
Unhandled(message);
break;
}
}
// ── Connected State ──
private void BecomeConnected()
{
_log.Info("[{0}] Entering Connected state", _connectionName);
Become(Connected);
Stash.UnstashAll();
}
private void Connected(object message)
{
switch (message)
{
case SubscribeTagsRequest req:
HandleSubscribe(req);
break;
case UnsubscribeTagsRequest req:
HandleUnsubscribe(req);
break;
case WriteTagRequest req:
HandleWrite(req);
break;
case AdapterDisconnected:
HandleDisconnect();
break;
case RetryTagResolution:
HandleRetryTagResolution();
break;
case GetHealthReport:
ReplyWithHealthReport();
break;
default:
Unhandled(message);
break;
}
}
// ── Reconnecting State ──
private void BecomeReconnecting()
{
_log.Warning("[{0}] Entering Reconnecting state", _connectionName);
Become(Reconnecting);
// WP-9: Push bad quality for all subscribed tags on disconnect
PushBadQualityForAllTags();
// Schedule reconnect attempt
Timers.StartSingleTimer("reconnect", new AttemptConnect(), _options.ReconnectInterval);
}
private void Reconnecting(object message)
{
switch (message)
{
case AttemptConnect:
HandleAttemptConnect();
break;
case ConnectResult result:
HandleReconnectResult(result);
break;
case SubscribeTagsRequest:
case WriteTagRequest:
Stash.Stash();
break;
case UnsubscribeTagsRequest req:
// Allow unsubscribe even during reconnect (for cleanup on instance stop)
HandleUnsubscribe(req);
break;
case GetHealthReport:
ReplyWithHealthReport();
break;
default:
Unhandled(message);
break;
}
}
// ── Connection Management ──
private void HandleAttemptConnect()
{
_log.Debug("[{0}] Attempting connection...", _connectionName);
var self = Self;
_adapter.ConnectAsync(new Dictionary<string, string>()).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
return new ConnectResult(true, null);
return new ConnectResult(false, t.Exception?.GetBaseException().Message);
}).PipeTo(self);
}
private void HandleConnectResult(ConnectResult result)
{
if (result.Success)
{
_log.Info("[{0}] Connection established", _connectionName);
BecomeConnected();
}
else
{
_log.Warning("[{0}] Connection failed: {1}. Retrying in {2}s",
_connectionName, result.Error, _options.ReconnectInterval.TotalSeconds);
Timers.StartSingleTimer("reconnect", new AttemptConnect(), _options.ReconnectInterval);
}
}
private void HandleReconnectResult(ConnectResult result)
{
if (result.Success)
{
_log.Info("[{0}] Reconnected successfully", _connectionName);
// WP-10: Transparent re-subscribe — re-establish all active subscriptions
ReSubscribeAll();
BecomeConnected();
}
else
{
_log.Warning("[{0}] Reconnect failed: {1}. Retrying in {2}s",
_connectionName, result.Error, _options.ReconnectInterval.TotalSeconds);
Timers.StartSingleTimer("reconnect", new AttemptConnect(), _options.ReconnectInterval);
}
}
private void HandleDisconnect()
{
_log.Warning("[{0}] Adapter reported disconnect", _connectionName);
BecomeReconnecting();
}
// ── Subscription Management (WP-14) ──
private void HandleSubscribe(SubscribeTagsRequest request)
{
_log.Debug("[{0}] Subscribing {1} tags for instance {2}",
_connectionName, request.TagPaths.Count, request.InstanceUniqueName);
_subscribers[request.InstanceUniqueName] = Sender;
if (!_subscriptionsByInstance.ContainsKey(request.InstanceUniqueName))
_subscriptionsByInstance[request.InstanceUniqueName] = new HashSet<string>();
var instanceTags = _subscriptionsByInstance[request.InstanceUniqueName];
var self = Self;
var sender = Sender;
Task.Run(async () =>
{
foreach (var tagPath in request.TagPaths)
{
if (_subscriptionIds.ContainsKey(tagPath))
{
// Already subscribed — just track for this instance
instanceTags.Add(tagPath);
continue;
}
try
{
var subId = await _adapter.SubscribeAsync(tagPath, (path, value) =>
{
self.Tell(new TagValueReceived(path, value));
});
_subscriptionIds[tagPath] = subId;
instanceTags.Add(tagPath);
_totalSubscribed++;
_resolvedTags++;
}
catch (Exception ex)
{
// WP-12: Tag path resolution failure — mark as unresolved, retry later
_unresolvedTags.Add(tagPath);
instanceTags.Add(tagPath);
_totalSubscribed++;
self.Tell(new TagResolutionFailed(tagPath, ex.Message));
}
}
return new SubscribeTagsResponse(
request.CorrelationId, request.InstanceUniqueName, true, null, DateTimeOffset.UtcNow);
}).PipeTo(sender);
// Start tag resolution retry timer if we have unresolved tags
if (_unresolvedTags.Count > 0)
{
Timers.StartPeriodicTimer(
"tag-resolution-retry",
new RetryTagResolution(),
_options.TagResolutionRetryInterval,
_options.TagResolutionRetryInterval);
}
}
private void HandleUnsubscribe(UnsubscribeTagsRequest request)
{
_log.Debug("[{0}] Unsubscribing all tags for instance {1}",
_connectionName, request.InstanceUniqueName);
if (!_subscriptionsByInstance.TryGetValue(request.InstanceUniqueName, out var tags))
return;
// WP-14: Cleanup on Instance Actor stop
foreach (var tagPath in tags)
{
// Check if any other instance is still subscribed to this tag
var otherSubscribers = _subscriptionsByInstance
.Where(kvp => kvp.Key != request.InstanceUniqueName && kvp.Value.Contains(tagPath))
.Any();
if (!otherSubscribers && _subscriptionIds.TryGetValue(tagPath, out var subId))
{
_ = _adapter.UnsubscribeAsync(subId);
_subscriptionIds.Remove(tagPath);
_unresolvedTags.Remove(tagPath);
_totalSubscribed--;
if (!_unresolvedTags.Contains(tagPath))
_resolvedTags--;
}
}
_subscriptionsByInstance.Remove(request.InstanceUniqueName);
_subscribers.Remove(request.InstanceUniqueName);
}
// ── Write Support (WP-11) ──
private void HandleWrite(WriteTagRequest request)
{
_log.Debug("[{0}] Writing to tag {1}", _connectionName, request.TagPath);
var sender = Sender;
// WP-11: Write through DCL to device, failure returned synchronously
_adapter.WriteAsync(request.TagPath, request.Value).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
{
var result = t.Result;
return new WriteTagResponse(
request.CorrelationId, result.Success, result.ErrorMessage, DateTimeOffset.UtcNow);
}
return new WriteTagResponse(
request.CorrelationId, false, t.Exception?.GetBaseException().Message, DateTimeOffset.UtcNow);
}).PipeTo(sender);
}
// ── Tag Resolution Retry (WP-12) ──
private void HandleRetryTagResolution()
{
if (_unresolvedTags.Count == 0)
{
Timers.Cancel("tag-resolution-retry");
return;
}
_log.Debug("[{0}] Retrying resolution for {1} unresolved tags", _connectionName, _unresolvedTags.Count);
var self = Self;
var toResolve = _unresolvedTags.ToList();
foreach (var tagPath in toResolve)
{
_adapter.SubscribeAsync(tagPath, (path, value) =>
{
self.Tell(new TagValueReceived(path, value));
}).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
return new TagResolutionSucceeded(tagPath, t.Result) as object;
return new TagResolutionFailed(tagPath, t.Exception?.GetBaseException().Message ?? "Unknown error");
}).PipeTo(self);
}
}
// ── Bad Quality Push (WP-9) ──
private void PushBadQualityForAllTags()
{
var now = DateTimeOffset.UtcNow;
foreach (var (instanceName, tags) in _subscriptionsByInstance)
{
if (!_subscribers.TryGetValue(instanceName, out var subscriber))
continue;
subscriber.Tell(new ConnectionQualityChanged(_connectionName, QualityCode.Bad, now));
}
}
// ── Re-subscribe (WP-10) ──
private void ReSubscribeAll()
{
_log.Info("[{0}] Re-subscribing {1} tags after reconnect", _connectionName, _subscriptionIds.Count);
var self = Self;
var allTags = _subscriptionIds.Keys.ToList();
_subscriptionIds.Clear();
_resolvedTags = 0;
foreach (var tagPath in allTags)
{
_adapter.SubscribeAsync(tagPath, (path, value) =>
{
self.Tell(new TagValueReceived(path, value));
}).ContinueWith(t =>
{
if (t.IsCompletedSuccessfully)
return new TagResolutionSucceeded(tagPath, t.Result) as object;
return new TagResolutionFailed(tagPath, t.Exception?.GetBaseException().Message ?? "Unknown error");
}).PipeTo(self);
}
}
// ── Health Reporting (WP-13) ──
private void ReplyWithHealthReport()
{
var status = _adapter.Status;
Sender.Tell(new DataConnectionHealthReport(
_connectionName, status, _totalSubscribed, _resolvedTags, DateTimeOffset.UtcNow));
}
// ── Internal message handlers for piped async results ──
private void HandleTagValueReceived(TagValueReceived msg)
{
// Fan out to all subscribed instances
foreach (var (instanceName, tags) in _subscriptionsByInstance)
{
if (!tags.Contains(msg.TagPath))
continue;
if (_subscribers.TryGetValue(instanceName, out var subscriber))
{
subscriber.Tell(new TagValueUpdate(
_connectionName, msg.TagPath, msg.Value.Value, msg.Value.Quality, msg.Value.Timestamp));
}
}
}
// ── Internal messages ──
internal record AttemptConnect;
internal record ConnectResult(bool Success, string? Error);
internal record AdapterDisconnected;
internal record TagValueReceived(string TagPath, TagValue Value);
internal record TagResolutionFailed(string TagPath, string Error);
internal record TagResolutionSucceeded(string TagPath, string SubscriptionId);
internal record RetryTagResolution;
public record GetHealthReport;
}