Files
scadalink-design/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs
Joseph Doherty 389f5a0378 Phase 3B: Site I/O & Observability — Communication, DCL, Script/Alarm actors, Health, Event Logging
Communication Layer (WP-1–5):
- 8 message patterns with correlation IDs, per-pattern timeouts
- Central/Site communication actors, transport heartbeat config
- Connection failure handling (no central buffering, debug streams killed)

Data Connection Layer (WP-6–14, WP-34):
- Connection actor with Become/Stash lifecycle (Connecting/Connected/Reconnecting)
- OPC UA + LmxProxy adapters behind IDataConnection
- Auto-reconnect, bad quality propagation, transparent re-subscribe
- Write-back, tag path resolution with retry, health reporting
- Protocol extensibility via DataConnectionFactory

Site Runtime (WP-15–25, WP-32–33):
- ScriptActor/ScriptExecutionActor (triggers, concurrent execution, blocking I/O dispatcher)
- AlarmActor/AlarmExecutionActor (ValueMatch/RangeViolation/RateOfChange, in-memory state)
- SharedScriptLibrary (inline execution), ScriptRuntimeContext (API)
- ScriptCompilationService (Roslyn, forbidden API enforcement, execution timeout)
- Recursion limit (default 10), call direction enforcement
- SiteStreamManager (per-subscriber bounded buffers, fire-and-forget)
- Debug view backend (snapshot + stream), concurrency serialization
- Local artifact storage (4 SQLite tables)

Health Monitoring (WP-26–28):
- SiteHealthCollector (thread-safe counters, connection state)
- HealthReportSender (30s interval, monotonic sequence numbers)
- CentralHealthAggregator (offline detection 60s, online recovery)

Site Event Logging (WP-29–31):
- SiteEventLogger (SQLite, 6 event categories, ISO 8601 UTC)
- EventLogPurgeService (30-day retention, 1GB cap)
- EventLogQueryService (filters, keyword search, keyset pagination)

541 tests pass, zero warnings.
2026-03-16 20:57:25 -04:00

173 lines
6.3 KiB
C#

using Akka.Actor;
using Akka.Event;
using ScadaLink.Commons.Messages.Communication;
using ScadaLink.Commons.Messages.Health;
namespace ScadaLink.Communication.Actors;
/// <summary>
/// Central-side actor that routes messages from central to site clusters via Akka remoting.
/// Maintains a registry of known site actor paths (learned from heartbeats/connection events).
///
/// WP-4: All 8 message patterns routed through this actor.
/// WP-5: Ask timeout on connection drop (no central buffering). Debug streams killed on interruption.
/// </summary>
public class CentralCommunicationActor : ReceiveActor
{
private readonly ILoggingAdapter _log = Context.GetLogger();
/// <summary>
/// Maps SiteId → remote SiteCommunicationActor selection.
/// Updated when heartbeats arrive or connection state changes.
/// </summary>
private readonly Dictionary<string, ActorSelection> _siteSelections = new();
/// <summary>
/// Tracks active debug view subscriptions: correlationId → (siteId, subscriber).
/// Used to kill debug streams on site disconnection (WP-5).
/// </summary>
private readonly Dictionary<string, (string SiteId, IActorRef Subscriber)> _debugSubscriptions = new();
/// <summary>
/// Tracks in-progress deployments: deploymentId → siteId.
/// On central failover, in-progress deployments are treated as failed (WP-5).
/// </summary>
private readonly Dictionary<string, string> _inProgressDeployments = new();
public CentralCommunicationActor()
{
// Site registration via heartbeats
Receive<HeartbeatMessage>(HandleHeartbeat);
// Connection state changes
Receive<ConnectionStateChanged>(HandleConnectionStateChanged);
// Site registration command (manual or from discovery)
Receive<RegisterSite>(HandleRegisterSite);
// Route enveloped messages to sites
Receive<SiteEnvelope>(HandleSiteEnvelope);
}
private void HandleHeartbeat(HeartbeatMessage heartbeat)
{
// Heartbeats arrive from sites — forward to any interested central actors
// The sender's path tells us the site's communication actor address
if (!_siteSelections.ContainsKey(heartbeat.SiteId))
{
var senderPath = Sender.Path.ToString();
_log.Info("Learned site {0} from heartbeat at path {1}", heartbeat.SiteId, senderPath);
}
// Forward heartbeat to parent/subscribers (central health monitoring)
Context.Parent.Tell(heartbeat);
}
private void HandleConnectionStateChanged(ConnectionStateChanged msg)
{
if (!msg.IsConnected)
{
_log.Warning("Site {0} disconnected at {1}", msg.SiteId, msg.Timestamp);
// WP-5: Kill active debug streams for the disconnected site
var toRemove = _debugSubscriptions
.Where(kvp => kvp.Value.SiteId == msg.SiteId)
.ToList();
foreach (var kvp in toRemove)
{
_log.Info("Killing debug stream {0} for disconnected site {1}", kvp.Key, msg.SiteId);
kvp.Value.Subscriber.Tell(new DebugStreamTerminated(msg.SiteId, kvp.Key));
_debugSubscriptions.Remove(kvp.Key);
}
// WP-5: Mark in-progress deployments as failed
var failedDeployments = _inProgressDeployments
.Where(kvp => kvp.Value == msg.SiteId)
.Select(kvp => kvp.Key)
.ToList();
foreach (var deploymentId in failedDeployments)
{
_log.Warning("Deployment {0} to site {1} treated as failed due to disconnection",
deploymentId, msg.SiteId);
_inProgressDeployments.Remove(deploymentId);
}
_siteSelections.Remove(msg.SiteId);
}
else
{
_log.Info("Site {0} connected at {1}", msg.SiteId, msg.Timestamp);
}
}
private void HandleRegisterSite(RegisterSite msg)
{
var selection = Context.ActorSelection(msg.RemoteActorPath);
_siteSelections[msg.SiteId] = selection;
_log.Info("Registered site {0} at path {1}", msg.SiteId, msg.RemoteActorPath);
}
private void HandleSiteEnvelope(SiteEnvelope envelope)
{
if (!_siteSelections.TryGetValue(envelope.SiteId, out var siteSelection))
{
_log.Warning("No known path for site {0}, cannot route message {1}",
envelope.SiteId, envelope.Message.GetType().Name);
// The Ask will timeout on the caller side — no central buffering (WP-5)
return;
}
// Track debug subscriptions for cleanup on disconnect
TrackMessageForCleanup(envelope);
// Forward the inner message to the site, preserving the original sender
// so the site can reply directly to the caller (completing the Ask pattern)
siteSelection.Tell(envelope.Message, Sender);
}
private void TrackMessageForCleanup(SiteEnvelope envelope)
{
switch (envelope.Message)
{
case Commons.Messages.DebugView.SubscribeDebugViewRequest sub:
_debugSubscriptions[sub.CorrelationId] = (envelope.SiteId, Sender);
break;
case Commons.Messages.DebugView.UnsubscribeDebugViewRequest unsub:
_debugSubscriptions.Remove(unsub.CorrelationId);
break;
case Commons.Messages.Deployment.DeployInstanceCommand deploy:
_inProgressDeployments[deploy.DeploymentId] = envelope.SiteId;
break;
}
}
protected override void PreStart()
{
_log.Info("CentralCommunicationActor started");
}
protected override void PostStop()
{
_log.Info("CentralCommunicationActor stopped. In-progress deployments treated as failed (WP-5).");
// On central failover, all in-progress deployments are failed
_inProgressDeployments.Clear();
_debugSubscriptions.Clear();
}
}
/// <summary>
/// Command to register a site's remote communication actor path.
/// </summary>
public record RegisterSite(string SiteId, string RemoteActorPath);
/// <summary>
/// Notification sent to debug view subscribers when the stream is terminated
/// due to site disconnection (WP-5).
/// </summary>
public record DebugStreamTerminated(string SiteId, string CorrelationId);