Phase 3B: Site I/O & Observability — Communication, DCL, Script/Alarm actors, Health, Event Logging
Communication Layer (WP-1–5): - 8 message patterns with correlation IDs, per-pattern timeouts - Central/Site communication actors, transport heartbeat config - Connection failure handling (no central buffering, debug streams killed) Data Connection Layer (WP-6–14, WP-34): - Connection actor with Become/Stash lifecycle (Connecting/Connected/Reconnecting) - OPC UA + LmxProxy adapters behind IDataConnection - Auto-reconnect, bad quality propagation, transparent re-subscribe - Write-back, tag path resolution with retry, health reporting - Protocol extensibility via DataConnectionFactory Site Runtime (WP-15–25, WP-32–33): - ScriptActor/ScriptExecutionActor (triggers, concurrent execution, blocking I/O dispatcher) - AlarmActor/AlarmExecutionActor (ValueMatch/RangeViolation/RateOfChange, in-memory state) - SharedScriptLibrary (inline execution), ScriptRuntimeContext (API) - ScriptCompilationService (Roslyn, forbidden API enforcement, execution timeout) - Recursion limit (default 10), call direction enforcement - SiteStreamManager (per-subscriber bounded buffers, fire-and-forget) - Debug view backend (snapshot + stream), concurrency serialization - Local artifact storage (4 SQLite tables) Health Monitoring (WP-26–28): - SiteHealthCollector (thread-safe counters, connection state) - HealthReportSender (30s interval, monotonic sequence numbers) - CentralHealthAggregator (offline detection 60s, online recovery) Site Event Logging (WP-29–31): - SiteEventLogger (SQLite, 6 event categories, ISO 8601 UTC) - EventLogPurgeService (30-day retention, 1GB cap) - EventLogQueryService (filters, keyword search, keyset pagination) 541 tests pass, zero warnings.
This commit is contained in:
172
src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs
Normal file
172
src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs
Normal file
@@ -0,0 +1,172 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Event;
|
||||
using ScadaLink.Commons.Messages.Communication;
|
||||
using ScadaLink.Commons.Messages.Health;
|
||||
|
||||
namespace ScadaLink.Communication.Actors;
|
||||
|
||||
/// <summary>
|
||||
/// Central-side actor that routes messages from central to site clusters via Akka remoting.
|
||||
/// Maintains a registry of known site actor paths (learned from heartbeats/connection events).
|
||||
///
|
||||
/// WP-4: All 8 message patterns routed through this actor.
|
||||
/// WP-5: Ask timeout on connection drop (no central buffering). Debug streams killed on interruption.
|
||||
/// </summary>
|
||||
public class CentralCommunicationActor : ReceiveActor
|
||||
{
|
||||
private readonly ILoggingAdapter _log = Context.GetLogger();
|
||||
|
||||
/// <summary>
|
||||
/// Maps SiteId → remote SiteCommunicationActor selection.
|
||||
/// Updated when heartbeats arrive or connection state changes.
|
||||
/// </summary>
|
||||
private readonly Dictionary<string, ActorSelection> _siteSelections = new();
|
||||
|
||||
/// <summary>
|
||||
/// Tracks active debug view subscriptions: correlationId → (siteId, subscriber).
|
||||
/// Used to kill debug streams on site disconnection (WP-5).
|
||||
/// </summary>
|
||||
private readonly Dictionary<string, (string SiteId, IActorRef Subscriber)> _debugSubscriptions = new();
|
||||
|
||||
/// <summary>
|
||||
/// Tracks in-progress deployments: deploymentId → siteId.
|
||||
/// On central failover, in-progress deployments are treated as failed (WP-5).
|
||||
/// </summary>
|
||||
private readonly Dictionary<string, string> _inProgressDeployments = new();
|
||||
|
||||
public CentralCommunicationActor()
|
||||
{
|
||||
// Site registration via heartbeats
|
||||
Receive<HeartbeatMessage>(HandleHeartbeat);
|
||||
|
||||
// Connection state changes
|
||||
Receive<ConnectionStateChanged>(HandleConnectionStateChanged);
|
||||
|
||||
// Site registration command (manual or from discovery)
|
||||
Receive<RegisterSite>(HandleRegisterSite);
|
||||
|
||||
// Route enveloped messages to sites
|
||||
Receive<SiteEnvelope>(HandleSiteEnvelope);
|
||||
}
|
||||
|
||||
private void HandleHeartbeat(HeartbeatMessage heartbeat)
|
||||
{
|
||||
// Heartbeats arrive from sites — forward to any interested central actors
|
||||
// The sender's path tells us the site's communication actor address
|
||||
if (!_siteSelections.ContainsKey(heartbeat.SiteId))
|
||||
{
|
||||
var senderPath = Sender.Path.ToString();
|
||||
_log.Info("Learned site {0} from heartbeat at path {1}", heartbeat.SiteId, senderPath);
|
||||
}
|
||||
|
||||
// Forward heartbeat to parent/subscribers (central health monitoring)
|
||||
Context.Parent.Tell(heartbeat);
|
||||
}
|
||||
|
||||
private void HandleConnectionStateChanged(ConnectionStateChanged msg)
|
||||
{
|
||||
if (!msg.IsConnected)
|
||||
{
|
||||
_log.Warning("Site {0} disconnected at {1}", msg.SiteId, msg.Timestamp);
|
||||
|
||||
// WP-5: Kill active debug streams for the disconnected site
|
||||
var toRemove = _debugSubscriptions
|
||||
.Where(kvp => kvp.Value.SiteId == msg.SiteId)
|
||||
.ToList();
|
||||
|
||||
foreach (var kvp in toRemove)
|
||||
{
|
||||
_log.Info("Killing debug stream {0} for disconnected site {1}", kvp.Key, msg.SiteId);
|
||||
kvp.Value.Subscriber.Tell(new DebugStreamTerminated(msg.SiteId, kvp.Key));
|
||||
_debugSubscriptions.Remove(kvp.Key);
|
||||
}
|
||||
|
||||
// WP-5: Mark in-progress deployments as failed
|
||||
var failedDeployments = _inProgressDeployments
|
||||
.Where(kvp => kvp.Value == msg.SiteId)
|
||||
.Select(kvp => kvp.Key)
|
||||
.ToList();
|
||||
|
||||
foreach (var deploymentId in failedDeployments)
|
||||
{
|
||||
_log.Warning("Deployment {0} to site {1} treated as failed due to disconnection",
|
||||
deploymentId, msg.SiteId);
|
||||
_inProgressDeployments.Remove(deploymentId);
|
||||
}
|
||||
|
||||
_siteSelections.Remove(msg.SiteId);
|
||||
}
|
||||
else
|
||||
{
|
||||
_log.Info("Site {0} connected at {1}", msg.SiteId, msg.Timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
private void HandleRegisterSite(RegisterSite msg)
|
||||
{
|
||||
var selection = Context.ActorSelection(msg.RemoteActorPath);
|
||||
_siteSelections[msg.SiteId] = selection;
|
||||
_log.Info("Registered site {0} at path {1}", msg.SiteId, msg.RemoteActorPath);
|
||||
}
|
||||
|
||||
private void HandleSiteEnvelope(SiteEnvelope envelope)
|
||||
{
|
||||
if (!_siteSelections.TryGetValue(envelope.SiteId, out var siteSelection))
|
||||
{
|
||||
_log.Warning("No known path for site {0}, cannot route message {1}",
|
||||
envelope.SiteId, envelope.Message.GetType().Name);
|
||||
|
||||
// The Ask will timeout on the caller side — no central buffering (WP-5)
|
||||
return;
|
||||
}
|
||||
|
||||
// Track debug subscriptions for cleanup on disconnect
|
||||
TrackMessageForCleanup(envelope);
|
||||
|
||||
// Forward the inner message to the site, preserving the original sender
|
||||
// so the site can reply directly to the caller (completing the Ask pattern)
|
||||
siteSelection.Tell(envelope.Message, Sender);
|
||||
}
|
||||
|
||||
private void TrackMessageForCleanup(SiteEnvelope envelope)
|
||||
{
|
||||
switch (envelope.Message)
|
||||
{
|
||||
case Commons.Messages.DebugView.SubscribeDebugViewRequest sub:
|
||||
_debugSubscriptions[sub.CorrelationId] = (envelope.SiteId, Sender);
|
||||
break;
|
||||
|
||||
case Commons.Messages.DebugView.UnsubscribeDebugViewRequest unsub:
|
||||
_debugSubscriptions.Remove(unsub.CorrelationId);
|
||||
break;
|
||||
|
||||
case Commons.Messages.Deployment.DeployInstanceCommand deploy:
|
||||
_inProgressDeployments[deploy.DeploymentId] = envelope.SiteId;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
protected override void PreStart()
|
||||
{
|
||||
_log.Info("CentralCommunicationActor started");
|
||||
}
|
||||
|
||||
protected override void PostStop()
|
||||
{
|
||||
_log.Info("CentralCommunicationActor stopped. In-progress deployments treated as failed (WP-5).");
|
||||
// On central failover, all in-progress deployments are failed
|
||||
_inProgressDeployments.Clear();
|
||||
_debugSubscriptions.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Command to register a site's remote communication actor path.
|
||||
/// </summary>
|
||||
public record RegisterSite(string SiteId, string RemoteActorPath);
|
||||
|
||||
/// <summary>
|
||||
/// Notification sent to debug view subscribers when the stream is terminated
|
||||
/// due to site disconnection (WP-5).
|
||||
/// </summary>
|
||||
public record DebugStreamTerminated(string SiteId, string CorrelationId);
|
||||
Reference in New Issue
Block a user