Communication Layer (WP-1–5): - 8 message patterns with correlation IDs, per-pattern timeouts - Central/Site communication actors, transport heartbeat config - Connection failure handling (no central buffering, debug streams killed) Data Connection Layer (WP-6–14, WP-34): - Connection actor with Become/Stash lifecycle (Connecting/Connected/Reconnecting) - OPC UA + LmxProxy adapters behind IDataConnection - Auto-reconnect, bad quality propagation, transparent re-subscribe - Write-back, tag path resolution with retry, health reporting - Protocol extensibility via DataConnectionFactory Site Runtime (WP-15–25, WP-32–33): - ScriptActor/ScriptExecutionActor (triggers, concurrent execution, blocking I/O dispatcher) - AlarmActor/AlarmExecutionActor (ValueMatch/RangeViolation/RateOfChange, in-memory state) - SharedScriptLibrary (inline execution), ScriptRuntimeContext (API) - ScriptCompilationService (Roslyn, forbidden API enforcement, execution timeout) - Recursion limit (default 10), call direction enforcement - SiteStreamManager (per-subscriber bounded buffers, fire-and-forget) - Debug view backend (snapshot + stream), concurrency serialization - Local artifact storage (4 SQLite tables) Health Monitoring (WP-26–28): - SiteHealthCollector (thread-safe counters, connection state) - HealthReportSender (30s interval, monotonic sequence numbers) - CentralHealthAggregator (offline detection 60s, online recovery) Site Event Logging (WP-29–31): - SiteEventLogger (SQLite, 6 event categories, ISO 8601 UTC) - EventLogPurgeService (30-day retention, 1GB cap) - EventLogQueryService (filters, keyword search, keyset pagination) 541 tests pass, zero warnings.
173 lines
6.3 KiB
C#
173 lines
6.3 KiB
C#
using Akka.Actor;
|
|
using Akka.Event;
|
|
using ScadaLink.Commons.Messages.Communication;
|
|
using ScadaLink.Commons.Messages.Health;
|
|
|
|
namespace ScadaLink.Communication.Actors;
|
|
|
|
/// <summary>
|
|
/// Central-side actor that routes messages from central to site clusters via Akka remoting.
|
|
/// Maintains a registry of known site actor paths (learned from heartbeats/connection events).
|
|
///
|
|
/// WP-4: All 8 message patterns routed through this actor.
|
|
/// WP-5: Ask timeout on connection drop (no central buffering). Debug streams killed on interruption.
|
|
/// </summary>
|
|
public class CentralCommunicationActor : ReceiveActor
|
|
{
|
|
private readonly ILoggingAdapter _log = Context.GetLogger();
|
|
|
|
/// <summary>
|
|
/// Maps SiteId → remote SiteCommunicationActor selection.
|
|
/// Updated when heartbeats arrive or connection state changes.
|
|
/// </summary>
|
|
private readonly Dictionary<string, ActorSelection> _siteSelections = new();
|
|
|
|
/// <summary>
|
|
/// Tracks active debug view subscriptions: correlationId → (siteId, subscriber).
|
|
/// Used to kill debug streams on site disconnection (WP-5).
|
|
/// </summary>
|
|
private readonly Dictionary<string, (string SiteId, IActorRef Subscriber)> _debugSubscriptions = new();
|
|
|
|
/// <summary>
|
|
/// Tracks in-progress deployments: deploymentId → siteId.
|
|
/// On central failover, in-progress deployments are treated as failed (WP-5).
|
|
/// </summary>
|
|
private readonly Dictionary<string, string> _inProgressDeployments = new();
|
|
|
|
public CentralCommunicationActor()
|
|
{
|
|
// Site registration via heartbeats
|
|
Receive<HeartbeatMessage>(HandleHeartbeat);
|
|
|
|
// Connection state changes
|
|
Receive<ConnectionStateChanged>(HandleConnectionStateChanged);
|
|
|
|
// Site registration command (manual or from discovery)
|
|
Receive<RegisterSite>(HandleRegisterSite);
|
|
|
|
// Route enveloped messages to sites
|
|
Receive<SiteEnvelope>(HandleSiteEnvelope);
|
|
}
|
|
|
|
private void HandleHeartbeat(HeartbeatMessage heartbeat)
|
|
{
|
|
// Heartbeats arrive from sites — forward to any interested central actors
|
|
// The sender's path tells us the site's communication actor address
|
|
if (!_siteSelections.ContainsKey(heartbeat.SiteId))
|
|
{
|
|
var senderPath = Sender.Path.ToString();
|
|
_log.Info("Learned site {0} from heartbeat at path {1}", heartbeat.SiteId, senderPath);
|
|
}
|
|
|
|
// Forward heartbeat to parent/subscribers (central health monitoring)
|
|
Context.Parent.Tell(heartbeat);
|
|
}
|
|
|
|
private void HandleConnectionStateChanged(ConnectionStateChanged msg)
|
|
{
|
|
if (!msg.IsConnected)
|
|
{
|
|
_log.Warning("Site {0} disconnected at {1}", msg.SiteId, msg.Timestamp);
|
|
|
|
// WP-5: Kill active debug streams for the disconnected site
|
|
var toRemove = _debugSubscriptions
|
|
.Where(kvp => kvp.Value.SiteId == msg.SiteId)
|
|
.ToList();
|
|
|
|
foreach (var kvp in toRemove)
|
|
{
|
|
_log.Info("Killing debug stream {0} for disconnected site {1}", kvp.Key, msg.SiteId);
|
|
kvp.Value.Subscriber.Tell(new DebugStreamTerminated(msg.SiteId, kvp.Key));
|
|
_debugSubscriptions.Remove(kvp.Key);
|
|
}
|
|
|
|
// WP-5: Mark in-progress deployments as failed
|
|
var failedDeployments = _inProgressDeployments
|
|
.Where(kvp => kvp.Value == msg.SiteId)
|
|
.Select(kvp => kvp.Key)
|
|
.ToList();
|
|
|
|
foreach (var deploymentId in failedDeployments)
|
|
{
|
|
_log.Warning("Deployment {0} to site {1} treated as failed due to disconnection",
|
|
deploymentId, msg.SiteId);
|
|
_inProgressDeployments.Remove(deploymentId);
|
|
}
|
|
|
|
_siteSelections.Remove(msg.SiteId);
|
|
}
|
|
else
|
|
{
|
|
_log.Info("Site {0} connected at {1}", msg.SiteId, msg.Timestamp);
|
|
}
|
|
}
|
|
|
|
private void HandleRegisterSite(RegisterSite msg)
|
|
{
|
|
var selection = Context.ActorSelection(msg.RemoteActorPath);
|
|
_siteSelections[msg.SiteId] = selection;
|
|
_log.Info("Registered site {0} at path {1}", msg.SiteId, msg.RemoteActorPath);
|
|
}
|
|
|
|
private void HandleSiteEnvelope(SiteEnvelope envelope)
|
|
{
|
|
if (!_siteSelections.TryGetValue(envelope.SiteId, out var siteSelection))
|
|
{
|
|
_log.Warning("No known path for site {0}, cannot route message {1}",
|
|
envelope.SiteId, envelope.Message.GetType().Name);
|
|
|
|
// The Ask will timeout on the caller side — no central buffering (WP-5)
|
|
return;
|
|
}
|
|
|
|
// Track debug subscriptions for cleanup on disconnect
|
|
TrackMessageForCleanup(envelope);
|
|
|
|
// Forward the inner message to the site, preserving the original sender
|
|
// so the site can reply directly to the caller (completing the Ask pattern)
|
|
siteSelection.Tell(envelope.Message, Sender);
|
|
}
|
|
|
|
private void TrackMessageForCleanup(SiteEnvelope envelope)
|
|
{
|
|
switch (envelope.Message)
|
|
{
|
|
case Commons.Messages.DebugView.SubscribeDebugViewRequest sub:
|
|
_debugSubscriptions[sub.CorrelationId] = (envelope.SiteId, Sender);
|
|
break;
|
|
|
|
case Commons.Messages.DebugView.UnsubscribeDebugViewRequest unsub:
|
|
_debugSubscriptions.Remove(unsub.CorrelationId);
|
|
break;
|
|
|
|
case Commons.Messages.Deployment.DeployInstanceCommand deploy:
|
|
_inProgressDeployments[deploy.DeploymentId] = envelope.SiteId;
|
|
break;
|
|
}
|
|
}
|
|
|
|
protected override void PreStart()
|
|
{
|
|
_log.Info("CentralCommunicationActor started");
|
|
}
|
|
|
|
protected override void PostStop()
|
|
{
|
|
_log.Info("CentralCommunicationActor stopped. In-progress deployments treated as failed (WP-5).");
|
|
// On central failover, all in-progress deployments are failed
|
|
_inProgressDeployments.Clear();
|
|
_debugSubscriptions.Clear();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Command to register a site's remote communication actor path.
|
|
/// </summary>
|
|
public record RegisterSite(string SiteId, string RemoteActorPath);
|
|
|
|
/// <summary>
|
|
/// Notification sent to debug view subscribers when the stream is terminated
|
|
/// due to site disconnection (WP-5).
|
|
/// </summary>
|
|
public record DebugStreamTerminated(string SiteId, string CorrelationId);
|