fix(comm): route Search/Verify/WriteTag commands through SiteCommunicationActor + DeploymentManager (M7 end-to-end)

This commit is contained in:
Joseph Doherty
2026-06-18 03:59:35 -04:00
parent 384204b71a
commit 39afa2743e
3 changed files with 87 additions and 0 deletions
@@ -161,6 +161,22 @@ public class SiteCommunicationActor : ReceiveActor, IWithTimers
// children holding the live OPC UA sessions.
Receive<ReadTagValuesCommand>(msg => _deploymentManagerProxy.Forward(msg));
// OPC UA tag-picker address-space search (T15) and secured-write execute
// (T14) — same singleton routing rationale as BrowseNodeCommand above: the
// DataConnectionActor children that own the live OPC UA sessions exist only
// on the singleton's (active) node, so these must hop through the Deployment
// Manager proxy too. Without these forwards the commands dead-letter and the
// central Ask times out. Forward preserves the central Ask sender so the
// result routes straight back to the waiting Ask.
Receive<SearchAddressSpaceCommand>(msg => _deploymentManagerProxy.Forward(msg));
Receive<Commons.Messages.DataConnection.WriteTagRequest>(msg => _deploymentManagerProxy.Forward(msg));
// OPC UA endpoint Verify (T17) — probes a (possibly unsaved) endpoint config
// WITHOUT persisting it. The Deployment Manager singleton's dcl-manager runs
// the probe directly (no existing connection required), so — like the
// commands above — Verify routes through the singleton's active node.
Receive<VerifyEndpointCommand>(msg => _deploymentManagerProxy.Forward(msg));
// OPC UA server-certificate trust management (T17 / D6) — forward to the
// Deployment Manager singleton, which owns the cross-node trust broadcast.
// The trusted-peer PKI store is node-wide per site node, so a trust/remove
@@ -168,6 +168,20 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
Receive<ReadTagValuesCommand>(msg =>
Context.ActorSelection("/user/dcl-manager").Tell(msg, Sender));
// OPC UA tag-picker address-space search (T15), secured-write execute (T14),
// and endpoint Verify (T17) — same singleton-only re-forward as the browse
// handler above. SiteCommunicationActor routes these to this singleton
// (active node) so the local dcl-manager is the one holding the live
// DataConnectionActor children (Search/Write route there by connection name;
// Verify runs a temp probe in the manager). Tell with Sender preserved is
// semantically identical to Forward (ActorSelection has no Forward()).
Receive<SearchAddressSpaceCommand>(msg =>
Context.ActorSelection("/user/dcl-manager").Tell(msg, Sender));
Receive<Commons.Messages.DataConnection.WriteTagRequest>(msg =>
Context.ActorSelection("/user/dcl-manager").Tell(msg, Sender));
Receive<VerifyEndpointCommand>(msg =>
Context.ActorSelection("/user/dcl-manager").Tell(msg, Sender));
// T17 / D6 — OPC UA server-certificate trust. Trust is site-local: the
// trusted-peer PKI store is per-node, so a trust/remove MUST reach BOTH
// site nodes (node-a + node-b) or they diverge across failover. This
@@ -1,10 +1,12 @@
using Akka.Actor;
using Akka.Cluster.Tools.Client;
using Akka.TestKit.Xunit2;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DataConnection;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Deployment;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Health;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Lifecycle;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Integration;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Management;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.RemoteQuery;
using ZB.MOM.WW.ScadaBridge.Communication.Actors;
@@ -284,6 +286,61 @@ public class SiteCommunicationActorTests : TestKit
Assert.NotNull(ack.ErrorMessage);
}
// ── M7 OPC UA cross-cluster routing: Search (T15), WriteTag (T14), Verify (T17) ──
//
// Regression guard for the M7 dead-letter defect. These three interactive
// commands have downstream handlers in DataConnectionManagerActor but were NOT
// forwarded through SiteCommunicationActor → Deployment Manager, so they
// dead-lettered and the central Ask timed out in the real cluster. They must
// forward to the Deployment Manager proxy exactly like BrowseNodeCommand, with
// the original Ask sender preserved so the result routes straight back.
[Fact]
public void SearchAddressSpaceCommand_ForwardedToDeploymentManager_SenderPreserved()
{
var dmProbe = CreateTestProbe();
var senderProbe = CreateTestProbe();
var siteActor = Sys.ActorOf(Props.Create(() =>
new SiteCommunicationActor("site1", _options, dmProbe.Ref)));
var command = new SearchAddressSpaceCommand("conn1", "Temp", 5, 100);
siteActor.Tell(command, senderProbe.Ref);
dmProbe.ExpectMsg<SearchAddressSpaceCommand>(msg => msg.ConnectionName == "conn1");
Assert.Equal(senderProbe.Ref, dmProbe.LastSender);
}
[Fact]
public void WriteTagRequest_ForwardedToDeploymentManager_SenderPreserved()
{
var dmProbe = CreateTestProbe();
var senderProbe = CreateTestProbe();
var siteActor = Sys.ActorOf(Props.Create(() =>
new SiteCommunicationActor("site1", _options, dmProbe.Ref)));
var request = new WriteTagRequest(
"corr-w", "conn1", "Channel1.Device1.Tag1", 42, DateTimeOffset.UtcNow);
siteActor.Tell(request, senderProbe.Ref);
dmProbe.ExpectMsg<WriteTagRequest>(msg => msg.CorrelationId == "corr-w");
Assert.Equal(senderProbe.Ref, dmProbe.LastSender);
}
[Fact]
public void VerifyEndpointCommand_ForwardedToDeploymentManager_SenderPreserved()
{
var dmProbe = CreateTestProbe();
var senderProbe = CreateTestProbe();
var siteActor = Sys.ActorOf(Props.Create(() =>
new SiteCommunicationActor("site1", _options, dmProbe.Ref)));
var command = new VerifyEndpointCommand("conn1", "OpcUa", "{}");
siteActor.Tell(command, senderProbe.Ref);
dmProbe.ExpectMsg<VerifyEndpointCommand>(msg => msg.ConnectionName == "conn1");
Assert.Equal(senderProbe.Ref, dmProbe.LastSender);
}
// ── Communication-018: heartbeat IsActive reflects this node's cluster role ──
[Theory]