From 4cda275b8d163e02adb0add005acd447b2df0e67 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 14 Jun 2026 00:16:47 -0400 Subject: [PATCH] fix(runtime): fast-fail RouteNodeWrite while Stale + micro-opts + raw-blob routing test --- .../Drivers/DriverHostActor.cs | 19 ++- .../DriverHostActorWriteRoutingTests.cs | 130 ++++++++++++++++++ 2 files changed, 142 insertions(+), 7 deletions(-) diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs b/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs index de4fbd2b..cf4d7372 100644 --- a/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs +++ b/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverHostActor.cs @@ -92,10 +92,11 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers /// NodeId(s). Rebuilt every apply by from the /// composition's EquipmentTags (mirroring VirtualTagHostActor._nodeIdByVtag), and /// resolved in so a driver value published by wire-ref FullName lands - /// on the variable's actual folder-scoped NodeId. A list because the same driver ref can back - /// several equipment variables (e.g. identical machines sharing a register). + /// on the variable's actual folder-scoped NodeId. A set because the same driver ref can back + /// several equipment variables (e.g. identical machines sharing a register), and the per-apply + /// rebuild dedups by NodeId. /// - private readonly Dictionary<(string DriverInstanceId, string FullName), List> _nodeIdByDriverRef = new(); + private readonly Dictionary<(string DriverInstanceId, string FullName), HashSet> _nodeIdByDriverRef = new(); /// /// Inverse of : folder-scoped equipment NodeId → @@ -516,7 +517,7 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers ? new NodeWriteResult(t.Result.Success, t.Result.Reason) : new NodeWriteResult(false, "write timeout"), CancellationToken.None, - TaskContinuationOptions.ExecuteSynchronously, + TaskContinuationOptions.None, TaskScheduler.Default) .PipeTo(replyTo); } @@ -545,6 +546,10 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers Receive(HandleRestartDriver); Receive(HandleReconnectDriver); Receive(OnRedundancyStateChanged); + // An inbound operator write can't be serviced while the config DB is unreachable — fast-fail so the + // node-manager's bounded Ask gets an immediate clear status instead of dead-lettering into a timeout. + Receive(_ => + Sender.Tell(new NodeWriteResult(false, "driver host stale (config DB unreachable)"))); Receive(_ => { /* PubSub ack */ }); Timers.StartPeriodicTimer("retry-db", RetryConfigDbConnection.Instance, ReconnectInterval); } @@ -751,9 +756,9 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers { var key = (t.DriverInstanceId, t.FullName); var nodeId = EquipmentNodeIds.Variable(t.EquipmentId, t.FolderPath, t.Name); - if (!_nodeIdByDriverRef.TryGetValue(key, out var list)) - _nodeIdByDriverRef[key] = list = new List(); - if (!list.Contains(nodeId)) list.Add(nodeId); + if (!_nodeIdByDriverRef.TryGetValue(key, out var set)) + _nodeIdByDriverRef[key] = set = new HashSet(StringComparer.Ordinal); + set.Add(nodeId); _driverRefByNodeId[nodeId] = key; } diff --git a/tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorWriteRoutingTests.cs b/tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorWriteRoutingTests.cs index 91f751bd..87178416 100644 --- a/tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorWriteRoutingTests.cs +++ b/tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/Drivers/DriverHostActorWriteRoutingTests.cs @@ -125,6 +125,61 @@ public sealed class DriverHostActorWriteRoutingTests : RuntimeActorTestBase recorder.Writes.ShouldBeEmpty(); } + /// The router keys purely on NodeId — the tag's TagConfig blob shape is irrelevant. A tag + /// seeded with a RAW protocol-driver config blob (Modbus-shaped, no FullName key) routes the + /// write to its owning child exactly like the Galaxy-style {FullName} blob does, because the + /// reverse map is built from the resolved FullName the composer projects, not the raw blob. + [Fact] + public void Primary_routes_write_for_raw_protocol_blob_tag() + { + var db = NewInMemoryDbFactory(); + var recorder = new RecordingDriverFactory("Modbus"); + // Seed the tag with a RAW protocol blob ({region/address/dataType}) instead of {FullName}; the + // composer still resolves a FullName, so the reverse map keys on that and the blob never matters. + var deploymentId = SeedDeploymentWithRawBlobTag(db, RevA, + equip: "eq-2", driver: "drv-2", fullName: "40002", name: "torque"); + + var actor = SpawnHostAndApply(db, deploymentId, recorder); + + // Local role unknown ⇒ treated as Primary ⇒ write allowed. + var asker = CreateTestProbe(); + actor.Tell(new DriverHostActor.RouteNodeWrite("eq-2/torque", 456.0), asker.Ref); + + var result = asker.ExpectMsg(Timeout); + result.Success.ShouldBeTrue(); + + // The write was forwarded to the owning child keyed by the resolved FullName, not the blob. + AwaitAssert(() => + { + recorder.Writes.Count.ShouldBe(1); + recorder.Writes[0].FullReference.ShouldBe("40002"); + recorder.Writes[0].Value.ShouldBe(456.0); + }, duration: Timeout); + } + + /// A RouteNodeWrite arriving while the host is Stale (config DB unreachable) must fast-fail + /// with an immediate negative NodeWriteResult (reason mentions "stale") instead of dead-lettering into + /// the node-manager's bounded-Ask timeout. Drives the host into Stale via a DB factory whose + /// CreateDbContext throws on bootstrap (the same fall-through to Become(Stale) production uses). + [Fact] + public void Stale_host_fast_fails_route_node_write() + { + // A factory that always throws on CreateDbContext ⇒ Bootstrap's try fails ⇒ Become(Stale). + var db = new ThrowingDbFactory(); + var coordinator = CreateTestProbe(); + var actor = Sys.ActorOf(DriverHostActor.Props( + db, TestNode, coordinator.Ref, + localRoles: new HashSet { "driver" })); + + var asker = CreateTestProbe(); + actor.Tell(new DriverHostActor.RouteNodeWrite("eq-1/speed", 123.0), asker.Ref); + + var result = asker.ExpectMsg(TimeSpan.FromSeconds(2)); + result.Success.ShouldBeFalse(); + result.Reason.ShouldNotBeNull(); + result.Reason!.ShouldContain("stale"); + } + /// Spawns the host with the recording driver factory, dispatches the deployment, and waits /// for the Applied ACK so the apply (and thus the reverse-map build in PushDesiredSubscriptions) has /// completed before the test routes a write. No OPC UA / mux probes are wired — this test exercises @@ -200,6 +255,81 @@ public sealed class DriverHostActorWriteRoutingTests : RuntimeActorTestBase return id; } + /// + /// Seeds a single-tag Sealed deployment exactly like , + /// except the tag's TagConfig is a RAW protocol-driver blob (Modbus-shaped: + /// {FullName, region, address, dataType}) instead of the bare Galaxy-style + /// {FullName} blob. The composer keys the reverse map purely on the blob's FullName + /// (ExtractTagFullName reads only that field), so the extra raw protocol keys alongside it + /// are irrelevant — proving routing is independent of the blob's broader shape. + /// + private static DeploymentId SeedDeploymentWithRawBlobTag( + IDbContextFactory db, RevisionHash rev, + string equip, string driver, string fullName, string name) + { + var artifact = JsonSerializer.SerializeToUtf8Bytes(new + { + Namespaces = new[] + { + new { NamespaceId = "ns-eq", Kind = 0 }, // NamespaceKind.Equipment = 0 + }, + DriverInstances = new[] + { + new + { + DriverInstanceRowId = Guid.NewGuid(), + DriverInstanceId = driver, + Name = driver, + DriverType = "Modbus", // not Windows-only ⇒ a real child is spawned (not stubbed) + Enabled = true, + DriverConfig = "{}", + NamespaceId = "ns-eq", + }, + }, + Tags = new[] + { + new + { + TagId = "tag-raw", + EquipmentId = equip, + DriverInstanceId = driver, + Name = name, + FolderPath = (string?)null, + DataType = "Double", + // RAW protocol-driver TagConfig: FullName alongside the actual Modbus wire fields + // (region/address/dataType), NOT the bare Galaxy {FullName} blob. The composer extracts + // only FullName, proving the extra protocol keys don't change routing. + TagConfig = JsonSerializer.Serialize( + new { FullName = fullName, region = "HoldingRegister", address = 200, dataType = "UInt16" }), + }, + }, + }); + + var id = DeploymentId.NewId(); + using var ctx = db.CreateDbContext(); + ctx.Deployments.Add(new Deployment + { + DeploymentId = id.Value, + RevisionHash = rev.Value, + Status = DeploymentStatus.Sealed, + CreatedBy = "test", + SealedAtUtc = DateTime.UtcNow, + ArtifactBlob = artifact, + }); + ctx.SaveChanges(); + return id; + } + + /// An whose CreateDbContext always throws, + /// driving 's bootstrap into the catchBecome(Stale) path + /// so a write can be routed at a Stale host. + private sealed class ThrowingDbFactory : IDbContextFactory + { + /// + public OtOpcUaConfigDbContext CreateDbContext() => + throw new InvalidOperationException("config DB unreachable (test stub)"); + } + /// Factory producing a single for the supported type, whose /// recorded write list is exposed for assertions. private sealed class RecordingDriverFactory : IDriverFactory