From 4de94fab0deb4913c6d78cc6c57774ac08a55a54 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 19 Apr 2026 12:31:24 -0400 Subject: [PATCH] =?UTF-8?q?Phase=206.1=20Stream=20A=20remaining=20?= =?UTF-8?q?=E2=80=94=20IPerCallHostResolver=20+=20DriverNodeManager=20per-?= =?UTF-8?q?call=20host=20dispatch=20(decision=20#144)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the per-device isolation gap flagged at the Phase 6.1 Stream A wire-up (PR #78 used driver.DriverInstanceId as the pipeline host for every call, so multi-host drivers like Modbus with N PLCs shared one pipeline — one dead PLC poisoned sibling breakers). Decision #144 requires per-device isolation; this PR wires it without breaking single-host drivers. Core.Abstractions: - IPerCallHostResolver interface. Optional driver capability. Drivers with multi-host topology (Modbus across N PLCs, AB CIP across a rack, etc.) implement this; single-host drivers (Galaxy, S7 against one PLC, OpcUaClient against one remote server) leave it alone. Must be fast + allocation-free — called once per tag on the hot path. Unknown refs return empty so dispatch falls back to single-host without throwing. Server/OpcUa/DriverNodeManager: - Captures `driver as IPerCallHostResolver` at construction alongside the existing capability casts. - New `ResolveHostFor(fullReference)` helper returns either the resolver's answer or the driver's DriverInstanceId (single-host fallback). Empty / whitespace resolver output also falls back to DriverInstanceId. - Every dispatch site now passes `ResolveHostFor(fullRef)` to the invoker instead of `_driver.DriverInstanceId` — OnReadValue, OnWriteValue, all four HistoryRead paths. The HistoryRead Events path tolerates fullRef=null and falls back to DriverInstanceId for those cluster-wide event queries. - Drivers without IPerCallHostResolver observe zero behavioural change: every call still keys on DriverInstanceId, same as before. Tests (4 new PerCallHostResolverDispatchTests, all pass): - DeadPlc_DoesNotOpenBreaker_For_HealthyPlc_With_Resolver — 2 PLCs behind one driver; hammer the dead PLC past its breaker threshold; assert the healthy PLC's first call succeeds on its first attempt (decision #144). - EmptyString / unknown-ref fallback behaviour documented via test. - WithoutResolver_SameHost_Shares_One_Pipeline — regression guard for the single-host pre-existing behaviour. - WithResolver_TwoHosts_Get_Two_Pipelines — builds the CachedPipelineCount assertion to confirm the shared-builder cache keys correctly. Full solution dotnet test: 1219 passing (was 1215, +4). Pre-existing Client.CLI Subscribe flake unchanged. Adoption: Modbus driver (#120 follow-up), AB CIP / AB Legacy / TwinCAT drivers (also #120) implement the interface and return the per-tag PLC host string. Single-host drivers stay silent and pay zero cost. Remaining sub-items of #160 still deferred: - IAlarmSource.SubscribeAlarmsAsync + AcknowledgeAsync invoker wrapping. Non-trivial because alarm subscription is push-based from driver through IAlarmConditionSink — the wrap has to happen at the driver-to-server glue rather than a synchronous dispatch site. - Roslyn analyzer asserting every capability-interface call routes through CapabilityInvoker. Substantial (separate analyzer project + test harness); noise-value ratio favors shipping this post-v2-GA once the coverage is known-stable. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../IPerCallHostResolver.cs | 34 ++++++ .../OpcUa/DriverNodeManager.cs | 29 ++++- .../PerCallHostResolverDispatchTests.cs | 110 ++++++++++++++++++ 3 files changed, 167 insertions(+), 6 deletions(-) create mode 100644 src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IPerCallHostResolver.cs create mode 100644 tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Resilience/PerCallHostResolverDispatchTests.cs diff --git a/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IPerCallHostResolver.cs b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IPerCallHostResolver.cs new file mode 100644 index 0000000..d250ce9 --- /dev/null +++ b/src/ZB.MOM.WW.OtOpcUa.Core.Abstractions/IPerCallHostResolver.cs @@ -0,0 +1,34 @@ +namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions; + +/// +/// Optional driver capability that maps a per-tag full reference to the underlying host +/// name responsible for serving it. Drivers with a one-host topology (Galaxy on one +/// MXAccess endpoint, OpcUaClient against one remote server, S7 against one PLC) do NOT +/// need to implement this — the dispatch layer falls back to +/// as a single-host key. +/// +/// +/// Multi-host drivers (Modbus with N PLCs, hypothetical AB CIP across a rack, etc.) +/// implement this so the Phase 6.1 resilience pipeline can be keyed on +/// (DriverInstanceId, ResolvedHostName, DriverCapability) per decision #144. One +/// dead PLC behind a multi-device Modbus driver then trips only its own breaker; healthy +/// siblings keep serving. +/// +/// Implementations must be fast + allocation-free on the hot path — ReadAsync +/// / WriteAsync call this once per tag. A simple Dictionary<string, string> +/// lookup is typical. +/// +/// When the fullRef doesn't map to a known host (caller passes an unregistered +/// reference, or the tag was removed mid-flight), implementations should return the +/// driver's default-host string rather than throwing — the invoker falls back to a +/// single-host pipeline for that call, which is safer than tearing down the request. +/// +public interface IPerCallHostResolver +{ + /// + /// Resolve the host name for the given driver-side full reference. Returned value is + /// used as the hostName argument to the Phase 6.1 CapabilityInvoker so + /// per-host breaker isolation + per-host bulkhead accounting both kick in. + /// + string ResolveHost(string fullReference); +} diff --git a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs index 7dd64ec..27cefe5 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Server/OpcUa/DriverNodeManager.cs @@ -35,6 +35,7 @@ public sealed class DriverNodeManager : CustomNodeManager2, IAddressSpaceBuilder private readonly IDriver _driver; private readonly IReadable? _readable; private readonly IWritable? _writable; + private readonly IPerCallHostResolver? _hostResolver; private readonly CapabilityInvoker _invoker; private readonly ILogger _logger; @@ -75,6 +76,7 @@ public sealed class DriverNodeManager : CustomNodeManager2, IAddressSpaceBuilder _driver = driver; _readable = driver as IReadable; _writable = driver as IWritable; + _hostResolver = driver as IPerCallHostResolver; _invoker = invoker; _authzGate = authzGate; _scopeResolver = scopeResolver; @@ -83,6 +85,21 @@ public sealed class DriverNodeManager : CustomNodeManager2, IAddressSpaceBuilder protected override NodeStateCollection LoadPredefinedNodes(ISystemContext context) => new(); + /// + /// Resolve the host name fed to the Phase 6.1 CapabilityInvoker for a per-tag call. + /// Multi-host drivers that implement get their + /// per-PLC isolation (decision #144); single-host drivers + drivers that don't + /// implement the resolver fall back to the DriverInstanceId — preserves existing + /// Phase 6.1 pipeline-key semantics for those drivers. + /// + private string ResolveHostFor(string fullReference) + { + if (_hostResolver is null) return _driver.DriverInstanceId; + + var resolved = _hostResolver.ResolveHost(fullReference); + return string.IsNullOrWhiteSpace(resolved) ? _driver.DriverInstanceId : resolved; + } + public override void CreateAddressSpace(IDictionary> externalReferences) { lock (Lock) @@ -224,7 +241,7 @@ public sealed class DriverNodeManager : CustomNodeManager2, IAddressSpaceBuilder var result = _invoker.ExecuteAsync( DriverCapability.Read, - _driver.DriverInstanceId, + ResolveHostFor(fullRef), async ct => (IReadOnlyList)await _readable.ReadAsync([fullRef], ct).ConfigureAwait(false), CancellationToken.None).AsTask().GetAwaiter().GetResult(); if (result.Count == 0) @@ -439,7 +456,7 @@ public sealed class DriverNodeManager : CustomNodeManager2, IAddressSpaceBuilder var isIdempotent = _writeIdempotentByFullRef.GetValueOrDefault(fullRef!, false); var capturedValue = value; var results = _invoker.ExecuteWriteAsync( - _driver.DriverInstanceId, + ResolveHostFor(fullRef!), isIdempotent, async ct => (IReadOnlyList)await _writable.WriteAsync( [new DriverWriteRequest(fullRef!, capturedValue)], @@ -538,7 +555,7 @@ public sealed class DriverNodeManager : CustomNodeManager2, IAddressSpaceBuilder { var driverResult = _invoker.ExecuteAsync( DriverCapability.HistoryRead, - _driver.DriverInstanceId, + ResolveHostFor(fullRef), async ct => await History.ReadRawAsync( fullRef, details.StartTime, @@ -612,7 +629,7 @@ public sealed class DriverNodeManager : CustomNodeManager2, IAddressSpaceBuilder { var driverResult = _invoker.ExecuteAsync( DriverCapability.HistoryRead, - _driver.DriverInstanceId, + ResolveHostFor(fullRef), async ct => await History.ReadProcessedAsync( fullRef, details.StartTime, @@ -679,7 +696,7 @@ public sealed class DriverNodeManager : CustomNodeManager2, IAddressSpaceBuilder { var driverResult = _invoker.ExecuteAsync( DriverCapability.HistoryRead, - _driver.DriverInstanceId, + ResolveHostFor(fullRef), async ct => await History.ReadAtTimeAsync(fullRef, requestedTimes, ct).ConfigureAwait(false), CancellationToken.None).AsTask().GetAwaiter().GetResult(); @@ -749,7 +766,7 @@ public sealed class DriverNodeManager : CustomNodeManager2, IAddressSpaceBuilder { var driverResult = _invoker.ExecuteAsync( DriverCapability.HistoryRead, - _driver.DriverInstanceId, + fullRef is null ? _driver.DriverInstanceId : ResolveHostFor(fullRef), async ct => await History.ReadEventsAsync( sourceName: fullRef, startUtc: details.StartTime, diff --git a/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Resilience/PerCallHostResolverDispatchTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Resilience/PerCallHostResolverDispatchTests.cs new file mode 100644 index 0000000..f48a074 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Core.Tests/Resilience/PerCallHostResolverDispatchTests.cs @@ -0,0 +1,110 @@ +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; +using ZB.MOM.WW.OtOpcUa.Core.Resilience; + +namespace ZB.MOM.WW.OtOpcUa.Core.Tests.Resilience; + +/// +/// Exercises the per-call host resolver contract against the shared +/// + — one +/// dead PLC behind a multi-device driver must NOT open the breaker for healthy sibling +/// PLCs (decision #144). +/// +[Trait("Category", "Unit")] +public sealed class PerCallHostResolverDispatchTests +{ + private sealed class StaticResolver : IPerCallHostResolver + { + private readonly Dictionary _map; + public StaticResolver(Dictionary map) => _map = map; + public string ResolveHost(string fullReference) => + _map.TryGetValue(fullReference, out var host) ? host : string.Empty; + } + + [Fact] + public async Task DeadPlc_DoesNotOpenBreaker_For_HealthyPlc_With_Resolver() + { + // Two PLCs behind one driver. Dead PLC keeps failing; healthy PLC must keep serving. + var builder = new DriverResiliencePipelineBuilder(); + var options = new DriverResilienceOptions { Tier = DriverTier.B }; + var invoker = new CapabilityInvoker(builder, "drv-modbus", () => options); + + var resolver = new StaticResolver(new Dictionary + { + ["tag-on-dead"] = "plc-dead", + ["tag-on-alive"] = "plc-alive", + }); + + var threshold = options.Resolve(DriverCapability.Read).BreakerFailureThreshold; + for (var i = 0; i < threshold + 3; i++) + { + await Should.ThrowAsync(async () => + await invoker.ExecuteAsync( + DriverCapability.Read, + hostName: resolver.ResolveHost("tag-on-dead"), + _ => throw new InvalidOperationException("plc-dead unreachable"), + CancellationToken.None)); + } + + // Healthy PLC's pipeline is in a different bucket; the first call should succeed + // without hitting the dead-PLC breaker. + var aliveAttempts = 0; + await invoker.ExecuteAsync( + DriverCapability.Read, + hostName: resolver.ResolveHost("tag-on-alive"), + _ => { aliveAttempts++; return ValueTask.FromResult("ok"); }, + CancellationToken.None); + + aliveAttempts.ShouldBe(1, "decision #144 — per-PLC isolation keeps healthy PLCs serving"); + } + + [Fact] + public void Resolver_EmptyString_Treated_As_Single_Host_Fallback() + { + var resolver = new StaticResolver(new Dictionary + { + ["tag-unknown"] = "", + }); + + resolver.ResolveHost("tag-unknown").ShouldBe(""); + resolver.ResolveHost("not-in-map").ShouldBe("", "unknown refs return empty so dispatch falls back to single-host"); + } + + [Fact] + public async Task WithoutResolver_SameHost_Shares_One_Pipeline() + { + // Without a resolver all calls share the DriverInstanceId pipeline — that's the + // pre-decision-#144 behavior single-host drivers should keep. + var builder = new DriverResiliencePipelineBuilder(); + var options = new DriverResilienceOptions { Tier = DriverTier.A }; + var invoker = new CapabilityInvoker(builder, "drv-single", () => options); + + await invoker.ExecuteAsync(DriverCapability.Read, "drv-single", + _ => ValueTask.FromResult("a"), CancellationToken.None); + await invoker.ExecuteAsync(DriverCapability.Read, "drv-single", + _ => ValueTask.FromResult("b"), CancellationToken.None); + + builder.CachedPipelineCount.ShouldBe(1, "single-host drivers share one pipeline"); + } + + [Fact] + public async Task WithResolver_TwoHosts_Get_Two_Pipelines() + { + var builder = new DriverResiliencePipelineBuilder(); + var options = new DriverResilienceOptions { Tier = DriverTier.B }; + var invoker = new CapabilityInvoker(builder, "drv-modbus", () => options); + var resolver = new StaticResolver(new Dictionary + { + ["tag-a"] = "plc-a", + ["tag-b"] = "plc-b", + }); + + await invoker.ExecuteAsync(DriverCapability.Read, resolver.ResolveHost("tag-a"), + _ => ValueTask.FromResult(1), CancellationToken.None); + await invoker.ExecuteAsync(DriverCapability.Read, resolver.ResolveHost("tag-b"), + _ => ValueTask.FromResult(2), CancellationToken.None); + + builder.CachedPipelineCount.ShouldBe(2, "each host keyed on its own pipeline"); + } +} -- 2.49.1