diff --git a/code-reviews/Driver.OpcUaClient/findings.md b/code-reviews/Driver.OpcUaClient/findings.md index 9bd42d03..da89ae68 100644 --- a/code-reviews/Driver.OpcUaClient/findings.md +++ b/code-reviews/Driver.OpcUaClient/findings.md @@ -4,8 +4,8 @@ |---|---| | Module | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient` | | Reviewer | Claude Code | -| Review date | 2026-05-22 | -| Commit reviewed | `76d35d1` | +| Review date | 2026-06-19 | +| Commit reviewed | `7286d320` | | Status | Reviewed | | Open findings | 0 | @@ -250,3 +250,43 @@ **Recommendation:** Add tests exercising the reconnect callbacks with a stub session (success and give-up cases), a browse test with a paged/continuation-point server stub, and a read-batch test asserting upstream Bad StatusCodes pass through verbatim while a transport throw fans out the local fault code. **Resolution:** Resolved 2026-05-22 — Added `OpcUaClientMediumFindingsRegressionTests.cs` covering: (1) BadTimeout vs BadCommunicationError status-code distinction for the write-timeout path (Driver.OpcUaClient-009); (2) Byte→UInt16 mapping regression (Driver.OpcUaClient-010); (3) AutoAcceptCertificates warning log assertion (Driver.OpcUaClient-012); (4) GetMemoryFootprint/FlushOptionalCachesAsync contract (Driver.OpcUaClient-013); (5) MapSeverity thresholds, pre-init health, Session null pre-init, GetHostStatuses contract. Wire-level reconnect callback tests remain fixture-gated pending the in-process OPC UA server fixture. + +--- + +## Re-review 2026-06-19 (commit 7286d320) + +#### Checklist coverage + +| # | Category | Result | +|---|---|---| +| 1 | Correctness & logic bugs | No new issues found | +| 2 | OtOpcUa conventions | No issues found | +| 3 | Concurrency & thread safety | No new issues found | +| 4 | Error handling & resilience | Driver.OpcUaClient-016 (continuation-point leak on cancellation) | +| 5 | Security | No new issues found | +| 6 | Performance & resource management | Driver.OpcUaClient-016 (server-side cursor held open) | +| 7 | Design-document adherence | No new issues found | +| 8 | Code organization & conventions | No issues found | +| 9 | Testing coverage | Driver.OpcUaClient-016 fixed with two new regression tests | +| 10 | Documentation & comments | No new issues found | + +#### Browser-002 verdict + +The Browser subagent (Driver.OpcUaClient.Browser-002) noted "the identical pattern exists in the runtime driver (`OpcUaClientDriver.cs`)" and deferred cross-cutting resolution here. Evaluated and confirmed: `BrowseRecursiveAsync` lines 934-948 at this commit contain the exact same continuation-point leak on `OperationCanceledException`. Self-contained fix is recorded as Driver.OpcUaClient-016 below and applied with TDD. + +### Driver.OpcUaClient-016 + +| Field | Value | +|---|---| +| Severity | Low | +| Category | Performance & resource management / Error handling & resilience | +| Location | `src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs:934-948` | +| Status | Resolved | + +**Description:** When the caller's `CancellationToken` fires during the `BrowseNextAsync` pagination loop inside `BrowseRecursiveAsync`, the `OperationCanceledException` propagates out of the while-loop and is caught by the outer `catch` (line 950) which silently swallows it. No call to `BrowseNextAsync(releaseContinuationPoints: true)` is made, so the server holds the browse cursor open until the session closes or a server-side timeout fires. For long-lived discovery sessions against large remote servers this represents a server-side resource leak that can accumulate across multiple cancelled discovery passes. Additionally, cancellation was silently indistinguishable from a transient browse failure: callers could not observe that their `CancellationToken` was respected. + +This is the same pattern the Browser subagent recorded as Driver.OpcUaClient.Browser-002, which noted it also exists in this runtime pagination loop. + +**Recommendation:** Catch `OperationCanceledException` specifically inside the while-loop, issue a fire-and-forget `BrowseNextAsync(releaseContinuationPoints: true, ct: CancellationToken.None)` to release the cursor before rethrowing. Change the outer catch to propagate `OperationCanceledException` rather than swallowing it, so callers can observe cancelled discovery. + +**Resolution:** Resolved 2026-06-19 (SHA pending commit) — Added an inner `catch (OperationCanceledException)` in the pagination while-loop that releases the server-side continuation point via `BrowseNextAsync(releaseContinuationPoints: true, ct: CancellationToken.None)` then rethrows; changed the outer catch to a `catch (OperationCanceledException) { throw; }` / `catch { return; }` pair so cancellation propagates out of `DiscoverAsync` while transient browse sub-tree failures continue to be silently skipped. Two regression tests added to `OpcUaClientContinuationPointReleaseTests.cs`: `DiscoverAsync_releases_continuation_point_when_cancelled_mid_pagination` (verifies `BrowseNextAsync(release=true)` is called once on cancellation) and `DiscoverAsync_does_not_release_continuation_point_on_non_cancel_browse_failure` (verifies no spurious release on transport error). diff --git a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs index 78b06030..ee2efaa7 100644 --- a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs +++ b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs @@ -934,11 +934,34 @@ public sealed class OpcUaClientDriver : IDriver, ITagDiscovery, IReadable, IWrit var continuationPoint = result.ContinuationPoint; while (continuationPoint is { Length: > 0 }) { - var next = await session.BrowseNextAsync( - requestHeader: null, - releaseContinuationPoints: false, - continuationPoints: [continuationPoint], - ct: ct).ConfigureAwait(false); + BrowseNextResponse next; + try + { + next = await session.BrowseNextAsync( + requestHeader: null, + releaseContinuationPoints: false, + continuationPoints: [continuationPoint], + ct: ct).ConfigureAwait(false); + } + catch (OperationCanceledException) + { + // Caller cancelled mid-pagination. Release the server-side cursor so it + // doesn't hold resources until session close (Driver.OpcUaClient-016; + // same pattern as Driver.OpcUaClient.Browser-002). Use CancellationToken.None + // for the release — the original token is already cancelled so we must not + // gate the release on it. Fire-and-forget: if the server is also unreachable, + // the release fails silently and the cursor times out server-side anyway. + try + { + await session.BrowseNextAsync( + requestHeader: null, + releaseContinuationPoints: true, + continuationPoints: [continuationPoint], + ct: CancellationToken.None).ConfigureAwait(false); + } + catch { /* best-effort — server may have already cleaned up */ } + throw; + } if (next.Results.Count == 0) break; var nextResult = next.Results[0]; @@ -947,6 +970,12 @@ public sealed class OpcUaClientDriver : IDriver, ITagDiscovery, IReadable, IWrit continuationPoint = nextResult.ContinuationPoint; } } + catch (OperationCanceledException) + { + // Propagate cancellation — don't silently swallow it like a transient browse + // failure. The caller's CancellationToken was honoured; let it observe that. + throw; + } catch { // Transient browse failure on a sub-tree — don't kill the whole discovery, just diff --git a/tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/OpcUaClientContinuationPointReleaseTests.cs b/tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/OpcUaClientContinuationPointReleaseTests.cs new file mode 100644 index 00000000..3abc109d --- /dev/null +++ b/tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/OpcUaClientContinuationPointReleaseTests.cs @@ -0,0 +1,226 @@ +using Moq; +using Opc.Ua; +using Opc.Ua.Client; +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Core.Abstractions; + +namespace ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests; + +/// +/// Regression coverage for Driver.OpcUaClient-016: when the caller cancels during +/// the BrowseNextAsync pagination loop inside BrowseRecursiveAsync, the +/// server-side continuation point must be released via +/// BrowseNextAsync(releaseContinuationPoints: true) before the +/// propagates. Without the release the server +/// retains the cursor open until the session is closed (a resource leak). This is the +/// same pattern the Browser subagent recorded as Driver.OpcUaClient.Browser-002 and +/// noted also exists in the runtime pagination loop. +/// +[Trait("Category", "Unit")] +public sealed class OpcUaClientContinuationPointReleaseTests +{ + // A fake non-empty continuation point so the pagination loop can be triggered. + private static readonly byte[] FakeContinuationPoint = [0x01, 0x02, 0x03]; + + /// + /// Builds a loose mock where: + /// + /// BrowseAsync returns a result with one Object-class child and a + /// non-empty continuation point so the pagination loop is entered. + /// BrowseNextAsync(releaseContinuationPoints: false) throws + /// — simulating cancellation mid-page. + /// BrowseNextAsync(releaseContinuationPoints: true) returns an empty + /// response — the release call the fixed driver should issue before rethrowing. + /// + /// + private static Mock BuildCancellingSessionMock() + { + var mock = new Mock(MockBehavior.Loose); + mock.SetupGet(s => s.MessageContext).Returns(new ServiceMessageContext()); + mock.SetupGet(s => s.NamespaceUris).Returns(new NamespaceTable()); + + // BrowseAsync — return one Object child plus a non-empty continuation point so + // the pagination loop is entered. The child node is irrelevant; the loop is + // what matters. + var firstRef = new ReferenceDescription + { + NodeId = new ExpandedNodeId(new NodeId(1000, 2), "opc.tcp://test"), + BrowseName = new QualifiedName("TestObject", 2), + DisplayName = new LocalizedText("TestObject"), + NodeClass = NodeClass.Object, + }; + var browseResult = new BrowseResult + { + ContinuationPoint = FakeContinuationPoint, + References = new ReferenceDescriptionCollection { firstRef }, + }; + var browseResp = new BrowseResponse + { + ResponseHeader = new ResponseHeader(), + Results = new BrowseResultCollection { browseResult }, + DiagnosticInfos = new DiagnosticInfoCollection(), + }; + mock.Setup(s => s.BrowseAsync( + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny())) + .ReturnsAsync(browseResp); + + // BrowseNextAsync(release=false) — pagination call: throw cancellation. + mock.Setup(s => s.BrowseNextAsync( + It.IsAny(), + It.Is(release => !release), + It.IsAny(), + It.IsAny())) + .ThrowsAsync(new OperationCanceledException("ct cancelled during BrowseNext")); + + // BrowseNextAsync(release=true) — the release call the fixed driver must make. + // BrowseNextResponse.Results shares BrowseResult (same as BrowseResponse.Results); + // there is no separate BrowseNextResult type in the OPC UA SDK. + var emptyReleaseResult = new BrowseResult + { + ContinuationPoint = Array.Empty(), + References = new ReferenceDescriptionCollection(), + }; + var releaseResp = new BrowseNextResponse + { + ResponseHeader = new ResponseHeader(), + Results = new BrowseResultCollection { emptyReleaseResult }, + DiagnosticInfos = new DiagnosticInfoCollection(), + }; + mock.Setup(s => s.BrowseNextAsync( + It.IsAny(), + It.Is(release => release), + It.IsAny(), + It.IsAny())) + .ReturnsAsync(releaseResp); + + return mock; + } + + /// + /// When the caller's fires mid-pagination, + /// DiscoverAsync must call BrowseNextAsync(releaseContinuationPoints: true) + /// before the exception propagates, so the server-side cursor is freed rather than + /// held open until session close. + /// + [Fact] + public async Task DiscoverAsync_releases_continuation_point_when_cancelled_mid_pagination() + { + var ct = TestContext.Current.CancellationToken; + using var drv = new OpcUaClientDriver(new OpcUaClientDriverOptions(), "opcua-cp-release"); + + var sessionMock = BuildCancellingSessionMock(); + drv.SetSessionForTest(sessionMock.Object); + + var builder = new NullAddressSpaceBuilder(); + + // DiscoverAsync should throw (OperationCanceledException propagates after release), + // but the release BrowseNextAsync(release=true) must be called first. + await Should.ThrowAsync(async () => + await drv.DiscoverAsync(builder, ct)); + + // Verify the release call was made exactly once with releaseContinuationPoints=true. + sessionMock.Verify(s => s.BrowseNextAsync( + It.IsAny(), + It.Is(release => release), + It.IsAny(), + It.IsAny()), + Times.Once, + "DiscoverAsync must release the server-side continuation point on cancellation " + + "(Driver.OpcUaClient-016 / Browser-002 cross-module finding)"); + } + + /// + /// Transient browse failure (non-cancellation exception) must NOT attempt a release — + /// the server already cleaned up (the request never reached the page-fetch state), and + /// calling BrowseNext on a bad continuation point would add noise to the server log. + /// BrowseRecursiveAsync's existing catch swallows transient failures silently; + /// we should not change that behaviour. + /// + [Fact] + public async Task DiscoverAsync_does_not_release_continuation_point_on_non_cancel_browse_failure() + { + var ct = TestContext.Current.CancellationToken; + using var drv = new OpcUaClientDriver(new OpcUaClientDriverOptions(), "opcua-cp-norelease"); + + var mock = new Mock(MockBehavior.Loose); + mock.SetupGet(s => s.MessageContext).Returns(new ServiceMessageContext()); + mock.SetupGet(s => s.NamespaceUris).Returns(new NamespaceTable()); + + // BrowseAsync — return a result with a continuation point so the loop is entered. + var firstRef = new ReferenceDescription + { + NodeId = new ExpandedNodeId(new NodeId(1001, 2), "opc.tcp://test"), + BrowseName = new QualifiedName("TestObj2", 2), + DisplayName = new LocalizedText("TestObj2"), + NodeClass = NodeClass.Object, + }; + var browseResult = new BrowseResult + { + ContinuationPoint = FakeContinuationPoint, + References = new ReferenceDescriptionCollection { firstRef }, + }; + mock.Setup(s => s.BrowseAsync( + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny())) + .ReturnsAsync(new BrowseResponse + { + ResponseHeader = new ResponseHeader(), + Results = new BrowseResultCollection { browseResult }, + DiagnosticInfos = new DiagnosticInfoCollection(), + }); + + // BrowseNextAsync — throw a non-cancellation transport error. + mock.Setup(s => s.BrowseNextAsync( + It.IsAny(), + It.Is(release => !release), + It.IsAny(), + It.IsAny())) + .ThrowsAsync(new ServiceResultException(StatusCodes.BadCommunicationError)); + + drv.SetSessionForTest(mock.Object); + + // DiscoverAsync catches the transient failure and continues (returns normally since + // the root sub-tree failure just skips that branch). + await drv.DiscoverAsync(new NullAddressSpaceBuilder(), ct); + + // The release call must NOT be made for a transport error — only for cancellation. + mock.Verify(s => s.BrowseNextAsync( + It.IsAny(), + It.Is(release => release), + It.IsAny(), + It.IsAny()), + Times.Never, + "A transient transport failure must not trigger a release BrowseNext call"); + } + + /// Minimal no-op address space builder for discovery tests. + private sealed class NullAddressSpaceBuilder : IAddressSpaceBuilder + { + /// + public IAddressSpaceBuilder Folder(string browseName, string displayName) => this; + + /// + public IVariableHandle Variable(string browseName, string displayName, DriverAttributeInfo attributeInfo) + => new StubHandle(); + + /// + public void AddProperty(string browseName, DriverDataType dataType, object? value) { } + + /// + public void AttachAlarmCondition(IVariableHandle sourceVariable, string alarmName, DriverAttributeInfo alarmInfo) { } + + private sealed class StubHandle : IVariableHandle + { + public string FullReference => "stub"; + public IAlarmConditionSink MarkAsAlarmCondition(AlarmConditionInfo info) => throw new NotSupportedException(); + } + } +}