From 94eec70fb0fca03ac8cdea0aa614c6d1e98a5a03 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sat, 20 Jun 2026 22:53:29 -0400 Subject: [PATCH] fix(code-review): resolve Batch 3 wave A (OpcUaServer history/guard, ControlPlane topology gate) - OpcUaServer-002: HistoryRead-Events NumValuesPerNode==0 now maps to unbounded (int.MaxValue) instead of the backend default-cap sentinel; no Core.Abstractions contract change (+EventMaxEvents helper tests) - OpcUaServer-004: EnsureAddressSpaceCreated guard on public mutators -> clear InvalidOperationException instead of bare NRE if called pre-start (+tests) - OpcUaServer-003: Deferred (endUtc inclusive/exclusive needs live Wonderware boundary confirmation) - Configuration-013: wire DraftValidator.ValidateClusterTopology into AdminOperationsActor deploy gate (read-only, no migration) (+2 tests) --- code-reviews/Configuration/findings.md | 6 +- code-reviews/OpcUaServer/findings.md | 47 +++++- .../AdminOperations/AdminOperationsActor.cs | 30 +++- .../OtOpcUaNodeManager.cs | 43 +++++- .../AdminOperationsActorTests.cs | 117 +++++++++++++++ .../NodeManagerEventMaxEventsTests.cs | 45 ++++++ .../NodeManagerHistoryReadEventsTests.cs | 38 +++++ .../NodeManagerPreStartGuardTests.cs | 142 ++++++++++++++++++ 8 files changed, 455 insertions(+), 13 deletions(-) create mode 100644 tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerEventMaxEventsTests.cs create mode 100644 tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerPreStartGuardTests.cs diff --git a/code-reviews/Configuration/findings.md b/code-reviews/Configuration/findings.md index 24aac669..bc77cb65 100644 --- a/code-reviews/Configuration/findings.md +++ b/code-reviews/Configuration/findings.md @@ -7,7 +7,7 @@ | Review date | 2026-06-19 (re-review; first reviewed 2026-05-22) | | Commit reviewed | `7286d320` (re-review; was `76d35d1`) | | Status | Reviewed | -| Open findings | 1 | +| Open findings | 0 | ## Checklist coverage @@ -232,13 +232,13 @@ Prior findings Configuration-001…011 remain Resolved. Notable since the first | Severity | Medium | | Category | Design-document adherence | | Location | `src/Core/ZB.MOM.WW.OtOpcUa.Configuration/Validation/DraftValidator.cs:243` (`ValidateClusterTopology`) | -| Status | Open | +| Status | Resolved | **Description:** `DraftValidator.ValidateClusterTopology` is documented as the managed pre-publish guard that catches cluster-topology drift the SQL `CK_ServerCluster_RedundancyMode_NodeCount` check cannot see — specifically an operator disabling a `ClusterNode` (effective enabled-count = 1) while `RedundancyMode` stays `Hot`/`Warm`, which would boot the runtime into an invalid-topology band. It is fully unit-tested (`DraftValidatorTests` §"ValidateClusterTopology") but **no production code calls it.** The deploy gate in `AdminOperationsActor.StartDeployment` runs `DraftValidator.Validate(...)` (the snapshot rules) but never `ValidateClusterTopology(...)`, so the documented enabled-node-count guard is inert at deploy time — the only thing standing is the row-level SQL CHECK, which the doc explicitly says is insufficient. **Recommendation:** Wire `ValidateClusterTopology` into the deploy/publish path — load the `ServerCluster` row(s) + their `ClusterNode`s and run it alongside `Validate`, folding its errors into the same reject summary. The fix belongs in `src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs` (a different module), so it is **deferred from this module's edit scope** and recorded here against the now-dead Configuration-layer method. Cross-module: ControlPlane. -**Resolution:** _(open — fix is in the ControlPlane module's `AdminOperationsActor`, outside Configuration's edit scope)_ +**Resolution:** Resolved 2026-06-20 — wired `DraftValidator.ValidateClusterTopology` into the deploy gate in the ControlPlane module's `AdminOperationsActor.HandleStartDeploymentAsync` (`src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs`). Immediately after the existing `DraftValidator.Validate(draft)` call, the handler now loads the `ServerCluster` rows (ClusterId-ordered for a deterministic summary) and their `ClusterNode`s from the **same** `db` context already open in the handler — read-only via `AsNoTracking()`, no second DbContext lifetime, no schema/migration or entity change — groups the nodes by `ClusterId`, and runs `ValidateClusterTopology(cluster, nodes)` per cluster. Its errors are appended to the SAME error list (`Validate(...)` now collected into a `List`), so a deploy failing either the snapshot rules or the topology guard is rejected with both sets of messages folded into the single reject summary string; ordering stays deterministic (snapshot rules first, then per-cluster topology errors in ClusterId order). The previously-inert enabled-node-count guard (e.g. `RedundancyMode = Hot` with one `ClusterNode` toggled off, effective enabled-count = 1) now rejects at deploy time rather than relying solely on the row-level SQL CHECK the doc says is insufficient. New ControlPlane tests `AdminOperationsActorTests.StartDeployment_rejects_on_invalid_cluster_topology_disabled_node` (Hot + one disabled node → `Rejected` with `ClusterEnabledNodeCountMismatch`, no coordinator dispatch, no Deployment row) and `StartDeployment_accepts_when_cluster_topology_is_valid` (Hot + two enabled nodes → `Accepted`, no topology error, row inserted) pin the wiring; the rejecting test was confirmed red against the unwired handler before the fix. ControlPlane.Tests 62/62 green; the existing `DraftValidatorTests` §"ValidateClusterTopology" (Configuration.Tests 103/103) unchanged and still green. ### Configuration-014 diff --git a/code-reviews/OpcUaServer/findings.md b/code-reviews/OpcUaServer/findings.md index 1caf9fc0..857d4a35 100644 --- a/code-reviews/OpcUaServer/findings.md +++ b/code-reviews/OpcUaServer/findings.md @@ -7,7 +7,7 @@ | Review date | 2026-06-19 | | Commit reviewed | `7286d320` | | Status | Reviewed | -| Open findings | 4 | +| Open findings | 1 | ## Checklist coverage @@ -72,7 +72,7 @@ which is outside this module's edit boundary. | Severity | Medium | | Category | Correctness & logic bugs | | Location | `OtOpcUaNodeManager.cs:1748` (`HistoryReadEvents`), `OtOpcUaNodeManager.cs:1814` (`ClampToInt`) | -| Status | Open | +| Status | Resolved | **Description:** For HistoryRead-Events, `HistoryReadEvents` passes `ClampToInt(details.NumValuesPerNode)` to `IHistorianDataSource.ReadEventsAsync(maxEvents)` and @@ -93,7 +93,21 @@ backend truncation and surface a continuation point / `GoodMoreData` for events. event backends (cross-module, Core.Abstractions contract); option (b) requires the backend to report truncation. Both cross this module's boundary. -**Resolution:** _(Open — deferred: rooted in the cross-module `IHistoryProvider.ReadEventsAsync` `maxEvents <= 0` sentinel contract (Core.Abstractions-006) and the Wonderware/OpcUaClient event backends; cannot be fixed safely inside OpcUaServer alone.)_ +**Resolution:** Resolved — 2026-06-20 (SHA pending): fixed locally inside OpcUaServer without touching +the cross-module `IHistoryProvider.ReadEventsAsync` `maxEvents <= 0` sentinel. Added a small testable +`internal static int EventMaxEvents(uint numValuesPerNode)` helper next to `ClampToInt` that translates +the OPC UA Part 4/11 "no limit" request (`NumValuesPerNode == 0`) to UNBOUNDED (`int.MaxValue`, a very +large positive cap) rather than the backend's `<= 0` "use the default cap" sentinel; a positive value +still passes through `ClampToInt` unchanged. `HistoryReadEvents` now calls `EventMaxEvents(details.NumValuesPerNode)` +instead of `ClampToInt(details.NumValuesPerNode)`, so a "give me the whole window" events read is no +longer silently truncated at the backend default. The sentinel contract + the Wonderware/OpcUaClient +backends are untouched (a positive `int.MaxValue` is never the `<= 0` sentinel). Tests: +`NodeManagerEventMaxEventsTests` (helper purity — `0u→int.MaxValue`, normal passthrough, +`>int.MaxValue→int.MaxValue` clamp, exact-`int.MaxValue` boundary) plus +`NodeManagerHistoryReadEventsTests.Events_unbounded_request_passes_int_max_to_backend` (the recording fake +`IHistorianDataSource` receives `int.MaxValue` when `NumValuesPerNode == 0`). Note: option (b) — surfacing +a continuation point / `GoodMoreData` on backend truncation — remains a cross-module/backend change and is +out of scope; option (a) here removes the silent-truncation defect for the common "all events" request. ### OpcUaServer-003 @@ -102,7 +116,7 @@ truncation. Both cross this module's boundary. | Severity | Low | | Category | Correctness & logic bugs | | Location | `OtOpcUaNodeManager.cs:1978` (`ServeRawPaged`), `HistoryPaging.cs` (whole), `HistoryPaging.cs:213` (`SliceTieCluster` `next <= endUtc`) | -| Status | Open | +| Status | Deferred | **Description:** The Raw paging chain treats `endUtc` as an **inclusive** upper bound throughout — the `HistoryContinuationState`/`HistoryPaging` XML docs all say "the original (inclusive) end of @@ -126,7 +140,14 @@ the inclusive/exclusive question requires confirming the Wonderware backend's ac semantics (cross-module / infra), and changing a comparison without that confirmation risks the opposite off-by-one. -**Resolution:** _(Open — deferred: needs the backend's authoritative endUtc boundary semantics confirmed before the comparison/doc is changed; flipping it blindly risks an off-by-one in the other direction.)_ +**Resolution:** Deferred — 2026-06-20: infra-gated. Resolving the `endUtc` inclusive-vs-exclusive +disagreement requires confirming the actual Wonderware historian backend's boundary semantics, which is +hardware/infra-gated and not reachable from this macOS dev host. The impact is benign and bounded — because +the backend is the authority on which samples exist (a sample at exactly `endUtc` never appears in an +exclusive-end read), the disagreement only ever yields ONE extra empty resume page (`[endUtc, endUtc)` → +GoodNoData, no continuation point) rather than any duplicated or dropped data. Changing the +`SliceTieCluster` comparison / paging XML docs without confirming the live backend boundary risks +introducing the opposite off-by-one, so no code is changed here pending that live confirmation. ### OpcUaServer-004 @@ -135,7 +156,7 @@ opposite off-by-one. | Severity | Low | | Category | Error handling & resilience | | Location | `OtOpcUaNodeManager.cs:1597` (`ResolveParentFolder`), and every public sink mutator that calls it (`EnsureFolder` 1278, `EnsureVariable` 1335, `MaterialiseAlarmCondition` 597, plus `WriteValue`/`WriteAlarmCondition` `CreateVariable`) | -| Status | Open | +| Status | Resolved | **Description:** `ResolveParentFolder` dereferences `_root!` with the null-forgiving operator, and `CreateVariable` uses `_root` (`AddChild`). `_root` is only assigned in `CreateAddressSpace`, which @@ -153,7 +174,19 @@ mutators, so a too-early call fails legibly instead of with a bare NRE. Low prio hardening, not a live defect. Left Open to avoid an unscoped change to the mutator entry points on this critical class without a regression scenario that reproduces the early-call ordering. -**Resolution:** _(Open — defensive-only; latent given current boot ordering. Deferred to avoid an unscoped guard-add across five mutators without a reproducing pre-start ordering scenario.)_ +**Resolution:** Resolved — 2026-06-20 (SHA pending): added a private `EnsureAddressSpaceCreated()` helper +that throws `InvalidOperationException("OPC UA address space has not been created yet (server not started.)")` +when `_root` is null, and call it at the top of `ResolveParentFolder` and at every public address-space +mutator entry point (`WriteValue`, `WriteAlarmCondition`, `EnsureFolder`, `EnsureVariable`, +`MaterialiseAlarmCondition`) — right after argument validation, before any `_root` dereference. A too-early +call (a sink wired or a publish replayed before `StartAsync` drives `CreateAddressSpace`) now fails legibly +instead of with a bare NRE out of `ResolveParentFolder` / `CreateVariable`. Happy-path behaviour is +unchanged. The guard was test-feasible after all: `NodeManagerPreStartGuardTests` boots a real host, +borrows the live node manager's real `IServerInternal`, constructs a SECOND, never-started +`OtOpcUaNodeManager` from it (so `_root` is null), and asserts each of the four lock-taking mutators +(`EnsureFolder`/`EnsureVariable`/`WriteValue`/`MaterialiseAlarmCondition`) throws `InvalidOperationException` +(not NRE), with the folder case asserting the message text. (`WriteAlarmCondition`'s guard is identical and +sits on the same path; it is build-verified.) Full `OpcUaServer.Tests` suite green (284/284). ### OpcUaServer-005 diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs b/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs index fe5443d1..7367e42d 100644 --- a/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs +++ b/src/Server/ZB.MOM.WW.OtOpcUa.ControlPlane/AdminOperations/AdminOperationsActor.cs @@ -173,7 +173,35 @@ public sealed class AdminOperationsActor : ReceiveActor // committed/visible when the snapshot is read — operators seeing a spurious one should // check ExternalIdReservation state before re-submitting. var draft = await DraftSnapshotFactory.FromConfigDbAsync(db); - var errors = DraftValidator.Validate(draft); + var errors = DraftValidator.Validate(draft).ToList(); + + // Cluster-topology guard (decision #91 / task #148 part 2). The SQL + // CK_ServerCluster_RedundancyMode_NodeCount CHECK enforces the (NodeCount, RedundancyMode) + // pair on the row itself, but it cannot see the per-node ClusterNode.Enabled flag — an + // operator can disable a node (effective enabled-count = 1) while leaving RedundancyMode at + // Hot/Warm and the constraint stays green, which would boot the runtime into an + // InvalidTopology band. ValidateClusterTopology catches that drift, but it isn't carried on + // the generation-versioned DraftSnapshot (the cluster/node rows aren't versioned), so it must + // be run separately here against the live rows. Read-only (AsNoTracking); errors fold into the + // same reject summary alongside the snapshot rules so a deploy failing either check is + // rejected with both sets of messages. ClusterId-ordered for a deterministic summary. + var clusters = await db.ServerClusters + .AsNoTracking() + .OrderBy(c => c.ClusterId) + .ToListAsync(); + var nodesByCluster = (await db.ClusterNodes + .AsNoTracking() + .ToListAsync()) + .GroupBy(n => n.ClusterId, StringComparer.Ordinal) + .ToDictionary(g => g.Key, g => g.ToList(), StringComparer.Ordinal); + foreach (var cluster in clusters) + { + var nodes = nodesByCluster.TryGetValue(cluster.ClusterId, out var ns) + ? (IReadOnlyList)ns + : []; + errors.AddRange(DraftValidator.ValidateClusterTopology(cluster, nodes)); + } + if (errors.Count > 0) { var summary = string.Join("; ", errors.Select(e => $"[{e.Code}] {e.Message}")); diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs b/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs index 394a6ba5..e6f7c8b4 100644 --- a/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs +++ b/src/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer/OtOpcUaNodeManager.cs @@ -261,6 +261,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 public void WriteValue(string nodeId, object? value, OpcUaQuality quality, DateTime sourceTimestampUtc) { ArgumentException.ThrowIfNullOrEmpty(nodeId); + EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started. lock (Lock) { @@ -296,6 +297,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 { ArgumentException.ThrowIfNullOrEmpty(alarmNodeId); ArgumentNullException.ThrowIfNull(state); + EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started. // Look up + project under a SINGLE Lock so a concurrent RebuildAddressSpace can't clear // _alarmConditions / detach the condition node between the lookup and the Set* calls. @@ -584,6 +586,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 { ArgumentException.ThrowIfNullOrEmpty(alarmNodeId); ArgumentException.ThrowIfNullOrEmpty(displayName); + EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started. lock (Lock) { @@ -1280,6 +1283,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 { ArgumentException.ThrowIfNullOrEmpty(folderNodeId); ArgumentException.ThrowIfNullOrEmpty(displayName); + EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started. if (_folders.ContainsKey(folderNodeId)) return; @@ -1336,6 +1340,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 { ArgumentException.ThrowIfNullOrEmpty(variableNodeId); ArgumentException.ThrowIfNullOrEmpty(displayName); + EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started. // If already present, leave it alone (idempotent re-applies). if (_variables.ContainsKey(variableNodeId)) return; @@ -1608,10 +1613,29 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 private FolderState ResolveParentFolder(string? parentNodeId) { + EnsureAddressSpaceCreated(); if (string.IsNullOrEmpty(parentNodeId)) return _root!; return _folders.TryGetValue(parentNodeId, out var existing) ? existing : _root!; } + /// OpcUaServer-004: guard the address-space mutators against a too-early call. _root + /// is only assigned in , which the SDK invokes during + /// StandardServer start; every public mutator (, + /// , , , + /// ) and assume it has run. + /// If one is invoked before the server has started (a sink wired or a publish replayed before + /// StartAsync completes), _root is null and the dereference would NRE; throw a legible + /// instead. Happy-path behaviour is unchanged. + /// When the address space has not been created yet. + private void EnsureAddressSpaceCreated() + { + if (_root is null) + { + throw new InvalidOperationException( + "OPC UA address space has not been created yet (server not started)."); + } + } + // --------------------------------------------------------------------------------------------- // Phase C — OPC UA HistoryRead over historized variable nodes. // @@ -1790,8 +1814,11 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 sourceName, details.StartTime, details.EndTime, - // NumValuesPerNode is uint; ReadEventsAsync takes int (<=0 ⇒ backend default cap). - ClampToInt(details.NumValuesPerNode), + // OpcUaServer-002: NumValuesPerNode==0 means "no limit — return ALL values" per OPC UA + // Part 4/11, so translate it to UNBOUNDED (int.MaxValue) here. Passing the int<=0 + // backend-default-cap sentinel instead would silently truncate a "give me everything" + // events read at the backend default. A positive cap passes through (clamped). + EventMaxEvents(details.NumValuesPerNode), CancellationToken.None).GetAwaiter().GetResult(); var historyEvent = ProjectEvents(sourceResult.Events, selectClauses); @@ -1825,6 +1852,18 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2 /// The clamped non-negative int. private static int ClampToInt(uint value) => value > int.MaxValue ? int.MaxValue : (int)value; + /// OpcUaServer-002: map a HistoryRead-Events NumValuesPerNode request cap onto the + /// maxEvents argument, honouring the OPC UA + /// Part 4/11 semantics that NumValuesPerNode == 0 means "no limit — return ALL values". + /// We translate 0 to UNBOUNDED () — a very large positive cap — rather than + /// the backend's maxEvents <= 0 "use the default cap" sentinel, so a client asking for the + /// whole window is not silently truncated at the backend default. A positive value passes through + /// clamped to (mirroring ). + /// The request's NumValuesPerNode cap (0 ⇒ no limit). + /// when 0 (unbounded); otherwise the clamped non-negative int. + internal static int EventMaxEvents(uint numValuesPerNode) => + numValuesPerNode == 0 ? int.MaxValue : ClampToInt(numValuesPerNode); + /// /// Project a sequence of s into an SDK — /// one per event, each carrying the requested diff --git a/tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/AdminOperationsActorTests.cs b/tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/AdminOperationsActorTests.cs index 67ce7565..d5190b72 100644 --- a/tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/AdminOperationsActorTests.cs +++ b/tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests/AdminOperationsActorTests.cs @@ -435,6 +435,123 @@ public sealed class AdminOperationsActorTests : ControlPlaneActorTestBase reply.Message.ShouldContain("1 script(s) will compile"); } + /// Verifies the cluster-topology guard is wired into the deploy gate (Configuration-013): + /// a cluster with only ONE enabled + /// (the second toggled off) is with the + /// ClusterEnabledNodeCountMismatch topology error in the message — no coordinator dispatch, + /// no Deployment row. The row-level SQL CHECK cannot see the disabled-node flag, so this proves the + /// managed guard runs + /// at deploy time rather than sitting inert. + [Fact] + public void StartDeployment_rejects_on_invalid_cluster_topology_disabled_node() + { + var dbFactory = NewInMemoryDbFactory(); + using (var db = dbFactory.CreateDbContext()) + { + db.ServerClusters.Add(new Configuration.Entities.ServerCluster + { + ClusterId = "LINE3-OPCUA", + Name = "Line 3", + Enterprise = "zb", + Site = "dev", + NodeCount = 2, + RedundancyMode = RedundancyMode.Hot, // declared 2 + Hot, but only 1 enabled below + CreatedBy = "seed", + }); + db.ClusterNodes.Add(new Configuration.Entities.ClusterNode + { + NodeId = "LINE3-OPCUA-A", + ClusterId = "LINE3-OPCUA", + Host = "host-a", + ApplicationUri = "urn:line3:a", + Enabled = true, + CreatedBy = "seed", + }); + db.ClusterNodes.Add(new Configuration.Entities.ClusterNode + { + NodeId = "LINE3-OPCUA-B", + ClusterId = "LINE3-OPCUA", + Host = "host-b", + ApplicationUri = "urn:line3:b", + Enabled = false, // toggled off → effective enabled-count = 1 while mode stays Hot + CreatedBy = "seed", + }); + db.SaveChanges(); + } + + var coordinator = CreateTestProbe("coord"); + var actor = Sys.ActorOf(AdminOperationsActor.Props(dbFactory, coordinator.Ref, Enumerable.Empty())); + + actor.Tell(new StartDeployment("joe", CorrelationId.NewId())); + + coordinator.ExpectNoMsg(TimeSpan.FromMilliseconds(500)); + var reply = ExpectMsg(TimeSpan.FromSeconds(3)); + reply.Outcome.ShouldBe(StartDeploymentOutcome.Rejected); + reply.Message.ShouldNotBeNull(); + reply.Message.ShouldContain("ClusterEnabledNodeCountMismatch"); + + using var verify = dbFactory.CreateDbContext(); + verify.Deployments.Count().ShouldBe(0); + } + + /// Verifies the topology guard does NOT spuriously reject a well-formed cluster: a + /// cluster whose two s are both enabled + /// passes the topology check, so a deploy of an otherwise-valid config is + /// with no topology error in the message and a row + /// inserted. Pairs with the rejecting test to prove the guard is discriminating, not blanket. + [Fact] + public void StartDeployment_accepts_when_cluster_topology_is_valid() + { + var dbFactory = NewInMemoryDbFactory(); + using (var db = dbFactory.CreateDbContext()) + { + db.ServerClusters.Add(new Configuration.Entities.ServerCluster + { + ClusterId = "LINE3-OPCUA", + Name = "Line 3", + Enterprise = "zb", + Site = "dev", + NodeCount = 2, + RedundancyMode = RedundancyMode.Hot, + CreatedBy = "seed", + }); + db.ClusterNodes.Add(new Configuration.Entities.ClusterNode + { + NodeId = "LINE3-OPCUA-A", + ClusterId = "LINE3-OPCUA", + Host = "host-a", + ApplicationUri = "urn:line3:a", + Enabled = true, + CreatedBy = "seed", + }); + db.ClusterNodes.Add(new Configuration.Entities.ClusterNode + { + NodeId = "LINE3-OPCUA-B", + ClusterId = "LINE3-OPCUA", + Host = "host-b", + ApplicationUri = "urn:line3:b", + Enabled = true, // both enabled → matches declared NodeCount=2 + Hot + CreatedBy = "seed", + }); + db.SaveChanges(); + } + + var coordinator = CreateTestProbe("coord"); + var actor = Sys.ActorOf(AdminOperationsActor.Props(dbFactory, coordinator.Ref, Enumerable.Empty())); + + actor.Tell(new StartDeployment("joe", CorrelationId.NewId())); + + coordinator.ExpectMsg(TimeSpan.FromSeconds(3)); + + var reply = ExpectMsg(TimeSpan.FromSeconds(3)); + reply.Outcome.ShouldBe(StartDeploymentOutcome.Accepted); + (reply.Message is null || !reply.Message.Contains("ClusterEnabledNodeCountMismatch")).ShouldBeTrue(); + (reply.Message is null || !reply.Message.Contains("ClusterRedundancyModeInvalid")).ShouldBeTrue(); + + using var verify = dbFactory.CreateDbContext(); + verify.Deployments.Count().ShouldBe(1); + } + /// Verifies that starting a deployment is refused when another is in flight. [Fact] public void StartDeployment_refuses_when_another_is_in_flight() diff --git a/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerEventMaxEventsTests.cs b/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerEventMaxEventsTests.cs new file mode 100644 index 00000000..f3e2c148 --- /dev/null +++ b/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerEventMaxEventsTests.cs @@ -0,0 +1,45 @@ +using Shouldly; +using Xunit; + +namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests; + +/// +/// OpcUaServer-002 — unit coverage for , the pure +/// helper that maps a HistoryRead-Events NumValuesPerNode request cap onto the +/// IHistorianDataSource.ReadEventsAsync maxEvents argument. Per OPC UA Part 4/11, +/// NumValuesPerNode == 0 means "no limit — return ALL values", so the helper translates 0 to +/// UNBOUNDED () rather than the backend's maxEvents <= 0 +/// "use the default cap" sentinel; a positive value passes through clamped to . +/// +public sealed class NodeManagerEventMaxEventsTests +{ + /// 0 ("no limit" per the spec) ⇒ int.MaxValue (unbounded), NOT the 0/default-cap sentinel. + [Fact] + public void Zero_maps_to_int_max() + { + OtOpcUaNodeManager.EventMaxEvents(0u).ShouldBe(int.MaxValue); + } + + /// A normal positive cap passes through unchanged. + [Fact] + public void Normal_value_passes_through() + { + OtOpcUaNodeManager.EventMaxEvents(50u).ShouldBe(50); + OtOpcUaNodeManager.EventMaxEvents(1u).ShouldBe(1); + } + + /// A value above int.MaxValue clamps to int.MaxValue (mirrors ClampToInt's saturation). + [Fact] + public void Value_above_int_max_clamps() + { + OtOpcUaNodeManager.EventMaxEvents((uint)int.MaxValue + 1u).ShouldBe(int.MaxValue); + OtOpcUaNodeManager.EventMaxEvents(uint.MaxValue).ShouldBe(int.MaxValue); + } + + /// int.MaxValue exactly passes through (boundary — not clamped down). + [Fact] + public void Int_max_exactly_passes_through() + { + OtOpcUaNodeManager.EventMaxEvents((uint)int.MaxValue).ShouldBe(int.MaxValue); + } +} diff --git a/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerHistoryReadEventsTests.cs b/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerHistoryReadEventsTests.cs index 17c92570..20a14ccf 100644 --- a/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerHistoryReadEventsTests.cs +++ b/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerHistoryReadEventsTests.cs @@ -94,6 +94,44 @@ public sealed class NodeManagerHistoryReadEventsTests : IDisposable await host.DisposeAsync(); } + /// OpcUaServer-002: a HistoryReadEvents with NumValuesPerNode == 0 means "no limit — + /// return ALL values" per OPC UA Part 4/11, so the backend must receive an UNBOUNDED cap + /// (), NOT the maxEvents <= 0 "use the default cap" sentinel that + /// would silently truncate a whole-window read. + [Fact] + public async Task Events_unbounded_request_passes_int_max_to_backend() + { + var (host, server) = await BootAsync(); + var nm = server.NodeManager!; + var fake = new RecordingHistorianDataSource(); + nm.HistorianDataSource = fake; + + const string equipmentId = "eq-unbounded"; + nm.EnsureFolder(equipmentId, parentNodeId: null, displayName: "Equipment"); + nm.MaterialiseAlarmCondition("alarm-0", equipmentId, "Cond", "OffNormalAlarm", severity: 600); + var notifierNodeId = nm.TryGetFolder(equipmentId)!.NodeId; + + fake.EventsResult = new HistoricalEventsResult( + new[] { new HistoricalEvent("evt-x", "Src", DateTime.UtcNow, DateTime.UtcNow, "msg", 600) }, null); + + var details = new ReadEventDetails + { + StartTime = DateTime.UtcNow.AddHours(-1), + EndTime = DateTime.UtcNow, + // 0 ⇒ "no limit" — the override must translate this to int.MaxValue for the backend. + NumValuesPerNode = 0, + Filter = SelectFilter("EventId"), + }; + + var (_, errors) = InvokeHistoryRead(server, nm, details, notifierNodeId); + + errors[0].StatusCode.Code.ShouldBe(StatusCodes.Good); + // The backend saw the unbounded cap, NOT the 0/default-cap sentinel. + fake.LastMaxEvents.ShouldBe(int.MaxValue); + + await host.DisposeAsync(); + } + /// An unsupported select operand (BrowsePath ["EventType"]) projects to Variant.Null — a field /// the server can't supply is null (spec-conformant) — while supported siblings still project. [Fact] diff --git a/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerPreStartGuardTests.cs b/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerPreStartGuardTests.cs new file mode 100644 index 00000000..1f2490fd --- /dev/null +++ b/tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests/NodeManagerPreStartGuardTests.cs @@ -0,0 +1,142 @@ +using Microsoft.Extensions.Logging.Abstractions; +using Opc.Ua; +using Shouldly; +using Xunit; +using ZB.MOM.WW.OtOpcUa.Commons.OpcUa; + +namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests; + +/// +/// OpcUaServer-004 — a fresh whose CreateAddressSpace has NOT +/// yet run (i.e. the server has not started) has a null _root. Every public address-space mutator +/// (, , +/// , , +/// ) must now fail with a legible +/// instead of a bare NRE out of ResolveParentFolder / +/// CreateVariable. +/// +/// The node manager's ctor needs a real + +/// , which only the SDK boot produces — so we boot a real +/// , borrow those two from the LIVE (already-started) node manager +/// (its public Server + Server.Configuration), then construct a SECOND, fresh node +/// manager from them. That second manager never had CreateAddressSpace driven, so it +/// reproduces the pre-start ordering hazard exactly. +/// +/// +public sealed class NodeManagerPreStartGuardTests : IDisposable +{ + private static CancellationToken Ct => TestContext.Current.CancellationToken; + + private readonly string _pkiRoot = Path.Combine( + Path.GetTempPath(), + $"otopcua-prestartguard-{Guid.NewGuid():N}"); + + [Fact] + public async Task EnsureFolder_before_CreateAddressSpace_throws_InvalidOperationException() + { + var (host, nm) = await BuildPreStartNodeManagerAsync(); + try + { + var ex = Should.Throw(() => + nm.EnsureFolder("eq-1", parentNodeId: null, displayName: "Equipment")); + ex.Message.ShouldContain("address space has not been created"); + } + finally + { + await host.DisposeAsync(); + } + } + + [Fact] + public async Task EnsureVariable_before_CreateAddressSpace_throws_InvalidOperationException() + { + var (host, nm) = await BuildPreStartNodeManagerAsync(); + try + { + Should.Throw(() => + nm.EnsureVariable("eq-1/temp", parentFolderNodeId: null, displayName: "Temp", + dataType: "Float", writable: false)); + } + finally + { + await host.DisposeAsync(); + } + } + + [Fact] + public async Task WriteValue_before_CreateAddressSpace_throws_InvalidOperationException() + { + var (host, nm) = await BuildPreStartNodeManagerAsync(); + try + { + Should.Throw(() => + nm.WriteValue("eq-1/temp", 1.0, OpcUaQuality.Good, DateTime.UtcNow)); + } + finally + { + await host.DisposeAsync(); + } + } + + [Fact] + public async Task MaterialiseAlarmCondition_before_CreateAddressSpace_throws_InvalidOperationException() + { + var (host, nm) = await BuildPreStartNodeManagerAsync(); + try + { + Should.Throw(() => + nm.MaterialiseAlarmCondition("alarm-1", "eq-1", "Cond", "OffNormalAlarm", severity: 500)); + } + finally + { + await host.DisposeAsync(); + } + } + + /// Boot a real host, borrow the live node manager's real + /// , then construct a SECOND node manager from it (with a + /// fresh — the ctor only records it + sets namespaces) that has + /// NEVER had CreateAddressSpace driven (so _root is null). The host is returned so the + /// caller disposes it after exercising the guard. + private async Task<(OpcUaApplicationHost Host, OtOpcUaNodeManager NodeManager)> BuildPreStartNodeManagerAsync() + { + var host = new OpcUaApplicationHost( + new OpcUaApplicationHostOptions + { + ApplicationName = "OtOpcUa.PreStartGuardTest", + ApplicationUri = $"urn:OtOpcUa.PreStartGuardTest:{Guid.NewGuid():N}", + OpcUaPort = AllocateFreePort(), + PublicHostname = "localhost", + PkiStoreRoot = _pkiRoot, + }, + NullLogger.Instance); + + var server = new OtOpcUaSdkServer(); + await host.StartAsync(server, Ct); + + var live = server.NodeManager!; + // Borrow the SDK's real IServerInternal from the live manager and build a brand-new node manager — + // CreateAddressSpace has not been driven on THIS instance, so _root is null and every mutator must + // hit the EnsureAddressSpaceCreated guard. + var fresh = new OtOpcUaNodeManager(live.Server, new ApplicationConfiguration()); + return (host, fresh); + } + + private static int AllocateFreePort() + { + using var listener = new System.Net.Sockets.TcpListener(System.Net.IPAddress.Loopback, 0); + listener.Start(); + var port = ((System.Net.IPEndPoint)listener.LocalEndpoint).Port; + listener.Stop(); + return port; + } + + public void Dispose() + { + if (Directory.Exists(_pkiRoot)) + { + try { Directory.Delete(_pkiRoot, recursive: true); } + catch { /* best-effort cleanup */ } + } + } +}