fix(code-review): resolve Batch 3 wave A (OpcUaServer history/guard, ControlPlane topology gate)

- OpcUaServer-002: HistoryRead-Events NumValuesPerNode==0 now maps to unbounded (int.MaxValue) instead of the backend default-cap sentinel; no Core.Abstractions contract change (+EventMaxEvents helper tests)
- OpcUaServer-004: EnsureAddressSpaceCreated guard on public mutators -> clear InvalidOperationException instead of bare NRE if called pre-start (+tests)
- OpcUaServer-003: Deferred (endUtc inclusive/exclusive needs live Wonderware boundary confirmation)
- Configuration-013: wire DraftValidator.ValidateClusterTopology into AdminOperationsActor deploy gate (read-only, no migration) (+2 tests)
This commit is contained in:
Joseph Doherty
2026-06-20 22:53:29 -04:00
parent c817d7720e
commit 94eec70fb0
8 changed files with 455 additions and 13 deletions
@@ -435,6 +435,123 @@ public sealed class AdminOperationsActorTests : ControlPlaneActorTestBase
reply.Message.ShouldContain("1 script(s) will compile");
}
/// <summary>Verifies the cluster-topology guard is wired into the deploy gate (Configuration-013):
/// a <see cref="RedundancyMode.Hot"/> cluster with only ONE enabled <see cref="ClusterNode"/>
/// (the second toggled off) is <see cref="StartDeploymentOutcome.Rejected"/> with the
/// <c>ClusterEnabledNodeCountMismatch</c> topology error in the message — no coordinator dispatch,
/// no Deployment row. The row-level SQL CHECK cannot see the disabled-node flag, so this proves the
/// managed <see cref="Configuration.Validation.DraftValidator.ValidateClusterTopology"/> guard runs
/// at deploy time rather than sitting inert.</summary>
[Fact]
public void StartDeployment_rejects_on_invalid_cluster_topology_disabled_node()
{
var dbFactory = NewInMemoryDbFactory();
using (var db = dbFactory.CreateDbContext())
{
db.ServerClusters.Add(new Configuration.Entities.ServerCluster
{
ClusterId = "LINE3-OPCUA",
Name = "Line 3",
Enterprise = "zb",
Site = "dev",
NodeCount = 2,
RedundancyMode = RedundancyMode.Hot, // declared 2 + Hot, but only 1 enabled below
CreatedBy = "seed",
});
db.ClusterNodes.Add(new Configuration.Entities.ClusterNode
{
NodeId = "LINE3-OPCUA-A",
ClusterId = "LINE3-OPCUA",
Host = "host-a",
ApplicationUri = "urn:line3:a",
Enabled = true,
CreatedBy = "seed",
});
db.ClusterNodes.Add(new Configuration.Entities.ClusterNode
{
NodeId = "LINE3-OPCUA-B",
ClusterId = "LINE3-OPCUA",
Host = "host-b",
ApplicationUri = "urn:line3:b",
Enabled = false, // toggled off → effective enabled-count = 1 while mode stays Hot
CreatedBy = "seed",
});
db.SaveChanges();
}
var coordinator = CreateTestProbe("coord");
var actor = Sys.ActorOf(AdminOperationsActor.Props(dbFactory, coordinator.Ref, Enumerable.Empty<IDriverProbe>()));
actor.Tell(new StartDeployment("joe", CorrelationId.NewId()));
coordinator.ExpectNoMsg(TimeSpan.FromMilliseconds(500));
var reply = ExpectMsg<StartDeploymentResult>(TimeSpan.FromSeconds(3));
reply.Outcome.ShouldBe(StartDeploymentOutcome.Rejected);
reply.Message.ShouldNotBeNull();
reply.Message.ShouldContain("ClusterEnabledNodeCountMismatch");
using var verify = dbFactory.CreateDbContext();
verify.Deployments.Count().ShouldBe(0);
}
/// <summary>Verifies the topology guard does NOT spuriously reject a well-formed cluster: a
/// <see cref="RedundancyMode.Hot"/> cluster whose two <see cref="ClusterNode"/>s are both enabled
/// passes the topology check, so a deploy of an otherwise-valid config is
/// <see cref="StartDeploymentOutcome.Accepted"/> with no topology error in the message and a row
/// inserted. Pairs with the rejecting test to prove the guard is discriminating, not blanket.</summary>
[Fact]
public void StartDeployment_accepts_when_cluster_topology_is_valid()
{
var dbFactory = NewInMemoryDbFactory();
using (var db = dbFactory.CreateDbContext())
{
db.ServerClusters.Add(new Configuration.Entities.ServerCluster
{
ClusterId = "LINE3-OPCUA",
Name = "Line 3",
Enterprise = "zb",
Site = "dev",
NodeCount = 2,
RedundancyMode = RedundancyMode.Hot,
CreatedBy = "seed",
});
db.ClusterNodes.Add(new Configuration.Entities.ClusterNode
{
NodeId = "LINE3-OPCUA-A",
ClusterId = "LINE3-OPCUA",
Host = "host-a",
ApplicationUri = "urn:line3:a",
Enabled = true,
CreatedBy = "seed",
});
db.ClusterNodes.Add(new Configuration.Entities.ClusterNode
{
NodeId = "LINE3-OPCUA-B",
ClusterId = "LINE3-OPCUA",
Host = "host-b",
ApplicationUri = "urn:line3:b",
Enabled = true, // both enabled → matches declared NodeCount=2 + Hot
CreatedBy = "seed",
});
db.SaveChanges();
}
var coordinator = CreateTestProbe("coord");
var actor = Sys.ActorOf(AdminOperationsActor.Props(dbFactory, coordinator.Ref, Enumerable.Empty<IDriverProbe>()));
actor.Tell(new StartDeployment("joe", CorrelationId.NewId()));
coordinator.ExpectMsg<DispatchDeployment>(TimeSpan.FromSeconds(3));
var reply = ExpectMsg<StartDeploymentResult>(TimeSpan.FromSeconds(3));
reply.Outcome.ShouldBe(StartDeploymentOutcome.Accepted);
(reply.Message is null || !reply.Message.Contains("ClusterEnabledNodeCountMismatch")).ShouldBeTrue();
(reply.Message is null || !reply.Message.Contains("ClusterRedundancyModeInvalid")).ShouldBeTrue();
using var verify = dbFactory.CreateDbContext();
verify.Deployments.Count().ShouldBe(1);
}
/// <summary>Verifies that starting a deployment is refused when another is in flight.</summary>
[Fact]
public void StartDeployment_refuses_when_another_is_in_flight()
@@ -0,0 +1,45 @@
using Shouldly;
using Xunit;
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests;
/// <summary>
/// OpcUaServer-002 — unit coverage for <see cref="OtOpcUaNodeManager.EventMaxEvents"/>, the pure
/// helper that maps a HistoryRead-Events <c>NumValuesPerNode</c> request cap onto the
/// <c>IHistorianDataSource.ReadEventsAsync</c> <c>maxEvents</c> argument. Per OPC UA Part 4/11,
/// <c>NumValuesPerNode == 0</c> means "no limit — return ALL values", so the helper translates 0 to
/// UNBOUNDED (<see cref="int.MaxValue"/>) rather than the backend's <c>maxEvents &lt;= 0</c>
/// "use the default cap" sentinel; a positive value passes through clamped to <see cref="int.MaxValue"/>.
/// </summary>
public sealed class NodeManagerEventMaxEventsTests
{
/// <summary>0 ("no limit" per the spec) ⇒ int.MaxValue (unbounded), NOT the 0/default-cap sentinel.</summary>
[Fact]
public void Zero_maps_to_int_max()
{
OtOpcUaNodeManager.EventMaxEvents(0u).ShouldBe(int.MaxValue);
}
/// <summary>A normal positive cap passes through unchanged.</summary>
[Fact]
public void Normal_value_passes_through()
{
OtOpcUaNodeManager.EventMaxEvents(50u).ShouldBe(50);
OtOpcUaNodeManager.EventMaxEvents(1u).ShouldBe(1);
}
/// <summary>A value above int.MaxValue clamps to int.MaxValue (mirrors ClampToInt's saturation).</summary>
[Fact]
public void Value_above_int_max_clamps()
{
OtOpcUaNodeManager.EventMaxEvents((uint)int.MaxValue + 1u).ShouldBe(int.MaxValue);
OtOpcUaNodeManager.EventMaxEvents(uint.MaxValue).ShouldBe(int.MaxValue);
}
/// <summary>int.MaxValue exactly passes through (boundary — not clamped down).</summary>
[Fact]
public void Int_max_exactly_passes_through()
{
OtOpcUaNodeManager.EventMaxEvents((uint)int.MaxValue).ShouldBe(int.MaxValue);
}
}
@@ -94,6 +94,44 @@ public sealed class NodeManagerHistoryReadEventsTests : IDisposable
await host.DisposeAsync();
}
/// <summary>OpcUaServer-002: a HistoryReadEvents with <c>NumValuesPerNode == 0</c> means "no limit —
/// return ALL values" per OPC UA Part 4/11, so the backend must receive an UNBOUNDED cap
/// (<see cref="int.MaxValue"/>), NOT the <c>maxEvents &lt;= 0</c> "use the default cap" sentinel that
/// would silently truncate a whole-window read.</summary>
[Fact]
public async Task Events_unbounded_request_passes_int_max_to_backend()
{
var (host, server) = await BootAsync();
var nm = server.NodeManager!;
var fake = new RecordingHistorianDataSource();
nm.HistorianDataSource = fake;
const string equipmentId = "eq-unbounded";
nm.EnsureFolder(equipmentId, parentNodeId: null, displayName: "Equipment");
nm.MaterialiseAlarmCondition("alarm-0", equipmentId, "Cond", "OffNormalAlarm", severity: 600);
var notifierNodeId = nm.TryGetFolder(equipmentId)!.NodeId;
fake.EventsResult = new HistoricalEventsResult(
new[] { new HistoricalEvent("evt-x", "Src", DateTime.UtcNow, DateTime.UtcNow, "msg", 600) }, null);
var details = new ReadEventDetails
{
StartTime = DateTime.UtcNow.AddHours(-1),
EndTime = DateTime.UtcNow,
// 0 ⇒ "no limit" — the override must translate this to int.MaxValue for the backend.
NumValuesPerNode = 0,
Filter = SelectFilter("EventId"),
};
var (_, errors) = InvokeHistoryRead(server, nm, details, notifierNodeId);
errors[0].StatusCode.Code.ShouldBe(StatusCodes.Good);
// The backend saw the unbounded cap, NOT the 0/default-cap sentinel.
fake.LastMaxEvents.ShouldBe(int.MaxValue);
await host.DisposeAsync();
}
/// <summary>An unsupported select operand (BrowsePath ["EventType"]) projects to Variant.Null — a field
/// the server can't supply is null (spec-conformant) — while supported siblings still project.</summary>
[Fact]
@@ -0,0 +1,142 @@
using Microsoft.Extensions.Logging.Abstractions;
using Opc.Ua;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests;
/// <summary>
/// OpcUaServer-004 — a fresh <see cref="OtOpcUaNodeManager"/> whose <c>CreateAddressSpace</c> has NOT
/// yet run (i.e. the server has not started) has a null <c>_root</c>. Every public address-space mutator
/// (<see cref="OtOpcUaNodeManager.WriteValue"/>, <see cref="OtOpcUaNodeManager.WriteAlarmCondition"/>,
/// <see cref="OtOpcUaNodeManager.EnsureFolder"/>, <see cref="OtOpcUaNodeManager.EnsureVariable"/>,
/// <see cref="OtOpcUaNodeManager.MaterialiseAlarmCondition"/>) must now fail with a legible
/// <see cref="InvalidOperationException"/> instead of a bare NRE out of <c>ResolveParentFolder</c> /
/// <c>CreateVariable</c>.
/// <para>
/// The node manager's ctor needs a real <see cref="Opc.Ua.Server.IServerInternal"/> +
/// <see cref="ApplicationConfiguration"/>, which only the SDK boot produces — so we boot a real
/// <see cref="OpcUaApplicationHost"/>, borrow those two from the LIVE (already-started) node manager
/// (its public <c>Server</c> + <c>Server.Configuration</c>), then construct a SECOND, fresh node
/// manager from them. That second manager never had <c>CreateAddressSpace</c> driven, so it
/// reproduces the pre-start ordering hazard exactly.
/// </para>
/// </summary>
public sealed class NodeManagerPreStartGuardTests : IDisposable
{
private static CancellationToken Ct => TestContext.Current.CancellationToken;
private readonly string _pkiRoot = Path.Combine(
Path.GetTempPath(),
$"otopcua-prestartguard-{Guid.NewGuid():N}");
[Fact]
public async Task EnsureFolder_before_CreateAddressSpace_throws_InvalidOperationException()
{
var (host, nm) = await BuildPreStartNodeManagerAsync();
try
{
var ex = Should.Throw<InvalidOperationException>(() =>
nm.EnsureFolder("eq-1", parentNodeId: null, displayName: "Equipment"));
ex.Message.ShouldContain("address space has not been created");
}
finally
{
await host.DisposeAsync();
}
}
[Fact]
public async Task EnsureVariable_before_CreateAddressSpace_throws_InvalidOperationException()
{
var (host, nm) = await BuildPreStartNodeManagerAsync();
try
{
Should.Throw<InvalidOperationException>(() =>
nm.EnsureVariable("eq-1/temp", parentFolderNodeId: null, displayName: "Temp",
dataType: "Float", writable: false));
}
finally
{
await host.DisposeAsync();
}
}
[Fact]
public async Task WriteValue_before_CreateAddressSpace_throws_InvalidOperationException()
{
var (host, nm) = await BuildPreStartNodeManagerAsync();
try
{
Should.Throw<InvalidOperationException>(() =>
nm.WriteValue("eq-1/temp", 1.0, OpcUaQuality.Good, DateTime.UtcNow));
}
finally
{
await host.DisposeAsync();
}
}
[Fact]
public async Task MaterialiseAlarmCondition_before_CreateAddressSpace_throws_InvalidOperationException()
{
var (host, nm) = await BuildPreStartNodeManagerAsync();
try
{
Should.Throw<InvalidOperationException>(() =>
nm.MaterialiseAlarmCondition("alarm-1", "eq-1", "Cond", "OffNormalAlarm", severity: 500));
}
finally
{
await host.DisposeAsync();
}
}
/// <summary>Boot a real host, borrow the live node manager's real
/// <see cref="Opc.Ua.Server.IServerInternal"/>, then construct a SECOND node manager from it (with a
/// fresh <see cref="ApplicationConfiguration"/> — the ctor only records it + sets namespaces) that has
/// NEVER had <c>CreateAddressSpace</c> driven (so <c>_root</c> is null). The host is returned so the
/// caller disposes it after exercising the guard.</summary>
private async Task<(OpcUaApplicationHost Host, OtOpcUaNodeManager NodeManager)> BuildPreStartNodeManagerAsync()
{
var host = new OpcUaApplicationHost(
new OpcUaApplicationHostOptions
{
ApplicationName = "OtOpcUa.PreStartGuardTest",
ApplicationUri = $"urn:OtOpcUa.PreStartGuardTest:{Guid.NewGuid():N}",
OpcUaPort = AllocateFreePort(),
PublicHostname = "localhost",
PkiStoreRoot = _pkiRoot,
},
NullLogger<OpcUaApplicationHost>.Instance);
var server = new OtOpcUaSdkServer();
await host.StartAsync(server, Ct);
var live = server.NodeManager!;
// Borrow the SDK's real IServerInternal from the live manager and build a brand-new node manager —
// CreateAddressSpace has not been driven on THIS instance, so _root is null and every mutator must
// hit the EnsureAddressSpaceCreated guard.
var fresh = new OtOpcUaNodeManager(live.Server, new ApplicationConfiguration());
return (host, fresh);
}
private static int AllocateFreePort()
{
using var listener = new System.Net.Sockets.TcpListener(System.Net.IPAddress.Loopback, 0);
listener.Start();
var port = ((System.Net.IPEndPoint)listener.LocalEndpoint).Port;
listener.Stop();
return port;
}
public void Dispose()
{
if (Directory.Exists(_pkiRoot))
{
try { Directory.Delete(_pkiRoot, recursive: true); }
catch { /* best-effort cleanup */ }
}
}
}