fix(code-review): resolve Batch 3 wave A (OpcUaServer history/guard, ControlPlane topology gate)

- OpcUaServer-002: HistoryRead-Events NumValuesPerNode==0 now maps to unbounded (int.MaxValue) instead of the backend default-cap sentinel; no Core.Abstractions contract change (+EventMaxEvents helper tests)
- OpcUaServer-004: EnsureAddressSpaceCreated guard on public mutators -> clear InvalidOperationException instead of bare NRE if called pre-start (+tests)
- OpcUaServer-003: Deferred (endUtc inclusive/exclusive needs live Wonderware boundary confirmation)
- Configuration-013: wire DraftValidator.ValidateClusterTopology into AdminOperationsActor deploy gate (read-only, no migration) (+2 tests)
This commit is contained in:
Joseph Doherty
2026-06-20 22:53:29 -04:00
parent c817d7720e
commit 94eec70fb0
8 changed files with 455 additions and 13 deletions
@@ -173,7 +173,35 @@ public sealed class AdminOperationsActor : ReceiveActor
// committed/visible when the snapshot is read — operators seeing a spurious one should
// check ExternalIdReservation state before re-submitting.
var draft = await DraftSnapshotFactory.FromConfigDbAsync(db);
var errors = DraftValidator.Validate(draft);
var errors = DraftValidator.Validate(draft).ToList();
// Cluster-topology guard (decision #91 / task #148 part 2). The SQL
// CK_ServerCluster_RedundancyMode_NodeCount CHECK enforces the (NodeCount, RedundancyMode)
// pair on the row itself, but it cannot see the per-node ClusterNode.Enabled flag — an
// operator can disable a node (effective enabled-count = 1) while leaving RedundancyMode at
// Hot/Warm and the constraint stays green, which would boot the runtime into an
// InvalidTopology band. ValidateClusterTopology catches that drift, but it isn't carried on
// the generation-versioned DraftSnapshot (the cluster/node rows aren't versioned), so it must
// be run separately here against the live rows. Read-only (AsNoTracking); errors fold into the
// same reject summary alongside the snapshot rules so a deploy failing either check is
// rejected with both sets of messages. ClusterId-ordered for a deterministic summary.
var clusters = await db.ServerClusters
.AsNoTracking()
.OrderBy(c => c.ClusterId)
.ToListAsync();
var nodesByCluster = (await db.ClusterNodes
.AsNoTracking()
.ToListAsync())
.GroupBy(n => n.ClusterId, StringComparer.Ordinal)
.ToDictionary(g => g.Key, g => g.ToList(), StringComparer.Ordinal);
foreach (var cluster in clusters)
{
var nodes = nodesByCluster.TryGetValue(cluster.ClusterId, out var ns)
? (IReadOnlyList<ClusterNode>)ns
: [];
errors.AddRange(DraftValidator.ValidateClusterTopology(cluster, nodes));
}
if (errors.Count > 0)
{
var summary = string.Join("; ", errors.Select(e => $"[{e.Code}] {e.Message}"));
@@ -261,6 +261,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
public void WriteValue(string nodeId, object? value, OpcUaQuality quality, DateTime sourceTimestampUtc)
{
ArgumentException.ThrowIfNullOrEmpty(nodeId);
EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started.
lock (Lock)
{
@@ -296,6 +297,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
{
ArgumentException.ThrowIfNullOrEmpty(alarmNodeId);
ArgumentNullException.ThrowIfNull(state);
EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started.
// Look up + project under a SINGLE Lock so a concurrent RebuildAddressSpace can't clear
// _alarmConditions / detach the condition node between the lookup and the Set* calls.
@@ -584,6 +586,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
{
ArgumentException.ThrowIfNullOrEmpty(alarmNodeId);
ArgumentException.ThrowIfNullOrEmpty(displayName);
EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started.
lock (Lock)
{
@@ -1280,6 +1283,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
{
ArgumentException.ThrowIfNullOrEmpty(folderNodeId);
ArgumentException.ThrowIfNullOrEmpty(displayName);
EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started.
if (_folders.ContainsKey(folderNodeId)) return;
@@ -1336,6 +1340,7 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
{
ArgumentException.ThrowIfNullOrEmpty(variableNodeId);
ArgumentException.ThrowIfNullOrEmpty(displayName);
EnsureAddressSpaceCreated(); // OpcUaServer-004: fail legibly if called before the server started.
// If already present, leave it alone (idempotent re-applies).
if (_variables.ContainsKey(variableNodeId)) return;
@@ -1608,10 +1613,29 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
private FolderState ResolveParentFolder(string? parentNodeId)
{
EnsureAddressSpaceCreated();
if (string.IsNullOrEmpty(parentNodeId)) return _root!;
return _folders.TryGetValue(parentNodeId, out var existing) ? existing : _root!;
}
/// <summary>OpcUaServer-004: guard the address-space mutators against a too-early call. <c>_root</c>
/// is only assigned in <see cref="CreateAddressSpace"/>, which the SDK invokes during
/// <c>StandardServer</c> start; every public mutator (<see cref="WriteValue"/>,
/// <see cref="WriteAlarmCondition"/>, <see cref="EnsureFolder"/>, <see cref="EnsureVariable"/>,
/// <see cref="MaterialiseAlarmCondition"/>) and <see cref="ResolveParentFolder"/> assume it has run.
/// If one is invoked before the server has started (a sink wired or a publish replayed before
/// <c>StartAsync</c> completes), <c>_root</c> is null and the dereference would NRE; throw a legible
/// <see cref="InvalidOperationException"/> instead. Happy-path behaviour is unchanged.</summary>
/// <exception cref="InvalidOperationException">When the address space has not been created yet.</exception>
private void EnsureAddressSpaceCreated()
{
if (_root is null)
{
throw new InvalidOperationException(
"OPC UA address space has not been created yet (server not started).");
}
}
// ---------------------------------------------------------------------------------------------
// Phase C — OPC UA HistoryRead over historized variable nodes.
//
@@ -1790,8 +1814,11 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
sourceName,
details.StartTime,
details.EndTime,
// NumValuesPerNode is uint; ReadEventsAsync takes int (<=0 ⇒ backend default cap).
ClampToInt(details.NumValuesPerNode),
// OpcUaServer-002: NumValuesPerNode==0 means "no limit — return ALL values" per OPC UA
// Part 4/11, so translate it to UNBOUNDED (int.MaxValue) here. Passing the int<=0
// backend-default-cap sentinel instead would silently truncate a "give me everything"
// events read at the backend default. A positive cap passes through (clamped).
EventMaxEvents(details.NumValuesPerNode),
CancellationToken.None).GetAwaiter().GetResult();
var historyEvent = ProjectEvents(sourceResult.Events, selectClauses);
@@ -1825,6 +1852,18 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
/// <returns>The clamped non-negative int.</returns>
private static int ClampToInt(uint value) => value > int.MaxValue ? int.MaxValue : (int)value;
/// <summary>OpcUaServer-002: map a HistoryRead-Events <c>NumValuesPerNode</c> request cap onto the
/// <see cref="IHistorianDataSource.ReadEventsAsync"/> <c>maxEvents</c> argument, honouring the OPC UA
/// Part 4/11 semantics that <c>NumValuesPerNode == 0</c> means "no limit — return ALL values".
/// We translate 0 to UNBOUNDED (<see cref="int.MaxValue"/>) — a very large positive cap — rather than
/// the backend's <c>maxEvents &lt;= 0</c> "use the default cap" sentinel, so a client asking for the
/// whole window is not silently truncated at the backend default. A positive value passes through
/// clamped to <see cref="int.MaxValue"/> (mirroring <see cref="ClampToInt"/>).</summary>
/// <param name="numValuesPerNode">The request's <c>NumValuesPerNode</c> cap (0 ⇒ no limit).</param>
/// <returns><see cref="int.MaxValue"/> when 0 (unbounded); otherwise the clamped non-negative int.</returns>
internal static int EventMaxEvents(uint numValuesPerNode) =>
numValuesPerNode == 0 ? int.MaxValue : ClampToInt(numValuesPerNode);
/// <summary>
/// Project a sequence of <see cref="HistoricalEvent"/>s into an SDK <see cref="HistoryEvent"/> —
/// one <see cref="HistoryEventFieldList"/> per event, each carrying the requested