feat(server): inbound operator-write pipeline — OnWriteValue authz gate + node-write router

This commit is contained in:
Joseph Doherty
2026-06-13 12:02:34 -04:00
parent a23fb2b82e
commit bb5832e900
5 changed files with 266 additions and 3 deletions
@@ -1,11 +1,14 @@
using Akka.Actor;
using Akka.Cluster.Tools.PublishSubscribe;
using Akka.Hosting;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
using ZB.MOM.WW.OtOpcUa.OpcUaServer.Security;
using ZB.MOM.WW.OtOpcUa.Runtime;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
using ZB.MOM.WW.OtOpcUa.Runtime.ScriptedAlarms;
namespace ZB.MOM.WW.OtOpcUa.Host.OpcUa;
@@ -28,6 +31,7 @@ public sealed class OtOpcUaServerHostedService : IHostedService, IAsyncDisposabl
private readonly DeferredServiceLevelPublisher _deferredServiceLevel;
private readonly IOpcUaUserAuthenticator _userAuthenticator;
private readonly Func<ActorSystem> _actorSystemAccessor;
private readonly ActorRegistry _actorRegistry;
private readonly ILoggerFactory _loggerFactory;
private readonly ILogger<OtOpcUaServerHostedService> _logger;
@@ -44,6 +48,9 @@ public sealed class OtOpcUaServerHostedService : IHostedService, IAsyncDisposabl
/// <param name="actorSystemAccessor">Lazy accessor for the running <see cref="ActorSystem"/>, used to
/// resolve the DistributedPubSub mediator the inbound alarm-command router publishes through. Resolved
/// lazily (mirroring <c>DpsScriptLogPublisher</c>) so construction never races Akka startup.</param>
/// <param name="actorRegistry">The Akka.Hosting actor registry, used to resolve the local
/// <c>DriverHostActor</c> ref (<c>DriverHostActorKey</c>) the inbound node-write router Asks. Resolved
/// in <see cref="StartAsync"/> after the runtime actors have been registered.</param>
/// <param name="loggerFactory">The logger factory for creating loggers.</param>
public OtOpcUaServerHostedService(
IOptions<OpcUaApplicationHostOptions> options,
@@ -51,6 +58,7 @@ public sealed class OtOpcUaServerHostedService : IHostedService, IAsyncDisposabl
DeferredServiceLevelPublisher deferredServiceLevel,
IOpcUaUserAuthenticator userAuthenticator,
Func<ActorSystem> actorSystemAccessor,
ActorRegistry actorRegistry,
ILoggerFactory loggerFactory)
{
_options = options.Value;
@@ -58,6 +66,7 @@ public sealed class OtOpcUaServerHostedService : IHostedService, IAsyncDisposabl
_deferredServiceLevel = deferredServiceLevel;
_userAuthenticator = userAuthenticator;
_actorSystemAccessor = actorSystemAccessor;
_actorRegistry = actorRegistry;
_loggerFactory = loggerFactory;
_logger = loggerFactory.CreateLogger<OtOpcUaServerHostedService>();
}
@@ -121,6 +130,36 @@ public sealed class OtOpcUaServerHostedService : IHostedService, IAsyncDisposabl
}
});
// Wire the reverse-path inbound operator-write router: a client write to a writable equipment-tag
// node that passes the node manager's WriteOperate gate routes the write to the owning driver child
// (RouteNodeWrite → NodeWriteResult) via the local DriverHostActor. This dispatch is FIRE-AND-FORGET
// (just like the alarm router): the SDK's CustomNodeManager2.Write holds the node-manager Lock while
// invoking OnWriteValue, so a blocking Ask here would freeze ALL address-space operations (reads,
// subscription notifications, the publish path) for up to the Ask timeout. We kick off the Ask and
// log failures from a continuation; the write reaches the device asynchronously and the value
// self-corrects on the next driver poll (standard OPC UA optimistic-write semantics). The
// DriverHostActor ref is resolved LAZILY per write — this hosted service's StartAsync runs before
// the Akka DriverHostActor registers, so a one-shot resolve here would always miss and leave every
// write unavailable. By write time (long after startup) the registry has it; a node that genuinely
// has no driver-host (admin-only, no writable driver nodes materialised) logs + drops the write.
_server.SetNodeWriteRouter((nodeId, value) =>
{
if (!_actorRegistry.TryGet<DriverHostActorKey>(out var driverHost))
{
_logger.LogWarning("Inbound write to {NodeId} dropped: no DriverHostActor registered", nodeId);
return;
}
driverHost.Ask<DriverHostActor.NodeWriteResult>(
new DriverHostActor.RouteNodeWrite(nodeId, value), TimeSpan.FromSeconds(10))
.ContinueWith(t =>
{
if (!t.IsCompletedSuccessfully)
_logger.LogWarning("Operator write to {NodeId} failed or timed out", nodeId);
else if (!t.Result.Success)
_logger.LogWarning("Operator write to {NodeId} rejected: {Reason}", nodeId, t.Result.Reason);
}, TaskScheduler.Default);
});
// ServiceLevel publisher needs IServerInternal — only available after Start.
if (_server.CurrentInstance is { } serverInternal)
{
@@ -142,6 +181,8 @@ public sealed class OtOpcUaServerHostedService : IHostedService, IAsyncDisposabl
// half-disposed NodeManager.
_deferredSink.SetSink(null);
_deferredServiceLevel.SetInner(null);
// Drop the inbound-write router too so a late client write doesn't Ask a stopping DriverHostActor.
_server?.SetNodeWriteRouter(null);
return Task.CompletedTask;
}
@@ -70,6 +70,39 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
/// </summary>
public Action<AlarmCommand>? AlarmCommandRouter { get; set; }
private volatile Action<string, object?>? _nodeWriteRouter;
/// <summary>
/// Reverse-path sink for inbound OPC UA operator writes to a writable equipment-tag variable node.
/// When a client writes such a node, the node's <see cref="BaseDataVariableState.OnWriteValue"/>
/// handler (<see cref="OnEquipmentTagWrite"/>, attached by <see cref="EnsureVariable"/> when the
/// variable is writable) first gates on the caller's <see cref="OpcUaDataPlaneRoles.WriteOperate"/>
/// role and, when allowed, invokes this delegate with the node's string id + the written value to
/// route the write to the backing driver.
/// <para>
/// This is the write-side twin of <see cref="AlarmCommandRouter"/> — a plain
/// <see cref="Action{T1,T2}"/> (no Akka / <c>IActorRef</c> / DI handle) so this assembly stays
/// Akka-free. The handler delegates run under the node-manager <c>Lock</c> (the OPC UA SDK's
/// <c>CustomNodeManager2.Write</c> holds <c>Lock</c> while invoking <c>OnWriteValue</c>), so this
/// dispatch MUST be non-blocking and fire-and-forget — exactly like the alarm router. The host
/// sets it at boot to a lambda that kicks off an unawaited bounded <c>Ask</c> of the local
/// <c>DriverHostActor</c> (<c>RouteNodeWrite</c> → <c>NodeWriteResult</c>) and logs failures from
/// a continuation; the write reaches the device asynchronously and the value self-corrects on the
/// next driver poll (standard OPC UA optimistic-write semantics). Null (the default) makes every
/// write resolve to a "writes unavailable" failure.
/// </para>
/// <para>
/// Backed by a <c>volatile</c> field (auto-properties can't be volatile) to make the
/// startup-write / SDK-thread-read explicit: the host assigns it once at boot on the start thread
/// and the SDK reads it on Write request threads.
/// </para>
/// </summary>
public Action<string, object?>? NodeWriteRouter
{
get => _nodeWriteRouter;
set => _nodeWriteRouter = value;
}
/// <summary>Look up a materialised Part 9 alarm-condition node by its alarm node id (the
/// ScriptedAlarmId), or null if not yet materialised. Exposed for tests + diagnostics.</summary>
/// <param name="alarmNodeId">The alarm node identifier (== ScriptedAlarmId).</param>
@@ -551,6 +584,78 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
return ServiceResult.Good;
}
/// <summary>
/// The <see cref="NodeValueEventHandler"/> attached to a writable equipment-tag variable by
/// <see cref="EnsureVariable"/> (Task 11). The OPC UA SDK invokes it when a client writes the
/// node's Value. It resolves the calling principal off the SDK <paramref name="context"/> the
/// SAME way <see cref="HandleAlarmCommand"/> does, gates on the
/// <see cref="OpcUaDataPlaneRoles.WriteOperate"/> role (<b>fails closed</b>: a missing identity or
/// missing role is denied), and on pass dispatches the value through <see cref="NodeWriteRouter"/>.
/// <para>
/// The dispatch is FIRE-AND-FORGET: the SDK's <c>CustomNodeManager2.Write</c> holds the node
/// manager <c>Lock</c> while invoking this handler, so a blocking driver round-trip here would
/// freeze every address-space operation (reads, subscription notifications, the publish path) for
/// the duration. The router only kicks off the asynchronous route. Returning
/// <see cref="ServiceResult.Good"/> lets the SDK apply the written value optimistically; the next
/// driver poll republishes the confirmed register value over the optimistic one via the normal
/// <see cref="WriteValue"/> path.
/// </para>
/// </summary>
private ServiceResult OnEquipmentTagWrite(
ISystemContext context, NodeState node, NumericRange indexRange, QualifiedName dataEncoding,
ref object value, ref StatusCode statusCode, ref DateTime timestamp)
{
var identity = (context as ISessionOperationContext)?.UserIdentity as RoleCarryingUserIdentity;
// Capture the value into a local so the route thunk (a lambda) can close over it — a ref parameter
// can't be captured by a lambda. The handler does not mutate the ref value: returning Good lets the
// SDK apply the client's value as-is.
var writtenValue = value;
var nodeKey = node.NodeId.Identifier?.ToString() ?? string.Empty;
var router = _nodeWriteRouter;
Action? route = router is { } r ? () => r(nodeKey, writtenValue) : null;
return EvaluateEquipmentWrite(identity, route);
}
/// <summary>
/// Pure decision for an inbound equipment-tag write: the <see cref="OpcUaDataPlaneRoles.WriteOperate"/>
/// role gate + the fire-and-forget dispatch, extracted off <see cref="OnEquipmentTagWrite"/> so it is
/// unit-testable without booting an SDK server. The gate fails closed (null identity or missing role
/// ⇒ <c>BadUserAccessDenied</c>, <paramref name="route"/> NOT invoked). When the gate passes but no
/// router is wired (<paramref name="route"/> is null), the write resolves to <c>BadNotWritable</c>
/// ("writes unavailable"). Otherwise it invokes <paramref name="route"/> exactly once (fire-and-forget
/// — the actual driver round-trip happens asynchronously off the router lambda) and returns
/// <see cref="ServiceResult.Good"/> so the SDK applies the client value optimistically. Role
/// comparison is case-insensitive (the role set is built with
/// <see cref="StringComparer.OrdinalIgnoreCase"/>), matching the alarm gate.
/// </summary>
/// <param name="identity">The role-carrying identity extracted off the SDK context, or null when the
/// session is anonymous / carries no role-carrying identity.</param>
/// <param name="route">A thunk that kicks off the asynchronous route of the write to the driver; invoked
/// only when the gate passes. Null when no router is wired (e.g. admin-only nodes).</param>
/// <returns><see cref="ServiceResult.Good"/> on an allowed write (dispatch started); <c>BadUserAccessDenied</c>
/// when the gate vetoes; <c>BadNotWritable</c> ("writes unavailable") when no router is wired.</returns>
internal static ServiceResult EvaluateEquipmentWrite(
RoleCarryingUserIdentity? identity, Action? route)
{
if (identity is null || !identity.Roles.Contains(OpcUaDataPlaneRoles.WriteOperate, StringComparer.OrdinalIgnoreCase))
{
// Fail closed: no role / no identity ⇒ veto. Returning a bad ServiceResult aborts the SDK's
// write and surfaces the status to the client; we never route.
return new ServiceResult(StatusCodes.BadUserAccessDenied);
}
if (route is null)
{
// Gate passed but no router wired (admin-only nodes / pre-boot) ⇒ writes unavailable.
return new ServiceResult(StatusCodes.BadNotWritable, "writes unavailable");
}
// Fire-and-forget: kick off the asynchronous route and return Good immediately. The SDK holds the
// node-manager Lock here, so we must NOT block on the driver round-trip.
route();
return ServiceResult.Good;
}
/// <summary>Map our domain <c>AlarmType</c> string to the matching SDK condition subtype. Script
/// alarms have no OPC limit/setpoint values, so limit-style types fall back to the base
/// <see cref="AlarmConditionState"/> (see <see cref="MaterialiseAlarmCondition"/> remarks).</summary>
@@ -634,9 +739,9 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
/// <param name="displayName">The display name of the variable.</param>
/// <param name="dataType">The OPC UA data type name (e.g., "Boolean", "Int32", "String").</param>
/// <param name="writable">When true the node is created <c>CurrentReadWrite</c> (an authored
/// ReadWrite equipment tag); when false it stays <c>CurrentRead</c> (read-only). This task only sets
/// the access level — no OnWriteValue handler is attached here (the inbound-write handler is owned
/// by a later task).</param>
/// ReadWrite equipment tag) and the inbound-write handler <see cref="OnEquipmentTagWrite"/> is attached
/// to its <c>OnWriteValue</c> (Task 11) so a client write gates on the <c>WriteOperate</c> role + routes
/// to the backing driver; when false it stays <c>CurrentRead</c> (read-only) with no write handler.</param>
public void EnsureVariable(string variableNodeId, string? parentFolderNodeId, string displayName, string dataType, bool writable)
{
ArgumentException.ThrowIfNullOrEmpty(variableNodeId);
@@ -668,6 +773,14 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
StatusCode = StatusCodes.BadWaitingForInitialData,
Timestamp = DateTime.MinValue,
};
// Task 11: a writable equipment tag owns an inbound-write handler. The SDK invokes
// OnWriteValue on a client write; it gates on the WriteOperate role and routes to the backing
// driver via NodeWriteRouter. Read-only nodes leave it null (the default) so a write is
// rejected by the SDK's own AccessLevel check before it ever reaches a handler.
if (writable)
{
variable.OnWriteValue = OnEquipmentTagWrite;
}
parent.AddChild(variable);
AddPredefinedNode(SystemContext, variable);
_variables[variableNodeId] = variable;
@@ -36,6 +36,26 @@ public sealed class OtOpcUaSdkServer : StandardServer
return true;
}
/// <summary>
/// Wire the reverse-path sink for inbound operator writes to writable equipment-tag nodes onto the
/// created <see cref="OtOpcUaNodeManager"/>. The host calls this after start with a fire-and-forget
/// lambda that kicks off a bounded <c>Ask</c> of the local <c>DriverHostActor</c>
/// (<c>RouteNodeWrite</c>) and logs failures from a continuation — it must NOT block, because the
/// handler runs under the node-manager <c>Lock</c> (mirrors <see cref="SetAlarmCommandRouter"/>).
/// No-op (returns <c>false</c>) when the node manager has not been created yet, so the caller can
/// detect a too-early call.
/// </summary>
/// <param name="router">The router invoked by the writable node's <c>OnWriteValue</c> handler once the
/// <c>WriteOperate</c> gate passes; may be <c>null</c> to clear it.</param>
/// <returns><c>true</c> when the router was set on a live node manager; <c>false</c> when no node
/// manager exists yet.</returns>
public bool SetNodeWriteRouter(Action<string, object?>? router)
{
if (_otOpcUaNodeManager is null) return false;
_otOpcUaNodeManager.NodeWriteRouter = router;
return true;
}
/// <inheritdoc />
protected override MasterNodeManager CreateMasterNodeManager(
IServerInternal server, ApplicationConfiguration configuration)
@@ -22,4 +22,10 @@ public static class OpcUaDataPlaneRoles
/// route the command to the engine; absent it, the call is denied with
/// <c>BadUserAccessDenied</c>.</summary>
public const string AlarmAck = "AlarmAck";
/// <summary>The role that grants authority to write a writable equipment-tag variable node
/// (FreeAccess / Operate attributes). A session must carry this role for the inbound
/// <c>OnWriteValue</c> handler (Task 11) to route the value to the backing driver; absent it the
/// write is denied with <c>BadUserAccessDenied</c> before any driver call.</summary>
public const string WriteOperate = "WriteOperate";
}
@@ -0,0 +1,83 @@
using Opc.Ua;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.OpcUaServer.Security;
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests;
/// <summary>
/// Task 11 — the inbound operator-write authz gate + fire-and-forget dispatch. The OnWriteValue handler
/// on a writable equipment-tag node extracts the caller's <see cref="RoleCarryingUserIdentity"/>, gates
/// on the <see cref="OpcUaDataPlaneRoles.WriteOperate"/> role (deny otherwise), and on pass kicks off the
/// fire-and-forget route through the <see cref="OtOpcUaNodeManager.NodeWriteRouter"/> and returns
/// <c>Good</c> (optimistic write). The pure decision is extracted into
/// <see cref="OtOpcUaNodeManager.EvaluateEquipmentWrite"/> so the gate + dispatch are unit-testable
/// without booting an SDK server: the handler just supplies the extracted identity and a thunk that
/// starts the router (or null when no router is wired).
/// </summary>
public sealed class EquipmentWriteGateTests
{
/// <summary>(a) A null identity (anonymous / no role-carrying identity on the context) is denied with
/// <c>BadUserAccessDenied</c> and the route thunk is NEVER invoked — the gate fails closed.</summary>
[Fact]
public void Null_identity_is_denied_and_does_not_route()
{
var routed = false;
var result = OtOpcUaNodeManager.EvaluateEquipmentWrite(
identity: null,
route: () => routed = true);
result.StatusCode.Code.ShouldBe(StatusCodes.BadUserAccessDenied);
routed.ShouldBeFalse();
}
/// <summary>(b) An identity WITHOUT the <c>WriteOperate</c> role is denied with
/// <c>BadUserAccessDenied</c> and the route thunk is NEVER invoked.</summary>
[Fact]
public void Identity_without_WriteOperate_is_denied_and_does_not_route()
{
var routed = false;
var identity = IdentityWith("ReadOnly", OpcUaDataPlaneRoles.AlarmAck); // no WriteOperate
var result = OtOpcUaNodeManager.EvaluateEquipmentWrite(
identity,
route: () => routed = true);
result.StatusCode.Code.ShouldBe(StatusCodes.BadUserAccessDenied);
routed.ShouldBeFalse();
}
/// <summary>(c) An identity WITH the <c>WriteOperate</c> role and a non-null route invokes the route
/// thunk (fire-and-forget) and returns <c>ServiceResult.Good</c> so the SDK applies the value
/// optimistically. The role match is case-insensitive (the role set + gate both use
/// <c>OrdinalIgnoreCase</c>).</summary>
[Fact]
public void Identity_with_WriteOperate_routes_and_returns_good()
{
var routed = false;
var identity = IdentityWith("readonly", "writeoperate"); // lower-cased: case-insensitive match
var result = OtOpcUaNodeManager.EvaluateEquipmentWrite(
identity,
route: () => routed = true);
routed.ShouldBeTrue();
result.ShouldBe(ServiceResult.Good);
}
/// <summary>(d) An identity WITH the <c>WriteOperate</c> role but a null route (no router wired — e.g.
/// admin-only nodes) maps to <c>BadNotWritable</c> ("writes unavailable") — the gate passes but there is
/// nowhere to route the write.</summary>
[Fact]
public void Identity_with_WriteOperate_and_null_route_maps_to_bad_not_writable()
{
var identity = IdentityWith(OpcUaDataPlaneRoles.WriteOperate);
var result = OtOpcUaNodeManager.EvaluateEquipmentWrite(
identity,
route: null);
result.StatusCode.Code.ShouldBe(StatusCodes.BadNotWritable);
result.LocalizedText.Text.ShouldContain("writes unavailable");
}
private static RoleCarryingUserIdentity IdentityWith(params string[] roles) =>
new(new UserNameIdentityToken { UserName = "op" }, roles);
}