feat(alarms): route inbound Part 9 alarm methods through AlarmAck gate (T18)

Wire the materialised AlarmConditionState method handlers so a client calling
Acknowledge/Confirm/Shelve/AddComment is gated on the AlarmAck data-plane role
and, when allowed, routed back to the scripted-alarm engine via a new
`alarm-commands` DistributedPubSub topic.

- Commons: new AlarmCommand DTO (AlarmId/Operation/User/Comment/UnshelveAtUtc).
- ScriptedAlarmHostActor: add AlarmCommandsTopic const.
- OtOpcUaNodeManager: settable AlarmCommandRouter + wire OnAcknowledge/OnConfirm/
  OnAddComment/OnShelve/OnTimedUnshelve. Each resolves the principal off
  ISessionOperationContext.UserIdentity as RoleCarryingUserIdentity, fails closed
  (BadUserAccessDenied) when the AlarmAck role is absent or no identity, else maps
  + routes an AlarmCommand and returns Good. OnShelve discriminates OneShotShelve/
  TimedShelve/Unshelve from the SDK flags; TimedShelve expiry = UtcNow + ms.
  No Akka/IActorRef handle — only the Action<AlarmCommand> delegate. T20 de-dup
  note left; WriteAlarmCondition untouched.
- OpcUaServer.Security: OpcUaDataPlaneRoles.AlarmAck shared const (the role was a
  bare string everywhere; introduced one symbol for the gate + tests).
- OtOpcUaSdkServer: SetAlarmCommandRouter pass-through.
- Host: boot wiring publishes each command via mediator.Tell(Publish(...)) using a
  lazy ActorSystem accessor (mirrors DpsScriptLogPublisher).
- Tests: 11 new gate + mapping tests (OpcUaServer.Tests 88->99, all green).
This commit is contained in:
Joseph Doherty
2026-06-11 06:05:39 -04:00
parent ac5db0a9f8
commit 63289d377c
8 changed files with 584 additions and 0 deletions
@@ -1,9 +1,12 @@
using Akka.Actor;
using Akka.Cluster.Tools.PublishSubscribe;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
using ZB.MOM.WW.OtOpcUa.OpcUaServer;
using ZB.MOM.WW.OtOpcUa.OpcUaServer.Security;
using ZB.MOM.WW.OtOpcUa.Runtime.ScriptedAlarms;
namespace ZB.MOM.WW.OtOpcUa.Host.OpcUa;
@@ -24,6 +27,7 @@ public sealed class OtOpcUaServerHostedService : IHostedService, IAsyncDisposabl
private readonly DeferredAddressSpaceSink _deferredSink;
private readonly DeferredServiceLevelPublisher _deferredServiceLevel;
private readonly IOpcUaUserAuthenticator _userAuthenticator;
private readonly Func<ActorSystem> _actorSystemAccessor;
private readonly ILoggerFactory _loggerFactory;
private readonly ILogger<OtOpcUaServerHostedService> _logger;
@@ -37,18 +41,23 @@ public sealed class OtOpcUaServerHostedService : IHostedService, IAsyncDisposabl
/// <param name="deferredSink">The deferred address space sink that receives the real sink once the server is ready.</param>
/// <param name="deferredServiceLevel">The deferred service level publisher that receives the real publisher once the server is ready.</param>
/// <param name="userAuthenticator">The OPC UA user authenticator.</param>
/// <param name="actorSystemAccessor">Lazy accessor for the running <see cref="ActorSystem"/>, used to
/// resolve the DistributedPubSub mediator the inbound alarm-command router publishes through. Resolved
/// lazily (mirroring <c>DpsScriptLogPublisher</c>) so construction never races Akka startup.</param>
/// <param name="loggerFactory">The logger factory for creating loggers.</param>
public OtOpcUaServerHostedService(
IOptions<OpcUaApplicationHostOptions> options,
DeferredAddressSpaceSink deferredSink,
DeferredServiceLevelPublisher deferredServiceLevel,
IOpcUaUserAuthenticator userAuthenticator,
Func<ActorSystem> actorSystemAccessor,
ILoggerFactory loggerFactory)
{
_options = options.Value;
_deferredSink = deferredSink;
_deferredServiceLevel = deferredServiceLevel;
_userAuthenticator = userAuthenticator;
_actorSystemAccessor = actorSystemAccessor;
_loggerFactory = loggerFactory;
_logger = loggerFactory.CreateLogger<OtOpcUaServerHostedService>();
}
@@ -88,6 +97,30 @@ public sealed class OtOpcUaServerHostedService : IHostedService, IAsyncDisposabl
_deferredSink.SetSink(new SdkAddressSpaceSink(_server.NodeManager));
// Wire the reverse-path inbound-alarm-command router: a client Acknowledge/Confirm/Shelve that
// passes the node manager's AlarmAck gate publishes the mapped AlarmCommand onto the cluster
// `alarm-commands` topic (same DistributedPubSub mediator the `alerts`/`script-logs` topics use).
// The Tell is fire-and-forget so the handler — which runs under the SDK's Lock — never blocks.
// The mediator is resolved per-publish via the lazy ActorSystem accessor so a transient cluster
// condition is tolerated and construction never raced Akka startup.
_server.SetAlarmCommandRouter(cmd =>
{
try
{
var mediator = DistributedPubSub.Get(_actorSystemAccessor()).Mediator;
mediator.Tell(new Publish(ScriptedAlarmHostActor.AlarmCommandsTopic, cmd));
}
catch (Exception ex)
{
// The router runs under the SDK Lock on a server thread; a cluster hiccup must not
// escape into the SDK's Call path. Log + drop — the client still gets Good for the
// node-state change; the missed command surfaces as a non-applied engine transition.
_logger.LogWarning(ex,
"OtOpcUaServerHostedService: failed to route inbound alarm command {Operation} for {AlarmId}",
cmd.Operation, cmd.AlarmId);
}
});
// ServiceLevel publisher needs IServerInternal — only available after Start.
if (_server.CurrentInstance is { } serverInternal)
{
@@ -147,6 +147,10 @@ if (hasDriver)
builder.Services.AddValidatedOptions<OpcUaApplicationHostOptions, OpcUaApplicationHostOptionsValidator>(
builder.Configuration, "OpcUa");
// Lazy ActorSystem accessor so OtOpcUaServerHostedService can resolve the DistributedPubSub
// mediator (for the inbound alarm-command router) without racing Akka startup — same pattern the
// DpsScriptLogPublisher above uses. TryAdd so a fused admin+driver node registers it exactly once.
builder.Services.TryAddSingleton<Func<ActorSystem>>(sp => () => sp.GetRequiredService<ActorSystem>());
builder.Services.AddHostedService<OtOpcUaServerHostedService>();
}
@@ -2,6 +2,7 @@ using System.Collections.Concurrent;
using Opc.Ua;
using Opc.Ua.Server;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
using ZB.MOM.WW.OtOpcUa.OpcUaServer.Security;
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer;
@@ -52,6 +53,23 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
/// <summary>Gets the count of real Part 9 <see cref="AlarmConditionState"/> nodes currently managed.</summary>
public int AlarmConditionCount => _alarmConditions.Count;
/// <summary>
/// Reverse-path sink for inbound OPC UA Part 9 alarm method calls. When a client invokes a
/// materialised condition's Acknowledge / Confirm / Shelve / AddComment method, the condition's
/// handler (wired in <see cref="MaterialiseAlarmCondition"/>) gates on the caller's
/// <c>AlarmAck</c> role and, when allowed, builds an <see cref="AlarmCommand"/> and invokes this
/// delegate. The host sets it at boot to a non-blocking <c>mediator.Tell</c> onto the
/// <c>alarm-commands</c> DistributedPubSub topic; T19's engine-side subscriber consumes it.
/// <para>
/// This is the ONLY reverse coupling out of the node manager — by design it is a plain
/// <see cref="Action{AlarmCommand}"/> (no Akka / <c>IActorRef</c> / DI handle). The handler
/// delegates run under the manager's <c>Lock</c>; the invoked action MUST be non-blocking
/// (a fire-and-forget <c>Tell</c>) so there is no deadlock. Null (the default) makes every
/// handler a safe no-op — it still gates + returns, just routes nowhere.
/// </para>
/// </summary>
public Action<AlarmCommand>? AlarmCommandRouter { get; set; }
/// <summary>Look up a materialised Part 9 alarm-condition node by its alarm node id (the
/// ScriptedAlarmId), or null if not yet materialised. Exposed for tests + diagnostics.</summary>
/// <param name="alarmNodeId">The alarm node identifier (== ScriptedAlarmId).</param>
@@ -316,6 +334,38 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
alarm.Message.Value = new LocalizedText(displayName);
if (alarm.ConditionName is not null) alarm.ConditionName.Value = displayName;
// T18 — inbound Part 9 method handlers. Create() materialised the Acknowledge/Confirm/
// AddComment/Shelve/Unshelve method nodes and the condition types wired their built-in OnCall
// routing; these delegates are the veto/permission seam the SDK invokes BEFORE applying the
// state change. Each gates on the caller's AlarmAck role (fails closed) and, when allowed,
// routes a mapped AlarmCommand to the engine via AlarmCommandRouter, then returns Good so the
// SDK applies its node state + auto-fires its own event.
// T20: the engine re-projects that same logical transition through WriteAlarmCondition, which
// also fires — the resulting double-emit is de-duped in a later task (T20), NOT here.
alarm.OnAcknowledge = (context, condition, _, comment) =>
HandleAlarmCommand(context, condition, "Acknowledge", comment, unshelveAt: null);
alarm.OnConfirm = (context, condition, _, comment) =>
HandleAlarmCommand(context, condition, "Confirm", comment, unshelveAt: null);
alarm.OnAddComment = (context, condition, _, comment) =>
HandleAlarmCommand(context, condition, "AddComment", comment, unshelveAt: null);
alarm.OnShelve = (context, condition, shelving, oneShot, shelvingTime) =>
{
// SDK invocation shapes (verified against the decompiled AlarmConditionState):
// OneShotShelve → (shelving:true, oneShot:true, 0.0) ⇒ OneShotShelve, no expiry
// TimedShelve → (shelving:true, oneShot:false, ms) ⇒ TimedShelve, expiry = UtcNow + ms
// Unshelve → (shelving:false, oneShot:false, 0.0) ⇒ Unshelve, no expiry
// shelvingTime is an OPC UA Duration (milliseconds).
var (operation, unshelveAt) =
!shelving ? ("Unshelve", (DateTime?)null)
: oneShot ? ("OneShotShelve", null)
: ("TimedShelve", DateTime.UtcNow + TimeSpan.FromMilliseconds(shelvingTime));
return HandleAlarmCommand(context, condition, operation, comment: null, unshelveAt);
};
// The auto-unshelve timer firing is an unshelve transition driven by the SDK (no client user);
// route it as Unshelve so the engine clears its shelve state. Same AlarmAck gate applies.
alarm.OnTimedUnshelve = (context, condition) =>
HandleAlarmCommand(context, condition, "Unshelve", comment: null, unshelveAt: null);
parent.AddChild(alarm);
// Promote the equipment folder to an event notifier + register it as a root notifier so
@@ -328,6 +378,49 @@ public sealed class OtOpcUaNodeManager : CustomNodeManager2
}
}
/// <summary>
/// Shared body for every inbound Part 9 alarm method handler (T18). Resolves the calling
/// principal off the SDK <paramref name="context"/>, applies the <c>AlarmAck</c> role gate
/// (<b>fails closed</b>: a missing identity or a missing role is denied), and on success builds a
/// mapped <see cref="AlarmCommand"/> and routes it through <see cref="AlarmCommandRouter"/>.
/// </summary>
/// <param name="context">The SDK context the handler delegate was invoked with — a
/// <c>ServerSystemContext</c> (an <see cref="ISessionOperationContext"/>) carrying the session
/// identity. T17 attached the LDAP roles as a <see cref="RoleCarryingUserIdentity"/>.</param>
/// <param name="condition">The condition the method targets; its <c>NodeId</c> identifier is the
/// ScriptedAlarmId (T14 aligned them), which becomes <see cref="AlarmCommand.AlarmId"/>.</param>
/// <param name="operation">The Part 9 operation name (e.g. <c>Acknowledge</c>, <c>TimedShelve</c>).</param>
/// <param name="comment">The call's comment text, or <c>null</c> when none was supplied.</param>
/// <param name="unshelveAt">For <c>TimedShelve</c>, the computed UTC expiry; otherwise <c>null</c>.</param>
/// <returns><c>ServiceResult.Good</c> when allowed (the SDK then applies state + auto-fires its
/// event); <c>BadUserAccessDenied</c> when the gate vetoes (no route, no state mutation).</returns>
private ServiceResult HandleAlarmCommand(
ISystemContext context, ConditionState condition, string operation, LocalizedText? comment, DateTime? unshelveAt)
{
// Resolve the principal the SAME way the SDK's own GetCurrentUserId does, then narrow to the
// role-carrying identity T17 attached. Anonymous / non-role-carrying identities ⇒ null ⇒ denied.
var identity = (context as ISessionOperationContext)?.UserIdentity as RoleCarryingUserIdentity;
if (identity is null || !identity.Roles.Contains(OpcUaDataPlaneRoles.AlarmAck, StringComparer.OrdinalIgnoreCase))
{
// Fail closed: no role / no identity ⇒ veto. Returning a bad ServiceResult aborts the SDK's
// state change and surfaces the status to the client; we never route or mutate.
return new ServiceResult(StatusCodes.BadUserAccessDenied);
}
var cmd = new AlarmCommand(
AlarmId: condition.NodeId.Identifier?.ToString() ?? string.Empty,
Operation: operation,
User: identity.DisplayName ?? string.Empty,
Comment: comment?.Text,
UnshelveAtUtc: unshelveAt);
// Non-blocking by contract (host wires a fire-and-forget mediator.Tell); safe to call under Lock.
AlarmCommandRouter?.Invoke(cmd);
// Good ⇒ the SDK applies the node-state change + auto-fires its own condition event.
return ServiceResult.Good;
}
/// <summary>Map our domain <c>AlarmType</c> string to the matching SDK condition subtype. Script
/// alarms have no OPC limit/setpoint values, so limit-style types fall back to the base
/// <see cref="AlarmConditionState"/> (see <see cref="MaterialiseAlarmCondition"/> remarks).</summary>
@@ -1,5 +1,6 @@
using Opc.Ua;
using Opc.Ua.Server;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer;
@@ -17,6 +18,24 @@ public sealed class OtOpcUaSdkServer : StandardServer
/// <see cref="CreateMasterNodeManager"/>. Null until the SDK has bootstrapped.</summary>
public OtOpcUaNodeManager? NodeManager => _otOpcUaNodeManager;
/// <summary>
/// Wire the reverse-path sink for inbound Part 9 alarm method calls onto the created
/// <see cref="OtOpcUaNodeManager"/>. The host calls this after start with a non-blocking
/// <c>mediator.Tell</c> that publishes each <see cref="AlarmCommand"/> onto the
/// <c>alarm-commands</c> DistributedPubSub topic. No-op (returns <c>false</c>) when the node
/// manager has not been created yet, so the caller can detect a too-early call.
/// </summary>
/// <param name="router">The router invoked by the condition handlers once the <c>AlarmAck</c>
/// gate passes; may be <c>null</c> to clear it.</param>
/// <returns><c>true</c> when the router was set on a live node manager; <c>false</c> when no node
/// manager exists yet.</returns>
public bool SetAlarmCommandRouter(Action<AlarmCommand>? router)
{
if (_otOpcUaNodeManager is null) return false;
_otOpcUaNodeManager.AlarmCommandRouter = router;
return true;
}
/// <inheritdoc />
protected override MasterNodeManager CreateMasterNodeManager(
IServerInternal server, ApplicationConfiguration configuration)
@@ -0,0 +1,25 @@
namespace ZB.MOM.WW.OtOpcUa.OpcUaServer.Security;
/// <summary>
/// Canonical string constants for the OPC UA <b>data-plane</b> roles the LDAP group→role map
/// produces and <see cref="RoleCarryingUserIdentity.Roles"/> carries onto the session identity.
/// These are distinct from the control-plane <c>AdminRole</c> enum (Admin UI capabilities) — the
/// two planes share zero runtime code path by design.
/// <para>
/// Across the codebase these data-plane roles (<c>ReadOnly</c>, <c>WriteOperate</c>,
/// <c>WriteTune</c>, <c>WriteConfigure</c>, <c>AlarmAck</c>, …) are used as bare strings
/// (they originate as LDAP group names mapped through <c>RoleMapper</c>). T18 introduced this
/// single shared const for the one role the inbound alarm-method gate reads, so the gate and
/// its tests reference one symbol instead of a re-typed literal. Comparison is case-insensitive
/// (the role set is built with <see cref="System.StringComparer.OrdinalIgnoreCase"/>), so the
/// gate matches with that comparer too.
/// </para>
/// </summary>
public static class OpcUaDataPlaneRoles
{
/// <summary>The role that grants OPC UA Part 9 alarm acknowledge / confirm / shelve / comment
/// authority. A session must carry this role for the inbound alarm-condition method handlers to
/// route the command to the engine; absent it, the call is denied with
/// <c>BadUserAccessDenied</c>.</summary>
public const string AlarmAck = "AlarmAck";
}
@@ -58,6 +58,12 @@ public sealed class ScriptedAlarmHostActor : ReceiveActor
/// the constant the (retired) <c>ScriptedAlarmActor</c> used so subscribers stay wired.</summary>
public const string AlertsTopic = "alerts";
/// <summary>The cluster DistributedPubSub topic inbound OPC UA Part 9 alarm method calls
/// (Acknowledge / Confirm / Shelve / AddComment) are routed onto as <see cref="AlarmCommand"/>s.
/// The OPC UA node manager's condition handlers build the command (after the <c>AlarmAck</c> role
/// gate); the host's boot wiring publishes it here; T19's engine-side subscriber consumes it.</summary>
public const string AlarmCommandsTopic = "alarm-commands";
/// <summary>Reconcile the loaded alarm set to exactly the enabled subset of <paramref name="Plans"/>:
/// builds <see cref="ScriptedAlarmDefinition"/>s (skipping disabled plans), reloads the engine, and
/// re-registers mux interest for the union of dependency refs.</summary>