feat(alarms): consume alarm-commands topic in ScriptedAlarmHostActor (T19)

Subscribe the host to the cluster alarm-commands DPS topic in PreStart and
drive the matching ScriptedAlarmEngine op per inbound AlarmCommand. An
ownership filter (engine.LoadedAlarmIds) ignores commands for alarms this
node does not own; TimedShelve without UnshelveAtUtc and unknown operations
are logged + rejected (never thrown); op failures are caught + logged so a
faulting op can't fault the actor. Re-projection is left to the engine's
existing OnEvent -> OnEngineEmission path.

Handler is a Task-returning ReceiveAsync (the project's AK2003 analyzer
forbids an async-void Receive delegate), giving ordered awaited async on the
actor thread. Adds 3 TestKit tests: ack drives the engine with mapped args,
unowned command ignored, missing-UnshelveAtUtc TimedShelve rejected not
thrown.
This commit is contained in:
Joseph Doherty
2026-06-11 06:23:08 -04:00
parent 1784eedd3f
commit 4f7999eac2
2 changed files with 193 additions and 0 deletions
@@ -153,12 +153,24 @@ public sealed class ScriptedAlarmHostActor : ReceiveActor
Receive<AlarmsLoaded>(OnAlarmsLoaded);
Receive<VirtualTagActor.DependencyValueChanged>(OnDependencyChanged);
Receive<EngineEmission>(OnEngineEmission);
// Inbound OPC UA Part 9 alarm method calls arrive as AlarmCommands on the cluster
// `alarm-commands` DPS topic (T18 publishes them after the AlarmAck role gate). The topic is a
// cluster-wide broadcast — every host node receives every command — so OnAlarmCommand filters to
// the alarms THIS host's engine owns before driving the matching engine op. The engine ops are
// async, and this project's Akka analyzer (AK2003) forbids an async-void Receive delegate, so
// the handler is a Task-returning ReceiveAsync: Akka suspends the mailbox until the op completes
// (ordered, awaited on the actor thread) and routes any escaped fault through supervision.
ReceiveAsync<AlarmCommand>(OnAlarmCommand);
// A faulted LoadAsync pipes back a Status.Failure (see OnApply) — log it and stay inert so the
// failure doesn't hit the dead-letter log.
Receive<Status.Failure>(OnLoadFailed);
// A LoadAsync cancelled by PostStop's _cts pipes back this marker. The actor is stopping, so
// there's nothing to do — swallow it quietly (no Warning, no dead letter).
Receive<AlarmsLoadCanceled>(_ => { });
// DPS Subscribe (PreStart) acks back here once the mediator has registered Self on the topic.
// No-op — the subscription is live the moment the ack arrives; we only need to keep it off the
// dead-letter log. Matches OpcUaPublishActor / DriverHostActor's SubscribeAck convention.
Receive<SubscribeAck>(_ => { /* PubSub ack */ });
}
private void OnApply(ApplyScriptedAlarms msg)
@@ -264,11 +276,110 @@ public sealed class ScriptedAlarmHostActor : ReceiveActor
_mediator.Tell(new Publish(AlertsTopic, evt));
}
/// <summary>
/// Drives an inbound OPC UA Part 9 alarm method call (delivered as an <see cref="AlarmCommand"/>
/// on the cluster <c>alarm-commands</c> topic) onto the matching <see cref="ScriptedAlarmEngine"/>
/// operation.
///
/// <para>
/// <b>Ownership filter.</b> The topic is a cluster-wide broadcast; every host node receives
/// every command, but each owns a disjoint subset of alarms (its engine's loaded set). A
/// command for an alarm this engine does NOT own is a no-op — the owning node will act on it.
/// </para>
///
/// <para>
/// <b>No re-projection.</b> The engine op raises <see cref="ScriptedAlarmEngine.OnEvent"/> on
/// success, which already marshals back to <see cref="OnEngineEmission"/> and re-projects the
/// condition to the OPC UA node + the alerts topic. So this handler just calls the op and
/// awaits; it never touches the publish actor directly.
/// </para>
///
/// <para>
/// <b>Async on the actor thread.</b> The handler is a <c>Task</c>-returning
/// <c>ReceiveAsync</c> (this project's AK2003 analyzer forbids an async-void Receive
/// delegate). Akka suspends the actor's mailbox until the returned task completes, so the op
/// runs ordered + awaited on the actor thread — never overlapping the next message. The engine
/// also serialises every operation behind its own <c>_evalGate</c> and marshals every emission
/// back via <c>Self.Tell</c> (never touching <see cref="Context"/> off-thread). The whole body
/// is wrapped in a try/catch so a faulting op can never escape the handler and fault the actor
/// — failures are logged like <see cref="OnLoadFailed"/> and swallowed.
/// </para>
/// </summary>
/// <param name="cmd">The inbound alarm command.</param>
private async Task OnAlarmCommand(AlarmCommand cmd)
{
// Ownership filter FIRST: ignore commands for alarms this engine doesn't own. The topic is a
// cluster-wide broadcast, so the same command lands on every host — only the owner acts.
if (!_engine.LoadedAlarmIds.Contains(cmd.AlarmId))
{
_log.Debug("ScriptedAlarmHost: ignoring AlarmCommand {Op} for unowned alarm {AlarmId}",
cmd.Operation, cmd.AlarmId);
return;
}
try
{
switch (cmd.Operation)
{
case "Acknowledge":
await _engine.AcknowledgeAsync(cmd.AlarmId, cmd.User, cmd.Comment, CancellationToken.None);
break;
case "Confirm":
await _engine.ConfirmAsync(cmd.AlarmId, cmd.User, cmd.Comment, CancellationToken.None);
break;
case "OneShotShelve":
await _engine.OneShotShelveAsync(cmd.AlarmId, cmd.User, CancellationToken.None);
break;
case "TimedShelve":
// A timed shelve needs the absolute unshelve instant. T18 derives it from the OPC UA
// Duration (UtcNow + shelvingTime); a command missing it is malformed — log + reject
// rather than throw (a throw out of this async void would crash the actor).
if (cmd.UnshelveAtUtc is not { } unshelveAt)
{
_log.Warning("ScriptedAlarmHost: rejecting TimedShelve for {AlarmId} — missing UnshelveAtUtc",
cmd.AlarmId);
return;
}
await _engine.TimedShelveAsync(cmd.AlarmId, cmd.User, unshelveAt, CancellationToken.None);
break;
case "Unshelve":
await _engine.UnshelveAsync(cmd.AlarmId, cmd.User, CancellationToken.None);
break;
case "Enable":
await _engine.EnableAsync(cmd.AlarmId, cmd.User, CancellationToken.None);
break;
case "Disable":
await _engine.DisableAsync(cmd.AlarmId, cmd.User, CancellationToken.None);
break;
case "AddComment":
// AddComment's text is required by the engine (ApplyAddComment takes a non-null text);
// coalesce a null comment to empty so a comment-less AddComment is still a valid no-op
// rather than an NRE.
await _engine.AddCommentAsync(cmd.AlarmId, cmd.User, cmd.Comment ?? string.Empty, CancellationToken.None);
break;
default:
_log.Warning("ScriptedAlarmHost: ignoring AlarmCommand with unknown operation {Op} for {AlarmId}",
cmd.Operation, cmd.AlarmId);
break;
}
}
catch (Exception ex)
{
// A failing engine op must not crash the actor — mirror OnLoadFailed's log-and-stay-inert style.
_log.Warning(ex, "ScriptedAlarmHost: engine op {Op} failed for alarm {AlarmId}",
cmd.Operation, cmd.AlarmId);
}
}
/// <inheritdoc />
protected override void PreStart()
{
// Resolve the cluster DPS mediator once, on the actor thread, so emissions only Tell it.
_mediator = DistributedPubSub.Get(Context.System).Mediator;
// Subscribe to the `alarm-commands` topic so inbound OPC UA Part 9 method calls (published by
// the node manager's condition handlers, T18) land here as AlarmCommands. The Subscribe is sent
// from Self so the SubscribeAck returns to this actor (handled as a no-op in the ctor wiring).
_mediator.Tell(new Subscribe(AlarmCommandsTopic, Self));
base.PreStart();
}