feat(alerts): AdminUI alarm ack/shelve via AdminOperationsActor singleton

T21: add an AdminUI path for acknowledging/shelving alarms that routes
through the admin-pinned AdminOperationsActor cluster singleton, which
republishes onto the same 'alarm-commands' DPS topic the OPC UA method
path (T18) and the engine subscriber (T19) use. The broadcast + the
ScriptedAlarmHostActor ownership filter handle cross-node routing, so
the singleton needs no knowledge of which node owns the alarm.

- Commons: AcknowledgeAlarmCommand/ShelveAlarmCommand (+ result records)
  and a shared AlarmCommandsTopic const; ScriptedAlarmHostActor now
  re-exports that const (mirrors the DriverControlTopic pattern).
- AdminOperationsActor: two handlers map the control-plane messages to
  AlarmCommand (Acknowledge / OneShotShelve / TimedShelve / Unshelve,
  threading User/Comment/UnshelveAtUtc) and publish via the DPS mediator.
- IAdminOperationsClient + AdminOperationsClient: typed Acknowledge/Shelve
  ask wrappers mirroring StartDeploymentAsync.
- Alerts.razor: per-row DriverOperator-gated Ack/Shelve/Unshelve controls;
  operator name from AuthenticationState. Timed-shelve datetime UI deferred.
- 5 TestKit tests (mediator-probe subscribed to alarm-commands) verifying
  each kind's mapping + reply; 56/56 ControlPlane tests green.
This commit is contained in:
Joseph Doherty
2026-06-11 06:44:27 -04:00
parent f742050ebd
commit 370a2b7b48
9 changed files with 503 additions and 3 deletions
@@ -37,6 +37,38 @@ public sealed class AdminOperationsClient : IAdminOperationsClient
return await _proxy.Ask<StartDeploymentResult>(msg, AskTimeout, linked.Token);
}
/// <summary>Acknowledges one alarm via the admin singleton.</summary>
/// <param name="alarmId">The alarm's ScriptedAlarmId.</param>
/// <param name="user">The acting operator's name.</param>
/// <param name="comment">Optional free-text comment; null when none.</param>
/// <param name="ct">The cancellation token.</param>
/// <returns>The acknowledge result.</returns>
public async Task<AcknowledgeAlarmResult> AcknowledgeAlarmAsync(
string alarmId, string user, string? comment, CancellationToken ct)
{
var msg = new AcknowledgeAlarmCommand(alarmId, user, comment, Guid.NewGuid());
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct);
linked.CancelAfter(AskTimeout);
return await _proxy.Ask<AcknowledgeAlarmResult>(msg, AskTimeout, linked.Token);
}
/// <summary>Shelves or unshelves one alarm via the admin singleton.</summary>
/// <param name="alarmId">The alarm's ScriptedAlarmId.</param>
/// <param name="user">The acting operator's name.</param>
/// <param name="kind">Which shelve action to perform.</param>
/// <param name="unshelveAtUtc">For a timed shelve, when it expires; null otherwise.</param>
/// <param name="comment">Optional free-text comment; null when none.</param>
/// <param name="ct">The cancellation token.</param>
/// <returns>The shelve result.</returns>
public async Task<ShelveAlarmResult> ShelveAlarmAsync(
string alarmId, string user, ShelveKind kind, DateTime? unshelveAtUtc, string? comment, CancellationToken ct)
{
var msg = new ShelveAlarmCommand(alarmId, user, kind, unshelveAtUtc, comment, Guid.NewGuid());
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct);
linked.CancelAfter(AskTimeout);
return await _proxy.Ask<ShelveAlarmResult>(msg, AskTimeout, linked.Token);
}
/// <summary>
/// Generic Ask — forwards any message to the AdminOperationsActor singleton proxy.
/// Uses the caller-supplied <paramref name="ct"/> for cancellation; does not impose an
@@ -4,9 +4,15 @@
and the AB CIP ALMD bridge. *@
@attribute [Microsoft.AspNetCore.Authorization.Authorize]
@rendermode RenderMode.InteractiveServer
@using Microsoft.AspNetCore.Authorization
@using ZB.MOM.WW.OtOpcUa.AdminUI.Hubs
@using ZB.MOM.WW.OtOpcUa.Commons.Interfaces
@using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin
@using ZB.MOM.WW.OtOpcUa.Commons.Messages.Alerts
@inject IInProcessBroadcaster<AlarmTransitionEvent> Alarms
@inject AuthenticationStateProvider AuthState
@inject IAuthorizationService AuthorizationService
@inject IAdminOperationsClient AdminOps
@implements IDisposable
<div class="d-flex justify-content-between align-items-center mb-3">
@@ -47,6 +53,10 @@ else
<th class="num">Severity</th>
<th>User</th>
<th>Message</th>
@if (_canOperate)
{
<th>Actions</th>
}
</tr>
</thead>
<tbody>
@@ -60,6 +70,35 @@ else
<td class="num">@e.Severity</td>
<td>@e.User</td>
<td>@e.Message</td>
@if (_canOperate)
{
@* DriverOperator-gated Acknowledge / Shelve / Unshelve. Each routes through
the AdminOperationsActor singleton, which republishes onto the cluster
'alarm-commands' topic; the owning node applies it (ownership filter). *@
<td>
<div class="d-flex gap-1 align-items-center">
<button type="button"
class="btn btn-sm btn-outline-secondary"
disabled="@_busyAlarmId.Equals(e.AlarmId)"
@onclick="() => AcknowledgeAsync(e.AlarmId)"
title="Acknowledge this alarm">Ack</button>
<button type="button"
class="btn btn-sm btn-outline-secondary"
disabled="@_busyAlarmId.Equals(e.AlarmId)"
@onclick="() => ShelveAsync(e.AlarmId, ShelveKind.OneShot)"
title="Shelve this alarm until it next clears">Shelve</button>
<button type="button"
class="btn btn-sm btn-outline-secondary"
disabled="@_busyAlarmId.Equals(e.AlarmId)"
@onclick="() => ShelveAsync(e.AlarmId, ShelveKind.Unshelve)"
title="Remove an existing shelve">Unshelve</button>
</div>
@if (_opResultAlarmId.Equals(e.AlarmId) && _opResultMessage is not null)
{
<span class="chip @(_opResultOk ? "chip-ok" : "chip-bad")" style="font-size:0.8rem">@_opResultMessage</span>
}
</td>
}
</tr>
}
</tbody>
@@ -74,13 +113,102 @@ else
private readonly List<AlarmTransitionEvent> _rows = new();
private bool _connected;
protected override void OnInitialized()
// Authorization — DriverOperator gates the per-row Ack/Shelve/Unshelve controls.
private bool _canOperate;
// Per-row action state. Only one alarm action is in flight at a time; the busy/result
// fields are keyed by AlarmId so the spinner + result chip attach to the right row.
private string _busyAlarmId = "";
private string _opResultAlarmId = "";
private string? _opResultMessage;
private bool _opResultOk;
protected override async Task OnInitializedAsync()
{
// Live alarm tail straight from the in-process broadcaster (fed by AlertSignalRBridge off the
// 'alerts' DPS topic). A Blazor Server component can't self-connect a SignalR HubConnection
// behind a reverse proxy — see IInProcessBroadcaster — so we subscribe in-process instead.
Alarms.Received += OnAlarm;
_connected = true;
// Check DriverOperator authorization so the per-row action controls only render for
// permitted users. The username is re-read at click time (GetCurrentUserNameAsync) so a
// mid-session token refresh lands in the published command + audit accurately.
var auth = await AuthState.GetAuthenticationStateAsync();
var authResult = await AuthorizationService.AuthorizeAsync(auth.User, null, "DriverOperator");
_canOperate = authResult.Succeeded;
}
private async Task AcknowledgeAsync(string alarmId)
{
_busyAlarmId = alarmId;
_opResultMessage = null;
StateHasChanged();
try
{
var user = await GetCurrentUserNameAsync();
var result = await AdminOps.AcknowledgeAlarmAsync(
alarmId, user, comment: null,
new System.Threading.CancellationTokenSource(TimeSpan.FromSeconds(15)).Token);
ShowOpResult(alarmId, result.Ok, result.Ok ? "Ack sent" : (result.Message ?? "Failed"));
}
catch (Exception ex)
{
ShowOpResult(alarmId, false, ex.Message.Length > 60 ? ex.Message[..60] + "…" : ex.Message);
}
finally
{
_busyAlarmId = "";
StateHasChanged();
}
}
private async Task ShelveAsync(string alarmId, ShelveKind kind)
{
_busyAlarmId = alarmId;
_opResultMessage = null;
StateHasChanged();
try
{
var user = await GetCurrentUserNameAsync();
// Timed shelve (with an unshelve-at datetime picker) is deferred — only OneShot + Unshelve
// are surfaced here, so unshelveAtUtc is always null. TimedShelve is fully wired through the
// singleton + AlarmCommand if a UI is added later.
var result = await AdminOps.ShelveAlarmAsync(
alarmId, user, kind, unshelveAtUtc: null, comment: null,
new System.Threading.CancellationTokenSource(TimeSpan.FromSeconds(15)).Token);
var verb = kind == ShelveKind.Unshelve ? "Unshelve" : "Shelve";
ShowOpResult(alarmId, result.Ok, result.Ok ? $"{verb} sent" : (result.Message ?? "Failed"));
}
catch (Exception ex)
{
ShowOpResult(alarmId, false, ex.Message.Length > 60 ? ex.Message[..60] + "…" : ex.Message);
}
finally
{
_busyAlarmId = "";
StateHasChanged();
}
}
/// <summary>
/// Re-reads the AuthenticationState at call time so the operator name forwarded to the
/// command + audit reflects the current claims-principal (survives token refresh during a
/// long-lived circuit). Returns "unknown" if no Name claim is present.
/// </summary>
private async Task<string> GetCurrentUserNameAsync()
{
var auth = await AuthState.GetAuthenticationStateAsync();
return auth.User.Identity?.Name
?? auth.User.FindFirst(System.Security.Claims.ClaimTypes.NameIdentifier)?.Value
?? "unknown";
}
private void ShowOpResult(string alarmId, bool ok, string message)
{
_opResultAlarmId = alarmId;
_opResultOk = ok;
_opResultMessage = message;
}
private void OnAlarm(AlarmTransitionEvent evt) =>
@@ -4,6 +4,7 @@ using Akka.Event;
using Microsoft.EntityFrameworkCore;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Deploy;
using ZB.MOM.WW.OtOpcUa.Commons.OpcUa;
using ZB.MOM.WW.OtOpcUa.Commons.Types;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
@@ -54,6 +55,87 @@ public sealed class AdminOperationsActor : ReceiveActor
ReceiveAsync<TestDriverConnect>(HandleTestDriverConnectAsync);
ReceiveAsync<RestartDriver>(HandleRestartDriverAsync);
ReceiveAsync<ReconnectDriver>(HandleReconnectDriverAsync);
Receive<AcknowledgeAlarmCommand>(HandleAcknowledgeAlarm);
Receive<ShelveAlarmCommand>(HandleShelveAlarm);
}
/// <summary>
/// AdminUI Acknowledge path. Maps the control-plane command to a
/// <see cref="AlarmCommand"/> (<c>Operation = "Acknowledge"</c>) and publishes it onto the
/// cluster <c>alarm-commands</c> topic. The broadcast lands on every driver node's
/// <c>ScriptedAlarmHostActor</c>; only the node owning the alarm acts (ownership filter), so the
/// admin singleton needs no knowledge of placement. Synchronous — the publish is fire-and-forget
/// via the mediator, so there is no awaitable work and no DB write.
/// </summary>
private void HandleAcknowledgeAlarm(AcknowledgeAlarmCommand msg)
{
var replyTo = Sender;
try
{
var alarmCmd = new AlarmCommand(
AlarmId: msg.AlarmId,
Operation: "Acknowledge",
User: msg.User,
Comment: msg.Comment,
UnshelveAtUtc: null);
DistributedPubSub.Get(Context.System).Mediator.Tell(new Publish(AlarmCommandsTopic.Name, alarmCmd));
_log.Info("AdminOps: Acknowledge published for alarm {AlarmId} by {User}", msg.AlarmId, msg.User);
replyTo.Tell(new AcknowledgeAlarmResult(true, null, msg.CorrelationId));
}
catch (Exception ex)
{
_log.Error(ex, "AdminOps: Acknowledge failed for alarm {AlarmId}", msg.AlarmId);
replyTo.Tell(new AcknowledgeAlarmResult(false, ex.Message, msg.CorrelationId));
}
}
/// <summary>
/// AdminUI Shelve / Unshelve path. Maps <see cref="ShelveAlarmCommand.Kind"/> to the matching
/// <see cref="AlarmCommand"/> operation (<c>OneShotShelve</c> / <c>TimedShelve</c> /
/// <c>Unshelve</c>), threading <see cref="ShelveAlarmCommand.UnshelveAtUtc"/> for the timed kind,
/// and publishes onto the cluster <c>alarm-commands</c> topic. Ownership filtering happens on the
/// owning node exactly as for Acknowledge.
/// </summary>
private void HandleShelveAlarm(ShelveAlarmCommand msg)
{
var replyTo = Sender;
try
{
var (operation, unshelveAt) = msg.Kind switch
{
ShelveKind.OneShot => ("OneShotShelve", (DateTime?)null),
ShelveKind.Timed => ("TimedShelve", msg.UnshelveAtUtc),
ShelveKind.Unshelve => ("Unshelve", (DateTime?)null),
_ => throw new ArgumentOutOfRangeException(nameof(msg), msg.Kind, "Unknown shelve kind."),
};
// TimedShelve requires an unshelve instant — the engine rejects it otherwise. Guard here so
// the AdminUI gets an immediate, attributable failure instead of a silently-dropped command.
if (msg.Kind == ShelveKind.Timed && unshelveAt is null)
{
replyTo.Tell(new ShelveAlarmResult(false, "TimedShelve requires UnshelveAtUtc.", msg.CorrelationId));
return;
}
var alarmCmd = new AlarmCommand(
AlarmId: msg.AlarmId,
Operation: operation,
User: msg.User,
Comment: msg.Comment,
UnshelveAtUtc: unshelveAt);
DistributedPubSub.Get(Context.System).Mediator.Tell(new Publish(AlarmCommandsTopic.Name, alarmCmd));
_log.Info("AdminOps: {Operation} published for alarm {AlarmId} by {User}", operation, msg.AlarmId, msg.User);
replyTo.Tell(new ShelveAlarmResult(true, null, msg.CorrelationId));
}
catch (Exception ex)
{
_log.Error(ex, "AdminOps: Shelve ({Kind}) failed for alarm {AlarmId}", msg.Kind, msg.AlarmId);
replyTo.Tell(new ShelveAlarmResult(false, ex.Message, msg.CorrelationId));
}
}
private async Task HandleStartDeploymentAsync(StartDeployment msg)
@@ -61,8 +61,10 @@ public sealed class ScriptedAlarmHostActor : ReceiveActor
/// <summary>The cluster DistributedPubSub topic inbound OPC UA Part 9 alarm method calls
/// (Acknowledge / Confirm / Shelve / AddComment) are routed onto as <see cref="AlarmCommand"/>s.
/// The OPC UA node manager's condition handlers build the command (after the <c>AlarmAck</c> role
/// gate); the host's boot wiring publishes it here; T19's engine-side subscriber consumes it.</summary>
public const string AlarmCommandsTopic = "alarm-commands";
/// gate, T18) or the AdminUI path republishes via <c>AdminOperationsActor</c> (T21); this host's
/// boot wiring subscribes; T19's engine-side handler consumes it. Re-exports the single Commons
/// const so every publisher/subscriber shares one literal.</summary>
public const string AlarmCommandsTopic = ZB.MOM.WW.OtOpcUa.Commons.OpcUa.AlarmCommandsTopic.Name;
/// <summary>Reconcile the loaded alarm set to exactly the enabled subset of <paramref name="Plans"/>:
/// builds <see cref="ScriptedAlarmDefinition"/>s (skipping disabled plans), reloads the engine, and