fix(adminui): wire Test Connect probes + live panels on admin-only nodes
v2-ci / build (push) Failing after 36s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests) (push) Has been skipped

Both bugs surfaced only on split-role deployments (the MAIN cluster's
admin-only nodes), where the AdminUI runs without the driver role.

- Test Connect returned "No probe registered" for every driver: the
  IDriverProbe set was registered only under the driver role, but the
  admin-operations singleton that consumes it is pinned to admin. Extract
  AddOtOpcUaDriverProbes() (idempotent via TryAddEnumerable) and call it
  in the hasAdmin path too.

- Live driver-status/alerts/script-log panels showed "SignalR error:
  Connection refused": these Blazor Server components opened a HubConnection
  to their own hub via the browser's public URL, which server-side code
  can't reach behind Traefik (host :9200 -> container :9000). Read the
  in-process source directly instead -- DriverStatus via
  IDriverStatusSnapshotStore.SnapshotChanged, Alerts/ScriptLog via a new
  IInProcessBroadcaster<T>. Fleet status was unaffected (reads DB/ActorSystem).

Adds unit tests for probe registration, the snapshot-store event, and the
broadcaster.
This commit is contained in:
Joseph Doherty
2026-05-29 16:38:32 -04:00
parent e3a27422a1
commit 61193629b6
14 changed files with 388 additions and 106 deletions
@@ -4,11 +4,10 @@
and the AB CIP ALMD bridge. *@
@attribute [Microsoft.AspNetCore.Authorization.Authorize]
@rendermode RenderMode.InteractiveServer
@using Microsoft.AspNetCore.SignalR.Client
@using ZB.MOM.WW.OtOpcUa.AdminUI.Hubs
@using ZB.MOM.WW.OtOpcUa.Commons.Messages.Alerts
@inject NavigationManager Nav
@implements IAsyncDisposable
@inject IInProcessBroadcaster<AlarmTransitionEvent> Alarms
@implements IDisposable
<div class="d-flex justify-content-between align-items-center mb-3">
<h4 class="mb-0">Alerts</h4>
@@ -73,36 +72,26 @@ else
private const int Capacity = 200;
private readonly List<AlarmTransitionEvent> _rows = new();
private HubConnection? _hub;
private bool _connected;
protected override async Task OnInitializedAsync()
protected override void OnInitialized()
{
_hub = new HubConnectionBuilder()
.WithUrl(Nav.ToAbsoluteUri(AlertHub.Endpoint))
.WithAutomaticReconnect()
.Build();
// Live alarm tail straight from the in-process broadcaster (fed by AlertSignalRBridge off the
// 'alerts' DPS topic). A Blazor Server component can't self-connect a SignalR HubConnection
// behind a reverse proxy — see IInProcessBroadcaster — so we subscribe in-process instead.
Alarms.Received += OnAlarm;
_connected = true;
}
_hub.On<AlarmTransitionEvent>(AlertHub.MethodName, evt =>
private void OnAlarm(AlarmTransitionEvent evt) =>
// Marshal both the mutation and the re-render onto the circuit sync context so this can't
// race ClearAsync (which runs there) over the shared _rows list.
InvokeAsync(() =>
{
_rows.Insert(0, evt);
if (_rows.Count > Capacity) _rows.RemoveAt(_rows.Count - 1);
InvokeAsync(StateHasChanged);
StateHasChanged();
});
_hub.Closed += _ => { _connected = false; return InvokeAsync(StateHasChanged); };
_hub.Reconnected += _ => { _connected = true; return InvokeAsync(StateHasChanged); };
try
{
await _hub.StartAsync();
_connected = true;
}
catch
{
// Connection failures (admin-only deployment, hub not mapped, etc.) leave the page
// showing "disconnected" — operator action: reload or talk to the host operator.
}
}
private async Task ClearAsync()
{
@@ -119,8 +108,5 @@ else
_ => "chip-idle",
};
public async ValueTask DisposeAsync()
{
if (_hub is not null) await _hub.DisposeAsync();
}
public void Dispose() => Alarms.Received -= OnAlarm;
}
@@ -3,11 +3,10 @@
VirtualTagActor / ScriptedAlarmActor script execution. Engine emit lands with F8 + F9. *@
@attribute [Microsoft.AspNetCore.Authorization.Authorize]
@rendermode RenderMode.InteractiveServer
@using Microsoft.AspNetCore.SignalR.Client
@using ZB.MOM.WW.OtOpcUa.AdminUI.Hubs
@using ZB.MOM.WW.OtOpcUa.Commons.Messages.Logging
@inject NavigationManager Nav
@implements IAsyncDisposable
@inject IInProcessBroadcaster<ScriptLogEntry> ScriptLogs
@implements IDisposable
<div class="d-flex justify-content-between align-items-center mb-3">
<h4 class="mb-0">Script log</h4>
@@ -87,7 +86,6 @@ else
private const int Capacity = 500;
private readonly List<ScriptLogEntry> _rows = new();
private HubConnection? _hub;
private bool _connected;
private string _levelFilter = "";
private string _scriptFilter = "";
@@ -115,32 +113,24 @@ else
}
}
protected override async Task OnInitializedAsync()
protected override void OnInitialized()
{
_hub = new HubConnectionBuilder()
.WithUrl(Nav.ToAbsoluteUri(ScriptLogHub.Endpoint))
.WithAutomaticReconnect()
.Build();
// Live tail straight from the in-process broadcaster (fed by ScriptLogSignalRBridge off the
// 'script-logs' DPS topic). Blazor Server can't self-connect a SignalR HubConnection behind
// a reverse proxy — see IInProcessBroadcaster — so we subscribe in-process instead.
ScriptLogs.Received += OnEntry;
_connected = true;
}
_hub.On<ScriptLogEntry>(ScriptLogHub.MethodName, entry =>
private void OnEntry(ScriptLogEntry entry) =>
// Marshal both the mutation and the re-render onto the circuit sync context so this can't
// race ClearAsync (which runs there) over the shared _rows list.
InvokeAsync(() =>
{
_rows.Insert(0, entry);
if (_rows.Count > Capacity) _rows.RemoveAt(_rows.Count - 1);
InvokeAsync(StateHasChanged);
StateHasChanged();
});
_hub.Closed += _ => { _connected = false; return InvokeAsync(StateHasChanged); };
_hub.Reconnected += _ => { _connected = true; return InvokeAsync(StateHasChanged); };
try
{
await _hub.StartAsync();
_connected = true;
}
catch
{
// Connection error — page shows "disconnected".
}
}
private async Task ClearAsync()
{
@@ -156,8 +146,5 @@ else
_ => "chip-idle",
};
public async ValueTask DisposeAsync()
{
if (_hub is not null) await _hub.DisposeAsync();
}
public void Dispose() => ScriptLogs.Received -= OnEntry;
}
@@ -4,14 +4,14 @@
DriverOperator-gated Reconnect/Restart buttons appear for authorised users. *@
@implements IAsyncDisposable
@using Microsoft.AspNetCore.Authorization
@using Microsoft.AspNetCore.SignalR.Client
@using ZB.MOM.WW.OtOpcUa.AdminUI.Hubs
@using ZB.MOM.WW.OtOpcUa.Commons.Interfaces
@using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin
@using ZB.MOM.WW.OtOpcUa.Commons.Messages.Drivers
@inject NavigationManager Nav
@inject AuthenticationStateProvider AuthState
@inject IAuthorizationService AuthorizationService
@inject IAdminOperationsClient AdminOps
@inject IDriverStatusSnapshotStore StatusStore
<section class="panel rise mt-3" style="animation-delay:.04s; @(_stale ? "opacity:0.5;" : "")">
<div class="panel-head d-flex align-items-center gap-2">
@@ -139,7 +139,6 @@
[Parameter] public string ClusterId { get; set; } = "";
[Parameter] public bool Enabled { get; set; } = true;
private HubConnection? _hub;
private DriverHealthChanged? _snapshot;
private DateTime _lastUpdateUtc = DateTime.MinValue;
private bool _stale;
@@ -180,30 +179,44 @@
InvokeAsync(StateHasChanged);
}, null, TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(5));
_hub = new HubConnectionBuilder()
.WithUrl(Nav.ToAbsoluteUri("/hubs/driverstatus"))
.WithAutomaticReconnect()
.Build();
_hub.On<DriverHealthChanged>("status", snap =>
{
_snapshot = snap;
_lastUpdateUtc = DateTime.UtcNow;
_stale = false;
InvokeAsync(StateHasChanged);
});
// Read live status straight from the in-process snapshot store rather than opening a
// self-targeted SignalR connection. This component runs server-side (Blazor
// InteractiveServer), so a HubConnection to the browser's public URL (e.g.
// http://localhost:9200 behind Traefik) would dial that port from *inside* the container —
// where only Kestrel's :9000 listens — and fail with "Connection refused". The store is fed
// on every admin node by DriverStatusSignalRBridge (a per-node DistributedPubSub
// subscriber), so the local singleton is always current regardless of which replica serves
// this circuit.
try
{
await _hub.StartAsync();
_connecting = false;
await _hub.InvokeAsync("JoinDriver", DriverInstanceId);
StatusStore.SnapshotChanged += OnSnapshotChanged;
if (StatusStore.TryGet(DriverInstanceId, out var snap))
{
_snapshot = snap;
_lastUpdateUtc = DateTime.UtcNow;
}
}
catch (Exception ex)
{
_connecting = false;
_error = ex.Message;
}
finally
{
_connecting = false;
}
}
// Invoked by the snapshot store (on the bridge actor's thread) for every driver instance;
// ignore snapshots for other instances and marshal onto the render sync context.
private void OnSnapshotChanged(DriverHealthChanged snap)
{
if (!string.Equals(snap.DriverInstanceId, DriverInstanceId, StringComparison.Ordinal))
return;
_snapshot = snap;
_lastUpdateUtc = DateTime.UtcNow;
_stale = false;
InvokeAsync(StateHasChanged);
}
private async Task ReconnectAsync()
@@ -285,12 +298,13 @@
public async ValueTask DisposeAsync()
{
// Drain BOTH timers first so an in-flight callback can't invoke StateHasChanged on
// a component whose hub has already been released. System.Threading.Timer's async
// dispose awaits any in-flight callback (.NET 6+).
// Unsubscribe first so the singleton store can't invoke a handler on a disposed component.
StatusStore.SnapshotChanged -= OnSnapshotChanged;
// Drain BOTH timers so an in-flight callback can't invoke StateHasChanged on a component
// that's already gone. System.Threading.Timer's async dispose awaits any in-flight
// callback (.NET 6+).
if (_timer is not null) await _timer.DisposeAsync();
if (_opResultClearTimer is not null) await _opResultClearTimer.DisposeAsync();
if (_hub is not null) await _hub.DisposeAsync();
}
// Map DriverState string → chip CSS class using the 4 defined theme variants.
@@ -17,22 +17,26 @@ public sealed class AlertSignalRBridge : ReceiveActor
public const string TopicName = "alerts";
private readonly IHubContext<AlertHub> _hub;
private readonly IInProcessBroadcaster<AlarmTransitionEvent> _broadcaster;
private readonly ILoggingAdapter _log = Context.GetLogger();
/// <summary>
/// Creates actor props for the AlertSignalRBridge.
/// </summary>
/// <param name="hub">The SignalR hub context to send alerts to.</param>
public static Props Props(IHubContext<AlertHub> hub) =>
Akka.Actor.Props.Create(() => new AlertSignalRBridge(hub));
/// <param name="broadcaster">In-process fan-out read directly by the Blazor Server Alerts page.</param>
public static Props Props(IHubContext<AlertHub> hub, IInProcessBroadcaster<AlarmTransitionEvent> broadcaster) =>
Akka.Actor.Props.Create(() => new AlertSignalRBridge(hub, broadcaster));
/// <summary>
/// Initializes a new instance of the AlertSignalRBridge actor.
/// </summary>
/// <param name="hub">The SignalR hub context to send alerts to.</param>
public AlertSignalRBridge(IHubContext<AlertHub> hub)
/// <param name="broadcaster">In-process fan-out read directly by the Blazor Server Alerts page.</param>
public AlertSignalRBridge(IHubContext<AlertHub> hub, IInProcessBroadcaster<AlarmTransitionEvent> broadcaster)
{
_hub = hub;
_broadcaster = broadcaster;
ReceiveAsync<AlarmTransitionEvent>(ForwardAsync);
Receive<SubscribeAck>(_ => { /* DPS confirmation */ });
}
@@ -43,6 +47,9 @@ public sealed class AlertSignalRBridge : ReceiveActor
private async Task ForwardAsync(AlarmTransitionEvent msg)
{
// In-process fan-out first — this is what the Blazor Server Alerts page reads. The hub push
// is kept for any out-of-process (e.g. WASM) SignalR client.
_broadcaster.Publish(msg);
try
{
await _hub.Clients.All.SendAsync(AlertHub.MethodName, msg);
@@ -13,14 +13,21 @@ public static class HubServiceCollectionExtensions
public const string DriverStatusSignalRBridgeName = "driver-status-signalr-bridge";
/// <summary>
/// Registers services required by the driver-status hub pipeline:
/// <see cref="IDriverStatusSnapshotStore"/> as a singleton backed by
/// <see cref="InMemoryDriverStatusSnapshotStore"/>.
/// Registers the in-process live-push services the AdminUI's Blazor Server panels read
/// directly (instead of self-connecting a SignalR <c>HubConnection</c>, which fails behind a
/// reverse proxy — see <see cref="IInProcessBroadcaster{T}"/>):
/// <list type="bullet">
/// <item><see cref="IDriverStatusSnapshotStore"/> — last-value snapshot per driver.</item>
/// <item><see cref="IInProcessBroadcaster{T}"/> — append-stream fan-out (alarm
/// transitions, script-log lines). Registered as an open generic so each closed type
/// resolves to its own singleton shared by the bridge actor and the consuming component.</item>
/// </list>
/// </summary>
/// <param name="services">The service collection.</param>
public static IServiceCollection AddOtOpcUaDriverStatusServices(this IServiceCollection services)
{
services.AddSingleton<IDriverStatusSnapshotStore, InMemoryDriverStatusSnapshotStore>();
services.AddSingleton(typeof(IInProcessBroadcaster<>), typeof(InProcessBroadcaster<>));
return services;
}
@@ -48,11 +55,13 @@ public static class HubServiceCollectionExtensions
registry.Register<FleetStatusSignalRBridgeKey>(fleetBridge);
var alertHub = resolver.GetService<IHubContext<AlertHub>>();
var alertBridge = system.ActorOf(AlertSignalRBridge.Props(alertHub), AlertSignalRBridgeName);
var alertBroadcaster = resolver.GetService<IInProcessBroadcaster<Commons.Messages.Alerts.AlarmTransitionEvent>>();
var alertBridge = system.ActorOf(AlertSignalRBridge.Props(alertHub, alertBroadcaster), AlertSignalRBridgeName);
registry.Register<AlertSignalRBridgeKey>(alertBridge);
var scriptLogHub = resolver.GetService<IHubContext<ScriptLogHub>>();
var scriptLogBridge = system.ActorOf(ScriptLogSignalRBridge.Props(scriptLogHub), ScriptLogSignalRBridgeName);
var scriptLogBroadcaster = resolver.GetService<IInProcessBroadcaster<Commons.Messages.Logging.ScriptLogEntry>>();
var scriptLogBridge = system.ActorOf(ScriptLogSignalRBridge.Props(scriptLogHub, scriptLogBroadcaster), ScriptLogSignalRBridgeName);
registry.Register<ScriptLogSignalRBridgeKey>(scriptLogBridge);
var driverStatusHub = resolver.GetService<IHubContext<DriverStatusHub>>();
@@ -6,10 +6,21 @@ namespace ZB.MOM.WW.OtOpcUa.AdminUI.Hubs;
/// Singleton last-snapshot-per-instance cache. Populated by
/// <c>DriverStatusSignalRBridge</c> as it forwards DPS messages; read by
/// <see cref="DriverStatusHub.JoinDriver"/> so newly-joined clients see current state
/// without waiting for the next change event.
/// without waiting for the next change event, and subscribed to directly by the Blazor
/// Server <c>DriverStatusPanel</c> via <see cref="SnapshotChanged"/>.
/// </summary>
public interface IDriverStatusSnapshotStore
{
void Upsert(DriverHealthChanged snapshot);
bool TryGet(string driverInstanceId, out DriverHealthChanged snapshot);
/// <summary>
/// Raised after every <see cref="Upsert"/> with the just-stored snapshot. Lets in-process
/// consumers (the Blazor Server <c>DriverStatusPanel</c>) receive live updates by reading
/// this singleton directly instead of opening a self-targeted SignalR connection — which a
/// server-side Blazor component cannot reach when the public URL (e.g. a reverse-proxy port)
/// differs from the local Kestrel bind. Handlers run on the caller's thread (the bridge
/// actor), so subscribers must marshal to their own sync context.
/// </summary>
event Action<DriverHealthChanged>? SnapshotChanged;
}
@@ -0,0 +1,41 @@
namespace ZB.MOM.WW.OtOpcUa.AdminUI.Hubs;
/// <summary>
/// A singleton, in-process fan-out for live event streams (alarm transitions, script-log
/// lines). A per-node SignalR bridge actor subscribes to the cluster's DistributedPubSub topic
/// and calls <see cref="Publish"/>; Blazor Server components subscribe to <see cref="Received"/>
/// to render the live tail.
/// <para>
/// This exists because the AdminUI runs as Blazor <em>Server</em>: a component opening a
/// SignalR <c>HubConnection</c> to its own hub would dial the browser's public URL from
/// server-side code, which is unreachable behind a reverse proxy (e.g. Traefik mapping host
/// :9200 → container :9000) and so fails with "Connection refused". Reading this in-process
/// broadcaster instead avoids the network hop entirely. Mirrors the
/// <c>IDriverStatusSnapshotStore.SnapshotChanged</c> pattern for stream (vs. last-value) feeds.
/// </para>
/// </summary>
/// <typeparam name="T">The event payload type (e.g. AlarmTransitionEvent, ScriptLogEntry).</typeparam>
public interface IInProcessBroadcaster<T>
{
/// <summary>
/// Raised once per <see cref="Publish"/> with the published item. Handlers run on the
/// caller's thread (the bridge actor), so subscribers must marshal to their own sync
/// context (Blazor's <c>InvokeAsync</c>).
/// </summary>
event Action<T>? Received;
/// <summary>Fan the item out to all current <see cref="Received"/> subscribers.</summary>
void Publish(T item);
}
/// <summary>Thread-safe singleton implementation of <see cref="IInProcessBroadcaster{T}"/>.</summary>
/// <typeparam name="T">The event payload type.</typeparam>
public sealed class InProcessBroadcaster<T> : IInProcessBroadcaster<T>
{
/// <inheritdoc />
public event Action<T>? Received;
/// <inheritdoc />
// Capture-then-invoke (via ?.) so a concurrent unsubscribe can't null the delegate mid-raise.
public void Publish(T item) => Received?.Invoke(item);
}
@@ -11,9 +11,16 @@ public sealed class InMemoryDriverStatusSnapshotStore : IDriverStatusSnapshotSto
{
private readonly ConcurrentDictionary<string, DriverHealthChanged> _byInstance = new();
/// <inheritdoc />
public event Action<DriverHealthChanged>? SnapshotChanged;
/// <inheritdoc />
public void Upsert(DriverHealthChanged snapshot)
=> _byInstance[snapshot.DriverInstanceId] = snapshot;
{
_byInstance[snapshot.DriverInstanceId] = snapshot;
// Capture-then-invoke so a concurrent unsubscribe can't null the delegate mid-raise.
SnapshotChanged?.Invoke(snapshot);
}
/// <inheritdoc />
public bool TryGet(string driverInstanceId, out DriverHealthChanged snapshot)
@@ -15,18 +15,22 @@ public sealed class ScriptLogSignalRBridge : ReceiveActor
public const string TopicName = "script-logs";
private readonly IHubContext<ScriptLogHub> _hub;
private readonly IInProcessBroadcaster<ScriptLogEntry> _broadcaster;
private readonly ILoggingAdapter _log = Context.GetLogger();
/// <summary>Creates a Props instance for the ScriptLogSignalRBridge.</summary>
/// <param name="hub">The SignalR hub context for sending messages to clients.</param>
public static Props Props(IHubContext<ScriptLogHub> hub) =>
Akka.Actor.Props.Create(() => new ScriptLogSignalRBridge(hub));
/// <param name="broadcaster">In-process fan-out read directly by the Blazor Server Script log page.</param>
public static Props Props(IHubContext<ScriptLogHub> hub, IInProcessBroadcaster<ScriptLogEntry> broadcaster) =>
Akka.Actor.Props.Create(() => new ScriptLogSignalRBridge(hub, broadcaster));
/// <summary>Initializes a new instance of the <see cref="ScriptLogSignalRBridge"/> class.</summary>
/// <param name="hub">The SignalR hub context for sending messages to clients.</param>
public ScriptLogSignalRBridge(IHubContext<ScriptLogHub> hub)
/// <param name="broadcaster">In-process fan-out read directly by the Blazor Server Script log page.</param>
public ScriptLogSignalRBridge(IHubContext<ScriptLogHub> hub, IInProcessBroadcaster<ScriptLogEntry> broadcaster)
{
_hub = hub;
_broadcaster = broadcaster;
ReceiveAsync<ScriptLogEntry>(ForwardAsync);
Receive<SubscribeAck>(_ => { /* DPS confirmation */ });
}
@@ -37,6 +41,9 @@ public sealed class ScriptLogSignalRBridge : ReceiveActor
private async Task ForwardAsync(ScriptLogEntry msg)
{
// In-process fan-out first — this is what the Blazor Server Script log page reads. The hub
// push is kept for any out-of-process (e.g. WASM) SignalR client.
_broadcaster.Publish(msg);
try
{
await _hub.Clients.All.SendAsync(ScriptLogHub.MethodName, msg);