feat(dashboard): mirror events via SessionEventDistributor subscriber (fixes dark feed without gRPC client)

This commit is contained in:
Joseph Doherty
2026-06-15 14:42:32 -04:00
parent 4f43733b96
commit 1ea08c3b10
9 changed files with 600 additions and 148 deletions
@@ -1,6 +1,8 @@
using System.Runtime.CompilerServices;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.MxGateway.Contracts.Proto;
using ZB.MOM.WW.MxGateway.Server.Configuration;
using ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs;
using ZB.MOM.WW.MxGateway.Server.Grpc;
using ZB.MOM.WW.MxGateway.Server.Metrics;
using ZB.MOM.WW.MxGateway.Server.Workers;
@@ -21,6 +23,10 @@ public sealed class GatewaySession
private int _activeEventSubscriberCount;
private SessionEventDistributor? _eventDistributor;
private bool _eventDistributorStarted;
private bool _dashboardMirrorStarted;
private IEventSubscriberLease? _dashboardMirrorLease;
private Task? _dashboardMirrorTask;
private CancellationTokenSource? _dashboardMirrorCts;
private readonly Dictionary<(int ServerHandle, int ItemHandle), SessionItemRegistration> _items = [];
/// <summary>
@@ -350,9 +356,22 @@ public sealed class GatewaySession
/// <summary>
/// Transitions the session to the Ready state.
/// </summary>
/// <remarks>
/// On becoming Ready the session starts its internal dashboard mirror (Task 6) when a
/// dashboard broadcaster was supplied. The mirror registers an internal subscriber on
/// the distributor and starts the pump <em>before</em> any gRPC client attaches, so the
/// dashboard EventsHub receives session events even with no gRPC subscriber streaming —
/// fixing the "dark feed" where the dashboard only saw events while a gRPC client was
/// actively streaming. Registering the internal subscriber BEFORE
/// <see cref="SessionEventDistributor.StartAsync"/> also avoids the Task 4 hazard where
/// starting the pump at Ready with zero subscribers drained a fast-completing worker
/// stream into nothing and left a later subscriber hanging: there is now always a
/// subscriber (the dashboard one) registered before the pump starts.
/// </remarks>
public void MarkReady()
{
TransitionTo(SessionState.Ready);
StartDashboardMirror();
}
// Constructs and starts the distributor exactly once, registering the subscriber under
@@ -369,8 +388,24 @@ public sealed class GatewaySession
// the start so the very first subscriber sees the stream from its beginning.
private IEventSubscriberLease StartDistributorAndRegister()
{
SessionEventDistributor distributor;
bool startNow = false;
SessionEventDistributor distributor = EnsureDistributorCreated(out bool startNow);
// Register BEFORE starting the pump so a subscriber is present when the pump begins
// draining — no event is fanned to an empty subscriber set and then missed by this
// first subscriber. StartAsync only schedules the pump task; it never blocks.
IEventSubscriberLease lease = distributor.Register();
StartPumpIfRequested(distributor, startNow);
return lease;
}
// Constructs the distributor exactly once and reports whether THIS caller is the one
// that should start the pump (i.e. it observed the unstarted state and claimed the
// start). Both the construction and the started-flag flip happen under _syncRoot so two
// concurrent callers (e.g. MarkReady's dashboard mirror and a racing first
// AttachEventSubscriber) agree on a single distributor and a single start.
private SessionEventDistributor EnsureDistributorCreated(out bool startNow)
{
lock (_syncRoot)
{
if (_eventDistributor is null)
@@ -387,28 +422,133 @@ public sealed class GatewaySession
CreateOverflowHandler(eventOptions.BackpressurePolicy));
}
distributor = _eventDistributor;
startNow = false;
if (!_eventDistributorStarted)
{
_eventDistributorStarted = true;
startNow = true;
}
}
// Register BEFORE starting the pump so a subscriber is present when the pump begins
// draining — no event is fanned to an empty subscriber set and then missed by this
// first subscriber. StartAsync only schedules the pump task; it never blocks.
IEventSubscriberLease lease = distributor.Register();
if (startNow)
return _eventDistributor;
}
}
private static void StartPumpIfRequested(SessionEventDistributor distributor, bool startNow)
{
if (!startNow)
{
// StartAsync only schedules the pump via Task.Run and returns a completed task;
// it does not perform any async I/O itself. The sync-over-async call here is
// therefore safe and will not deadlock. Do not make StartAsync truly async
// (i.e., await real I/O before returning) without also changing this call site.
distributor.StartAsync(CancellationToken.None).GetAwaiter().GetResult();
return;
}
return lease;
// StartAsync only schedules the pump via Task.Run and returns a completed task;
// it does not perform any async I/O itself. The sync-over-async call here is
// therefore safe and will not deadlock. Do not make StartAsync truly async
// (i.e., await real I/O before returning) without also changing this call site.
distributor.StartAsync(CancellationToken.None).GetAwaiter().GetResult();
}
// Registers the gateway-owned internal dashboard subscriber on the distributor and starts
// a background loop that mirrors every fanned event to the dashboard broadcaster. Called
// once when the session becomes Ready (idempotent). The internal subscriber is registered
// BEFORE the pump starts (see StartDistributorAndRegister / EnsureDistributorCreated), so
// a subscriber is always present at pump start — the dashboard receives events with no
// gRPC subscriber attached, and the Task 4 "zero-subscriber drain into the void" hang
// cannot occur. No-op when no dashboard broadcaster was supplied (unit tests).
private void StartDashboardMirror()
{
IDashboardEventBroadcaster? broadcaster = _eventStreaming.DashboardBroadcaster;
if (broadcaster is null)
{
return;
}
SessionEventDistributor distributor;
CancellationToken loopToken;
lock (_syncRoot)
{
if (_dashboardMirrorStarted || _state is SessionState.Closing or SessionState.Closed or SessionState.Faulted)
{
return;
}
_dashboardMirrorStarted = true;
_dashboardMirrorCts = new CancellationTokenSource();
loopToken = _dashboardMirrorCts.Token;
}
// Create the distributor (claiming the start if we are first) and register the
// internal subscriber BEFORE starting the pump. isInternal: true keeps the dashboard
// subscriber out of the single-subscriber overflow accounting, so a slow/broken
// dashboard mirror only disconnects itself and never faults the session.
distributor = EnsureDistributorCreated(out bool startNow);
IEventSubscriberLease lease = distributor.Register(isInternal: true);
StartPumpIfRequested(distributor, startNow);
lock (_syncRoot)
{
_dashboardMirrorLease = lease;
}
_dashboardMirrorTask = Task.Run(
() => RunDashboardMirrorAsync(broadcaster, lease, loopToken),
CancellationToken.None);
}
// Reads the internal dashboard subscriber's channel and publishes each RAW fanned event
// to the dashboard broadcaster. The dashboard is a first-class distributor subscriber
// (Task 6), so it sees the session's full raw event activity — NOT the per-gRPC-subscriber
// AfterWorkerSequence filtering that EventStreamService applies at its own boundary. This
// is intentional: the dashboard is a separate LDAP-authenticated monitoring view (per-
// session dashboard ACL is the separate Task 18). Publish is best-effort / never-throw, so
// a slow or broken dashboard cannot fault the session or stall the pump; the bounded
// internal subscriber channel (Task 5 per-subscriber isolation) only disconnects THIS
// mirror on overflow, leaving the session and other subscribers untouched.
private async Task RunDashboardMirrorAsync(
IDashboardEventBroadcaster broadcaster,
IEventSubscriberLease lease,
CancellationToken cancellationToken)
{
try
{
await foreach (MxEvent mxEvent in lease.Reader
.ReadAllAsync(cancellationToken)
.ConfigureAwait(false))
{
try
{
broadcaster.Publish(SessionId, mxEvent);
}
catch (Exception exception)
{
// Publish is documented never-throw, but enforce it here too so a future
// implementation cannot fault the mirror loop. Logs identifiers only.
_eventStreaming.DistributorLogger.LogDebug(
exception,
"Dashboard event mirror threw for session {SessionId}; continuing.",
SessionId);
}
}
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
// Teardown path: the session is shutting down the mirror.
}
catch (SessionManagerException)
{
// The internal subscriber's channel overflowed and the distributor disconnected
// it with a terminal overflow fault. That disconnects only the dashboard mirror;
// the session, pump, and any gRPC subscriber are unaffected. Stop mirroring.
}
catch (Exception exception)
{
// Source-fault completion (worker event stream terminated abnormally) surfaces
// here. The session's own fault handling runs via the gRPC path / lifecycle; the
// mirror just stops. Logs identifiers only.
_eventStreaming.DistributorLogger.LogDebug(
exception,
"Dashboard event mirror loop ended for session {SessionId}.",
SessionId);
}
}
// Builds the per-subscriber backpressure handler the distributor invokes when a
@@ -1108,6 +1248,46 @@ public sealed class GatewaySession
{
}
// Stop the internal dashboard mirror first: cancel its loop, dispose its lease (which
// unregisters its internal distributor subscriber and completes its channel), and
// await the loop task. Done BEFORE disposing the distributor and worker client — like
// the distributor itself — so the mirror is no longer reading the pump when the pump
// and its source (the worker client) tear down.
IEventSubscriberLease? dashboardLease;
Task? dashboardTask;
CancellationTokenSource? dashboardCts;
lock (_syncRoot)
{
dashboardLease = _dashboardMirrorLease;
dashboardTask = _dashboardMirrorTask;
dashboardCts = _dashboardMirrorCts;
_dashboardMirrorLease = null;
_dashboardMirrorTask = null;
_dashboardMirrorCts = null;
}
if (dashboardCts is not null)
{
await dashboardCts.CancelAsync().ConfigureAwait(false);
}
dashboardLease?.Dispose();
if (dashboardTask is not null)
{
try
{
await dashboardTask.ConfigureAwait(false);
}
catch (Exception)
{
// The mirror loop swallows its own faults; any escape here must not block
// disposal. The loop has stopped, which is all teardown requires.
}
}
dashboardCts?.Dispose();
// Stop the event pump and complete every subscriber channel before tearing down the
// worker client (the pump's source). DisposeAsync is the single session teardown
// point (SessionManager.RemoveSessionAsync awaits it after close), so awaiting it