feat(dashboard): mirror events via SessionEventDistributor subscriber (fixes dark feed without gRPC client)
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
||||
using ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs;
|
||||
using ZB.MOM.WW.MxGateway.Server.Grpc;
|
||||
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
||||
using ZB.MOM.WW.MxGateway.Server.Workers;
|
||||
@@ -21,6 +23,10 @@ public sealed class GatewaySession
|
||||
private int _activeEventSubscriberCount;
|
||||
private SessionEventDistributor? _eventDistributor;
|
||||
private bool _eventDistributorStarted;
|
||||
private bool _dashboardMirrorStarted;
|
||||
private IEventSubscriberLease? _dashboardMirrorLease;
|
||||
private Task? _dashboardMirrorTask;
|
||||
private CancellationTokenSource? _dashboardMirrorCts;
|
||||
private readonly Dictionary<(int ServerHandle, int ItemHandle), SessionItemRegistration> _items = [];
|
||||
|
||||
/// <summary>
|
||||
@@ -350,9 +356,22 @@ public sealed class GatewaySession
|
||||
/// <summary>
|
||||
/// Transitions the session to the Ready state.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// On becoming Ready the session starts its internal dashboard mirror (Task 6) when a
|
||||
/// dashboard broadcaster was supplied. The mirror registers an internal subscriber on
|
||||
/// the distributor and starts the pump <em>before</em> any gRPC client attaches, so the
|
||||
/// dashboard EventsHub receives session events even with no gRPC subscriber streaming —
|
||||
/// fixing the "dark feed" where the dashboard only saw events while a gRPC client was
|
||||
/// actively streaming. Registering the internal subscriber BEFORE
|
||||
/// <see cref="SessionEventDistributor.StartAsync"/> also avoids the Task 4 hazard where
|
||||
/// starting the pump at Ready with zero subscribers drained a fast-completing worker
|
||||
/// stream into nothing and left a later subscriber hanging: there is now always a
|
||||
/// subscriber (the dashboard one) registered before the pump starts.
|
||||
/// </remarks>
|
||||
public void MarkReady()
|
||||
{
|
||||
TransitionTo(SessionState.Ready);
|
||||
StartDashboardMirror();
|
||||
}
|
||||
|
||||
// Constructs and starts the distributor exactly once, registering the subscriber under
|
||||
@@ -369,8 +388,24 @@ public sealed class GatewaySession
|
||||
// the start so the very first subscriber sees the stream from its beginning.
|
||||
private IEventSubscriberLease StartDistributorAndRegister()
|
||||
{
|
||||
SessionEventDistributor distributor;
|
||||
bool startNow = false;
|
||||
SessionEventDistributor distributor = EnsureDistributorCreated(out bool startNow);
|
||||
|
||||
// Register BEFORE starting the pump so a subscriber is present when the pump begins
|
||||
// draining — no event is fanned to an empty subscriber set and then missed by this
|
||||
// first subscriber. StartAsync only schedules the pump task; it never blocks.
|
||||
IEventSubscriberLease lease = distributor.Register();
|
||||
StartPumpIfRequested(distributor, startNow);
|
||||
|
||||
return lease;
|
||||
}
|
||||
|
||||
// Constructs the distributor exactly once and reports whether THIS caller is the one
|
||||
// that should start the pump (i.e. it observed the unstarted state and claimed the
|
||||
// start). Both the construction and the started-flag flip happen under _syncRoot so two
|
||||
// concurrent callers (e.g. MarkReady's dashboard mirror and a racing first
|
||||
// AttachEventSubscriber) agree on a single distributor and a single start.
|
||||
private SessionEventDistributor EnsureDistributorCreated(out bool startNow)
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
if (_eventDistributor is null)
|
||||
@@ -387,28 +422,133 @@ public sealed class GatewaySession
|
||||
CreateOverflowHandler(eventOptions.BackpressurePolicy));
|
||||
}
|
||||
|
||||
distributor = _eventDistributor;
|
||||
startNow = false;
|
||||
if (!_eventDistributorStarted)
|
||||
{
|
||||
_eventDistributorStarted = true;
|
||||
startNow = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Register BEFORE starting the pump so a subscriber is present when the pump begins
|
||||
// draining — no event is fanned to an empty subscriber set and then missed by this
|
||||
// first subscriber. StartAsync only schedules the pump task; it never blocks.
|
||||
IEventSubscriberLease lease = distributor.Register();
|
||||
if (startNow)
|
||||
return _eventDistributor;
|
||||
}
|
||||
}
|
||||
|
||||
private static void StartPumpIfRequested(SessionEventDistributor distributor, bool startNow)
|
||||
{
|
||||
if (!startNow)
|
||||
{
|
||||
// StartAsync only schedules the pump via Task.Run and returns a completed task;
|
||||
// it does not perform any async I/O itself. The sync-over-async call here is
|
||||
// therefore safe and will not deadlock. Do not make StartAsync truly async
|
||||
// (i.e., await real I/O before returning) without also changing this call site.
|
||||
distributor.StartAsync(CancellationToken.None).GetAwaiter().GetResult();
|
||||
return;
|
||||
}
|
||||
|
||||
return lease;
|
||||
// StartAsync only schedules the pump via Task.Run and returns a completed task;
|
||||
// it does not perform any async I/O itself. The sync-over-async call here is
|
||||
// therefore safe and will not deadlock. Do not make StartAsync truly async
|
||||
// (i.e., await real I/O before returning) without also changing this call site.
|
||||
distributor.StartAsync(CancellationToken.None).GetAwaiter().GetResult();
|
||||
}
|
||||
|
||||
// Registers the gateway-owned internal dashboard subscriber on the distributor and starts
|
||||
// a background loop that mirrors every fanned event to the dashboard broadcaster. Called
|
||||
// once when the session becomes Ready (idempotent). The internal subscriber is registered
|
||||
// BEFORE the pump starts (see StartDistributorAndRegister / EnsureDistributorCreated), so
|
||||
// a subscriber is always present at pump start — the dashboard receives events with no
|
||||
// gRPC subscriber attached, and the Task 4 "zero-subscriber drain into the void" hang
|
||||
// cannot occur. No-op when no dashboard broadcaster was supplied (unit tests).
|
||||
private void StartDashboardMirror()
|
||||
{
|
||||
IDashboardEventBroadcaster? broadcaster = _eventStreaming.DashboardBroadcaster;
|
||||
if (broadcaster is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
SessionEventDistributor distributor;
|
||||
CancellationToken loopToken;
|
||||
lock (_syncRoot)
|
||||
{
|
||||
if (_dashboardMirrorStarted || _state is SessionState.Closing or SessionState.Closed or SessionState.Faulted)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_dashboardMirrorStarted = true;
|
||||
_dashboardMirrorCts = new CancellationTokenSource();
|
||||
loopToken = _dashboardMirrorCts.Token;
|
||||
}
|
||||
|
||||
// Create the distributor (claiming the start if we are first) and register the
|
||||
// internal subscriber BEFORE starting the pump. isInternal: true keeps the dashboard
|
||||
// subscriber out of the single-subscriber overflow accounting, so a slow/broken
|
||||
// dashboard mirror only disconnects itself and never faults the session.
|
||||
distributor = EnsureDistributorCreated(out bool startNow);
|
||||
IEventSubscriberLease lease = distributor.Register(isInternal: true);
|
||||
StartPumpIfRequested(distributor, startNow);
|
||||
|
||||
lock (_syncRoot)
|
||||
{
|
||||
_dashboardMirrorLease = lease;
|
||||
}
|
||||
|
||||
_dashboardMirrorTask = Task.Run(
|
||||
() => RunDashboardMirrorAsync(broadcaster, lease, loopToken),
|
||||
CancellationToken.None);
|
||||
}
|
||||
|
||||
// Reads the internal dashboard subscriber's channel and publishes each RAW fanned event
|
||||
// to the dashboard broadcaster. The dashboard is a first-class distributor subscriber
|
||||
// (Task 6), so it sees the session's full raw event activity — NOT the per-gRPC-subscriber
|
||||
// AfterWorkerSequence filtering that EventStreamService applies at its own boundary. This
|
||||
// is intentional: the dashboard is a separate LDAP-authenticated monitoring view (per-
|
||||
// session dashboard ACL is the separate Task 18). Publish is best-effort / never-throw, so
|
||||
// a slow or broken dashboard cannot fault the session or stall the pump; the bounded
|
||||
// internal subscriber channel (Task 5 per-subscriber isolation) only disconnects THIS
|
||||
// mirror on overflow, leaving the session and other subscribers untouched.
|
||||
private async Task RunDashboardMirrorAsync(
|
||||
IDashboardEventBroadcaster broadcaster,
|
||||
IEventSubscriberLease lease,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
try
|
||||
{
|
||||
await foreach (MxEvent mxEvent in lease.Reader
|
||||
.ReadAllAsync(cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
{
|
||||
try
|
||||
{
|
||||
broadcaster.Publish(SessionId, mxEvent);
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
// Publish is documented never-throw, but enforce it here too so a future
|
||||
// implementation cannot fault the mirror loop. Logs identifiers only.
|
||||
_eventStreaming.DistributorLogger.LogDebug(
|
||||
exception,
|
||||
"Dashboard event mirror threw for session {SessionId}; continuing.",
|
||||
SessionId);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
// Teardown path: the session is shutting down the mirror.
|
||||
}
|
||||
catch (SessionManagerException)
|
||||
{
|
||||
// The internal subscriber's channel overflowed and the distributor disconnected
|
||||
// it with a terminal overflow fault. That disconnects only the dashboard mirror;
|
||||
// the session, pump, and any gRPC subscriber are unaffected. Stop mirroring.
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
// Source-fault completion (worker event stream terminated abnormally) surfaces
|
||||
// here. The session's own fault handling runs via the gRPC path / lifecycle; the
|
||||
// mirror just stops. Logs identifiers only.
|
||||
_eventStreaming.DistributorLogger.LogDebug(
|
||||
exception,
|
||||
"Dashboard event mirror loop ended for session {SessionId}.",
|
||||
SessionId);
|
||||
}
|
||||
}
|
||||
|
||||
// Builds the per-subscriber backpressure handler the distributor invokes when a
|
||||
@@ -1108,6 +1248,46 @@ public sealed class GatewaySession
|
||||
{
|
||||
}
|
||||
|
||||
// Stop the internal dashboard mirror first: cancel its loop, dispose its lease (which
|
||||
// unregisters its internal distributor subscriber and completes its channel), and
|
||||
// await the loop task. Done BEFORE disposing the distributor and worker client — like
|
||||
// the distributor itself — so the mirror is no longer reading the pump when the pump
|
||||
// and its source (the worker client) tear down.
|
||||
IEventSubscriberLease? dashboardLease;
|
||||
Task? dashboardTask;
|
||||
CancellationTokenSource? dashboardCts;
|
||||
lock (_syncRoot)
|
||||
{
|
||||
dashboardLease = _dashboardMirrorLease;
|
||||
dashboardTask = _dashboardMirrorTask;
|
||||
dashboardCts = _dashboardMirrorCts;
|
||||
_dashboardMirrorLease = null;
|
||||
_dashboardMirrorTask = null;
|
||||
_dashboardMirrorCts = null;
|
||||
}
|
||||
|
||||
if (dashboardCts is not null)
|
||||
{
|
||||
await dashboardCts.CancelAsync().ConfigureAwait(false);
|
||||
}
|
||||
|
||||
dashboardLease?.Dispose();
|
||||
|
||||
if (dashboardTask is not null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await dashboardTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// The mirror loop swallows its own faults; any escape here must not block
|
||||
// disposal. The loop has stopped, which is all teardown requires.
|
||||
}
|
||||
}
|
||||
|
||||
dashboardCts?.Dispose();
|
||||
|
||||
// Stop the event pump and complete every subscriber channel before tearing down the
|
||||
// worker client (the pump's source). DisposeAsync is the single session teardown
|
||||
// point (SessionManager.RemoveSessionAsync awaits it after close), so awaiting it
|
||||
|
||||
Reference in New Issue
Block a user