feat(dashboard): mirror events via SessionEventDistributor subscriber (fixes dark feed without gRPC client)
This commit is contained in:
@@ -167,7 +167,7 @@ bearer). Each hub class is `[Authorize(Policy = HubClientsPolicy)]`.
|
|||||||
|---|---|---|---|---|
|
|---|---|---|---|---|
|
||||||
| `DashboardSnapshotHub` | `/hubs/snapshot` | `DashboardSnapshotPublisher` (BackgroundService consuming `IDashboardSnapshotService.WatchSnapshotsAsync`) | `DashboardSnapshot` | Sent to all connected clients on every snapshot tick; new connections receive the current snapshot synchronously in `OnConnectedAsync`. |
|
| `DashboardSnapshotHub` | `/hubs/snapshot` | `DashboardSnapshotPublisher` (BackgroundService consuming `IDashboardSnapshotService.WatchSnapshotsAsync`) | `DashboardSnapshot` | Sent to all connected clients on every snapshot tick; new connections receive the current snapshot synchronously in `OnConnectedAsync`. |
|
||||||
| `AlarmsHub` | `/hubs/alarms` | `AlarmsHubPublisher` (BackgroundService consuming `IGatewayAlarmService.StreamAsync(filter: null)`) | `AlarmFeedMessage` (`active_alarm` / `snapshot_complete` / `transition`) | Connected clients auto-join `__alarms__`; all clients receive every message. Publisher auto-reconnects every 5s on stream faults. |
|
| `AlarmsHub` | `/hubs/alarms` | `AlarmsHubPublisher` (BackgroundService consuming `IGatewayAlarmService.StreamAsync(filter: null)`) | `AlarmFeedMessage` (`active_alarm` / `snapshot_complete` / `transition`) | Connected clients auto-join `__alarms__`; all clients receive every message. Publisher auto-reconnects every 5s on stream faults. |
|
||||||
| `EventsHub` | `/hubs/events` | `DashboardEventBroadcaster` invoked by `EventStreamService` for each event it forwards to a gRPC client | `MxEvent` | Clients call `SubscribeSession(sessionId)` to join `session:{id}`. Events appear only while a gRPC client is also consuming that session's events — the dashboard is a passive mirror, not a separate worker subscriber. |
|
| `EventsHub` | `/hubs/events` | `DashboardEventBroadcaster` invoked by each session's internal dashboard-mirror subscriber on its `SessionEventDistributor` (registered when the session becomes Ready) | `MxEvent` | Clients call `SubscribeSession(sessionId)` to join `session:{id}`. The dashboard is a first-class distributor subscriber, so it receives the session's events whether or not a gRPC client is streaming. It sees RAW session events — not the per-gRPC-subscriber `AfterWorkerSequence` filtering that `EventStreamService` applies at its own boundary — because the dashboard is a separate LDAP-authenticated monitoring view meant to show the session's full event activity (per-session dashboard ACL is tracked separately). |
|
||||||
|
|
||||||
`DashboardPageBase` opens a `DashboardSnapshotHub` connection via the connection
|
`DashboardPageBase` opens a `DashboardSnapshotHub` connection via the connection
|
||||||
factory in `OnInitializedAsync`, seeds `Snapshot` synchronously from
|
factory in `OnInitializedAsync`, seeds `Snapshot` synchronously from
|
||||||
@@ -184,7 +184,8 @@ Default cadences:
|
|||||||
- snapshot service produces one snapshot per
|
- snapshot service produces one snapshot per
|
||||||
`MxGateway:Dashboard:SnapshotIntervalMilliseconds` (default 1s);
|
`MxGateway:Dashboard:SnapshotIntervalMilliseconds` (default 1s);
|
||||||
- alarm publisher emits on each transition observed by the central monitor;
|
- alarm publisher emits on each transition observed by the central monitor;
|
||||||
- event publisher emits per event forwarded by `StreamEvents`.
|
- event publisher emits per event fanned by the session's `SessionEventDistributor`
|
||||||
|
to its internal dashboard-mirror subscriber (independent of any gRPC `StreamEvents`).
|
||||||
|
|
||||||
Avoid pushing every MXAccess data-change event into a wider broadcast group.
|
Avoid pushing every MXAccess data-change event into a wider broadcast group.
|
||||||
The current design routes events strictly through `session:{id}` groups; the
|
The current design routes events strictly through `session:{id}` groups; the
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ using System.Runtime.CompilerServices;
|
|||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs;
|
|
||||||
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Sessions;
|
using ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Workers;
|
using ZB.MOM.WW.MxGateway.Server.Workers;
|
||||||
@@ -12,9 +11,7 @@ namespace ZB.MOM.WW.MxGateway.Server.Grpc;
|
|||||||
public sealed class EventStreamService(
|
public sealed class EventStreamService(
|
||||||
ISessionManager sessionManager,
|
ISessionManager sessionManager,
|
||||||
IOptions<GatewayOptions> options,
|
IOptions<GatewayOptions> options,
|
||||||
GatewayMetrics metrics,
|
GatewayMetrics metrics) : IEventStreamService
|
||||||
IDashboardEventBroadcaster dashboardEventBroadcaster,
|
|
||||||
ILogger<EventStreamService> logger) : IEventStreamService
|
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Streams events from a session to the client asynchronously.
|
/// Streams events from a session to the client asynchronously.
|
||||||
@@ -26,8 +23,19 @@ public sealed class EventStreamService(
|
|||||||
/// <see cref="SessionEventDistributor"/> pump. The pump owns the single drain of
|
/// <see cref="SessionEventDistributor"/> pump. The pump owns the single drain of
|
||||||
/// the worker event stream and the worker→public mapping (mirroring the former
|
/// the worker event stream and the worker→public mapping (mirroring the former
|
||||||
/// <c>ProduceEventsAsync</c>); this loop is the per-subscriber boundary that
|
/// <c>ProduceEventsAsync</c>); this loop is the per-subscriber boundary that
|
||||||
/// applies the per-RPC filter (<c>AfterWorkerSequence</c>), the dashboard mirror,
|
/// applies the per-RPC filter (<c>AfterWorkerSequence</c>), queue-depth metrics,
|
||||||
/// queue-depth metrics, and the backpressure/overflow policy.
|
/// and the backpressure/overflow policy.
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
|
/// Task 6 moved the dashboard mirror OFF this per-RPC loop. The dashboard is now a
|
||||||
|
/// first-class internal subscriber on the session's
|
||||||
|
/// <see cref="SessionEventDistributor"/> (see <c>GatewaySession.StartDashboardMirror</c>),
|
||||||
|
/// so it receives session events even when no gRPC client is streaming. This loop no
|
||||||
|
/// longer mirrors to the dashboard. One deliberate consequence: the dashboard now sees
|
||||||
|
/// RAW session events, not the per-gRPC-subscriber <c>AfterWorkerSequence</c>-filtered
|
||||||
|
/// view this loop applies — the dashboard is a separate LDAP-authenticated monitoring
|
||||||
|
/// view that should see the session's full event activity (per-session dashboard ACL is
|
||||||
|
/// the separate Task 18).
|
||||||
/// </para>
|
/// </para>
|
||||||
/// <para>
|
/// <para>
|
||||||
/// Overflow handling (Task 5): the distributor's per-subscriber channel is bounded
|
/// Overflow handling (Task 5): the distributor's per-subscriber channel is bounded
|
||||||
@@ -97,34 +105,12 @@ public sealed class EventStreamService(
|
|||||||
|
|
||||||
// Per-RPC filter stays at the subscriber boundary: each request may resume
|
// Per-RPC filter stays at the subscriber boundary: each request may resume
|
||||||
// from a different AfterWorkerSequence, so the shared pump fans raw events and
|
// from a different AfterWorkerSequence, so the shared pump fans raw events and
|
||||||
// this loop drops the ones at or below the caller's watermark. Filtered events
|
// this loop drops the ones at or below the caller's watermark.
|
||||||
// are not mirrored to the dashboard, matching the pre-Task-4 ordering where
|
|
||||||
// the skip ran before the dashboard Publish.
|
|
||||||
if (mxEvent.WorkerSequence <= afterWorkerSequence)
|
if (mxEvent.WorkerSequence <= afterWorkerSequence)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mirror the event to the dashboard EventsHub group for this session.
|
|
||||||
// Fire-and-forget — broadcast errors must not affect the source gRPC stream.
|
|
||||||
// Server-041: IDashboardEventBroadcaster documents Publish as never-throw,
|
|
||||||
// but we enforce that at the seam too so a future implementation that adds
|
|
||||||
// synchronous validation or a serializer hop cannot fault this loop and end
|
|
||||||
// the client's gRPC stream. (Task 6 will move this tap onto its own
|
|
||||||
// distributor subscriber; for Task 4 it coexists here, firing once per
|
|
||||||
// delivered event for the single subscriber exactly as before.)
|
|
||||||
try
|
|
||||||
{
|
|
||||||
dashboardEventBroadcaster.Publish(session.SessionId, mxEvent);
|
|
||||||
}
|
|
||||||
catch (Exception ex)
|
|
||||||
{
|
|
||||||
logger.LogDebug(
|
|
||||||
ex,
|
|
||||||
"Dashboard event mirror threw for session {SessionId}; continuing.",
|
|
||||||
session.SessionId);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Queue-depth gauge tracks events the pump has fanned into this subscriber's
|
// Queue-depth gauge tracks events the pump has fanned into this subscriber's
|
||||||
// channel but the client has not yet consumed — the same "buffered, not yet
|
// channel but the client has not yet consumed — the same "buffered, not yet
|
||||||
// delivered" quantity the pre-Task-4 per-RPC channel reported. The bounded
|
// delivered" quantity the pre-Task-4 per-RPC channel reported. The bounded
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
|
using Microsoft.Extensions.Logging;
|
||||||
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
||||||
|
using ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Grpc;
|
using ZB.MOM.WW.MxGateway.Server.Grpc;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Workers;
|
using ZB.MOM.WW.MxGateway.Server.Workers;
|
||||||
@@ -21,6 +23,10 @@ public sealed class GatewaySession
|
|||||||
private int _activeEventSubscriberCount;
|
private int _activeEventSubscriberCount;
|
||||||
private SessionEventDistributor? _eventDistributor;
|
private SessionEventDistributor? _eventDistributor;
|
||||||
private bool _eventDistributorStarted;
|
private bool _eventDistributorStarted;
|
||||||
|
private bool _dashboardMirrorStarted;
|
||||||
|
private IEventSubscriberLease? _dashboardMirrorLease;
|
||||||
|
private Task? _dashboardMirrorTask;
|
||||||
|
private CancellationTokenSource? _dashboardMirrorCts;
|
||||||
private readonly Dictionary<(int ServerHandle, int ItemHandle), SessionItemRegistration> _items = [];
|
private readonly Dictionary<(int ServerHandle, int ItemHandle), SessionItemRegistration> _items = [];
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -350,9 +356,22 @@ public sealed class GatewaySession
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Transitions the session to the Ready state.
|
/// Transitions the session to the Ready state.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
/// <remarks>
|
||||||
|
/// On becoming Ready the session starts its internal dashboard mirror (Task 6) when a
|
||||||
|
/// dashboard broadcaster was supplied. The mirror registers an internal subscriber on
|
||||||
|
/// the distributor and starts the pump <em>before</em> any gRPC client attaches, so the
|
||||||
|
/// dashboard EventsHub receives session events even with no gRPC subscriber streaming —
|
||||||
|
/// fixing the "dark feed" where the dashboard only saw events while a gRPC client was
|
||||||
|
/// actively streaming. Registering the internal subscriber BEFORE
|
||||||
|
/// <see cref="SessionEventDistributor.StartAsync"/> also avoids the Task 4 hazard where
|
||||||
|
/// starting the pump at Ready with zero subscribers drained a fast-completing worker
|
||||||
|
/// stream into nothing and left a later subscriber hanging: there is now always a
|
||||||
|
/// subscriber (the dashboard one) registered before the pump starts.
|
||||||
|
/// </remarks>
|
||||||
public void MarkReady()
|
public void MarkReady()
|
||||||
{
|
{
|
||||||
TransitionTo(SessionState.Ready);
|
TransitionTo(SessionState.Ready);
|
||||||
|
StartDashboardMirror();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Constructs and starts the distributor exactly once, registering the subscriber under
|
// Constructs and starts the distributor exactly once, registering the subscriber under
|
||||||
@@ -369,8 +388,24 @@ public sealed class GatewaySession
|
|||||||
// the start so the very first subscriber sees the stream from its beginning.
|
// the start so the very first subscriber sees the stream from its beginning.
|
||||||
private IEventSubscriberLease StartDistributorAndRegister()
|
private IEventSubscriberLease StartDistributorAndRegister()
|
||||||
{
|
{
|
||||||
SessionEventDistributor distributor;
|
SessionEventDistributor distributor = EnsureDistributorCreated(out bool startNow);
|
||||||
bool startNow = false;
|
|
||||||
|
// Register BEFORE starting the pump so a subscriber is present when the pump begins
|
||||||
|
// draining — no event is fanned to an empty subscriber set and then missed by this
|
||||||
|
// first subscriber. StartAsync only schedules the pump task; it never blocks.
|
||||||
|
IEventSubscriberLease lease = distributor.Register();
|
||||||
|
StartPumpIfRequested(distributor, startNow);
|
||||||
|
|
||||||
|
return lease;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Constructs the distributor exactly once and reports whether THIS caller is the one
|
||||||
|
// that should start the pump (i.e. it observed the unstarted state and claimed the
|
||||||
|
// start). Both the construction and the started-flag flip happen under _syncRoot so two
|
||||||
|
// concurrent callers (e.g. MarkReady's dashboard mirror and a racing first
|
||||||
|
// AttachEventSubscriber) agree on a single distributor and a single start.
|
||||||
|
private SessionEventDistributor EnsureDistributorCreated(out bool startNow)
|
||||||
|
{
|
||||||
lock (_syncRoot)
|
lock (_syncRoot)
|
||||||
{
|
{
|
||||||
if (_eventDistributor is null)
|
if (_eventDistributor is null)
|
||||||
@@ -387,28 +422,133 @@ public sealed class GatewaySession
|
|||||||
CreateOverflowHandler(eventOptions.BackpressurePolicy));
|
CreateOverflowHandler(eventOptions.BackpressurePolicy));
|
||||||
}
|
}
|
||||||
|
|
||||||
distributor = _eventDistributor;
|
startNow = false;
|
||||||
if (!_eventDistributorStarted)
|
if (!_eventDistributorStarted)
|
||||||
{
|
{
|
||||||
_eventDistributorStarted = true;
|
_eventDistributorStarted = true;
|
||||||
startNow = true;
|
startNow = true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Register BEFORE starting the pump so a subscriber is present when the pump begins
|
return _eventDistributor;
|
||||||
// draining — no event is fanned to an empty subscriber set and then missed by this
|
}
|
||||||
// first subscriber. StartAsync only schedules the pump task; it never blocks.
|
}
|
||||||
IEventSubscriberLease lease = distributor.Register();
|
|
||||||
if (startNow)
|
private static void StartPumpIfRequested(SessionEventDistributor distributor, bool startNow)
|
||||||
|
{
|
||||||
|
if (!startNow)
|
||||||
{
|
{
|
||||||
// StartAsync only schedules the pump via Task.Run and returns a completed task;
|
return;
|
||||||
// it does not perform any async I/O itself. The sync-over-async call here is
|
|
||||||
// therefore safe and will not deadlock. Do not make StartAsync truly async
|
|
||||||
// (i.e., await real I/O before returning) without also changing this call site.
|
|
||||||
distributor.StartAsync(CancellationToken.None).GetAwaiter().GetResult();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return lease;
|
// StartAsync only schedules the pump via Task.Run and returns a completed task;
|
||||||
|
// it does not perform any async I/O itself. The sync-over-async call here is
|
||||||
|
// therefore safe and will not deadlock. Do not make StartAsync truly async
|
||||||
|
// (i.e., await real I/O before returning) without also changing this call site.
|
||||||
|
distributor.StartAsync(CancellationToken.None).GetAwaiter().GetResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Registers the gateway-owned internal dashboard subscriber on the distributor and starts
|
||||||
|
// a background loop that mirrors every fanned event to the dashboard broadcaster. Called
|
||||||
|
// once when the session becomes Ready (idempotent). The internal subscriber is registered
|
||||||
|
// BEFORE the pump starts (see StartDistributorAndRegister / EnsureDistributorCreated), so
|
||||||
|
// a subscriber is always present at pump start — the dashboard receives events with no
|
||||||
|
// gRPC subscriber attached, and the Task 4 "zero-subscriber drain into the void" hang
|
||||||
|
// cannot occur. No-op when no dashboard broadcaster was supplied (unit tests).
|
||||||
|
private void StartDashboardMirror()
|
||||||
|
{
|
||||||
|
IDashboardEventBroadcaster? broadcaster = _eventStreaming.DashboardBroadcaster;
|
||||||
|
if (broadcaster is null)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
SessionEventDistributor distributor;
|
||||||
|
CancellationToken loopToken;
|
||||||
|
lock (_syncRoot)
|
||||||
|
{
|
||||||
|
if (_dashboardMirrorStarted || _state is SessionState.Closing or SessionState.Closed or SessionState.Faulted)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_dashboardMirrorStarted = true;
|
||||||
|
_dashboardMirrorCts = new CancellationTokenSource();
|
||||||
|
loopToken = _dashboardMirrorCts.Token;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the distributor (claiming the start if we are first) and register the
|
||||||
|
// internal subscriber BEFORE starting the pump. isInternal: true keeps the dashboard
|
||||||
|
// subscriber out of the single-subscriber overflow accounting, so a slow/broken
|
||||||
|
// dashboard mirror only disconnects itself and never faults the session.
|
||||||
|
distributor = EnsureDistributorCreated(out bool startNow);
|
||||||
|
IEventSubscriberLease lease = distributor.Register(isInternal: true);
|
||||||
|
StartPumpIfRequested(distributor, startNow);
|
||||||
|
|
||||||
|
lock (_syncRoot)
|
||||||
|
{
|
||||||
|
_dashboardMirrorLease = lease;
|
||||||
|
}
|
||||||
|
|
||||||
|
_dashboardMirrorTask = Task.Run(
|
||||||
|
() => RunDashboardMirrorAsync(broadcaster, lease, loopToken),
|
||||||
|
CancellationToken.None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reads the internal dashboard subscriber's channel and publishes each RAW fanned event
|
||||||
|
// to the dashboard broadcaster. The dashboard is a first-class distributor subscriber
|
||||||
|
// (Task 6), so it sees the session's full raw event activity — NOT the per-gRPC-subscriber
|
||||||
|
// AfterWorkerSequence filtering that EventStreamService applies at its own boundary. This
|
||||||
|
// is intentional: the dashboard is a separate LDAP-authenticated monitoring view (per-
|
||||||
|
// session dashboard ACL is the separate Task 18). Publish is best-effort / never-throw, so
|
||||||
|
// a slow or broken dashboard cannot fault the session or stall the pump; the bounded
|
||||||
|
// internal subscriber channel (Task 5 per-subscriber isolation) only disconnects THIS
|
||||||
|
// mirror on overflow, leaving the session and other subscribers untouched.
|
||||||
|
private async Task RunDashboardMirrorAsync(
|
||||||
|
IDashboardEventBroadcaster broadcaster,
|
||||||
|
IEventSubscriberLease lease,
|
||||||
|
CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await foreach (MxEvent mxEvent in lease.Reader
|
||||||
|
.ReadAllAsync(cancellationToken)
|
||||||
|
.ConfigureAwait(false))
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
broadcaster.Publish(SessionId, mxEvent);
|
||||||
|
}
|
||||||
|
catch (Exception exception)
|
||||||
|
{
|
||||||
|
// Publish is documented never-throw, but enforce it here too so a future
|
||||||
|
// implementation cannot fault the mirror loop. Logs identifiers only.
|
||||||
|
_eventStreaming.DistributorLogger.LogDebug(
|
||||||
|
exception,
|
||||||
|
"Dashboard event mirror threw for session {SessionId}; continuing.",
|
||||||
|
SessionId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
// Teardown path: the session is shutting down the mirror.
|
||||||
|
}
|
||||||
|
catch (SessionManagerException)
|
||||||
|
{
|
||||||
|
// The internal subscriber's channel overflowed and the distributor disconnected
|
||||||
|
// it with a terminal overflow fault. That disconnects only the dashboard mirror;
|
||||||
|
// the session, pump, and any gRPC subscriber are unaffected. Stop mirroring.
|
||||||
|
}
|
||||||
|
catch (Exception exception)
|
||||||
|
{
|
||||||
|
// Source-fault completion (worker event stream terminated abnormally) surfaces
|
||||||
|
// here. The session's own fault handling runs via the gRPC path / lifecycle; the
|
||||||
|
// mirror just stops. Logs identifiers only.
|
||||||
|
_eventStreaming.DistributorLogger.LogDebug(
|
||||||
|
exception,
|
||||||
|
"Dashboard event mirror loop ended for session {SessionId}.",
|
||||||
|
SessionId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Builds the per-subscriber backpressure handler the distributor invokes when a
|
// Builds the per-subscriber backpressure handler the distributor invokes when a
|
||||||
@@ -1108,6 +1248,46 @@ public sealed class GatewaySession
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stop the internal dashboard mirror first: cancel its loop, dispose its lease (which
|
||||||
|
// unregisters its internal distributor subscriber and completes its channel), and
|
||||||
|
// await the loop task. Done BEFORE disposing the distributor and worker client — like
|
||||||
|
// the distributor itself — so the mirror is no longer reading the pump when the pump
|
||||||
|
// and its source (the worker client) tear down.
|
||||||
|
IEventSubscriberLease? dashboardLease;
|
||||||
|
Task? dashboardTask;
|
||||||
|
CancellationTokenSource? dashboardCts;
|
||||||
|
lock (_syncRoot)
|
||||||
|
{
|
||||||
|
dashboardLease = _dashboardMirrorLease;
|
||||||
|
dashboardTask = _dashboardMirrorTask;
|
||||||
|
dashboardCts = _dashboardMirrorCts;
|
||||||
|
_dashboardMirrorLease = null;
|
||||||
|
_dashboardMirrorTask = null;
|
||||||
|
_dashboardMirrorCts = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dashboardCts is not null)
|
||||||
|
{
|
||||||
|
await dashboardCts.CancelAsync().ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
dashboardLease?.Dispose();
|
||||||
|
|
||||||
|
if (dashboardTask is not null)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
await dashboardTask.ConfigureAwait(false);
|
||||||
|
}
|
||||||
|
catch (Exception)
|
||||||
|
{
|
||||||
|
// The mirror loop swallows its own faults; any escape here must not block
|
||||||
|
// disposal. The loop has stopped, which is all teardown requires.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dashboardCts?.Dispose();
|
||||||
|
|
||||||
// Stop the event pump and complete every subscriber channel before tearing down the
|
// Stop the event pump and complete every subscriber channel before tearing down the
|
||||||
// worker client (the pump's source). DisposeAsync is the single session teardown
|
// worker client (the pump's source). DisposeAsync is the single session teardown
|
||||||
// point (SessionManager.RemoveSessionAsync awaits it after close), so awaiting it
|
// point (SessionManager.RemoveSessionAsync awaits it after close), so awaiting it
|
||||||
|
|||||||
@@ -240,7 +240,19 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
|||||||
/// subscriber and completes its channel without disturbing the pump or other
|
/// subscriber and completes its channel without disturbing the pump or other
|
||||||
/// subscribers.
|
/// subscribers.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public IEventSubscriberLease Register()
|
/// <param name="isInternal">
|
||||||
|
/// <see langword="true"/> for a gateway-owned internal subscriber (Task 6: the
|
||||||
|
/// session's dashboard mirror) that must NOT participate in the single-subscriber
|
||||||
|
/// overflow accounting. An internal subscriber is excluded from the
|
||||||
|
/// <c>isOnlySubscriber</c> count, so a lone external gRPC subscriber still reports
|
||||||
|
/// <c>isOnlySubscriber == true</c> (preserving legacy FailFast session-fault
|
||||||
|
/// behavior) even while the dashboard subscriber is attached; and an internal
|
||||||
|
/// subscriber that itself overflows always reports <c>isOnlySubscriber == false</c>,
|
||||||
|
/// so a slow/broken dashboard can never fault the session — it is merely
|
||||||
|
/// disconnected from the mirror. Defaults to <see langword="false"/> (external
|
||||||
|
/// subscriber) so every existing call site is unchanged.
|
||||||
|
/// </param>
|
||||||
|
public IEventSubscriberLease Register(bool isInternal = false)
|
||||||
{
|
{
|
||||||
// The pump is the single writer for this channel; readers are single-consumer
|
// The pump is the single writer for this channel; readers are single-consumer
|
||||||
// (one gRPC stream / dashboard subscriber). Synchronous continuations are
|
// (one gRPC stream / dashboard subscriber). Synchronous continuations are
|
||||||
@@ -265,7 +277,7 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
|||||||
});
|
});
|
||||||
|
|
||||||
long id = Interlocked.Increment(ref _nextSubscriberId);
|
long id = Interlocked.Increment(ref _nextSubscriberId);
|
||||||
Subscriber subscriber = new(id, channel);
|
Subscriber subscriber = new(id, channel, isInternal);
|
||||||
|
|
||||||
// The disposed check AND the map add happen under the same lock with no await
|
// The disposed check AND the map add happen under the same lock with no await
|
||||||
// in between. DisposeAsync sets _disposed=true under this same lock before it
|
// in between. DisposeAsync sets _disposed=true under this same lock before it
|
||||||
@@ -411,7 +423,14 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
|||||||
// race window opens — a concurrent second registration could cause Count to read as 1
|
// race window opens — a concurrent second registration could cause Count to read as 1
|
||||||
// here even with two subscribers, producing a false FailFast that faults a shared
|
// here even with two subscribers, producing a false FailFast that faults a shared
|
||||||
// session. Resolve before enabling multi-subscriber.
|
// session. Resolve before enabling multi-subscriber.
|
||||||
bool isOnlySubscriber = _subscribers.Count == 1;
|
//
|
||||||
|
// Task 6: the gateway-owned internal dashboard subscriber is excluded from this
|
||||||
|
// accounting. (a) An internal subscriber that overflows is NEVER the "only subscriber"
|
||||||
|
// — a slow/broken dashboard must never fault the session, only disconnect its own
|
||||||
|
// mirror. (b) Internal subscribers are excluded from the count, so a lone external
|
||||||
|
// gRPC subscriber still reports isOnlySubscriber==true and preserves the legacy
|
||||||
|
// FailFast session-fault behavior even while the dashboard mirror is attached.
|
||||||
|
bool isOnlySubscriber = !subscriber.IsInternal && CountExternalSubscribers() == 1;
|
||||||
|
|
||||||
_logger.LogDebug(
|
_logger.LogDebug(
|
||||||
"Event distributor disconnecting subscriber {SubscriberId} in session {SessionId} after queue overflow (worker sequence {WorkerSequence}).",
|
"Event distributor disconnecting subscriber {SubscriberId} in session {SessionId} after queue overflow (worker sequence {WorkerSequence}).",
|
||||||
@@ -446,6 +465,22 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Counts external (non-internal) subscribers. Drives the isOnlySubscriber FailFast
|
||||||
|
// decision so the gateway-owned internal dashboard subscriber never inflates the count.
|
||||||
|
private int CountExternalSubscribers()
|
||||||
|
{
|
||||||
|
int count = 0;
|
||||||
|
foreach (Subscriber subscriber in _subscribers.Values)
|
||||||
|
{
|
||||||
|
if (!subscriber.IsInternal)
|
||||||
|
{
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
private void CompleteAllSubscribers(Exception? error)
|
private void CompleteAllSubscribers(Exception? error)
|
||||||
{
|
{
|
||||||
foreach (Subscriber subscriber in _subscribers.Values)
|
foreach (Subscriber subscriber in _subscribers.Values)
|
||||||
@@ -593,11 +628,15 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
|||||||
|
|
||||||
private readonly record struct ReplayEntry(MxEvent Event, DateTimeOffset RetainedAt);
|
private readonly record struct ReplayEntry(MxEvent Event, DateTimeOffset RetainedAt);
|
||||||
|
|
||||||
private sealed class Subscriber(long id, Channel<MxEvent> channel)
|
private sealed class Subscriber(long id, Channel<MxEvent> channel, bool isInternal)
|
||||||
{
|
{
|
||||||
public long Id { get; } = id;
|
public long Id { get; } = id;
|
||||||
|
|
||||||
public Channel<MxEvent> Channel { get; } = channel;
|
public Channel<MxEvent> Channel { get; } = channel;
|
||||||
|
|
||||||
|
// True for the gateway-owned internal dashboard subscriber. Excluded from the
|
||||||
|
// single-subscriber overflow accounting so it cannot fault the session.
|
||||||
|
public bool IsInternal { get; } = isInternal;
|
||||||
}
|
}
|
||||||
|
|
||||||
private sealed class SubscriberLease(SessionEventDistributor distributor, Subscriber subscriber)
|
private sealed class SubscriberLease(SessionEventDistributor distributor, Subscriber subscriber)
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
using Microsoft.Extensions.Logging.Abstractions;
|
using Microsoft.Extensions.Logging.Abstractions;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
||||||
|
using ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Grpc;
|
using ZB.MOM.WW.MxGateway.Server.Grpc;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
||||||
|
|
||||||
@@ -30,17 +31,25 @@ namespace ZB.MOM.WW.MxGateway.Server.Sessions;
|
|||||||
/// fault. Carrying it here keeps the distributor decoupled from the metrics type while
|
/// fault. Carrying it here keeps the distributor decoupled from the metrics type while
|
||||||
/// preserving the observability the pre-epic per-RPC overflow path emitted.
|
/// preserving the observability the pre-epic per-RPC overflow path emitted.
|
||||||
/// </param>
|
/// </param>
|
||||||
|
/// <param name="DashboardBroadcaster">
|
||||||
|
/// Sink the session's internal dashboard mirror loop (Task 6) publishes raw session
|
||||||
|
/// <c>MxEvent</c>s to. When non-null the session registers an internal distributor
|
||||||
|
/// subscriber on becoming Ready and mirrors every fanned event to the dashboard
|
||||||
|
/// EventsHub group regardless of whether a gRPC client is streaming. When null
|
||||||
|
/// (unit tests that don't exercise the dashboard mirror) no mirror is started.
|
||||||
|
/// </param>
|
||||||
public sealed record SessionEventStreaming(
|
public sealed record SessionEventStreaming(
|
||||||
MxAccessGrpcMapper Mapper,
|
MxAccessGrpcMapper Mapper,
|
||||||
EventOptions EventOptions,
|
EventOptions EventOptions,
|
||||||
ILogger<SessionEventDistributor> DistributorLogger,
|
ILogger<SessionEventDistributor> DistributorLogger,
|
||||||
TimeProvider TimeProvider,
|
TimeProvider TimeProvider,
|
||||||
GatewayMetrics Metrics)
|
GatewayMetrics Metrics,
|
||||||
|
IDashboardEventBroadcaster? DashboardBroadcaster = null)
|
||||||
{
|
{
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Defaults used when a session is constructed without explicit streaming
|
/// Defaults used when a session is constructed without explicit streaming
|
||||||
/// dependencies (unit tests). Uses a fresh mapper, default event options, a no-op
|
/// dependencies (unit tests). Uses a fresh mapper, default event options, a no-op
|
||||||
/// logger, the system clock, and a fresh metrics sink.
|
/// logger, the system clock, a fresh metrics sink, and no dashboard mirror.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public static SessionEventStreaming Default { get; } = new(
|
public static SessionEventStreaming Default { get; } = new(
|
||||||
new MxAccessGrpcMapper(),
|
new MxAccessGrpcMapper(),
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ public sealed class SessionManager : ISessionManager
|
|||||||
private readonly SemaphoreSlim _sessionSlots;
|
private readonly SemaphoreSlim _sessionSlots;
|
||||||
private readonly Grpc.MxAccessGrpcMapper _eventMapper;
|
private readonly Grpc.MxAccessGrpcMapper _eventMapper;
|
||||||
private readonly ILogger<SessionEventDistributor> _distributorLogger;
|
private readonly ILogger<SessionEventDistributor> _distributorLogger;
|
||||||
|
private readonly Dashboard.Hubs.IDashboardEventBroadcaster? _dashboardEventBroadcaster;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Initializes a new instance of <see cref="SessionManager"/>.
|
/// Initializes a new instance of <see cref="SessionManager"/>.
|
||||||
@@ -39,6 +40,12 @@ public sealed class SessionManager : ISessionManager
|
|||||||
/// <param name="logger">Logger.</param>
|
/// <param name="logger">Logger.</param>
|
||||||
/// <param name="eventMapper">Mapper used by each session's event distributor to map worker events to public events.</param>
|
/// <param name="eventMapper">Mapper used by each session's event distributor to map worker events to public events.</param>
|
||||||
/// <param name="distributorLogger">Logger passed to each session's event distributor pump.</param>
|
/// <param name="distributorLogger">Logger passed to each session's event distributor pump.</param>
|
||||||
|
/// <param name="dashboardEventBroadcaster">
|
||||||
|
/// Dashboard SignalR fan-out sink. Each session registers an internal distributor
|
||||||
|
/// subscriber (Task 6) that mirrors raw session events to this broadcaster, so the
|
||||||
|
/// dashboard receives events regardless of whether a gRPC client is streaming. Null in
|
||||||
|
/// unit tests that do not exercise the dashboard mirror.
|
||||||
|
/// </param>
|
||||||
public SessionManager(
|
public SessionManager(
|
||||||
ISessionRegistry registry,
|
ISessionRegistry registry,
|
||||||
ISessionWorkerClientFactory workerClientFactory,
|
ISessionWorkerClientFactory workerClientFactory,
|
||||||
@@ -47,7 +54,8 @@ public sealed class SessionManager : ISessionManager
|
|||||||
TimeProvider? timeProvider = null,
|
TimeProvider? timeProvider = null,
|
||||||
ILogger<SessionManager>? logger = null,
|
ILogger<SessionManager>? logger = null,
|
||||||
Grpc.MxAccessGrpcMapper? eventMapper = null,
|
Grpc.MxAccessGrpcMapper? eventMapper = null,
|
||||||
ILogger<SessionEventDistributor>? distributorLogger = null)
|
ILogger<SessionEventDistributor>? distributorLogger = null,
|
||||||
|
Dashboard.Hubs.IDashboardEventBroadcaster? dashboardEventBroadcaster = null)
|
||||||
{
|
{
|
||||||
_registry = registry ?? throw new ArgumentNullException(nameof(registry));
|
_registry = registry ?? throw new ArgumentNullException(nameof(registry));
|
||||||
_workerClientFactory = workerClientFactory ?? throw new ArgumentNullException(nameof(workerClientFactory));
|
_workerClientFactory = workerClientFactory ?? throw new ArgumentNullException(nameof(workerClientFactory));
|
||||||
@@ -57,6 +65,7 @@ public sealed class SessionManager : ISessionManager
|
|||||||
_logger = logger ?? NullLogger<SessionManager>.Instance;
|
_logger = logger ?? NullLogger<SessionManager>.Instance;
|
||||||
_eventMapper = eventMapper ?? new Grpc.MxAccessGrpcMapper();
|
_eventMapper = eventMapper ?? new Grpc.MxAccessGrpcMapper();
|
||||||
_distributorLogger = distributorLogger ?? NullLogger<SessionEventDistributor>.Instance;
|
_distributorLogger = distributorLogger ?? NullLogger<SessionEventDistributor>.Instance;
|
||||||
|
_dashboardEventBroadcaster = dashboardEventBroadcaster;
|
||||||
_options = options.Value;
|
_options = options.Value;
|
||||||
_sessionSlots = new SemaphoreSlim(_options.Sessions.MaxSessions, _options.Sessions.MaxSessions);
|
_sessionSlots = new SemaphoreSlim(_options.Sessions.MaxSessions, _options.Sessions.MaxSessions);
|
||||||
}
|
}
|
||||||
@@ -451,7 +460,8 @@ public sealed class SessionManager : ISessionManager
|
|||||||
_options.Events,
|
_options.Events,
|
||||||
_distributorLogger,
|
_distributorLogger,
|
||||||
_timeProvider,
|
_timeProvider,
|
||||||
_metrics);
|
_metrics,
|
||||||
|
_dashboardEventBroadcaster);
|
||||||
|
|
||||||
return new GatewaySession(
|
return new GatewaySession(
|
||||||
sessionId,
|
sessionId,
|
||||||
|
|||||||
@@ -268,14 +268,13 @@ public sealed class GatewayEndToEndFakeWorkerSmokeTests
|
|||||||
workerClientFactory,
|
workerClientFactory,
|
||||||
options,
|
options,
|
||||||
_metrics,
|
_metrics,
|
||||||
logger: NullLogger<SessionManager>.Instance);
|
logger: NullLogger<SessionManager>.Instance,
|
||||||
|
dashboardEventBroadcaster: NullDashboardEventBroadcaster.Instance);
|
||||||
MxAccessGrpcMapper mapper = new();
|
MxAccessGrpcMapper mapper = new();
|
||||||
EventStreamService eventStreamService = new(
|
EventStreamService eventStreamService = new(
|
||||||
sessionManager,
|
sessionManager,
|
||||||
options,
|
options,
|
||||||
_metrics,
|
_metrics);
|
||||||
NullDashboardEventBroadcaster.Instance,
|
|
||||||
NullLogger<EventStreamService>.Instance);
|
|
||||||
|
|
||||||
Service = new MxAccessGatewayService(
|
Service = new MxAccessGatewayService(
|
||||||
sessionManager,
|
sessionManager,
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ using ZB.MOM.WW.MxGateway.Server.Grpc;
|
|||||||
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Sessions;
|
using ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||||
using ZB.MOM.WW.MxGateway.Server.Workers;
|
using ZB.MOM.WW.MxGateway.Server.Workers;
|
||||||
using ZB.MOM.WW.MxGateway.Tests.TestSupport;
|
|
||||||
|
|
||||||
namespace ZB.MOM.WW.MxGateway.Tests.Gateway.Grpc;
|
namespace ZB.MOM.WW.MxGateway.Tests.Gateway.Grpc;
|
||||||
|
|
||||||
@@ -301,81 +300,11 @@ public sealed class EventStreamServiceTests
|
|||||||
Assert.Equal(1, metrics.GetSnapshot().Faults);
|
Assert.Equal(1, metrics.GetSnapshot().Faults);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Tests-026 regression: <see cref="EventStreamService.StreamEventsAsync"/>
|
|
||||||
/// must mirror every yielded event to the
|
|
||||||
/// <see cref="ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs.IDashboardEventBroadcaster"/>
|
|
||||||
/// seam (the only path that fans events out to dashboard SignalR clients).
|
|
||||||
/// A regression that silently dropped the <c>Publish</c> call — e.g. an
|
|
||||||
/// <c>if</c> accidentally added around it, or the broadcaster ctor
|
|
||||||
/// parameter being removed — would have produced no failing test before
|
|
||||||
/// this fixture existed. The recording fake captures every call and we
|
|
||||||
/// assert one publish per yielded event, with the correct session id and
|
|
||||||
/// preserved <c>WorkerSequence</c>.
|
|
||||||
/// </summary>
|
|
||||||
[Fact]
|
|
||||||
public async Task StreamEventsAsync_PublishesEachEventToDashboardBroadcaster()
|
|
||||||
{
|
|
||||||
FakeWorkerClient workerClient = new();
|
|
||||||
GatewaySession session = CreateReadySession(workerClient);
|
|
||||||
RecordingDashboardEventBroadcaster recordingBroadcaster = new();
|
|
||||||
EventStreamService service = CreateService(
|
|
||||||
new FakeSessionManager(session),
|
|
||||||
dashboardEventBroadcaster: recordingBroadcaster);
|
|
||||||
workerClient.Events.Add(CreateWorkerEvent(sequence: 7, MxEventFamily.OnDataChange));
|
|
||||||
workerClient.Events.Add(CreateWorkerEvent(sequence: 8, MxEventFamily.OnWriteComplete));
|
|
||||||
workerClient.CompleteAfterConfiguredEvents = true;
|
|
||||||
|
|
||||||
List<MxEvent> events = await CollectEventsAsync(service, session.SessionId);
|
|
||||||
|
|
||||||
Assert.Equal([7UL, 8UL], events.Select(mxEvent => mxEvent.WorkerSequence).ToArray());
|
|
||||||
IReadOnlyList<DashboardEventCapture> captures = recordingBroadcaster.Captures;
|
|
||||||
Assert.Equal(2, captures.Count);
|
|
||||||
Assert.All(captures, capture => Assert.Equal(session.SessionId, capture.SessionId));
|
|
||||||
Assert.Equal([7UL, 8UL], captures.Select(capture => capture.MxEvent.WorkerSequence).ToArray());
|
|
||||||
Assert.Equal(MxEventFamily.OnDataChange, captures[0].MxEvent.Family);
|
|
||||||
Assert.Equal(MxEventFamily.OnWriteComplete, captures[1].MxEvent.Family);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Server-041 regression: <see cref="EventStreamService"/> must not
|
|
||||||
/// abort the gRPC stream when the dashboard broadcaster throws.
|
|
||||||
/// <c>IDashboardEventBroadcaster.Publish</c> is documented as
|
|
||||||
/// best-effort and never-throw, but the gRPC consumer cannot rely on
|
|
||||||
/// implementation discipline alone — the seam itself swallows the
|
|
||||||
/// fault and logs at debug, mirroring the broadcaster's own
|
|
||||||
/// continuation handler. Without the wrap, the producer loop would
|
|
||||||
/// surface the exception and the client would see a faulted stream
|
|
||||||
/// for a dashboard-mirror failure.
|
|
||||||
/// </summary>
|
|
||||||
[Fact]
|
|
||||||
public async Task StreamEventsAsync_WhenDashboardBroadcasterThrows_StillYieldsEventsAndDoesNotFaultSession()
|
|
||||||
{
|
|
||||||
FakeWorkerClient workerClient = new();
|
|
||||||
GatewaySession session = CreateReadySession(workerClient);
|
|
||||||
using GatewayMetrics metrics = new();
|
|
||||||
ThrowingDashboardEventBroadcaster throwingBroadcaster = new();
|
|
||||||
EventStreamService service = CreateService(
|
|
||||||
new FakeSessionManager(session),
|
|
||||||
metrics,
|
|
||||||
dashboardEventBroadcaster: throwingBroadcaster);
|
|
||||||
workerClient.Events.Add(CreateWorkerEvent(sequence: 1, MxEventFamily.OnDataChange));
|
|
||||||
workerClient.Events.Add(CreateWorkerEvent(sequence: 2, MxEventFamily.OnDataChange));
|
|
||||||
workerClient.CompleteAfterConfiguredEvents = true;
|
|
||||||
|
|
||||||
List<MxEvent> events = await CollectEventsAsync(service, session.SessionId);
|
|
||||||
|
|
||||||
Assert.Equal([1UL, 2UL], events.Select(mxEvent => mxEvent.WorkerSequence).ToArray());
|
|
||||||
Assert.Equal(2, throwingBroadcaster.PublishAttempts);
|
|
||||||
Assert.NotEqual(SessionState.Faulted, session.State);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static EventStreamService CreateService(
|
private static EventStreamService CreateService(
|
||||||
FakeSessionManager sessionManager,
|
FakeSessionManager sessionManager,
|
||||||
GatewayMetrics? metrics = null,
|
GatewayMetrics? metrics = null,
|
||||||
int queueCapacity = 8,
|
int queueCapacity = 8,
|
||||||
EventBackpressurePolicy backpressurePolicy = EventBackpressurePolicy.FailFast,
|
EventBackpressurePolicy backpressurePolicy = EventBackpressurePolicy.FailFast)
|
||||||
ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs.IDashboardEventBroadcaster? dashboardEventBroadcaster = null)
|
|
||||||
{
|
{
|
||||||
return new EventStreamService(
|
return new EventStreamService(
|
||||||
sessionManager,
|
sessionManager,
|
||||||
@@ -387,24 +316,7 @@ public sealed class EventStreamServiceTests
|
|||||||
BackpressurePolicy = backpressurePolicy,
|
BackpressurePolicy = backpressurePolicy,
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
metrics ?? new GatewayMetrics(),
|
metrics ?? new GatewayMetrics());
|
||||||
dashboardEventBroadcaster ?? NullDashboardEventBroadcaster.Instance,
|
|
||||||
NullLogger<EventStreamService>.Instance);
|
|
||||||
}
|
|
||||||
|
|
||||||
private sealed class ThrowingDashboardEventBroadcaster : ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs.IDashboardEventBroadcaster
|
|
||||||
{
|
|
||||||
/// <summary>Gets the count of publish attempts.</summary>
|
|
||||||
public int PublishAttempts { get; private set; }
|
|
||||||
|
|
||||||
/// <summary>Increments the attempt count and throws a simulated failure.</summary>
|
|
||||||
/// <param name="sessionId">The session identifier.</param>
|
|
||||||
/// <param name="mxEvent">The event to publish.</param>
|
|
||||||
public void Publish(string sessionId, MxEvent mxEvent)
|
|
||||||
{
|
|
||||||
PublishAttempts++;
|
|
||||||
throw new InvalidOperationException("simulated dashboard broadcaster failure");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static async Task<List<MxEvent>> CollectEventsAsync(
|
private static async Task<List<MxEvent>> CollectEventsAsync(
|
||||||
|
|||||||
@@ -0,0 +1,316 @@
|
|||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using Microsoft.Extensions.Logging.Abstractions;
|
||||||
|
using Microsoft.Extensions.Options;
|
||||||
|
using ZB.MOM.WW.MxGateway.Contracts;
|
||||||
|
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
||||||
|
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
||||||
|
using ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs;
|
||||||
|
using ZB.MOM.WW.MxGateway.Server.Grpc;
|
||||||
|
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
||||||
|
using ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||||
|
using ZB.MOM.WW.MxGateway.Server.Workers;
|
||||||
|
using ZB.MOM.WW.MxGateway.Tests.TestSupport;
|
||||||
|
|
||||||
|
namespace ZB.MOM.WW.MxGateway.Tests.Gateway.Sessions;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Task 6 regression tests for the internal dashboard mirror. The dashboard is a
|
||||||
|
/// first-class subscriber on the session's <see cref="SessionEventDistributor"/>, so it
|
||||||
|
/// receives session events whether or not a gRPC client is streaming — fixing the
|
||||||
|
/// "dark feed" where the dashboard only saw events while a gRPC client was actively
|
||||||
|
/// streaming (the inline per-RPC tap removed by this task).
|
||||||
|
/// </summary>
|
||||||
|
public sealed class GatewaySessionDashboardMirrorTests
|
||||||
|
{
|
||||||
|
private static readonly TimeSpan TestTimeout = TimeSpan.FromSeconds(5);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The KEY bug-fix test: the dashboard broadcaster receives session events even when
|
||||||
|
/// NO gRPC <c>StreamEvents</c> subscriber is attached. The session is driven to Ready
|
||||||
|
/// with a fake worker emitting events; only the internal dashboard subscriber exists.
|
||||||
|
/// Before Task 6 the mirror lived inside the per-RPC gRPC loop, so with no gRPC
|
||||||
|
/// subscriber the dashboard saw nothing.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public async Task DashboardMirror_ReceivesEvents_WithNoGrpcSubscriber()
|
||||||
|
{
|
||||||
|
FakeWorkerClient workerClient = new();
|
||||||
|
workerClient.Events.Add(CreateWorkerEvent(10, MxEventFamily.OnDataChange));
|
||||||
|
workerClient.Events.Add(CreateWorkerEvent(11, MxEventFamily.OnWriteComplete));
|
||||||
|
workerClient.CompleteAfterConfiguredEvents = true;
|
||||||
|
RecordingDashboardEventBroadcaster broadcaster = new();
|
||||||
|
|
||||||
|
await using GatewaySession session = CreateSession(workerClient, broadcaster);
|
||||||
|
session.AttachWorkerClient(workerClient);
|
||||||
|
|
||||||
|
// MarkReady starts the internal dashboard mirror; no gRPC subscriber is ever attached.
|
||||||
|
session.MarkReady();
|
||||||
|
|
||||||
|
await WaitUntilAsync(() => broadcaster.Captures.Count == 2);
|
||||||
|
|
||||||
|
IReadOnlyList<DashboardEventCapture> captures = broadcaster.Captures;
|
||||||
|
Assert.Equal(0, session.ActiveEventSubscriberCount);
|
||||||
|
Assert.Equal([10UL, 11UL], captures.Select(capture => capture.MxEvent.WorkerSequence).ToArray());
|
||||||
|
Assert.All(captures, capture => Assert.Equal(session.SessionId, capture.SessionId));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// A gRPC subscriber and the dashboard both receive every event concurrently. The
|
||||||
|
/// gRPC path is no longer the dashboard's source — both read independent leases fed by
|
||||||
|
/// the single distributor pump.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public async Task DashboardMirror_AndGrpcSubscriber_BothReceiveEvents()
|
||||||
|
{
|
||||||
|
FakeWorkerClient workerClient = new();
|
||||||
|
workerClient.Events.Add(CreateWorkerEvent(1, MxEventFamily.OnDataChange));
|
||||||
|
workerClient.Events.Add(CreateWorkerEvent(2, MxEventFamily.OnDataChange));
|
||||||
|
workerClient.Events.Add(CreateWorkerEvent(3, MxEventFamily.OnWriteComplete));
|
||||||
|
workerClient.CompleteAfterConfiguredEvents = true;
|
||||||
|
RecordingDashboardEventBroadcaster broadcaster = new();
|
||||||
|
|
||||||
|
await using GatewaySession session = CreateSession(workerClient, broadcaster);
|
||||||
|
session.AttachWorkerClient(workerClient);
|
||||||
|
session.MarkReady();
|
||||||
|
|
||||||
|
EventStreamService service = new(
|
||||||
|
new SingleSessionManager(session),
|
||||||
|
Options.Create(new GatewayOptions { Events = new EventOptions { QueueCapacity = 8 } }),
|
||||||
|
new GatewayMetrics());
|
||||||
|
|
||||||
|
List<MxEvent> grpcEvents = [];
|
||||||
|
await foreach (MxEvent mxEvent in service
|
||||||
|
.StreamEventsAsync(new StreamEventsRequest { SessionId = session.SessionId }, CancellationToken.None)
|
||||||
|
.WithCancellation(CancellationToken.None))
|
||||||
|
{
|
||||||
|
grpcEvents.Add(mxEvent);
|
||||||
|
}
|
||||||
|
|
||||||
|
await WaitUntilAsync(() => broadcaster.Captures.Count == 3);
|
||||||
|
|
||||||
|
Assert.Equal([1UL, 2UL, 3UL], grpcEvents.Select(mxEvent => mxEvent.WorkerSequence).ToArray());
|
||||||
|
Assert.Equal([1UL, 2UL, 3UL], broadcaster.Captures.Select(capture => capture.MxEvent.WorkerSequence).ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Task 4 hazard guard: starting the pump at Ready with a fast-completing worker stream
|
||||||
|
/// and zero subscribers used to drain into nothing and leave a later subscriber hanging.
|
||||||
|
/// Now the dashboard subscriber is registered BEFORE the pump starts, so even a worker
|
||||||
|
/// stream that completes immediately delivers every event to the dashboard with no hang.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public async Task DashboardMirror_FastCompletingWorkerStream_DeliversAllEventsWithoutHang()
|
||||||
|
{
|
||||||
|
FakeWorkerClient workerClient = new();
|
||||||
|
workerClient.Events.Add(CreateWorkerEvent(1, MxEventFamily.OnDataChange));
|
||||||
|
workerClient.Events.Add(CreateWorkerEvent(2, MxEventFamily.OnDataChange));
|
||||||
|
workerClient.CompleteAfterConfiguredEvents = true;
|
||||||
|
RecordingDashboardEventBroadcaster broadcaster = new();
|
||||||
|
|
||||||
|
await using GatewaySession session = CreateSession(workerClient, broadcaster);
|
||||||
|
session.AttachWorkerClient(workerClient);
|
||||||
|
session.MarkReady();
|
||||||
|
|
||||||
|
await WaitUntilAsync(() => broadcaster.Captures.Count == 2);
|
||||||
|
Assert.Equal([1UL, 2UL], broadcaster.Captures.Select(capture => capture.MxEvent.WorkerSequence).ToArray());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The dashboard Publish must be never-throw at the seam too: a throwing broadcaster
|
||||||
|
/// must not fault the session or stop the mirror from continuing past the failure.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public async Task DashboardMirror_WhenBroadcasterThrows_DoesNotFaultSessionAndKeepsMirroring()
|
||||||
|
{
|
||||||
|
FakeWorkerClient workerClient = new();
|
||||||
|
workerClient.Events.Add(CreateWorkerEvent(1, MxEventFamily.OnDataChange));
|
||||||
|
workerClient.Events.Add(CreateWorkerEvent(2, MxEventFamily.OnDataChange));
|
||||||
|
workerClient.CompleteAfterConfiguredEvents = true;
|
||||||
|
ThrowingDashboardEventBroadcaster broadcaster = new();
|
||||||
|
|
||||||
|
await using GatewaySession session = CreateSession(workerClient, broadcaster);
|
||||||
|
session.AttachWorkerClient(workerClient);
|
||||||
|
session.MarkReady();
|
||||||
|
|
||||||
|
await WaitUntilAsync(() => broadcaster.PublishAttempts == 2);
|
||||||
|
Assert.NotEqual(SessionState.Faulted, session.State);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// The internal dashboard subscriber must NOT count against the single-subscriber
|
||||||
|
/// guard: a gRPC subscriber can still attach while the dashboard mirror is running.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public async Task DashboardMirror_DoesNotCountAgainstSingleSubscriberGuard()
|
||||||
|
{
|
||||||
|
FakeWorkerClient workerClient = new();
|
||||||
|
RecordingDashboardEventBroadcaster broadcaster = new();
|
||||||
|
|
||||||
|
await using GatewaySession session = CreateSession(workerClient, broadcaster);
|
||||||
|
session.AttachWorkerClient(workerClient);
|
||||||
|
session.MarkReady();
|
||||||
|
|
||||||
|
Assert.Equal(0, session.ActiveEventSubscriberCount);
|
||||||
|
using IEventSubscriberLease lease = session.AttachEventSubscriber(allowMultipleSubscribers: false);
|
||||||
|
Assert.Equal(1, session.ActiveEventSubscriberCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static GatewaySession CreateSession(
|
||||||
|
IWorkerClient workerClient,
|
||||||
|
IDashboardEventBroadcaster broadcaster)
|
||||||
|
{
|
||||||
|
return new GatewaySession(
|
||||||
|
sessionId: "session-dashboard-mirror",
|
||||||
|
backendName: GatewayContractInfo.DefaultBackendName,
|
||||||
|
pipeName: "mxaccess-gateway-1-session-dashboard-mirror",
|
||||||
|
nonce: "nonce",
|
||||||
|
clientIdentity: "client-1",
|
||||||
|
ownerKeyId: null,
|
||||||
|
clientSessionName: "test-session",
|
||||||
|
clientCorrelationId: "client-correlation-1",
|
||||||
|
commandTimeout: TimeSpan.FromSeconds(5),
|
||||||
|
startupTimeout: TimeSpan.FromSeconds(5),
|
||||||
|
shutdownTimeout: TimeSpan.FromSeconds(5),
|
||||||
|
leaseDuration: TimeSpan.FromMinutes(30),
|
||||||
|
openedAt: DateTimeOffset.UtcNow,
|
||||||
|
eventStreaming: new SessionEventStreaming(
|
||||||
|
new MxAccessGrpcMapper(),
|
||||||
|
new EventOptions { QueueCapacity = 8 },
|
||||||
|
NullLogger<SessionEventDistributor>.Instance,
|
||||||
|
TimeProvider.System,
|
||||||
|
new GatewayMetrics(),
|
||||||
|
broadcaster));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static WorkerEvent CreateWorkerEvent(ulong sequence, MxEventFamily family)
|
||||||
|
{
|
||||||
|
MxEvent mxEvent = new()
|
||||||
|
{
|
||||||
|
SessionId = "session-dashboard-mirror",
|
||||||
|
Family = family,
|
||||||
|
WorkerSequence = sequence,
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (family)
|
||||||
|
{
|
||||||
|
case MxEventFamily.OnDataChange:
|
||||||
|
mxEvent.OnDataChange = new OnDataChangeEvent();
|
||||||
|
break;
|
||||||
|
case MxEventFamily.OnWriteComplete:
|
||||||
|
mxEvent.OnWriteComplete = new OnWriteCompleteEvent();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new WorkerEvent { Event = mxEvent };
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async Task WaitUntilAsync(Func<bool> predicate)
|
||||||
|
{
|
||||||
|
using CancellationTokenSource cancellationTokenSource = new(TestTimeout);
|
||||||
|
while (!predicate())
|
||||||
|
{
|
||||||
|
await Task.Delay(TimeSpan.FromMilliseconds(10), cancellationTokenSource.Token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private sealed class ThrowingDashboardEventBroadcaster : IDashboardEventBroadcaster
|
||||||
|
{
|
||||||
|
private int _publishAttempts;
|
||||||
|
|
||||||
|
public int PublishAttempts => Volatile.Read(ref _publishAttempts);
|
||||||
|
|
||||||
|
public void Publish(string sessionId, MxEvent mxEvent)
|
||||||
|
{
|
||||||
|
Interlocked.Increment(ref _publishAttempts);
|
||||||
|
throw new InvalidOperationException("simulated dashboard broadcaster failure");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private sealed class SingleSessionManager(GatewaySession session) : ISessionManager
|
||||||
|
{
|
||||||
|
public Task<GatewaySession> OpenSessionAsync(
|
||||||
|
SessionOpenRequest request,
|
||||||
|
string? clientIdentity,
|
||||||
|
string? ownerKeyId,
|
||||||
|
CancellationToken cancellationToken) => Task.FromResult(session);
|
||||||
|
|
||||||
|
public bool TryGetSession(string sessionId, out GatewaySession gatewaySession)
|
||||||
|
{
|
||||||
|
gatewaySession = session;
|
||||||
|
return string.Equals(sessionId, session.SessionId, StringComparison.Ordinal);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task<WorkerCommandReply> InvokeAsync(
|
||||||
|
string sessionId,
|
||||||
|
WorkerCommand command,
|
||||||
|
CancellationToken cancellationToken) => Task.FromResult(new WorkerCommandReply());
|
||||||
|
|
||||||
|
public IAsyncEnumerable<WorkerEvent> ReadEventsAsync(
|
||||||
|
string sessionId,
|
||||||
|
CancellationToken cancellationToken) => session.ReadEventsAsync(cancellationToken);
|
||||||
|
|
||||||
|
public Task<SessionCloseResult> CloseSessionAsync(
|
||||||
|
string sessionId,
|
||||||
|
CancellationToken cancellationToken) =>
|
||||||
|
Task.FromResult(new SessionCloseResult(sessionId, SessionState.Closed, AlreadyClosed: false));
|
||||||
|
|
||||||
|
public Task<SessionCloseResult> KillWorkerAsync(
|
||||||
|
string sessionId,
|
||||||
|
string reason,
|
||||||
|
CancellationToken cancellationToken) =>
|
||||||
|
Task.FromResult(new SessionCloseResult(sessionId, SessionState.Closed, AlreadyClosed: false));
|
||||||
|
|
||||||
|
public Task<int> CloseExpiredLeasesAsync(
|
||||||
|
DateTimeOffset now,
|
||||||
|
CancellationToken cancellationToken) => Task.FromResult(0);
|
||||||
|
|
||||||
|
public Task ShutdownAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||||
|
}
|
||||||
|
|
||||||
|
private sealed class FakeWorkerClient : IWorkerClient
|
||||||
|
{
|
||||||
|
public List<WorkerEvent> Events { get; } = [];
|
||||||
|
|
||||||
|
public bool CompleteAfterConfiguredEvents { get; set; }
|
||||||
|
|
||||||
|
public string SessionId { get; } = "session-dashboard-mirror";
|
||||||
|
|
||||||
|
public int? ProcessId { get; } = 1234;
|
||||||
|
|
||||||
|
public WorkerClientState State { get; } = WorkerClientState.Ready;
|
||||||
|
|
||||||
|
public DateTimeOffset LastHeartbeatAt { get; } = DateTimeOffset.UtcNow;
|
||||||
|
|
||||||
|
public Task StartAsync(CancellationToken cancellationToken) => Task.CompletedTask;
|
||||||
|
|
||||||
|
public Task<WorkerCommandReply> InvokeAsync(
|
||||||
|
WorkerCommand command,
|
||||||
|
TimeSpan timeout,
|
||||||
|
CancellationToken cancellationToken) => Task.FromResult(new WorkerCommandReply());
|
||||||
|
|
||||||
|
public async IAsyncEnumerable<WorkerEvent> ReadEventsAsync(
|
||||||
|
[EnumeratorCancellation] CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
foreach (WorkerEvent workerEvent in Events)
|
||||||
|
{
|
||||||
|
cancellationToken.ThrowIfCancellationRequested();
|
||||||
|
yield return workerEvent;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (CompleteAfterConfiguredEvents)
|
||||||
|
{
|
||||||
|
yield break;
|
||||||
|
}
|
||||||
|
|
||||||
|
await Task.Delay(Timeout.InfiniteTimeSpan, cancellationToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task ShutdownAsync(TimeSpan timeout, CancellationToken cancellationToken) => Task.CompletedTask;
|
||||||
|
|
||||||
|
public void Kill(string reason)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public ValueTask DisposeAsync() => ValueTask.CompletedTask;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user