281e00b300
Remove the per-call allowMultipleSubscribers param from AttachEventSubscriber and derive the mode internally from _eventStreaming.AllowMultipleEventSubscribers — the same source SessionEventDistributor uses for singleSubscriberMode — so the two can never structurally diverge. The maxSubscribers cap param is kept because MaxEventSubscribersPerSession lives in SessionOptions, which the session does not hold directly (only EventOptions flows through SessionEventStreaming). Other nits: - SubscriberCount XML doc clarifies it includes internal subscribers and differs from GatewaySession.ActiveEventSubscriberCount (external/gRPC only). - SingleSubscriberMode_LoneExternalOverflow test: add Assert.Equal(1, observedSet) guard before the value assertion so the test cannot pass vacuously if the handler never fired. - GatewayOptionsValidator.ValidateSessions: add explanatory code comment documenting why !AllowMultipleEventSubscribers && MaxEventSubscribersPerSession > 1 is NOT rejected as a hard error (the default config ships with this combination; the cap is simply unused in single-subscriber mode, not a behavior bug). - GatewaySession.DetachEventSubscriber: add Debug.Assert before the clamp so a genuine double-decrement surfaces in debug builds.
148 lines
7.2 KiB
C#
148 lines
7.2 KiB
C#
using System.Runtime.CompilerServices;
|
|
using Microsoft.Extensions.Options;
|
|
using ZB.MOM.WW.MxGateway.Contracts.Proto;
|
|
using ZB.MOM.WW.MxGateway.Server.Configuration;
|
|
using ZB.MOM.WW.MxGateway.Server.Metrics;
|
|
using ZB.MOM.WW.MxGateway.Server.Sessions;
|
|
using ZB.MOM.WW.MxGateway.Server.Workers;
|
|
|
|
namespace ZB.MOM.WW.MxGateway.Server.Grpc;
|
|
|
|
public sealed class EventStreamService(
|
|
ISessionManager sessionManager,
|
|
IOptions<GatewayOptions> options,
|
|
GatewayMetrics metrics) : IEventStreamService
|
|
{
|
|
/// <summary>
|
|
/// Streams events from a session to the client asynchronously.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// Task 4 rewired this from a per-RPC channel that drained the session directly
|
|
/// to reading the subscriber's lease channel fed by the session's single
|
|
/// <see cref="SessionEventDistributor"/> pump. The pump owns the single drain of
|
|
/// the worker event stream and the worker→public mapping (mirroring the former
|
|
/// <c>ProduceEventsAsync</c>); this loop is the per-subscriber boundary that
|
|
/// applies the per-RPC filter (<c>AfterWorkerSequence</c>), queue-depth metrics,
|
|
/// and the backpressure/overflow policy.
|
|
/// </para>
|
|
/// <para>
|
|
/// Task 6 moved the dashboard mirror OFF this per-RPC loop. The dashboard is now a
|
|
/// first-class internal subscriber on the session's
|
|
/// <see cref="SessionEventDistributor"/> (see <c>GatewaySession.StartDashboardMirror</c>),
|
|
/// so it receives session events even when no gRPC client is streaming. This loop no
|
|
/// longer mirrors to the dashboard. One deliberate consequence: the dashboard now sees
|
|
/// RAW session events, not the per-gRPC-subscriber <c>AfterWorkerSequence</c>-filtered
|
|
/// view this loop applies — the dashboard is a separate LDAP-authenticated monitoring
|
|
/// view that should see the session's full event activity (per-session dashboard ACL is
|
|
/// the separate Task 18).
|
|
/// </para>
|
|
/// <para>
|
|
/// Overflow handling (Task 5): the distributor's per-subscriber channel is bounded
|
|
/// and the pump writes non-blocking. When this subscriber's channel is full the pump
|
|
/// applies the per-subscriber backpressure policy and completes this subscriber's
|
|
/// channel with a <see cref="SessionManagerException"/>
|
|
/// (<see cref="SessionManagerErrorCode.EventQueueOverflow"/>). That terminal fault
|
|
/// surfaces here when the reader's <c>MoveNextAsync</c> throws, and — like the
|
|
/// pre-epic per-RPC overflow — it propagates to the gRPC client unchanged. The
|
|
/// overflow metric, and (in the legacy single-subscriber FailFast case) the session
|
|
/// fault + fault metric, are recorded by the distributor's overflow handler so the
|
|
/// session, the pump, and other subscribers are isolated from this subscriber's
|
|
/// slowness.
|
|
/// </para>
|
|
/// </remarks>
|
|
/// <param name="request">Stream events request.</param>
|
|
/// <param name="cancellationToken">Cancellation token.</param>
|
|
/// <returns>Async enumerable of MX events.</returns>
|
|
public async IAsyncEnumerable<MxEvent> StreamEventsAsync(
|
|
StreamEventsRequest request,
|
|
[EnumeratorCancellation] CancellationToken cancellationToken)
|
|
{
|
|
if (!sessionManager.TryGetSession(request.SessionId, out GatewaySession? session) || session is null)
|
|
{
|
|
throw new SessionManagerException(
|
|
SessionManagerErrorCode.SessionNotFound,
|
|
$"Session {request.SessionId} was not found.");
|
|
}
|
|
|
|
// No `using` here — subscriber.Dispose() is called exactly once in the finally
|
|
// block below, which also disposes the reader. A `using` declaration would add a
|
|
// second Dispose on the same path and double-decrement the session subscriber count.
|
|
// The subscriber mode (single vs. multi) is derived inside AttachEventSubscriber from
|
|
// the session's own SessionEventStreaming.AllowMultipleEventSubscribers field — the
|
|
// same source the distributor uses — so the two cannot diverge.
|
|
IEventSubscriberLease subscriber = session.AttachEventSubscriber(
|
|
options.Value.Sessions.MaxEventSubscribersPerSession);
|
|
|
|
int streamQueueDepth = 0;
|
|
ulong afterWorkerSequence = request.AfterWorkerSequence;
|
|
IAsyncEnumerator<MxEvent> reader = subscriber.Reader
|
|
.ReadAllAsync(cancellationToken)
|
|
.GetAsyncEnumerator(cancellationToken);
|
|
|
|
try
|
|
{
|
|
while (true)
|
|
{
|
|
MxEvent mxEvent;
|
|
try
|
|
{
|
|
if (!await reader.MoveNextAsync().ConfigureAwait(false))
|
|
{
|
|
break;
|
|
}
|
|
|
|
mxEvent = reader.Current;
|
|
}
|
|
catch (WorkerClientException workerException)
|
|
{
|
|
// The distributor pump completes every subscriber channel with the source
|
|
// fault when the worker event stream terminates abnormally; that surfaces
|
|
// here. Mirror the pre-Task-4 ProduceEventsAsync behavior: fault the
|
|
// session and record the metric, then propagate the terminal fault to the
|
|
// gRPC client.
|
|
session.MarkFaulted(workerException.Message);
|
|
metrics.Fault(WorkerClientErrorCode.WorkerFaulted.ToString());
|
|
throw;
|
|
}
|
|
|
|
// Per-RPC filter stays at the subscriber boundary: each request may resume
|
|
// from a different AfterWorkerSequence, so the shared pump fans raw events and
|
|
// this loop drops the ones at or below the caller's watermark.
|
|
if (mxEvent.WorkerSequence <= afterWorkerSequence)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// Queue-depth gauge tracks events the pump has fanned into this subscriber's
|
|
// channel but the client has not yet consumed — the same "buffered, not yet
|
|
// delivered" quantity the pre-Task-4 per-RPC channel reported. The bounded
|
|
// subscriber channel supports counting, so reconcile the gauge to the current
|
|
// backlog; falling back to a no-op delta if a channel ever cannot count.
|
|
int backlog = subscriber.Reader.CanCount ? subscriber.Reader.Count : streamQueueDepth;
|
|
int delta = backlog - streamQueueDepth;
|
|
if (delta != 0)
|
|
{
|
|
streamQueueDepth = backlog;
|
|
metrics.AdjustGrpcEventStreamQueueDepth(delta);
|
|
}
|
|
|
|
yield return mxEvent;
|
|
}
|
|
}
|
|
finally
|
|
{
|
|
await reader.DisposeAsync().ConfigureAwait(false);
|
|
subscriber.Dispose();
|
|
|
|
if (streamQueueDepth != 0)
|
|
{
|
|
metrics.AdjustGrpcEventStreamQueueDepth(-streamQueueDepth);
|
|
streamQueueDepth = 0;
|
|
}
|
|
|
|
metrics.StreamDisconnected("Detached");
|
|
}
|
|
}
|
|
}
|