feat(sessions): multi-subscriber cap enforcement + mode-gated FailFast

This commit is contained in:
Joseph Doherty
2026-06-15 15:32:08 -04:00
parent 7b12eebbd1
commit ac42783e36
12 changed files with 399 additions and 81 deletions
@@ -69,7 +69,8 @@ public sealed class EventStreamService(
// block below, which also disposes the reader. A `using` declaration would add a
// second Dispose on the same path and double-decrement the session subscriber count.
IEventSubscriberLease subscriber = session.AttachEventSubscriber(
options.Value.Sessions.AllowMultipleEventSubscribers);
options.Value.Sessions.AllowMultipleEventSubscribers,
options.Value.Sessions.MaxEventSubscribersPerSession);
int streamQueueDepth = 0;
ulong afterWorkerSequence = request.AfterWorkerSequence;
@@ -949,6 +949,7 @@ public sealed class MxAccessGatewayService(
SessionManagerErrorCode.SessionNotFound => StatusCode.NotFound,
SessionManagerErrorCode.SessionNotReady => StatusCode.FailedPrecondition,
SessionManagerErrorCode.EventSubscriberAlreadyActive => StatusCode.ResourceExhausted,
SessionManagerErrorCode.EventSubscriberLimitReached => StatusCode.ResourceExhausted,
SessionManagerErrorCode.EventQueueOverflow => StatusCode.ResourceExhausted,
SessionManagerErrorCode.SessionLimitExceeded => StatusCode.ResourceExhausted,
SessionManagerErrorCode.OpenFailed => StatusCode.Unavailable,
@@ -419,7 +419,8 @@ public sealed class GatewaySession
eventOptions.ReplayRetentionSeconds,
_eventStreaming.DistributorLogger,
_eventStreaming.TimeProvider,
CreateOverflowHandler(eventOptions.BackpressurePolicy));
CreateOverflowHandler(eventOptions.BackpressurePolicy),
singleSubscriberMode: !_eventStreaming.AllowMultipleEventSubscribers);
}
startNow = false;
@@ -680,14 +681,36 @@ public sealed class GatewaySession
/// <summary>
/// Attaches an event subscriber and returns a lease whose
/// <see cref="IEventSubscriberLease.Reader"/> reads the fanned public
/// <see cref="MxEvent"/>s for this subscriber. The single-subscriber guard
/// (Tasks 7/8 relax it) is unchanged: with multi-subscriber disabled a second
/// attach is rejected. The returned lease, when disposed, unregisters the
/// distributor subscriber AND decrements the active-subscriber count.
/// <see cref="MxEvent"/>s for this subscriber. The returned lease, when disposed,
/// unregisters the distributor subscriber AND decrements the active-subscriber count.
/// </summary>
/// <param name="allowMultipleSubscribers">If true, allows multiple concurrent event subscribers.</param>
public IEventSubscriberLease AttachEventSubscriber(bool allowMultipleSubscribers)
/// <param name="allowMultipleSubscribers">
/// When <see langword="false"/>, single-subscriber mode: a second concurrent EXTERNAL
/// subscriber is rejected with <see cref="SessionManagerErrorCode.EventSubscriberAlreadyActive"/>.
/// When <see langword="true"/>, multi-subscriber mode: up to
/// <paramref name="maxSubscribers"/> concurrent EXTERNAL subscribers are allowed; the
/// next attach is rejected with
/// <see cref="SessionManagerErrorCode.EventSubscriberLimitReached"/>.
/// </param>
/// <param name="maxSubscribers">
/// Maximum concurrent external subscribers in multi-subscriber mode
/// (<c>MxGateway:Sessions:MaxEventSubscribersPerSession</c>). Ignored when
/// <paramref name="allowMultipleSubscribers"/> is <see langword="false"/> (the effective
/// cap is then 1). The gateway-owned internal dashboard subscriber is registered
/// directly on the distributor and is NOT counted here, so it never consumes cap budget.
/// </param>
/// <remarks>
/// The count-check-and-increment runs atomically under <c>_syncRoot</c>, so two
/// concurrent attaches racing toward the cap can never both succeed past it. On
/// distributor-register failure the count is rolled back (see the catch below).
/// </remarks>
public IEventSubscriberLease AttachEventSubscriber(bool allowMultipleSubscribers, int maxSubscribers)
{
// Effective cap: 1 in single-subscriber mode, otherwise the configured maximum
// (clamped to at least 1 so a misconfigured non-positive value can never deadlock
// attaches in multi-subscriber mode).
int effectiveCap = allowMultipleSubscribers ? Math.Max(1, maxSubscribers) : 1;
lock (_syncRoot)
{
if (_state != SessionState.Ready || _workerClient?.State != WorkerClientState.Ready)
@@ -697,11 +720,15 @@ public sealed class GatewaySession
$"Session {SessionId} is not ready for event streaming. Current state is {_state}.");
}
if (!allowMultipleSubscribers && _activeEventSubscriberCount > 0)
if (_activeEventSubscriberCount >= effectiveCap)
{
throw new SessionManagerException(
SessionManagerErrorCode.EventSubscriberAlreadyActive,
$"Session {SessionId} already has an active event stream subscriber.");
throw allowMultipleSubscribers
? new SessionManagerException(
SessionManagerErrorCode.EventSubscriberLimitReached,
$"Session {SessionId} has reached its maximum of {effectiveCap} concurrent event stream subscribers.")
: new SessionManagerException(
SessionManagerErrorCode.EventSubscriberAlreadyActive,
$"Session {SessionId} already has an active event stream subscriber.");
}
_activeEventSubscriberCount++;
@@ -13,12 +13,16 @@ namespace ZB.MOM.WW.MxGateway.Server.Sessions;
/// regardless of what the handler does.
/// </summary>
/// <param name="isOnlySubscriber">
/// <see langword="true"/> when the overflowing subscriber is the sole registered
/// subscriber at the moment of overflow (legacy single-subscriber mode). FailFast faults
/// the session only in this case; with multiple subscribers FailFast degrades to a
/// per-subscriber disconnect so one slow consumer never faults a session shared by others.
/// Always <see langword="false"/> for internal subscribers (the dashboard mirror) because
/// <see cref="SessionEventDistributor"/> excludes them from the external-subscriber count.
/// <see langword="true"/> when FailFast is allowed to fault the whole session for this
/// overflow. As of Task 8 this is gated on the SESSION MODE, not a live count: it is
/// <see langword="true"/> only for an external subscriber in single-subscriber mode
/// (<c>AllowMultipleEventSubscribers == false</c>), where at most one external subscriber
/// can ever exist. In multi-subscriber mode it is always <see langword="false"/>, so
/// FailFast degrades to a per-subscriber disconnect and one slow consumer never faults a
/// session shared by others; gating on the fixed mode also removes the Task 5 race where a
/// concurrent registration could make a count snapshot falsely report a sole subscriber.
/// Always <see langword="false"/> for internal subscribers (the dashboard mirror) so a
/// slow/broken dashboard can never fault the session.
/// </param>
/// <param name="isInternal">
/// <see langword="true"/> when the overflowing subscriber is the gateway-owned internal
@@ -40,8 +44,10 @@ public delegate void SubscriberOverflowHandler(bool isOnlySubscriber, bool isInt
/// policy (Task 5) is implemented here: a slow subscriber overflows only its own
/// bounded channel and the pump applies the policy to that subscriber alone (see
/// <see cref="SubscriberOverflowHandler"/> and <c>OnSubscriberOverflow</c>), leaving
/// the pump, the session, and other subscribers running. The class does not yet
/// remove the single-subscriber guard (Tasks 7/8). The ring buffer supports capacity
/// the pump, the session, and other subscribers running. Task 8 made the
/// FailFast-faults-session decision mode-gated: it fires only in single-subscriber
/// mode (<c>singleSubscriberMode</c>), so multi-subscriber FailFast always degrades to
/// a per-subscriber disconnect — see <c>OnSubscriberOverflow</c>. The ring buffer supports capacity
/// eviction (oldest entry dropped when the count exceeds
/// <c>replayBufferCapacity</c>) and age eviction (entries older than
/// <c>replayRetentionSeconds</c> dropped on the next append or query), and is
@@ -83,6 +89,7 @@ public sealed class SessionEventDistributor : IAsyncDisposable
private readonly string _sessionId;
private readonly Func<CancellationToken, IAsyncEnumerable<MxEvent>> _eventSourceFactory;
private readonly int _subscriberQueueCapacity;
private readonly bool _singleSubscriberMode;
private readonly SubscriberOverflowHandler? _overflowHandler;
private readonly TimeSpan _shutdownTimeout;
private readonly ILogger<SessionEventDistributor> _logger;
@@ -134,7 +141,8 @@ public sealed class SessionEventDistributor : IAsyncDisposable
Func<CancellationToken, IAsyncEnumerable<MxEvent>> eventSourceFactory,
int subscriberQueueCapacity,
ILogger<SessionEventDistributor> logger,
SubscriberOverflowHandler? overflowHandler = null)
SubscriberOverflowHandler? overflowHandler = null,
bool singleSubscriberMode = true)
: this(
sessionId,
eventSourceFactory,
@@ -143,7 +151,8 @@ public sealed class SessionEventDistributor : IAsyncDisposable
replayRetentionSeconds: 0,
logger,
TimeProvider.System,
overflowHandler)
overflowHandler,
singleSubscriberMode)
{
}
@@ -181,6 +190,17 @@ public sealed class SessionEventDistributor : IAsyncDisposable
/// handler. When <see langword="null"/> (unit/skeleton use) the offending subscriber is
/// still disconnected but no metric/fault side effect runs.
/// </param>
/// <param name="singleSubscriberMode">
/// <see langword="true"/> when the owning session is in single-subscriber mode
/// (<c>AllowMultipleEventSubscribers == false</c>). This gates the FailFast
/// session-fault decision in <c>OnSubscriberOverflow</c>: an external subscriber that
/// overflows reports <c>isOnlySubscriber == true</c> (legacy FailFast faults the
/// session) ONLY in single-subscriber mode. In multi-subscriber mode it is always
/// <see langword="false"/>, so FailFast degrades to a per-subscriber disconnect and a
/// transient registration race can never falsely fault a shared session (Task 8;
/// resolves the Task 5 REVISIT race). Defaults to <see langword="true"/> so existing
/// call sites and unit tests keep legacy single-subscriber FailFast behavior.
/// </param>
public SessionEventDistributor(
string sessionId,
Func<CancellationToken, IAsyncEnumerable<MxEvent>> eventSourceFactory,
@@ -189,7 +209,8 @@ public sealed class SessionEventDistributor : IAsyncDisposable
double replayRetentionSeconds,
ILogger<SessionEventDistributor> logger,
TimeProvider timeProvider,
SubscriberOverflowHandler? overflowHandler = null)
SubscriberOverflowHandler? overflowHandler = null,
bool singleSubscriberMode = true)
{
ArgumentException.ThrowIfNullOrWhiteSpace(sessionId);
ArgumentNullException.ThrowIfNull(eventSourceFactory);
@@ -202,6 +223,7 @@ public sealed class SessionEventDistributor : IAsyncDisposable
_sessionId = sessionId;
_eventSourceFactory = eventSourceFactory;
_subscriberQueueCapacity = subscriberQueueCapacity;
_singleSubscriberMode = singleSubscriberMode;
_overflowHandler = overflowHandler;
_shutdownTimeout = DefaultShutdownTimeout;
_replayBufferCapacity = replayBufferCapacity;
@@ -416,28 +438,25 @@ public sealed class SessionEventDistributor : IAsyncDisposable
// slow consumer must not fault a session shared by other healthy subscribers.
private void OnSubscriberOverflow(Subscriber subscriber, ulong workerSequence)
{
// Snapshot whether this is the sole subscriber BEFORE we unregister it. This drives
// the FailFast-fault-session-vs-disconnect decision: FailFast only faults the session
// when the overflowing subscriber is the sole subscriber.
// Decide whether FailFast may fault the whole session for this overflow. This is the
// "isOnlySubscriber" signal the legacy single-subscriber FailFast path keys on.
//
// This snapshot is safe in v1 because AllowMultipleEventSubscribers=false is enforced
// by the validator and the single-subscriber guard in AttachEventSubscriber — a
// concurrent second registration is impossible, so the false-FailFast race (two
// subscribers, one overflows, Count reads as 1 after the other concurrently unregisters,
// FailFast wrongly faults the session) cannot occur today.
// Task 8 resolution of the Task 5/7 REVISIT race: gate this on the SESSION MODE
// (_singleSubscriberMode), NOT on a live count snapshot. The old
// `CountExternalSubscribers() == 1` snapshot raced once multi-subscriber became real —
// a concurrent second registration/unregistration could make the count read as 1 with
// two subscribers actually present, producing a false FailFast that faults a shared
// session. The mode is fixed for the session's lifetime, so reading it is race-free:
// - single-subscriber mode: at most one external subscriber can ever exist (the
// AttachEventSubscriber guard enforces it), so an overflowing external subscriber
// IS the sole subscriber — preserve the legacy FailFast session-fault behavior.
// - multi-subscriber mode: never fault the shared session; FailFast degrades to a
// per-subscriber disconnect so one slow consumer cannot punish healthy ones.
//
// REVISIT (Task 7/8): when multi-subscriber is enabled the guard is removed and the
// race window opens — a concurrent second registration could cause Count to read as 1
// here even with two subscribers, producing a false FailFast that faults a shared
// session. Resolve before enabling multi-subscriber.
//
// Task 6: the gateway-owned internal dashboard subscriber is excluded from this
// accounting. (a) An internal subscriber that overflows is NEVER the "only subscriber"
// — a slow/broken dashboard must never fault the session, only disconnect its own
// mirror. (b) Internal subscribers are excluded from the count, so a lone external
// gRPC subscriber still reports isOnlySubscriber==true and preserves the legacy
// FailFast session-fault behavior even while the dashboard mirror is attached.
bool isOnlySubscriber = !subscriber.IsInternal && CountExternalSubscribers() == 1;
// Task 6: the gateway-owned internal dashboard subscriber is excluded — an internal
// subscriber that overflows is NEVER the "only subscriber", so a slow/broken dashboard
// can only disconnect its own mirror and never fault the session.
bool isOnlySubscriber = !subscriber.IsInternal && _singleSubscriberMode;
_logger.LogDebug(
"Event distributor disconnecting subscriber {SubscriberId} in session {SessionId} after queue overflow (worker sequence {WorkerSequence}).",
@@ -473,22 +492,6 @@ public sealed class SessionEventDistributor : IAsyncDisposable
}
}
// Counts external (non-internal) subscribers. Drives the isOnlySubscriber FailFast
// decision so the gateway-owned internal dashboard subscriber never inflates the count.
private int CountExternalSubscribers()
{
int count = 0;
foreach (Subscriber subscriber in _subscribers.Values)
{
if (!subscriber.IsInternal)
{
count++;
}
}
return count;
}
private void CompleteAllSubscribers(Exception? error)
{
foreach (Subscriber subscriber in _subscribers.Values)
@@ -38,13 +38,24 @@ namespace ZB.MOM.WW.MxGateway.Server.Sessions;
/// EventsHub group regardless of whether a gRPC client is streaming. When null
/// (unit tests that don't exercise the dashboard mirror) no mirror is started.
/// </param>
/// <param name="AllowMultipleEventSubscribers">
/// The session's effective multi-subscriber mode (Task 8). Carried here so the session
/// can pass it to its <see cref="SessionEventDistributor"/> at construction — the
/// distributor is created at <c>MarkReady</c> (for the dashboard mirror) before any gRPC
/// subscriber attaches, so the mode cannot be learned from a later
/// <c>AttachEventSubscriber</c> call. The distributor gates its FailFast session-fault
/// decision on this mode (single-subscriber only) instead of a live count snapshot,
/// closing the Task 5 false-FailFast race. Defaults to <see langword="false"/>
/// (single-subscriber) so existing call sites and unit tests are unchanged.
/// </param>
public sealed record SessionEventStreaming(
MxAccessGrpcMapper Mapper,
EventOptions EventOptions,
ILogger<SessionEventDistributor> DistributorLogger,
TimeProvider TimeProvider,
GatewayMetrics Metrics,
IDashboardEventBroadcaster? DashboardBroadcaster = null)
IDashboardEventBroadcaster? DashboardBroadcaster = null,
bool AllowMultipleEventSubscribers = false)
{
/// <summary>
/// Defaults used when a session is constructed without explicit streaming
@@ -461,7 +461,8 @@ public sealed class SessionManager : ISessionManager
_distributorLogger,
_timeProvider,
_metrics,
_dashboardEventBroadcaster);
_dashboardEventBroadcaster,
_options.Sessions.AllowMultipleEventSubscribers);
return new GatewaySession(
sessionId,
@@ -5,6 +5,7 @@ public enum SessionManagerErrorCode
SessionNotFound,
SessionNotReady,
EventSubscriberAlreadyActive,
EventSubscriberLimitReached,
EventQueueOverflow,
SessionLimitExceeded,
OpenFailed,
@@ -151,7 +151,7 @@ public sealed class GatewaySessionDashboardMirrorTests
session.MarkReady();
Assert.Equal(0, session.ActiveEventSubscriberCount);
using IEventSubscriberLease lease = session.AttachEventSubscriber(allowMultipleSubscribers: false);
using IEventSubscriberLease lease = session.AttachEventSubscriber(allowMultipleSubscribers: false, maxSubscribers: 1);
Assert.Equal(1, session.ActiveEventSubscriberCount);
}
@@ -2,10 +2,12 @@ using System.Runtime.CompilerServices;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.MxGateway.Contracts.Proto;
using ZB.MOM.WW.MxGateway.Server.Configuration;
using ZB.MOM.WW.MxGateway.Server.Dashboard.Hubs;
using ZB.MOM.WW.MxGateway.Server.Grpc;
using ZB.MOM.WW.MxGateway.Server.Metrics;
using ZB.MOM.WW.MxGateway.Server.Sessions;
using ZB.MOM.WW.MxGateway.Server.Workers;
using ZB.MOM.WW.MxGateway.Tests.TestSupport;
namespace ZB.MOM.WW.MxGateway.Tests.Gateway.Sessions;
@@ -185,7 +187,7 @@ public sealed class GatewaySessionTests
{
// Attach one subscriber; this increments _activeEventSubscriberCount to 1.
IEventSubscriberLease lease = session.AttachEventSubscriber(
allowMultipleSubscribers: false);
allowMultipleSubscribers: false, maxSubscribers: 1);
// Race Concurrency threads all calling Dispose() on the same lease.
// Only one must actually run DetachEventSubscriber.
@@ -211,7 +213,7 @@ public sealed class GatewaySessionTests
// Observable contract: a fresh single subscriber must now be attachable
// (i.e., the guard _activeEventSubscriberCount > 0 is false).
IEventSubscriberLease next = session.AttachEventSubscriber(
allowMultipleSubscribers: false);
allowMultipleSubscribers: false, maxSubscribers: 1);
next.Dispose();
Assert.Equal(0, session.ActiveEventSubscriberCount);
}
@@ -220,6 +222,220 @@ public sealed class GatewaySessionTests
await session.DisposeAsync();
}
/// <summary>
/// Task 8 regression. Single-subscriber mode rejects a SECOND concurrent external
/// attach with <see cref="SessionManagerErrorCode.EventSubscriberAlreadyActive"/> —
/// the legacy guard is preserved unchanged when multi-subscriber is disabled.
/// </summary>
[Fact]
public async Task AttachEventSubscriber_SingleMode_SecondAttachThrowsAlreadyActive()
{
FakeWorkerClient workerClient = new();
GatewaySession session = CreateReadySessionWithEventStreaming(workerClient);
using IEventSubscriberLease first = session.AttachEventSubscriber(
allowMultipleSubscribers: false, maxSubscribers: 8);
SessionManagerException exception = Assert.Throws<SessionManagerException>(
() => session.AttachEventSubscriber(allowMultipleSubscribers: false, maxSubscribers: 8));
Assert.Equal(SessionManagerErrorCode.EventSubscriberAlreadyActive, exception.ErrorCode);
Assert.Equal(1, session.ActiveEventSubscriberCount);
await session.CloseAsync("test-done", CancellationToken.None);
await session.DisposeAsync();
}
/// <summary>
/// Task 8. Multi-subscriber mode allows exactly <c>cap</c> concurrent external
/// subscribers; the (cap+1)-th attach throws
/// <see cref="SessionManagerErrorCode.EventSubscriberLimitReached"/>.
/// </summary>
[Fact]
public async Task AttachEventSubscriber_MultiMode_AttachesUpToCapThenThrowsLimitReached()
{
const int Cap = 4;
FakeWorkerClient workerClient = new();
GatewaySession session = CreateReadySessionWithEventStreaming(
workerClient,
allowMultipleEventSubscribers: true);
List<IEventSubscriberLease> leases = [];
for (int i = 0; i < Cap; i++)
{
leases.Add(session.AttachEventSubscriber(allowMultipleSubscribers: true, maxSubscribers: Cap));
}
Assert.Equal(Cap, session.ActiveEventSubscriberCount);
SessionManagerException exception = Assert.Throws<SessionManagerException>(
() => session.AttachEventSubscriber(allowMultipleSubscribers: true, maxSubscribers: Cap));
Assert.Equal(SessionManagerErrorCode.EventSubscriberLimitReached, exception.ErrorCode);
Assert.Equal(Cap, session.ActiveEventSubscriberCount);
foreach (IEventSubscriberLease lease in leases)
{
lease.Dispose();
}
await session.CloseAsync("test-done", CancellationToken.None);
await session.DisposeAsync();
}
/// <summary>
/// Task 8. The gateway-owned INTERNAL dashboard subscriber must NOT consume cap
/// budget: with the dashboard mirror running, the full cap of external subscribers is
/// still attachable.
/// </summary>
[Fact]
public async Task AttachEventSubscriber_MultiMode_DashboardMirrorDoesNotConsumeCap()
{
const int Cap = 3;
FakeWorkerClient workerClient = new();
RecordingDashboardEventBroadcaster broadcaster = new();
GatewaySession session = CreateReadySessionWithEventStreaming(
workerClient,
allowMultipleEventSubscribers: true,
dashboardBroadcaster: broadcaster);
// The internal dashboard mirror registered on MarkReady is NOT counted as an external
// subscriber, so the external active count starts at zero.
Assert.Equal(0, session.ActiveEventSubscriberCount);
List<IEventSubscriberLease> leases = [];
for (int i = 0; i < Cap; i++)
{
leases.Add(session.AttachEventSubscriber(allowMultipleSubscribers: true, maxSubscribers: Cap));
}
Assert.Equal(Cap, session.ActiveEventSubscriberCount);
// The (cap+1)-th still fails: the dashboard mirror did not eat a slot.
SessionManagerException exception = Assert.Throws<SessionManagerException>(
() => session.AttachEventSubscriber(allowMultipleSubscribers: true, maxSubscribers: Cap));
Assert.Equal(SessionManagerErrorCode.EventSubscriberLimitReached, exception.ErrorCode);
foreach (IEventSubscriberLease lease in leases)
{
lease.Dispose();
}
await session.CloseAsync("test-done", CancellationToken.None);
await session.DisposeAsync();
}
/// <summary>
/// Task 8 concurrency. Many concurrent attaches in multi-subscriber mode must never
/// exceed the cap: the count-check-and-increment is atomic under <c>_syncRoot</c>, so
/// exactly <c>cap</c> attaches succeed and the rest throw
/// <see cref="SessionManagerErrorCode.EventSubscriberLimitReached"/>. The observed
/// count never goes above the cap.
/// </summary>
[Fact]
public async Task AttachEventSubscriber_MultiMode_ConcurrentAttaches_NeverExceedCap()
{
const int Cap = 5;
const int Attempts = 32;
TimeSpan testTimeout = TimeSpan.FromSeconds(10);
FakeWorkerClient workerClient = new();
GatewaySession session = CreateReadySessionWithEventStreaming(
workerClient,
allowMultipleEventSubscribers: true);
using SemaphoreSlim gate = new(0);
int successCount = 0;
int limitReachedCount = 0;
int maxObservedCount = 0;
IEventSubscriberLease?[] leases = new IEventSubscriberLease?[Attempts];
Task[] tasks = new Task[Attempts];
for (int t = 0; t < Attempts; t++)
{
int index = t;
tasks[index] = Task.Run(async () =>
{
await gate.WaitAsync(testTimeout);
try
{
IEventSubscriberLease lease = session.AttachEventSubscriber(
allowMultipleSubscribers: true, maxSubscribers: Cap);
leases[index] = lease;
Interlocked.Increment(ref successCount);
}
catch (SessionManagerException exception)
when (exception.ErrorCode == SessionManagerErrorCode.EventSubscriberLimitReached)
{
Interlocked.Increment(ref limitReachedCount);
}
int observed = session.ActiveEventSubscriberCount;
int previousMax;
do
{
previousMax = Volatile.Read(ref maxObservedCount);
if (observed <= previousMax)
{
break;
}
}
while (Interlocked.CompareExchange(ref maxObservedCount, observed, previousMax) != previousMax);
});
}
gate.Release(Attempts);
await Task.WhenAll(tasks).WaitAsync(testTimeout);
Assert.Equal(Cap, successCount);
Assert.Equal(Attempts - Cap, limitReachedCount);
Assert.Equal(Cap, session.ActiveEventSubscriberCount);
Assert.True(maxObservedCount <= Cap, $"Observed count {maxObservedCount} exceeded cap {Cap}.");
foreach (IEventSubscriberLease? lease in leases)
{
lease?.Dispose();
}
await session.CloseAsync("test-done", CancellationToken.None);
await session.DisposeAsync();
}
/// <summary>
/// Task 8. Disposing a subscriber frees a cap slot so a fresh attach succeeds, and a
/// double-dispose does not double-free the slot (count integrity preserved).
/// </summary>
[Fact]
public async Task AttachEventSubscriber_MultiMode_DisposeFreesSlotAndDoubleDisposeIsIdempotent()
{
const int Cap = 2;
FakeWorkerClient workerClient = new();
GatewaySession session = CreateReadySessionWithEventStreaming(
workerClient,
allowMultipleEventSubscribers: true);
IEventSubscriberLease a = session.AttachEventSubscriber(allowMultipleSubscribers: true, maxSubscribers: Cap);
IEventSubscriberLease b = session.AttachEventSubscriber(allowMultipleSubscribers: true, maxSubscribers: Cap);
Assert.Equal(Cap, session.ActiveEventSubscriberCount);
// At cap: next attach is rejected.
Assert.Throws<SessionManagerException>(
() => session.AttachEventSubscriber(allowMultipleSubscribers: true, maxSubscribers: Cap));
// Dispose one — and dispose it twice. The second dispose must not double-free.
a.Dispose();
a.Dispose();
Assert.Equal(1, session.ActiveEventSubscriberCount);
// Exactly one slot is free, so exactly one fresh attach succeeds.
using IEventSubscriberLease c = session.AttachEventSubscriber(allowMultipleSubscribers: true, maxSubscribers: Cap);
Assert.Equal(Cap, session.ActiveEventSubscriberCount);
Assert.Throws<SessionManagerException>(
() => session.AttachEventSubscriber(allowMultipleSubscribers: true, maxSubscribers: Cap));
b.Dispose();
await session.CloseAsync("test-done", CancellationToken.None);
await session.DisposeAsync();
}
private static GatewaySession CreateReadySession(IWorkerClient workerClient)
{
GatewaySession session = new(
@@ -241,7 +457,10 @@ public sealed class GatewaySessionTests
return session;
}
private static GatewaySession CreateReadySessionWithEventStreaming(IWorkerClient workerClient)
private static GatewaySession CreateReadySessionWithEventStreaming(
IWorkerClient workerClient,
bool allowMultipleEventSubscribers = false,
IDashboardEventBroadcaster? dashboardBroadcaster = null)
{
GatewaySession session = new(
sessionId: "session-test-concurrent",
@@ -262,7 +481,9 @@ public sealed class GatewaySessionTests
new EventOptions { QueueCapacity = 8 },
NullLogger<SessionEventDistributor>.Instance,
TimeProvider.System,
new GatewayMetrics()));
new GatewayMetrics(),
dashboardBroadcaster,
allowMultipleEventSubscribers));
session.AttachWorkerClient(workerClient);
session.MarkReady();
return session;
@@ -355,7 +355,8 @@ public sealed class SessionEventDistributorTests
Interlocked.Increment(ref overflowCalls);
Volatile.Write(ref observedIsOnlySubscriberValue, isOnlySubscriber);
Volatile.Write(ref observedIsOnlySubscriberSet, 1);
});
},
singleSubscriberMode: false);
await distributor.StartAsync(CancellationToken.None);
// Slow subscriber: registered but never read, so its capacity-2 channel fills.
@@ -377,7 +378,7 @@ public sealed class SessionEventDistributorTests
async () => await DrainUntilFaultAsync(slow.Reader));
Assert.Equal(SessionManagerErrorCode.EventQueueOverflow, fault.ErrorCode);
// Two subscribers were registered at overflow time, so isOnlySubscriber is false.
// Multi-subscriber mode, so isOnlySubscriber is always false (Task 8 mode-gating).
// Use Interlocked.Read / Volatile.Read so the test-thread reads are ordered after the
// pump-thread writes, avoiding a data race by the C# memory model.
Assert.Equal(1, Volatile.Read(ref overflowCalls));
@@ -395,15 +396,12 @@ public sealed class SessionEventDistributorTests
public async Task SlowSubscriberOverflow_WithMultipleSubscribers_HandlerSeesIsOnlySubscriberFalse_OtherKeepsReceiving()
{
// Distributor-level pin for "FailFast with multiple subscribers degrades to
// disconnect-only (no session fault)": when the overflowing subscriber is NOT the
// sole subscriber, isOnlySubscriber is false, so a FailFast-wired handler must NOT
// fault the session. This test drives the distributor directly (without GatewaySession)
// with two subscribers and a FailFast-style overflow handler seam, overflows the slow
// one, and asserts (a) isOnlySubscriber==false, (b) the other subscriber keeps
// receiving, and (c) the pump keeps running — all without a GatewaySession.
//
// TODO(Task 8): add a GatewaySession-level "session stays Ready" assertion once
// multi-subscriber config is enabled by the Tasks 7/8 validator/guard change.
// disconnect-only (no session fault)": in multi-subscriber mode isOnlySubscriber is
// always false (Task 8 mode-gating), so a FailFast-wired handler must NOT fault the
// session. This test drives the distributor directly (without GatewaySession) in
// multi-subscriber mode with two subscribers and a FailFast-style overflow handler
// seam, overflows the slow one, and asserts (a) isOnlySubscriber==false, (b) the other
// subscriber keeps receiving, and (c) the pump keeps running.
Channel<MxEvent> source = Channel.CreateUnbounded<MxEvent>();
bool handlerFiredWithFalse = false;
bool sessionFaultWouldBeCalled = false; // tracks if a FailFast path would fault
@@ -427,7 +425,8 @@ public sealed class SessionEventDistributorTests
// Single-subscriber: FailFast would fault the session — must not happen here.
Volatile.Write(ref sessionFaultWouldBeCalled, true);
}
});
},
singleSubscriberMode: false);
await distributor.StartAsync(CancellationToken.None);
// Slow subscriber: never reads, so capacity-2 channel overflows quickly.
@@ -520,6 +519,56 @@ public sealed class SessionEventDistributorTests
"isInternal must be true for a subscriber registered with isInternal: true.");
}
[Fact]
public async Task SingleSubscriberMode_LoneExternalOverflow_HandlerSeesIsOnlySubscriberTrue()
{
// Task 8 mode-gating: in single-subscriber mode a lone external subscriber that
// overflows reports isOnlySubscriber==true, so the legacy FailFast session-fault path
// is preserved. The decision is gated on the fixed session mode, NOT a live count, so
// it is race-free.
Channel<MxEvent> source = Channel.CreateUnbounded<MxEvent>();
int observedSet = 0;
bool observedValue = false;
await using SessionEventDistributor distributor = new(
"session-single-sub",
ct => source.Reader.ReadAllAsync(ct),
subscriberQueueCapacity: 2,
replayBufferCapacity: 0,
replayRetentionSeconds: 0,
NullLogger<SessionEventDistributor>.Instance,
TimeProvider.System,
(isOnlySubscriber, _) =>
{
Volatile.Write(ref observedValue, isOnlySubscriber);
Volatile.Write(ref observedSet, 1);
},
singleSubscriberMode: true);
await distributor.StartAsync(CancellationToken.None);
using IEventSubscriberLease external = distributor.Register();
for (ulong sequence = 1; sequence <= 10; sequence++)
{
source.Writer.TryWrite(Event(sequence));
}
SessionManagerException fault = await Assert.ThrowsAsync<SessionManagerException>(
async () => await DrainUntilFaultAsync(external.Reader));
Assert.Equal(SessionManagerErrorCode.EventQueueOverflow, fault.ErrorCode);
await Task.Run(async () =>
{
using CancellationTokenSource cts = new(ReadTimeout);
while (Volatile.Read(ref observedSet) == 0)
{
await Task.Delay(10, cts.Token);
}
});
Assert.True(Volatile.Read(ref observedValue),
"isOnlySubscriber must be true for a lone external subscriber in single-subscriber mode.");
}
private static async Task DrainUntilFaultAsync(ChannelReader<MxEvent> reader)
{
// Drains any buffered events, then surfaces the channel's completion fault (if any)
@@ -743,7 +743,7 @@ public sealed class SessionManagerTests
GatewaySession session = await manager.OpenSessionAsync(CreateOpenRequest(), "client-1", ownerKeyId: null, CancellationToken.None);
DateTimeOffset now = DateTimeOffset.UtcNow;
session.ExtendLease(now.AddSeconds(-1));
using IDisposable eventSubscriber = session.AttachEventSubscriber(allowMultipleSubscribers: false);
using IDisposable eventSubscriber = session.AttachEventSubscriber(allowMultipleSubscribers: false, maxSubscribers: 1);
int closedCount = await manager.CloseExpiredLeasesAsync(now, CancellationToken.None);