fix(sessions): close SessionEventDistributor dispose/register races + add overflow logging
This commit is contained in:
@@ -43,9 +43,17 @@ namespace ZB.MOM.WW.MxGateway.Server.Sessions;
|
||||
/// </remarks>
|
||||
public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Bounded wait for the pump to stop during disposal. A source factory that
|
||||
/// ignores cancellation must not hang dispose forever; after this window the
|
||||
/// pump is abandoned and subscribers are completed anyway.
|
||||
/// </summary>
|
||||
private static readonly TimeSpan DefaultShutdownTimeout = TimeSpan.FromSeconds(5);
|
||||
|
||||
private readonly string _sessionId;
|
||||
private readonly Func<CancellationToken, IAsyncEnumerable<MxEvent>> _eventSourceFactory;
|
||||
private readonly int _subscriberQueueCapacity;
|
||||
private readonly TimeSpan _shutdownTimeout;
|
||||
private readonly ILogger<SessionEventDistributor> _logger;
|
||||
private readonly ConcurrentDictionary<long, Subscriber> _subscribers = new();
|
||||
private readonly CancellationTokenSource _shutdownCts = new();
|
||||
@@ -84,6 +92,7 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
_sessionId = sessionId;
|
||||
_eventSourceFactory = eventSourceFactory;
|
||||
_subscriberQueueCapacity = subscriberQueueCapacity;
|
||||
_shutdownTimeout = DefaultShutdownTimeout;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
@@ -123,14 +132,15 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
/// </summary>
|
||||
public IEventSubscriberLease Register()
|
||||
{
|
||||
lock (_lifecycleLock)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
}
|
||||
|
||||
// The pump is the single writer for this channel; readers are single-consumer
|
||||
// (one gRPC stream / dashboard subscriber). Synchronous continuations are
|
||||
// disabled so a slow reader can never stall the pump on its completion.
|
||||
//
|
||||
// FullMode is Wait but the pump currently writes with TryWrite (drop-on-full):
|
||||
// these are deliberately opposite policies and only a placeholder. Task 5 owns
|
||||
// the overflow policy and will reconcile them by either switching the pump to
|
||||
// WriteAsync (true backpressure, honouring Wait) or changing this to a Drop mode.
|
||||
// Do not "fix" the mismatch here — leave the decision to Task 5.
|
||||
Channel<MxEvent> channel = Channel.CreateBounded<MxEvent>(
|
||||
new BoundedChannelOptions(_subscriberQueueCapacity)
|
||||
{
|
||||
@@ -142,10 +152,18 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
|
||||
long id = Interlocked.Increment(ref _nextSubscriberId);
|
||||
Subscriber subscriber = new(id, channel);
|
||||
_subscribers[id] = subscriber;
|
||||
|
||||
// Disposal between the add and a concurrent DisposeAsync: DisposeAsync drains
|
||||
// and completes every subscriber currently in the map, so this entry is covered.
|
||||
// The disposed check AND the map add happen under the same lock with no await
|
||||
// in between. DisposeAsync sets _disposed=true under this same lock before it
|
||||
// calls CompleteAllSubscribers, so once disposal has begun no further subscriber
|
||||
// can be added — closing the Register-after-DisposeAsync window that would
|
||||
// otherwise leave a subscriber's channel never completed.
|
||||
lock (_lifecycleLock)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
_subscribers[id] = subscriber;
|
||||
}
|
||||
|
||||
return new SubscriberLease(this, subscriber);
|
||||
}
|
||||
|
||||
@@ -172,20 +190,34 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
|
||||
if (pumpTask is not null)
|
||||
{
|
||||
try
|
||||
// Bound the wait: a source factory that ignores cancellation would otherwise
|
||||
// hang dispose forever. If the pump does not stop in time we log and proceed
|
||||
// to complete subscribers anyway; DisposeAsync must not throw on this path.
|
||||
Task completed = await Task.WhenAny(pumpTask, Task.Delay(_shutdownTimeout)).ConfigureAwait(false);
|
||||
if (!ReferenceEquals(completed, pumpTask))
|
||||
{
|
||||
await pumpTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
exception,
|
||||
"Event distributor pump faulted during shutdown for session {SessionId}.",
|
||||
_logger.LogWarning(
|
||||
"Event distributor pump did not stop within {ShutdownTimeoutSeconds}s for session {SessionId}; completing subscribers and abandoning the pump.",
|
||||
_shutdownTimeout.TotalSeconds,
|
||||
_sessionId);
|
||||
}
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
await pumpTask.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
exception,
|
||||
"Event distributor pump faulted during shutdown for session {SessionId}.",
|
||||
_sessionId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CompleteAllSubscribers(error: null);
|
||||
@@ -209,7 +241,17 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
// drop / disconnect / fault that one subscriber). For the Task 2
|
||||
// skeleton, a non-blocking TryWrite that silently drops on a full
|
||||
// channel is the placeholder so one slow reader never stalls the pump.
|
||||
subscriber.Channel.Writer.TryWrite(mxEvent);
|
||||
if (!subscriber.Channel.Writer.TryWrite(mxEvent))
|
||||
{
|
||||
// Visibility only — Task 5 owns the actual drop/backpressure policy.
|
||||
// Logs identifiers (worker sequence, subscriber id, session) only,
|
||||
// never the event payload or tag values.
|
||||
_logger.LogDebug(
|
||||
"Event distributor dropped event (worker sequence {WorkerSequence}) for subscriber {SubscriberId} in session {SessionId}: channel full.",
|
||||
mxEvent.WorkerSequence,
|
||||
subscriber.Id,
|
||||
_sessionId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,7 +263,9 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
}
|
||||
catch (Exception exception)
|
||||
{
|
||||
_logger.LogDebug(
|
||||
// Unexpected source fault (not the shutdown-cancellation path above) — visible
|
||||
// by default so an event stream silently dying is not lost in Debug noise.
|
||||
_logger.LogError(
|
||||
exception,
|
||||
"Event distributor source faulted for session {SessionId}.",
|
||||
_sessionId);
|
||||
@@ -255,19 +299,17 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
private sealed class SubscriberLease(SessionEventDistributor distributor, Subscriber subscriber)
|
||||
: IEventSubscriberLease
|
||||
{
|
||||
private bool _disposed;
|
||||
private int _leaseDisposed;
|
||||
|
||||
public ChannelReader<MxEvent> Reader => subscriber.Channel.Reader;
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed)
|
||||
// Atomic check-and-set so concurrent Dispose calls unregister at most once.
|
||||
if (Interlocked.Exchange(ref _leaseDisposed, 1) == 0)
|
||||
{
|
||||
return;
|
||||
distributor.Unregister(subscriber);
|
||||
}
|
||||
|
||||
_disposed = true;
|
||||
distributor.Unregister(subscriber);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,6 +100,18 @@ public sealed class SessionEventDistributorTests
|
||||
await AssertCompletedAsync(leaseB.Reader);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Register_AfterDispose_ThrowsObjectDisposedException()
|
||||
{
|
||||
Channel<MxEvent> source = Channel.CreateUnbounded<MxEvent>();
|
||||
SessionEventDistributor distributor = CreateDistributor(source.Reader);
|
||||
await distributor.StartAsync(CancellationToken.None);
|
||||
|
||||
await distributor.DisposeAsync().AsTask().WaitAsync(ReadTimeout);
|
||||
|
||||
Assert.Throws<ObjectDisposedException>(() => distributor.Register());
|
||||
}
|
||||
|
||||
private static SessionEventDistributor CreateDistributor(ChannelReader<MxEvent> source)
|
||||
=> new(
|
||||
"session-test",
|
||||
|
||||
Reference in New Issue
Block a user