fix: resolve code-review findings (locally verified)
Server-054/055/056, Contracts-020/021/022, Tests-036/038/039, IntegrationTests-030/031/032 (+033 deferred to live rig), Client.Dotnet-026/028/029 (+027 won't-fix), Client.Go-030..034, Client.Python-032..036, Client.Rust-033..038. Key fix: SessionEventDistributor orphaned a subscriber that registered after the pump completed but before disposal (Server-056) -> register paths now complete late registrants under _lifecycleLock; regression test added. The racy dashboard-mirror gRPC test made deterministic (Tests-039). Verified green locally: gateway Tests targeted classes (GatewaySession, SessionEventDistributor, GatewayOptionsValidator, ProtobufContractRoundTrip, GatewaySessionDashboardMirror) + dotnet/go/python/rust client suites.
This commit is contained in:
@@ -25,6 +25,12 @@ public sealed class GatewaySession
|
||||
private readonly TimeSpan _detachGrace;
|
||||
private readonly TimeSpan _workerReadyWaitTimeout;
|
||||
private DateTimeOffset? _detachedAtUtc;
|
||||
// True once at least one external subscriber attached SUCCESSFULLY. Detach-grace's
|
||||
// "last subscriber dropped" stamp (see DetachEventSubscriber) is gated on this so a
|
||||
// FAILED first attach — which still runs the rollback DetachEventSubscriber from the
|
||||
// attach catch path — does not push a never-subscribed session into the grace window
|
||||
// (Server-055).
|
||||
private bool _everHadEventSubscriber;
|
||||
private SessionEventDistributor? _eventDistributor;
|
||||
private bool _eventDistributorStarted;
|
||||
private bool _dashboardMirrorStarted;
|
||||
@@ -842,6 +848,7 @@ public sealed class GatewaySession
|
||||
try
|
||||
{
|
||||
IEventSubscriberLease distributorLease = StartDistributorAndRegister();
|
||||
MarkEventSubscriberAttached();
|
||||
return new EventSubscriberLease(this, distributorLease);
|
||||
}
|
||||
catch
|
||||
@@ -906,6 +913,7 @@ public sealed class GatewaySession
|
||||
out ulong oldestAvailableSequence,
|
||||
out ulong liveResumeSequence);
|
||||
|
||||
MarkEventSubscriberAttached();
|
||||
return new EventSubscriberReplayAttachment(
|
||||
new EventSubscriberLease(this, distributorLease),
|
||||
replayedEvents,
|
||||
@@ -920,6 +928,17 @@ public sealed class GatewaySession
|
||||
}
|
||||
}
|
||||
|
||||
// Records that an external subscriber attached successfully. Gates the detach-grace
|
||||
// "last subscriber dropped" stamp so a FAILED first attach (which still rolls back via
|
||||
// DetachEventSubscriber) never pushes a never-subscribed session into grace (Server-055).
|
||||
private void MarkEventSubscriberAttached()
|
||||
{
|
||||
lock (_syncRoot)
|
||||
{
|
||||
_everHadEventSubscriber = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Invokes a worker command synchronously and returns the reply.
|
||||
/// </summary>
|
||||
@@ -1862,7 +1881,12 @@ public sealed class GatewaySession
|
||||
// Closing/Closed/Faulted there is nothing to retain. This is the detach→grace-start
|
||||
// transition; it shares _syncRoot with the reattach→grace-cancel write above and the
|
||||
// sweeper's IsDetachGraceExpired read, so the three serialize.
|
||||
if (_detachGrace > TimeSpan.Zero
|
||||
// Only stamp a detach that mirrors a prior SUCCESSFUL attach. The attach catch path
|
||||
// calls this same method to roll back a reserved slot when the FIRST attach failed
|
||||
// before any subscriber registered; that never-subscribed session must not enter the
|
||||
// grace window (Server-055).
|
||||
if (_everHadEventSubscriber
|
||||
&& _detachGrace > TimeSpan.Zero
|
||||
&& _activeEventSubscriberCount == 0
|
||||
&& _state is not (SessionState.Closing or SessionState.Closed or SessionState.Faulted))
|
||||
{
|
||||
|
||||
@@ -116,6 +116,17 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
private bool _started;
|
||||
private bool _disposed;
|
||||
|
||||
// Set once the pump has run its final CompleteAllSubscribers sweep — the event source
|
||||
// completed or faulted and the pump exited. Guarded by _lifecycleLock together with the
|
||||
// subscriber add. A subscriber that registers AFTER this point but BEFORE DisposeAsync
|
||||
// (the source ended but the session is not yet torn down) would otherwise be added with a
|
||||
// channel the now-exited pump never completes, hanging its reader forever. The register
|
||||
// paths complete such a late registrant's channel immediately with the same terminal
|
||||
// state. _completionError carries the terminal exception (source fault) or null (graceful
|
||||
// source completion), mirroring what the final CompleteAllSubscribers passed.
|
||||
private bool _completed;
|
||||
private Exception? _completionError;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a per-session event distributor.
|
||||
/// </summary>
|
||||
@@ -304,6 +315,16 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
_subscribers[subscriber.Id] = subscriber;
|
||||
|
||||
// Close the register-after-pump-completion window: if the pump already ran its
|
||||
// final CompleteAllSubscribers (source completed/faulted) but the distributor is
|
||||
// not yet disposed, no further completion sweep will run, so complete this late
|
||||
// registrant's channel now with the same terminal state instead of leaving its
|
||||
// reader hanging.
|
||||
if (_completed)
|
||||
{
|
||||
subscriber.Channel.Writer.TryComplete(_completionError);
|
||||
}
|
||||
}
|
||||
|
||||
return new SubscriberLease(this, subscriber);
|
||||
@@ -450,6 +471,14 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
_subscribers[id] = subscriber;
|
||||
|
||||
// Same register-after-pump-completion guard as Register: a resume that races in
|
||||
// after the source already ended still gets its retained replay batch (snapshot
|
||||
// above), but its live channel must be completed now since the pump is gone.
|
||||
if (_completed)
|
||||
{
|
||||
subscriber.Channel.Writer.TryComplete(_completionError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -628,9 +657,21 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
|
||||
private void CompleteAllSubscribers(Exception? error)
|
||||
{
|
||||
foreach (Subscriber subscriber in _subscribers.Values)
|
||||
// Record the terminal state AND complete the current subscribers under _lifecycleLock
|
||||
// so this serializes with the subscriber-add in Register/RegisterWithReplay: a
|
||||
// subscriber added before this runs is in the map and completed by the loop; one that
|
||||
// races in afterward sees _completed and completes its own channel in the register
|
||||
// path. Exactly one of the two completes each subscriber. TryComplete is non-blocking
|
||||
// and (channels use AllowSynchronousContinuations=false) runs no continuation inline,
|
||||
// so holding the lock across the loop cannot stall or re-enter.
|
||||
lock (_lifecycleLock)
|
||||
{
|
||||
subscriber.Channel.Writer.TryComplete(error);
|
||||
_completed = true;
|
||||
_completionError = error;
|
||||
foreach (Subscriber subscriber in _subscribers.Values)
|
||||
{
|
||||
subscriber.Channel.Writer.TryComplete(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user