feat(sessions): replay-on-reconnect with ReplayGap sentinel

This commit is contained in:
Joseph Doherty
2026-06-16 07:22:19 -04:00
parent 042f5e3d82
commit 36ab8d15f1
7 changed files with 736 additions and 29 deletions
@@ -71,17 +71,80 @@ public sealed class EventStreamService(
// The subscriber mode (single vs. multi) is derived inside AttachEventSubscriber from
// the session's own SessionEventStreaming.AllowMultipleEventSubscribers field — the
// same source the distributor uses — so the two cannot diverge.
IEventSubscriberLease subscriber = session.AttachEventSubscriber(
options.Value.Sessions.MaxEventSubscribersPerSession);
//
// Reconnect/resume (Task 12): when AfterWorkerSequence > 0 the client is resuming, so
// attach via the replay variant that atomically snapshots the replay ring AND registers
// the live subscriber under one lock. That single critical section is the crux of the
// no-gap/no-duplicate handoff: every replayed event has sequence <= LiveResumeSequence
// and every live event delivered below is filtered to sequence > LiveResumeSequence, so
// an event that was both replayed and (racing the registration) fanned into the live
// channel is dropped exactly once, while no newer event is skipped. See
// SessionEventDistributor.RegisterWithReplay for the full argument.
//
// AfterWorkerSequence == 0 (fresh stream, not a resume) keeps the pre-Task-12 behavior:
// a plain attach, no replay, no sentinel, and the live filter watermark stays 0.
ulong afterWorkerSequence = request.AfterWorkerSequence;
IEventSubscriberLease subscriber;
IReadOnlyList<MxEvent> replayedEvents = [];
bool replayGap = false;
ulong oldestAvailableSequence = 0;
if (afterWorkerSequence > 0)
{
EventSubscriberReplayAttachment attachment = session.AttachEventSubscriberWithReplay(
options.Value.Sessions.MaxEventSubscribersPerSession,
afterWorkerSequence);
subscriber = attachment.Lease;
replayedEvents = attachment.ReplayedEvents;
replayGap = attachment.Gap;
oldestAvailableSequence = attachment.OldestAvailableSequence;
// The live filter resumes strictly after the last replayed sequence (or, when
// nothing was replayed, after the requested watermark). This is what makes the
// handoff free of duplicates: anything <= this watermark was already replayed.
afterWorkerSequence = attachment.LiveResumeSequence;
}
else
{
subscriber = session.AttachEventSubscriber(
options.Value.Sessions.MaxEventSubscribersPerSession);
}
int streamQueueDepth = 0;
ulong afterWorkerSequence = request.AfterWorkerSequence;
IAsyncEnumerator<MxEvent> reader = subscriber.Reader
.ReadAllAsync(cancellationToken)
.GetAsyncEnumerator(cancellationToken);
try
{
// Emit order for a resume: the ReplayGap sentinel FIRST (only when events were
// evicted), then the still-retained replay batch, then live. The sentinel is an
// explicit documented control signal (not a synthesized MXAccess event) and is
// delivered ONLY to this resuming subscriber — it is never fanned to other
// subscribers and never appears in DrainEventsReply (that path is untouched).
if (replayGap)
{
yield return CreateReplayGapSentinel(
request.SessionId,
request.AfterWorkerSequence,
oldestAvailableSequence);
}
foreach (MxEvent replayedEvent in replayedEvents)
{
// Replayed events pass through the SAME per-item filter the live loop applies,
// so a constrained/resuming caller never sees a replayed event it could not
// have seen live. The watermark dropped events at/below the requested
// AfterWorkerSequence; the snapshot already excluded those, but this keeps the
// filter identical for replay and live.
if (replayedEvent.WorkerSequence <= request.AfterWorkerSequence)
{
continue;
}
yield return replayedEvent;
}
while (true)
{
MxEvent mxEvent;
@@ -144,4 +207,24 @@ public sealed class EventStreamService(
metrics.StreamDisconnected("Detached");
}
}
// Builds the single ReplayGap control sentinel emitted at the head of a resumed
// StreamEvents stream when the requested AfterWorkerSequence predates the oldest event
// still retained (events were evicted). Per the proto contract (MxEvent.replay_gap),
// the sentinel carries the session id and the populated ReplayGap, with family
// UNSPECIFIED, no body, and no per-item fields. It is a documented control signal — NOT a
// synthesized MXAccess event — so emitting it does not violate the no-synthesis rule.
private static MxEvent CreateReplayGapSentinel(
string sessionId,
ulong requestedAfterSequence,
ulong oldestAvailableSequence)
=> new()
{
SessionId = sessionId,
ReplayGap = new ReplayGap
{
RequestedAfterSequence = requestedAfterSequence,
OldestAvailableSequence = oldestAvailableSequence,
},
};
}
@@ -0,0 +1,43 @@
using ZB.MOM.WW.MxGateway.Contracts.Proto;
namespace ZB.MOM.WW.MxGateway.Server.Sessions;
/// <summary>
/// The result of a reconnect/resume attach
/// (<see cref="GatewaySession.AttachEventSubscriberWithReplay"/>, Task 12): the live
/// subscriber lease plus the replay batch and resume watermarks snapshotted atomically
/// with the registration, so the replay→live handoff has no gap and no duplicate.
/// </summary>
/// <param name="Lease">
/// The live event subscriber lease. Disposing it unregisters the distributor subscriber
/// and decrements the session's active-subscriber count, exactly as a fresh attach.
/// </param>
/// <param name="ReplayedEvents">
/// Retained events with worker sequence strictly greater than the requested
/// <c>afterSequence</c>, in ascending order. These must be yielded (after the optional
/// gap sentinel) before live events. Never null; empty when nothing newer is retained.
/// </param>
/// <param name="Gap">
/// <see langword="true"/> when events between the requested <c>afterSequence</c> and the
/// oldest retained event were already evicted, so the client missed unrecoverable events.
/// When <see langword="true"/> the caller emits a <c>ReplayGap</c> sentinel before the
/// replay batch.
/// </param>
/// <param name="OldestAvailableSequence">
/// The oldest worker sequence still retained and replayable; <c>0</c> when nothing is
/// retained. Populates the <c>ReplayGap.oldest_available_sequence</c> field. Meaningful
/// only when <paramref name="Gap"/> is <see langword="true"/>.
/// </param>
/// <param name="LiveResumeSequence">
/// The worker sequence the live channel must resume strictly after: the highest replayed
/// sequence, or the requested <c>afterSequence</c> when nothing was replayed. The caller
/// applies this as the per-subscriber live filter so any event both replayed and fanned
/// into the live channel is dropped exactly once (no duplicate) while every newer event
/// is delivered (no gap).
/// </param>
public readonly record struct EventSubscriberReplayAttachment(
IEventSubscriberLease Lease,
IReadOnlyList<MxEvent> ReplayedEvents,
bool Gap,
ulong OldestAvailableSequence,
ulong LiveResumeSequence);
@@ -433,6 +433,32 @@ public sealed class GatewaySession
return lease;
}
// Reconnect/resume variant of StartDistributorAndRegister (Task 12). Snapshots the replay
// ring for events newer than afterSequence AND registers the live subscriber atomically
// under the distributor's replay lock, so the replay→live handoff has no gap and no
// duplicate (see SessionEventDistributor.RegisterWithReplay). The pump is started after
// registration, exactly as the fresh-attach path, so the very first subscriber on a
// freshly-Ready session still sees the stream from its beginning.
private IEventSubscriberLease StartDistributorAndRegisterWithReplay(
ulong afterSequence,
out IReadOnlyList<MxEvent> replayedEvents,
out bool gap,
out ulong oldestAvailableSequence,
out ulong liveResumeSequence)
{
SessionEventDistributor distributor = EnsureDistributorCreated(out bool startNow);
IEventSubscriberLease lease = distributor.RegisterWithReplay(
afterSequence,
out replayedEvents,
out gap,
out oldestAvailableSequence,
out liveResumeSequence);
StartPumpIfRequested(distributor, startNow);
return lease;
}
// Constructs the distributor exactly once and reports whether THIS caller is the one
// that should start the pump (i.e. it observed the unstarted state and claimed the
// start). Both the construction and the started-flag flip happen under _syncRoot so two
@@ -811,6 +837,75 @@ public sealed class GatewaySession
}
}
/// <summary>
/// Reconnect/resume variant of <see cref="AttachEventSubscriber"/> (Task 12). Attaches
/// an event subscriber AND atomically snapshots the session replay ring for events newer
/// than <paramref name="afterSequence"/>, so a resuming client can replay what it missed
/// before live delivery resumes — with no gap and no duplicate across the handoff.
/// </summary>
/// <param name="maxSubscribers">See <see cref="AttachEventSubscriber"/>.</param>
/// <param name="afterSequence">
/// The last worker sequence the resuming client already observed. Replay returns events
/// strictly newer than this; the caller must filter the live channel to events strictly
/// newer than <see cref="EventSubscriberReplayAttachment.LiveResumeSequence"/>.
/// </param>
/// <returns>
/// The lease plus the replay batch, gap flag, and resume watermarks. See
/// <see cref="SessionEventDistributor.RegisterWithReplay"/> for the no-gap/no-duplicate
/// guarantee.
/// </returns>
public EventSubscriberReplayAttachment AttachEventSubscriberWithReplay(int maxSubscribers, ulong afterSequence)
{
bool allowMultipleSubscribers = _eventStreaming.AllowMultipleEventSubscribers;
int effectiveCap = allowMultipleSubscribers ? Math.Max(1, maxSubscribers) : 1;
lock (_syncRoot)
{
if (_state != SessionState.Ready || _workerClient?.State != WorkerClientState.Ready)
{
throw new SessionManagerException(
SessionManagerErrorCode.SessionNotReady,
$"Session {SessionId} is not ready for event streaming. Current state is {_state}.");
}
if (_activeEventSubscriberCount >= effectiveCap)
{
throw allowMultipleSubscribers
? new SessionManagerException(
SessionManagerErrorCode.EventSubscriberLimitReached,
$"Session {SessionId} has reached its maximum of {effectiveCap} concurrent event stream subscribers.")
: new SessionManagerException(
SessionManagerErrorCode.EventSubscriberAlreadyActive,
$"Session {SessionId} already has an active event stream subscriber.");
}
_activeEventSubscriberCount++;
_detachedAtUtc = null;
}
try
{
IEventSubscriberLease distributorLease = StartDistributorAndRegisterWithReplay(
afterSequence,
out IReadOnlyList<MxEvent> replayedEvents,
out bool gap,
out ulong oldestAvailableSequence,
out ulong liveResumeSequence);
return new EventSubscriberReplayAttachment(
new EventSubscriberLease(this, distributorLease),
replayedEvents,
gap,
oldestAvailableSequence,
liveResumeSequence);
}
catch
{
DetachEventSubscriber();
throw;
}
}
/// <summary>
/// Invokes a worker command synchronously and returns the reply.
/// </summary>
@@ -287,30 +287,14 @@ public sealed class SessionEventDistributor : IAsyncDisposable
/// </param>
public IEventSubscriberLease Register(bool isInternal = false)
{
// The pump is the single writer for this channel; readers are single-consumer
// (one gRPC stream / dashboard subscriber). Synchronous continuations are
// disabled so a slow reader can never stall the pump on its completion.
//
// The pump MUST stay non-blocking: it writes with the non-blocking TryWrite so one
// slow reader can never stall the single pump that feeds every subscriber. FullMode
// is deliberately Wait — NOT because the pump ever blocks (it never calls the blocking
// WriteAsync overload), but because Wait is the only BoundedChannelFullMode under
// which TryWrite returns false when the channel is full. That false return IS the
// overflow signal the pump needs to apply the per-subscriber backpressure policy. The
// Drop* modes would make TryWrite silently succeed-and-drop, hiding overflow and
// re-introducing the silent data loss this task removes. So: Wait mode + TryWrite =
// a non-blocking pump that still detects a full subscriber channel.
Channel<MxEvent> channel = Channel.CreateBounded<MxEvent>(
new BoundedChannelOptions(_subscriberQueueCapacity)
{
SingleReader = true,
SingleWriter = true,
FullMode = BoundedChannelFullMode.Wait,
AllowSynchronousContinuations = false,
});
Channel<MxEvent> channel = CreateSubscriberChannel();
long id = Interlocked.Increment(ref _nextSubscriberId);
Subscriber subscriber = new(id, channel, isInternal);
return RegisterSubscriber(subscriber);
}
private IEventSubscriberLease RegisterSubscriber(Subscriber subscriber)
{
// The disposed check AND the map add happen under the same lock with no await
// in between. DisposeAsync sets _disposed=true under this same lock before it
@@ -320,7 +304,152 @@ public sealed class SessionEventDistributor : IAsyncDisposable
lock (_lifecycleLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
_subscribers[id] = subscriber;
_subscribers[subscriber.Id] = subscriber;
}
return new SubscriberLease(this, subscriber);
}
// Creates a per-subscriber bounded channel. The pump is the single writer; readers are
// single-consumer (one gRPC stream / dashboard subscriber). Synchronous continuations are
// disabled so a slow reader can never stall the pump on its completion.
//
// The pump MUST stay non-blocking: it writes with the non-blocking TryWrite so one slow
// reader can never stall the single pump that feeds every subscriber. FullMode is
// deliberately Wait — NOT because the pump ever blocks (it never calls the blocking
// WriteAsync overload), but because Wait is the only BoundedChannelFullMode under which
// TryWrite returns false when the channel is full. That false return IS the overflow signal
// the pump needs to apply the per-subscriber backpressure policy. The Drop* modes would
// make TryWrite silently succeed-and-drop, hiding overflow and re-introducing silent data
// loss. So: Wait mode + TryWrite = a non-blocking pump that still detects a full channel.
private Channel<MxEvent> CreateSubscriberChannel()
=> Channel.CreateBounded<MxEvent>(
new BoundedChannelOptions(_subscriberQueueCapacity)
{
SingleReader = true,
SingleWriter = true,
FullMode = BoundedChannelFullMode.Wait,
AllowSynchronousContinuations = false,
});
/// <summary>
/// Atomically snapshots the replay ring for events newer than
/// <paramref name="afterSequence"/> AND registers a live subscriber, so the
/// replay→live handoff has no gap and no duplicate (Task 12 reconnect/resume).
/// </summary>
/// <param name="afterSequence">
/// The last worker sequence the reconnecting client already observed. Replay returns
/// events strictly newer than this; the live channel is filtered (by the caller) to
/// events strictly newer than the last replayed sequence.
/// </param>
/// <param name="replayedEvents">
/// The retained events newer than <paramref name="afterSequence"/>, in ascending
/// sequence order. Never null; empty when nothing newer is retained.
/// </param>
/// <param name="gap">
/// <see langword="true"/> when events between <paramref name="afterSequence"/> and the
/// oldest retained event were already evicted (capacity/age), so the client missed
/// events that can no longer be replayed and must re-snapshot. Mirrors
/// <see cref="TryGetReplayFrom"/> gap semantics.
/// </param>
/// <param name="oldestAvailableSequence">
/// The oldest worker sequence still retained and replayable. <c>0</c> when nothing is
/// retained. Meaningful to the caller only when <paramref name="gap"/> is
/// <see langword="true"/> (it populates the ReplayGap sentinel's
/// <c>oldest_available_sequence</c>).
/// </param>
/// <param name="liveResumeSequence">
/// The worker sequence the live channel must resume strictly after: the highest
/// replayed sequence, or <paramref name="afterSequence"/> when nothing was replayed.
/// The caller MUST apply this as the per-subscriber live filter so any event that was
/// both replayed here and subsequently fanned into this subscriber's live channel is
/// dropped exactly once (no duplicate), while every newer event is delivered (no gap).
/// </param>
/// <param name="isInternal">
/// <see langword="true"/> for a gateway-owned internal subscriber. See
/// <see cref="Register"/>.
/// </param>
/// <remarks>
/// <para>
/// <b>Why this is atomic and the handoff is correct.</b> The replay snapshot and the
/// subscriber registration both run inside the SAME <c>_replayLock</c> critical
/// section. The pump appends each event to the replay buffer under <c>_replayLock</c>
/// <em>before</em> fanning it to subscribers (outside the lock). Therefore, relative
/// to this method's critical section, for every event E:
/// </para>
/// <list type="bullet">
/// <item>
/// If the pump appended E before this critical section, E is in
/// <paramref name="replayedEvents"/> (when newer than
/// <paramref name="afterSequence"/>). The pump's fan-out of E may race the
/// registration: if it writes E to this new channel too, E's sequence is
/// <c>&lt;= liveResumeSequence</c>, so the caller's live filter DROPS it — no
/// duplicate.
/// </item>
/// <item>
/// If the pump appends E after this critical section, E is NOT in the snapshot,
/// but this subscriber is already registered, so the pump fans E into the live
/// channel with sequence <c>&gt; liveResumeSequence</c> — delivered as live, no
/// gap.
/// </item>
/// </list>
/// <para>
/// Lock ordering: this is the only path that holds both <c>_replayLock</c> and
/// <c>_lifecycleLock</c>; it always takes <c>_replayLock</c> first then
/// <c>_lifecycleLock</c>. No other path acquires both, so there is no inversion.
/// </para>
/// </remarks>
public IEventSubscriberLease RegisterWithReplay(
ulong afterSequence,
out IReadOnlyList<MxEvent> replayedEvents,
out bool gap,
out ulong oldestAvailableSequence,
out ulong liveResumeSequence,
bool isInternal = false)
{
Channel<MxEvent> channel = CreateSubscriberChannel();
long id = Interlocked.Increment(ref _nextSubscriberId);
Subscriber subscriber = new(id, channel, isInternal);
// Snapshot replay AND register under a single _replayLock section so the live channel
// begins exactly where the replay snapshot ends — see the remarks for the no-gap /
// no-duplicate argument. _lifecycleLock is nested inside (consistent ordering) only to
// honor the disposed check and the same add semantics as Register.
lock (_replayLock)
{
EvictAged();
List<MxEvent> newer = [];
ulong highestReplayed = afterSequence;
if (_replayBuffer.Count == 0)
{
oldestAvailableSequence = 0;
gap = _anyEventSeen && afterSequence < _highestSequenceSeen;
}
else
{
oldestAvailableSequence = _replayBuffer.First!.Value.Event.WorkerSequence;
gap = oldestAvailableSequence > 0 && afterSequence < oldestAvailableSequence - 1;
foreach (ReplayEntry entry in _replayBuffer)
{
if (entry.Event.WorkerSequence > afterSequence)
{
newer.Add(entry.Event);
highestReplayed = entry.Event.WorkerSequence;
}
}
}
replayedEvents = newer;
liveResumeSequence = highestReplayed;
lock (_lifecycleLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
_subscribers[id] = subscriber;
}
}
return new SubscriberLease(this, subscriber);