feat(sessions): replay-on-reconnect with ReplayGap sentinel
This commit is contained in:
@@ -71,17 +71,80 @@ public sealed class EventStreamService(
|
||||
// The subscriber mode (single vs. multi) is derived inside AttachEventSubscriber from
|
||||
// the session's own SessionEventStreaming.AllowMultipleEventSubscribers field — the
|
||||
// same source the distributor uses — so the two cannot diverge.
|
||||
IEventSubscriberLease subscriber = session.AttachEventSubscriber(
|
||||
options.Value.Sessions.MaxEventSubscribersPerSession);
|
||||
//
|
||||
// Reconnect/resume (Task 12): when AfterWorkerSequence > 0 the client is resuming, so
|
||||
// attach via the replay variant that atomically snapshots the replay ring AND registers
|
||||
// the live subscriber under one lock. That single critical section is the crux of the
|
||||
// no-gap/no-duplicate handoff: every replayed event has sequence <= LiveResumeSequence
|
||||
// and every live event delivered below is filtered to sequence > LiveResumeSequence, so
|
||||
// an event that was both replayed and (racing the registration) fanned into the live
|
||||
// channel is dropped exactly once, while no newer event is skipped. See
|
||||
// SessionEventDistributor.RegisterWithReplay for the full argument.
|
||||
//
|
||||
// AfterWorkerSequence == 0 (fresh stream, not a resume) keeps the pre-Task-12 behavior:
|
||||
// a plain attach, no replay, no sentinel, and the live filter watermark stays 0.
|
||||
ulong afterWorkerSequence = request.AfterWorkerSequence;
|
||||
IEventSubscriberLease subscriber;
|
||||
IReadOnlyList<MxEvent> replayedEvents = [];
|
||||
bool replayGap = false;
|
||||
ulong oldestAvailableSequence = 0;
|
||||
|
||||
if (afterWorkerSequence > 0)
|
||||
{
|
||||
EventSubscriberReplayAttachment attachment = session.AttachEventSubscriberWithReplay(
|
||||
options.Value.Sessions.MaxEventSubscribersPerSession,
|
||||
afterWorkerSequence);
|
||||
subscriber = attachment.Lease;
|
||||
replayedEvents = attachment.ReplayedEvents;
|
||||
replayGap = attachment.Gap;
|
||||
oldestAvailableSequence = attachment.OldestAvailableSequence;
|
||||
|
||||
// The live filter resumes strictly after the last replayed sequence (or, when
|
||||
// nothing was replayed, after the requested watermark). This is what makes the
|
||||
// handoff free of duplicates: anything <= this watermark was already replayed.
|
||||
afterWorkerSequence = attachment.LiveResumeSequence;
|
||||
}
|
||||
else
|
||||
{
|
||||
subscriber = session.AttachEventSubscriber(
|
||||
options.Value.Sessions.MaxEventSubscribersPerSession);
|
||||
}
|
||||
|
||||
int streamQueueDepth = 0;
|
||||
ulong afterWorkerSequence = request.AfterWorkerSequence;
|
||||
IAsyncEnumerator<MxEvent> reader = subscriber.Reader
|
||||
.ReadAllAsync(cancellationToken)
|
||||
.GetAsyncEnumerator(cancellationToken);
|
||||
|
||||
try
|
||||
{
|
||||
// Emit order for a resume: the ReplayGap sentinel FIRST (only when events were
|
||||
// evicted), then the still-retained replay batch, then live. The sentinel is an
|
||||
// explicit documented control signal (not a synthesized MXAccess event) and is
|
||||
// delivered ONLY to this resuming subscriber — it is never fanned to other
|
||||
// subscribers and never appears in DrainEventsReply (that path is untouched).
|
||||
if (replayGap)
|
||||
{
|
||||
yield return CreateReplayGapSentinel(
|
||||
request.SessionId,
|
||||
request.AfterWorkerSequence,
|
||||
oldestAvailableSequence);
|
||||
}
|
||||
|
||||
foreach (MxEvent replayedEvent in replayedEvents)
|
||||
{
|
||||
// Replayed events pass through the SAME per-item filter the live loop applies,
|
||||
// so a constrained/resuming caller never sees a replayed event it could not
|
||||
// have seen live. The watermark dropped events at/below the requested
|
||||
// AfterWorkerSequence; the snapshot already excluded those, but this keeps the
|
||||
// filter identical for replay and live.
|
||||
if (replayedEvent.WorkerSequence <= request.AfterWorkerSequence)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
yield return replayedEvent;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
MxEvent mxEvent;
|
||||
@@ -144,4 +207,24 @@ public sealed class EventStreamService(
|
||||
metrics.StreamDisconnected("Detached");
|
||||
}
|
||||
}
|
||||
|
||||
// Builds the single ReplayGap control sentinel emitted at the head of a resumed
|
||||
// StreamEvents stream when the requested AfterWorkerSequence predates the oldest event
|
||||
// still retained (events were evicted). Per the proto contract (MxEvent.replay_gap),
|
||||
// the sentinel carries the session id and the populated ReplayGap, with family
|
||||
// UNSPECIFIED, no body, and no per-item fields. It is a documented control signal — NOT a
|
||||
// synthesized MXAccess event — so emitting it does not violate the no-synthesis rule.
|
||||
private static MxEvent CreateReplayGapSentinel(
|
||||
string sessionId,
|
||||
ulong requestedAfterSequence,
|
||||
ulong oldestAvailableSequence)
|
||||
=> new()
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ReplayGap = new ReplayGap
|
||||
{
|
||||
RequestedAfterSequence = requestedAfterSequence,
|
||||
OldestAvailableSequence = oldestAvailableSequence,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user