feat(sessions): replay-on-reconnect with ReplayGap sentinel

This commit is contained in:
Joseph Doherty
2026-06-16 07:22:19 -04:00
parent 042f5e3d82
commit 36ab8d15f1
7 changed files with 736 additions and 29 deletions
@@ -71,17 +71,80 @@ public sealed class EventStreamService(
// The subscriber mode (single vs. multi) is derived inside AttachEventSubscriber from
// the session's own SessionEventStreaming.AllowMultipleEventSubscribers field — the
// same source the distributor uses — so the two cannot diverge.
IEventSubscriberLease subscriber = session.AttachEventSubscriber(
options.Value.Sessions.MaxEventSubscribersPerSession);
//
// Reconnect/resume (Task 12): when AfterWorkerSequence > 0 the client is resuming, so
// attach via the replay variant that atomically snapshots the replay ring AND registers
// the live subscriber under one lock. That single critical section is the crux of the
// no-gap/no-duplicate handoff: every replayed event has sequence <= LiveResumeSequence
// and every live event delivered below is filtered to sequence > LiveResumeSequence, so
// an event that was both replayed and (racing the registration) fanned into the live
// channel is dropped exactly once, while no newer event is skipped. See
// SessionEventDistributor.RegisterWithReplay for the full argument.
//
// AfterWorkerSequence == 0 (fresh stream, not a resume) keeps the pre-Task-12 behavior:
// a plain attach, no replay, no sentinel, and the live filter watermark stays 0.
ulong afterWorkerSequence = request.AfterWorkerSequence;
IEventSubscriberLease subscriber;
IReadOnlyList<MxEvent> replayedEvents = [];
bool replayGap = false;
ulong oldestAvailableSequence = 0;
if (afterWorkerSequence > 0)
{
EventSubscriberReplayAttachment attachment = session.AttachEventSubscriberWithReplay(
options.Value.Sessions.MaxEventSubscribersPerSession,
afterWorkerSequence);
subscriber = attachment.Lease;
replayedEvents = attachment.ReplayedEvents;
replayGap = attachment.Gap;
oldestAvailableSequence = attachment.OldestAvailableSequence;
// The live filter resumes strictly after the last replayed sequence (or, when
// nothing was replayed, after the requested watermark). This is what makes the
// handoff free of duplicates: anything <= this watermark was already replayed.
afterWorkerSequence = attachment.LiveResumeSequence;
}
else
{
subscriber = session.AttachEventSubscriber(
options.Value.Sessions.MaxEventSubscribersPerSession);
}
int streamQueueDepth = 0;
ulong afterWorkerSequence = request.AfterWorkerSequence;
IAsyncEnumerator<MxEvent> reader = subscriber.Reader
.ReadAllAsync(cancellationToken)
.GetAsyncEnumerator(cancellationToken);
try
{
// Emit order for a resume: the ReplayGap sentinel FIRST (only when events were
// evicted), then the still-retained replay batch, then live. The sentinel is an
// explicit documented control signal (not a synthesized MXAccess event) and is
// delivered ONLY to this resuming subscriber — it is never fanned to other
// subscribers and never appears in DrainEventsReply (that path is untouched).
if (replayGap)
{
yield return CreateReplayGapSentinel(
request.SessionId,
request.AfterWorkerSequence,
oldestAvailableSequence);
}
foreach (MxEvent replayedEvent in replayedEvents)
{
// Replayed events pass through the SAME per-item filter the live loop applies,
// so a constrained/resuming caller never sees a replayed event it could not
// have seen live. The watermark dropped events at/below the requested
// AfterWorkerSequence; the snapshot already excluded those, but this keeps the
// filter identical for replay and live.
if (replayedEvent.WorkerSequence <= request.AfterWorkerSequence)
{
continue;
}
yield return replayedEvent;
}
while (true)
{
MxEvent mxEvent;
@@ -144,4 +207,24 @@ public sealed class EventStreamService(
metrics.StreamDisconnected("Detached");
}
}
// Builds the single ReplayGap control sentinel emitted at the head of a resumed
// StreamEvents stream when the requested AfterWorkerSequence predates the oldest event
// still retained (events were evicted). Per the proto contract (MxEvent.replay_gap),
// the sentinel carries the session id and the populated ReplayGap, with family
// UNSPECIFIED, no body, and no per-item fields. It is a documented control signal — NOT a
// synthesized MXAccess event — so emitting it does not violate the no-synthesis rule.
private static MxEvent CreateReplayGapSentinel(
string sessionId,
ulong requestedAfterSequence,
ulong oldestAvailableSequence)
=> new()
{
SessionId = sessionId,
ReplayGap = new ReplayGap
{
RequestedAfterSequence = requestedAfterSequence,
OldestAvailableSequence = oldestAvailableSequence,
},
};
}