feat(sessions): add bounded replay ring buffer to SessionEventDistributor
This commit is contained in:
@@ -55,10 +55,24 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
private readonly int _subscriberQueueCapacity;
|
||||
private readonly TimeSpan _shutdownTimeout;
|
||||
private readonly ILogger<SessionEventDistributor> _logger;
|
||||
private readonly TimeProvider _timeProvider;
|
||||
private readonly ConcurrentDictionary<long, Subscriber> _subscribers = new();
|
||||
private readonly CancellationTokenSource _shutdownCts = new();
|
||||
private readonly object _lifecycleLock = new();
|
||||
|
||||
// Replay ring buffer. Appended on the pump thread and queried from arbitrary
|
||||
// threads via TryGetReplayFrom, so every access is under _replayLock. The deque
|
||||
// keeps events in ascending WorkerSequence order (the pump fans in source order),
|
||||
// so the oldest retained event is always at the front. Capacity == 0 disables
|
||||
// retention; RetentionSeconds <= 0 disables age-based eviction.
|
||||
private readonly int _replayBufferCapacity;
|
||||
private readonly TimeSpan _replayRetention;
|
||||
private readonly bool _ageEvictionEnabled;
|
||||
private readonly LinkedList<ReplayEntry> _replayBuffer = new();
|
||||
private readonly object _replayLock = new();
|
||||
private bool _anyEventSeen;
|
||||
private ulong _highestSequenceSeen;
|
||||
|
||||
private long _nextSubscriberId;
|
||||
private Task? _pumpTask;
|
||||
private bool _started;
|
||||
@@ -78,22 +92,80 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
/// queue capacity shape used today.
|
||||
/// </param>
|
||||
/// <param name="logger">Logger for pump lifecycle diagnostics.</param>
|
||||
/// <remarks>
|
||||
/// This overload disables the replay ring buffer (capacity 0). Use the overload
|
||||
/// taking replay parameters to retain events for reconnect/reattach replay.
|
||||
/// </remarks>
|
||||
public SessionEventDistributor(
|
||||
string sessionId,
|
||||
Func<CancellationToken, IAsyncEnumerable<MxEvent>> eventSourceFactory,
|
||||
int subscriberQueueCapacity,
|
||||
ILogger<SessionEventDistributor> logger)
|
||||
: this(
|
||||
sessionId,
|
||||
eventSourceFactory,
|
||||
subscriberQueueCapacity,
|
||||
replayBufferCapacity: 0,
|
||||
replayRetentionSeconds: 0,
|
||||
logger,
|
||||
TimeProvider.System)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a per-session event distributor with a bounded replay ring buffer.
|
||||
/// </summary>
|
||||
/// <param name="sessionId">Owning session id, used only for logging context.</param>
|
||||
/// <param name="eventSourceFactory">
|
||||
/// Factory producing the session's event stream given a cancellation token.
|
||||
/// The pump consumes this exactly once. See the type remarks for the seam Task 4
|
||||
/// plugs into.
|
||||
/// </param>
|
||||
/// <param name="subscriberQueueCapacity">
|
||||
/// Bounded capacity of each per-subscriber channel. Mirrors the gRPC event-stream
|
||||
/// queue capacity shape used today.
|
||||
/// </param>
|
||||
/// <param name="replayBufferCapacity">
|
||||
/// Maximum number of events retained for replay. The oldest retained event is
|
||||
/// evicted once this count is exceeded. <c>0</c> disables retention entirely.
|
||||
/// </param>
|
||||
/// <param name="replayRetentionSeconds">
|
||||
/// Maximum age, in seconds, of a retained event. Entries older than this are
|
||||
/// evicted regardless of capacity. <c>0</c> (or less) disables age-based eviction.
|
||||
/// </param>
|
||||
/// <param name="logger">Logger for pump lifecycle diagnostics.</param>
|
||||
/// <param name="timeProvider">
|
||||
/// Clock used to timestamp and age-evict replay entries. Inject a fake to make
|
||||
/// age-eviction deterministic in tests.
|
||||
/// </param>
|
||||
public SessionEventDistributor(
|
||||
string sessionId,
|
||||
Func<CancellationToken, IAsyncEnumerable<MxEvent>> eventSourceFactory,
|
||||
int subscriberQueueCapacity,
|
||||
int replayBufferCapacity,
|
||||
double replayRetentionSeconds,
|
||||
ILogger<SessionEventDistributor> logger,
|
||||
TimeProvider timeProvider)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(sessionId);
|
||||
ArgumentNullException.ThrowIfNull(eventSourceFactory);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(subscriberQueueCapacity, 1);
|
||||
ArgumentOutOfRangeException.ThrowIfNegative(replayBufferCapacity);
|
||||
ArgumentOutOfRangeException.ThrowIfNegative(replayRetentionSeconds);
|
||||
ArgumentNullException.ThrowIfNull(logger);
|
||||
ArgumentNullException.ThrowIfNull(timeProvider);
|
||||
|
||||
_sessionId = sessionId;
|
||||
_eventSourceFactory = eventSourceFactory;
|
||||
_subscriberQueueCapacity = subscriberQueueCapacity;
|
||||
_shutdownTimeout = DefaultShutdownTimeout;
|
||||
_replayBufferCapacity = replayBufferCapacity;
|
||||
_ageEvictionEnabled = replayRetentionSeconds > 0;
|
||||
_replayRetention = _ageEvictionEnabled
|
||||
? TimeSpan.FromSeconds(replayRetentionSeconds)
|
||||
: TimeSpan.Zero;
|
||||
_logger = logger;
|
||||
_timeProvider = timeProvider;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -232,6 +304,12 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
.WithCancellation(cancellationToken)
|
||||
.ConfigureAwait(false))
|
||||
{
|
||||
// Retain for replay BEFORE fan-out so a reconnecting subscriber that
|
||||
// queries between fan-out and its own read still sees this event. Order
|
||||
// is preserved: the pump is the single appender and events arrive in
|
||||
// source order.
|
||||
AppendToReplayBuffer(mxEvent);
|
||||
|
||||
// Enumerating a ConcurrentDictionary's Values never throws on concurrent
|
||||
// add/remove; a subscriber registered mid-iteration may miss this event,
|
||||
// which matches "late subscribers see events after they register".
|
||||
@@ -289,6 +367,132 @@ public sealed class SessionEventDistributor : IAsyncDisposable
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the retained events with <see cref="MxEvent.WorkerSequence"/> strictly
|
||||
/// greater than <paramref name="afterSequence"/>, in ascending sequence order, so a
|
||||
/// reconnecting or reattaching subscriber can replay what it missed.
|
||||
/// </summary>
|
||||
/// <param name="afterSequence">
|
||||
/// The last worker sequence the caller already observed. Only events newer than this
|
||||
/// are returned.
|
||||
/// </param>
|
||||
/// <param name="events">
|
||||
/// The retained events newer than <paramref name="afterSequence"/>, in order. Never
|
||||
/// null; empty when nothing newer is retained.
|
||||
/// </param>
|
||||
/// <param name="gap">
|
||||
/// <see langword="true"/> when events between <paramref name="afterSequence"/> and the
|
||||
/// oldest retained event were already evicted (by capacity or age), meaning the caller
|
||||
/// missed events that can no longer be replayed and must re-snapshot. When
|
||||
/// <see langword="true"/>, whatever IS still retained is still returned via
|
||||
/// <paramref name="events"/>.
|
||||
/// </param>
|
||||
/// <returns>
|
||||
/// Always <see langword="true"/> — the out parameters fully describe the result. The
|
||||
/// return value exists for a fluent call shape and future extension.
|
||||
/// </returns>
|
||||
/// <remarks>
|
||||
/// <para>Gap semantics, by buffer state:</para>
|
||||
/// <list type="bullet">
|
||||
/// <item>
|
||||
/// Buffer non-empty: <paramref name="gap"/> is <see langword="true"/> iff
|
||||
/// <paramref name="afterSequence"/> is below the oldest retained sequence minus
|
||||
/// one (i.e. at least one event newer than <paramref name="afterSequence"/> but
|
||||
/// older than the oldest retained was evicted). When
|
||||
/// <paramref name="afterSequence"/> equals or exceeds the newest retained
|
||||
/// sequence the caller is fully caught up: empty list, no gap.
|
||||
/// </item>
|
||||
/// <item>
|
||||
/// Buffer empty (retention disabled, nothing seen yet, or everything evicted):
|
||||
/// empty list, and <paramref name="gap"/> is <see langword="true"/> iff
|
||||
/// <paramref name="afterSequence"/> is below the highest sequence ever seen —
|
||||
/// i.e. the caller is behind but nothing is retained to replay. If no event has
|
||||
/// ever been seen, or the caller is already at/ahead of the highest seen, there
|
||||
/// is nothing to miss: no gap.
|
||||
/// </item>
|
||||
/// </list>
|
||||
/// </remarks>
|
||||
public bool TryGetReplayFrom(ulong afterSequence, out IReadOnlyList<MxEvent> events, out bool gap)
|
||||
{
|
||||
lock (_replayLock)
|
||||
{
|
||||
EvictAged();
|
||||
|
||||
if (_replayBuffer.Count == 0)
|
||||
{
|
||||
events = [];
|
||||
// Nothing retained. The caller missed events only if it is behind the
|
||||
// highest sequence ever seen (and we have seen at least one event).
|
||||
gap = _anyEventSeen && afterSequence < _highestSequenceSeen;
|
||||
return true;
|
||||
}
|
||||
|
||||
ulong oldestRetained = _replayBuffer.First!.Value.Event.WorkerSequence;
|
||||
|
||||
// A gap exists when at least one event newer than afterSequence was evicted,
|
||||
// i.e. afterSequence sits below the oldest-retained-minus-one boundary.
|
||||
gap = afterSequence + 1 < oldestRetained;
|
||||
|
||||
List<MxEvent> newer = [];
|
||||
foreach (ReplayEntry entry in _replayBuffer)
|
||||
{
|
||||
if (entry.Event.WorkerSequence > afterSequence)
|
||||
{
|
||||
newer.Add(entry.Event);
|
||||
}
|
||||
}
|
||||
|
||||
events = newer;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private void AppendToReplayBuffer(MxEvent mxEvent)
|
||||
{
|
||||
lock (_replayLock)
|
||||
{
|
||||
_anyEventSeen = true;
|
||||
if (mxEvent.WorkerSequence > _highestSequenceSeen)
|
||||
{
|
||||
_highestSequenceSeen = mxEvent.WorkerSequence;
|
||||
}
|
||||
|
||||
// Capacity 0 disables retention: track the highest-seen sequence (so replay
|
||||
// can still report a gap) but keep no events.
|
||||
if (_replayBufferCapacity == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_replayBuffer.AddLast(new ReplayEntry(mxEvent, _timeProvider.GetUtcNow()));
|
||||
|
||||
// Capacity eviction: drop oldest until within bound.
|
||||
while (_replayBuffer.Count > _replayBufferCapacity)
|
||||
{
|
||||
_replayBuffer.RemoveFirst();
|
||||
}
|
||||
|
||||
EvictAged();
|
||||
}
|
||||
}
|
||||
|
||||
// Must be called under _replayLock. Drops entries older than the retention window.
|
||||
private void EvictAged()
|
||||
{
|
||||
if (!_ageEvictionEnabled || _replayBuffer.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
DateTimeOffset cutoff = _timeProvider.GetUtcNow() - _replayRetention;
|
||||
while (_replayBuffer.First is { } first && first.Value.RetainedAt < cutoff)
|
||||
{
|
||||
_replayBuffer.RemoveFirst();
|
||||
}
|
||||
}
|
||||
|
||||
private readonly record struct ReplayEntry(MxEvent Event, DateTimeOffset RetainedAt);
|
||||
|
||||
private sealed class Subscriber(long id, Channel<MxEvent> channel)
|
||||
{
|
||||
public long Id { get; } = id;
|
||||
|
||||
Reference in New Issue
Block a user