feat(sessions): detach-grace retention window for reconnect

This commit is contained in:
Joseph Doherty
2026-06-16 06:15:46 -04:00
parent 85e4334bb7
commit db95f8644f
10 changed files with 346 additions and 6 deletions
@@ -181,6 +181,10 @@ public sealed class GatewayOptionsValidator : OptionsValidatorBase<GatewayOption
options.MaxEventSubscribersPerSession,
"MxGateway:Sessions:MaxEventSubscribersPerSession must be greater than zero.",
builder);
AddIfNegative(
options.DetachGraceSeconds,
"MxGateway:Sessions:DetachGraceSeconds must be zero or greater (0 disables detach-grace retention).",
builder);
// NOTE: We intentionally do NOT reject !AllowMultipleEventSubscribers &&
// MaxEventSubscribersPerSession > 1 as a hard validation error here. The default
@@ -23,6 +23,21 @@ public sealed class SessionOptions
/// <summary>Gets the interval for sweeping expired session leases in seconds.</summary>
public int LeaseSweepIntervalSeconds { get; init; } = 30;
/// <summary>
/// Gets the detach-grace retention window, in seconds, that a session is kept alive
/// after its last external (gRPC) event-stream subscriber drops, so a client can
/// reconnect to it. While within the window the session stays in
/// <c>Ready</c> and remains usable; if no new external subscriber attaches before the
/// window elapses, the lease monitor closes the session exactly as it closes an
/// expired lease. The gateway-owned internal dashboard subscriber does not count as an
/// external subscriber, so a session whose only remaining subscriber is the dashboard
/// mirror still enters detach-grace. A value of <c>0</c> disables retention: the
/// session reverts to the original behavior of lingering only until its normal lease
/// expires. The reconnect/replay itself is implemented separately (Task 12); this
/// option controls retention and expiry only.
/// </summary>
public int DetachGraceSeconds { get; init; } = 30;
/// <summary>
/// Gets a value indicating whether multiple event subscribers are allowed per session.
/// </summary>
@@ -22,6 +22,8 @@ public sealed class GatewaySession
private DateTimeOffset? _leaseExpiresAt;
private bool _closeStarted;
private int _activeEventSubscriberCount;
private readonly TimeSpan _detachGrace;
private DateTimeOffset? _detachedAtUtc;
private SessionEventDistributor? _eventDistributor;
private bool _eventDistributorStarted;
private bool _dashboardMirrorStarted;
@@ -103,6 +105,16 @@ public sealed class GatewaySession
/// session directly still get a working distributor. Production passes the
/// DI-resolved dependencies.
/// </param>
/// <param name="detachGrace">
/// Retention window kept after the last external (gRPC) event subscriber drops, so a
/// client can reconnect (Task 12). When the window is positive and the active external
/// subscriber count falls to zero, the session stays <see cref="SessionState.Ready"/>
/// and records a detached timestamp; the lease monitor closes it once the window
/// elapses with no subscriber having re-attached. <see cref="TimeSpan.Zero"/> (the
/// default) disables retention and preserves the original lease-only expiry behavior.
/// The clock comes from <paramref name="eventStreaming"/>'s
/// <see cref="SessionEventStreaming.TimeProvider"/> so the timer is unit-testable.
/// </param>
public GatewaySession(
string sessionId,
string backendName,
@@ -117,7 +129,8 @@ public sealed class GatewaySession
TimeSpan shutdownTimeout,
TimeSpan leaseDuration,
DateTimeOffset openedAt,
SessionEventStreaming? eventStreaming = null)
SessionEventStreaming? eventStreaming = null,
TimeSpan detachGrace = default)
{
if (string.IsNullOrWhiteSpace(sessionId))
{
@@ -155,6 +168,7 @@ public sealed class GatewaySession
_lastClientActivityAt = openedAt;
_leaseExpiresAt = openedAt + leaseDuration;
_eventStreaming = eventStreaming ?? SessionEventStreaming.Default;
_detachGrace = detachGrace > TimeSpan.Zero ? detachGrace : TimeSpan.Zero;
}
/// <summary>
@@ -300,6 +314,25 @@ public sealed class GatewaySession
}
}
/// <summary>
/// Gets the UTC timestamp at which the session entered its detach-grace retention
/// window (the last external event subscriber dropped while a positive
/// detach-grace was configured), or <see langword="null"/> when the session is not
/// currently within a detach-grace window. Re-attaching an external subscriber clears
/// this. Always <see langword="null"/> when detach-grace is disabled
/// (<c>DetachGraceSeconds == 0</c>).
/// </summary>
public DateTimeOffset? DetachedAtUtc
{
get
{
lock (_syncRoot)
{
return _detachedAtUtc;
}
}
}
/// <summary>
/// Attaches the worker client for this session.
/// </summary>
@@ -679,6 +712,28 @@ public sealed class GatewaySession
}
}
/// <summary>
/// Determines whether the session's detach-grace retention window has elapsed: the
/// session entered detach-grace (its last external event subscriber dropped while a
/// positive detach-grace was configured) and has had no external subscriber re-attach
/// for longer than the configured detach-grace. The lease monitor closes such a
/// session exactly as it closes an expired lease. Always returns <see langword="false"/>
/// when detach-grace is disabled or when an external subscriber is attached (the
/// detached timestamp is cleared on re-attach, so an attached session is never within a
/// window).
/// </summary>
/// <param name="now">Current timestamp for comparison.</param>
public bool IsDetachGraceExpired(DateTimeOffset now)
{
lock (_syncRoot)
{
return _detachGrace > TimeSpan.Zero
&& _activeEventSubscriberCount == 0
&& _detachedAtUtc is not null
&& now - _detachedAtUtc.Value >= _detachGrace;
}
}
/// <summary>
/// Attaches an event subscriber and returns a lease whose
/// <see cref="IEventSubscriberLease.Reader"/> reads the fanned public
@@ -733,6 +788,12 @@ public sealed class GatewaySession
}
_activeEventSubscriberCount++;
// An external subscriber (re)attached: cancel any in-flight detach-grace window so
// the lease monitor no longer treats this session as eligible for grace-expiry
// close. This is the reattach→grace-cancel transition; it races the sweeper's
// IsDetachGraceExpired read, and both run under _syncRoot so they serialize.
_detachedAtUtc = null;
}
// Construct/start the distributor and register this subscriber. Done outside the
@@ -1502,6 +1563,24 @@ public sealed class GatewaySession
{
_activeEventSubscriberCount--;
}
// When the LAST external subscriber drops and detach-grace is enabled, retain the
// session instead of letting it linger only on the (long) lease: stamp the detached
// time so the lease monitor can close it once the grace window elapses. The session
// stays in its current (Ready) state and remains usable, so a reconnecting subscriber
// (Task 12) re-attaches normally. The gateway-owned internal dashboard subscriber is
// NOT counted in _activeEventSubscriberCount (it registers on the distributor with
// isInternal: true), so a session whose only remaining subscriber is the dashboard
// mirror still enters grace. Only stamp while the session is alive — once
// Closing/Closed/Faulted there is nothing to retain. This is the detach→grace-start
// transition; it shares _syncRoot with the reattach→grace-cancel write above and the
// sweeper's IsDetachGraceExpired read, so the three serialize.
if (_detachGrace > TimeSpan.Zero
&& _activeEventSubscriberCount == 0
&& _state is not (SessionState.Closing or SessionState.Closed or SessionState.Faulted))
{
_detachedAtUtc = _eventStreaming.TimeProvider.GetUtcNow();
}
}
}
@@ -17,6 +17,7 @@ public sealed class SessionManager : ISessionManager
public const string DefaultCloseReason = "client-close";
public const string GatewayShutdownReason = "gateway-shutdown";
public const string LeaseExpiredReason = "lease-expired";
public const string DetachGraceExpiredReason = "detach-grace-expired";
private readonly ISessionRegistry _registry;
private readonly ISessionWorkerClientFactory _workerClientFactory;
@@ -295,12 +296,22 @@ public sealed class SessionManager : ISessionManager
int closedCount = 0;
foreach (GatewaySession session in _registry.Snapshot())
{
if (!session.IsLeaseExpired(now))
// A session is swept when its normal lease has expired OR its detach-grace
// retention window has elapsed (last external subscriber dropped and no client
// reconnected within DetachGraceSeconds). The detach-grace close is the same
// teardown as a lease-expiry close; only the reason differs so operators can tell
// a short reconnect-window expiry from a long idle-lease expiry in logs/metrics.
string? reason = session.IsLeaseExpired(now)
? LeaseExpiredReason
: session.IsDetachGraceExpired(now)
? DetachGraceExpiredReason
: null;
if (reason is null)
{
continue;
}
await CloseSessionCoreAsync(session, LeaseExpiredReason, cancellationToken).ConfigureAwait(false);
await CloseSessionCoreAsync(session, reason, cancellationToken).ConfigureAwait(false);
closedCount++;
}
@@ -478,7 +489,8 @@ public sealed class SessionManager : ISessionManager
shutdownTimeout,
leaseDuration,
openedAt,
eventStreaming);
eventStreaming,
TimeSpan.FromSeconds(Math.Max(0, _options.Sessions.DetachGraceSeconds)));
}
private static string CreateClientCorrelationId(
@@ -46,6 +46,7 @@
"MaxPendingCommandsPerSession": 128,
"DefaultLeaseSeconds": 1800,
"LeaseSweepIntervalSeconds": 30,
"DetachGraceSeconds": 30,
"AllowMultipleEventSubscribers": false,
"MaxEventSubscribersPerSession": 8
},