Fix reliability findings

This commit is contained in:
Joseph Doherty
2026-04-28 06:27:01 -04:00
parent 907aa49aea
commit b0041c5d18
9 changed files with 233 additions and 21 deletions
@@ -66,6 +66,8 @@ public sealed class EventStreamService(
{
await streamCts.CancelAsync().ConfigureAwait(false);
subscriber.Dispose();
Interlocked.Exchange(ref streamQueueDepth, 0);
metrics.SetGrpcEventStreamQueueDepth(0);
metrics.StreamDisconnected("Detached");
try
@@ -101,6 +101,17 @@ public sealed class GatewayMetrics : IDisposable
_sessionsClosedCounter.Add(1);
}
public void SessionRemoved()
{
lock (_syncRoot)
{
if (_openSessions > 0)
{
_openSessions--;
}
}
}
public void WorkerStarted(TimeSpan startupDuration)
{
lock (_syncRoot)
@@ -184,8 +184,11 @@ public sealed class SessionManager : ISessionManager
exception,
"Graceful shutdown failed for session {SessionId}; killing worker.",
session.SessionId);
session.KillWorker(GatewayShutdownReason);
await RemoveSessionAsync(session).ConfigureAwait(false);
if (_registry.TryGet(session.SessionId, out _))
{
session.KillWorker(GatewayShutdownReason);
await RemoveSessionAsync(session).ConfigureAwait(false);
}
}
}
}
@@ -210,7 +213,13 @@ public sealed class SessionManager : ISessionManager
catch (Exception exception)
{
session.MarkFaulted(exception.Message);
if (!wasClosed)
{
_metrics.SessionRemoved();
}
_metrics.Fault(SessionManagerErrorCode.CloseFailed.ToString());
await RemoveSessionAsync(session).ConfigureAwait(false);
throw new SessionManagerException(
SessionManagerErrorCode.CloseFailed,
$"Failed to close session {session.SessionId}.",