Fix remaining reliability findings

This commit is contained in:
Joseph Doherty
2026-04-28 06:38:05 -04:00
parent b0041c5d18
commit 047d875fe6
8 changed files with 200 additions and 217 deletions
@@ -46,8 +46,8 @@ public sealed class EventStreamService(
eventQueue.Writer,
() =>
{
int depth = Interlocked.Increment(ref streamQueueDepth);
metrics.SetGrpcEventStreamQueueDepth(depth);
Interlocked.Increment(ref streamQueueDepth);
metrics.AdjustGrpcEventStreamQueueDepth(1);
},
streamCts.Token);
@@ -55,8 +55,8 @@ public sealed class EventStreamService(
{
await foreach (MxEvent mxEvent in eventQueue.Reader.ReadAllAsync(cancellationToken).ConfigureAwait(false))
{
int depth = Math.Max(0, Interlocked.Decrement(ref streamQueueDepth));
metrics.SetGrpcEventStreamQueueDepth(depth);
Interlocked.Decrement(ref streamQueueDepth);
metrics.AdjustGrpcEventStreamQueueDepth(-1);
yield return mxEvent;
}
@@ -66,9 +66,6 @@ public sealed class EventStreamService(
{
await streamCts.CancelAsync().ConfigureAwait(false);
subscriber.Dispose();
Interlocked.Exchange(ref streamQueueDepth, 0);
metrics.SetGrpcEventStreamQueueDepth(0);
metrics.StreamDisconnected("Detached");
try
{
@@ -84,6 +81,14 @@ public sealed class EventStreamService(
"Event stream producer stopped for session {SessionId}.",
request.SessionId);
}
int remainingDepth = Interlocked.Exchange(ref streamQueueDepth, 0);
if (remainingDepth > 0)
{
metrics.AdjustGrpcEventStreamQueueDepth(-remainingDepth);
}
metrics.StreamDisconnected("Detached");
}
}
@@ -232,6 +232,14 @@ public sealed class GatewayMetrics : IDisposable
}
}
public void AdjustGrpcEventStreamQueueDepth(int delta)
{
lock (_syncRoot)
{
_grpcEventStreamQueueDepth = Math.Max(0, _grpcEventStreamQueueDepth + delta);
}
}
public void RemoveSessionEvents(string sessionId)
{
_eventsBySession.TryRemove(sessionId, out _);
+39 -20
View File
@@ -376,30 +376,49 @@ public sealed class GatewaySession
await _closeLock.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_state is SessionState.Closed)
try
{
return new SessionCloseResult(SessionId, SessionState.Closed, AlreadyClosed: true);
if (_state is SessionState.Closed)
{
return new SessionCloseResult(SessionId, SessionState.Closed, AlreadyClosed: true);
}
bool alreadyClosing = _closeStarted;
_closeStarted = true;
_state = SessionState.Closing;
if (_workerClient is not null)
{
try
{
await _workerClient.ShutdownAsync(ShutdownTimeout, cancellationToken).ConfigureAwait(false);
}
catch (Exception exception)
{
try
{
_workerClient.Kill(reason);
}
catch (Exception killException)
{
throw new SessionCloseStartedException(
$"Session {SessionId} close failed after worker shutdown started.",
new AggregateException(exception, killException));
}
throw;
}
}
_state = SessionState.Closed;
return new SessionCloseResult(SessionId, SessionState.Closed, alreadyClosing);
}
bool alreadyClosing = _closeStarted;
_closeStarted = true;
_state = SessionState.Closing;
if (_workerClient is not null)
catch (Exception exception) when (exception is not SessionCloseStartedException)
{
try
{
await _workerClient.ShutdownAsync(ShutdownTimeout, cancellationToken).ConfigureAwait(false);
}
catch
{
_workerClient.Kill(reason);
throw;
}
throw new SessionCloseStartedException(
$"Session {SessionId} close failed after the close lock was acquired.",
exception);
}
_state = SessionState.Closed;
return new SessionCloseResult(SessionId, SessionState.Closed, alreadyClosing);
}
finally
{
@@ -0,0 +1,11 @@
namespace MxGateway.Server.Sessions;
internal sealed class SessionCloseStartedException : Exception
{
public SessionCloseStartedException(
string message,
Exception innerException)
: base(message, innerException)
{
}
}
@@ -210,7 +210,11 @@ public sealed class SessionManager : ISessionManager
await RemoveSessionAsync(session).ConfigureAwait(false);
return result;
}
catch (Exception exception)
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
throw;
}
catch (SessionCloseStartedException exception)
{
session.MarkFaulted(exception.Message);
if (!wasClosed)