Improve gateway reliability and client e2e coverage

This commit is contained in:
Joseph Doherty
2026-04-28 06:11:18 -04:00
parent 4fc355b357
commit 907aa49aea
25 changed files with 1153 additions and 83 deletions
@@ -114,6 +114,37 @@ public sealed class EventStreamServiceTests
Assert.Equal(1, metrics.GetSnapshot().Faults);
}
[Fact]
public async Task StreamEventsAsync_WhenStreamQueueOverflowsWithDisconnectPolicy_LeavesSessionReady()
{
FakeWorkerClient workerClient = new();
GatewaySession session = CreateReadySession(workerClient);
using GatewayMetrics metrics = new();
EventStreamService service = CreateService(
new FakeSessionManager(session),
metrics,
queueCapacity: 1,
backpressurePolicy: EventBackpressurePolicy.DisconnectSubscriber);
workerClient.Events.Add(CreateWorkerEvent(sequence: 1, MxEventFamily.OnDataChange));
workerClient.Events.Add(CreateWorkerEvent(sequence: 2, MxEventFamily.OnDataChange));
workerClient.Events.Add(CreateWorkerEvent(sequence: 3, MxEventFamily.OnDataChange));
workerClient.CompleteAfterConfiguredEvents = true;
await using IAsyncEnumerator<MxEvent> subscriber = service
.StreamEventsAsync(CreateRequest(session.SessionId), CancellationToken.None)
.GetAsyncEnumerator();
Assert.True(await subscriber.MoveNextAsync().AsTask().WaitAsync(TestTimeout));
SessionManagerException exception = await Assert.ThrowsAsync<SessionManagerException>(
async () => await subscriber.MoveNextAsync().AsTask().WaitAsync(TestTimeout));
Assert.Equal(SessionManagerErrorCode.EventQueueOverflow, exception.ErrorCode);
Assert.Equal(SessionState.Ready, session.State);
GatewayMetricsSnapshot snapshot = metrics.GetSnapshot();
Assert.Equal(1, snapshot.QueueOverflows);
Assert.Equal(0, snapshot.Faults);
Assert.Equal(1, snapshot.StreamDisconnects);
}
[Fact]
public async Task StreamEventsAsync_DoesNotSynthesizeOperationComplete()
{
@@ -157,7 +188,8 @@ public sealed class EventStreamServiceTests
private static EventStreamService CreateService(
FakeSessionManager sessionManager,
GatewayMetrics? metrics = null,
int queueCapacity = 8)
int queueCapacity = 8,
EventBackpressurePolicy backpressurePolicy = EventBackpressurePolicy.FailFast)
{
return new EventStreamService(
sessionManager,
@@ -166,6 +198,7 @@ public sealed class EventStreamServiceTests
Events = new EventOptions
{
QueueCapacity = queueCapacity,
BackpressurePolicy = backpressurePolicy,
},
}),
new MxAccessGrpcMapper(),
@@ -65,13 +65,33 @@ public sealed class SessionWorkerClientFactoryFakeWorkerTests
Assert.True(launcher.Process.IsDisposed);
}
private static GatewayOptions CreateOptions()
[Fact]
public async Task CreateAsync_WhenFakeWorkerNeverSendsReady_TimesOutAndKillsWorker()
{
NeverReadyWorkerProcessLauncher launcher = new();
using GatewayMetrics metrics = new();
SessionWorkerClientFactory factory = new(
launcher,
Options.Create(CreateOptions(startupTimeoutSeconds: 1)),
metrics,
NullLoggerFactory.Instance);
GatewaySession session = CreateSession(startupTimeout: TimeSpan.FromSeconds(1));
TimeoutException exception = await Assert.ThrowsAsync<TimeoutException>(
async () => await factory.CreateAsync(session, CancellationToken.None).WaitAsync(TestTimeout));
Assert.Contains("did not complete startup", exception.Message);
Assert.Equal(1, launcher.Process.KillCount);
Assert.True(launcher.Process.IsDisposed);
}
private static GatewayOptions CreateOptions(int startupTimeoutSeconds = 5)
{
return new GatewayOptions
{
Worker = new WorkerOptions
{
StartupTimeoutSeconds = 5,
StartupTimeoutSeconds = startupTimeoutSeconds,
ShutdownTimeoutSeconds = 5,
HeartbeatIntervalSeconds = 30,
HeartbeatGraceSeconds = 30,
@@ -84,7 +104,7 @@ public sealed class SessionWorkerClientFactoryFakeWorkerTests
};
}
private static GatewaySession CreateSession()
private static GatewaySession CreateSession(TimeSpan? startupTimeout = null)
{
return new GatewaySession(
FakeWorkerHarness.DefaultSessionId,
@@ -94,7 +114,7 @@ public sealed class SessionWorkerClientFactoryFakeWorkerTests
"test-client",
"fake-worker-session-test",
"client-correlation-1",
TestTimeout,
startupTimeout ?? TestTimeout,
TestTimeout,
TestTimeout,
DateTimeOffset.UtcNow);
@@ -172,6 +192,38 @@ public sealed class SessionWorkerClientFactoryFakeWorkerTests
}
}
private sealed class NeverReadyWorkerProcessLauncher : IWorkerProcessLauncher
{
public FakeWorkerProcess Process { get; } = new(processId: 4680);
public Task<WorkerProcessHandle> LaunchAsync(
WorkerProcessLaunchRequest request,
CancellationToken cancellationToken = default)
{
_ = RunWorkerAsync(request, cancellationToken);
return Task.FromResult(CreateHandle(Process));
}
private async Task RunWorkerAsync(
WorkerProcessLaunchRequest request,
CancellationToken cancellationToken)
{
await using FakeWorkerHarness harness = await FakeWorkerHarness.ConnectToGatewayPipeAsync(
request.SessionId,
request.Nonce,
request.PipeName,
request.ProtocolVersion,
cancellationToken: cancellationToken).ConfigureAwait(false);
_ = await harness.ReadGatewayEnvelopeAsync(cancellationToken).ConfigureAwait(false);
await harness.SendWorkerHelloAsync(
workerProcessId: Process.Id,
workerProtocolVersion: request.ProtocolVersion,
cancellationToken: cancellationToken).ConfigureAwait(false);
await Task.Delay(Timeout.InfiniteTimeSpan, cancellationToken).ConfigureAwait(false);
}
}
private static WorkerProcessHandle CreateHandle(IWorkerProcess process)
{
return new WorkerProcessHandle(
@@ -166,7 +166,8 @@ public sealed class WorkerClientTests
await pipePair.DisposeWorkerSideAsync();
await WaitUntilAsync(
() => client.State == WorkerClientState.Faulted,
() => client.State == WorkerClientState.Faulted
&& metrics.GetSnapshot().WorkersRunning == 0,
TestTimeout);
GatewayMetricsSnapshot snapshot = metrics.GetSnapshot();
@@ -174,6 +175,22 @@ public sealed class WorkerClientTests
Assert.Equal(1, snapshot.WorkerExits);
}
[Fact]
public async Task DisposeAsync_WhenPipeReadIsBlocked_ReturnsWithinBoundedTimeout()
{
await using PipePair pipePair = await PipePair.CreateAsync();
WorkerClient client = CreateClient(pipePair);
await CompleteHandshakeAsync(client, pipePair);
DateTimeOffset startedAt = DateTimeOffset.UtcNow;
await client.DisposeAsync().AsTask().WaitAsync(TestTimeout);
TimeSpan elapsed = DateTimeOffset.UtcNow - startedAt;
Assert.True(
elapsed < TimeSpan.FromSeconds(4),
$"DisposeAsync took {elapsed.TotalMilliseconds:N0}ms.");
}
[Fact]
public async Task ReadLoop_WhenHeartbeatArrives_UpdatesLastHeartbeatAndWorkerProcess()
{
@@ -60,4 +60,22 @@ public sealed class GatewayMetricsTests
Assert.Equal("depth", exception.ParamName);
}
[Fact]
public void RemoveSessionEvents_RemovesOnlyThatSession()
{
using GatewayMetrics metrics = new();
metrics.EventReceived("session-1", "OnDataChange");
metrics.EventReceived("session-2", "OnWriteComplete");
metrics.RemoveSessionEvents("session-1");
GatewayMetricsSnapshot snapshot = metrics.GetSnapshot();
Assert.Equal(2, snapshot.EventsReceived);
Assert.False(snapshot.EventsBySession.ContainsKey("session-1"));
Assert.Equal(1, snapshot.EventsBySession["session-2"]);
Assert.Equal(1, snapshot.EventsByFamily["OnDataChange"]);
Assert.Equal(1, snapshot.EventsByFamily["OnWriteComplete"]);
}
}