From bf73985481d69d6e3562256bff1acdf4be20db43 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 15:19:59 -0400 Subject: [PATCH] Fix hanging and timing-fragile WorkerClient event-channel tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Server-032 change made event-channel overflow wait EventChannelFullModeTimeout before faulting, instead of faulting instantly. Two pre-existing overflow tests were not updated and left EventChannelFullModeTimeout at its 5s default, which races the 5s TestTimeout: ReadLoop_WhenEventQueueOverflows_FaultsClient and ReadLoop_WhenClientFaults_KillsOwnedWorkerProcess. Pin it to 50ms in both so overflow faults promptly. EnqueueWorkerEvent_WhenChannelFullPastTimeout_FaultsWithRichDiagnostic wrote 6 events into a 4-slot channel, but the worker client faults while reading the 5th and its read loop then stops — the 6th event is never drained and the test's pipe write for it blocks forever on a full OS pipe buffer, hanging the test host. Write exactly 5 (4 to fill plus 1 to overflow) as the test comment already intends, and bound the post-fault event drain with TestTimeout so a future regression fails instead of hanging. No production change: the Server-031/032 WorkerClient logic is correct — these were test-only defects. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Gateway/Workers/WorkerClientTests.cs | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/MxGateway.Tests/Gateway/Workers/WorkerClientTests.cs b/src/MxGateway.Tests/Gateway/Workers/WorkerClientTests.cs index 6370de6..e53f297 100644 --- a/src/MxGateway.Tests/Gateway/Workers/WorkerClientTests.cs +++ b/src/MxGateway.Tests/Gateway/Workers/WorkerClientTests.cs @@ -128,6 +128,7 @@ public sealed class WorkerClientTests new WorkerClientOptions { EventChannelCapacity = 1, + EventChannelFullModeTimeout = TimeSpan.FromMilliseconds(50), HeartbeatGrace = TimeSpan.FromSeconds(30), HeartbeatCheckInterval = TimeSpan.FromSeconds(30), }); @@ -163,6 +164,7 @@ public sealed class WorkerClientTests new WorkerClientOptions { EventChannelCapacity = 1, + EventChannelFullModeTimeout = TimeSpan.FromMilliseconds(50), HeartbeatGrace = TimeSpan.FromSeconds(30), HeartbeatCheckInterval = TimeSpan.FromSeconds(30), }, @@ -483,10 +485,13 @@ public sealed class WorkerClientTests }); await CompleteHandshakeAsync(client, pipePair); - // Fill the channel plus one to force the overflow path. The gateway - // never opens a StreamEvents consumer so the events stay in the - // bounded channel. - for (ulong sequence = 1; sequence <= 6; sequence++) + // Fill the 4-slot channel and write exactly one more to force the + // overflow path. The gateway never opens a StreamEvents consumer, so + // the events stay buffered. Exactly five events are written: the + // worker client faults while reading the fifth, after which its read + // loop stops — a sixth event would never be drained and its pipe + // write would block forever on a full OS pipe buffer. + for (ulong sequence = 1; sequence <= 5; sequence++) { await pipePair.WorkerWriter.WriteAsync( CreateEventEnvelope(sequence: sequence, MxEventFamily.OnDataChange)); @@ -499,10 +504,13 @@ public sealed class WorkerClientTests Assert.Equal(WorkerClientState.Faulted, client.State); // Reading the events channel after fault throws the propagated - // WorkerClientException carrying the rich diagnostic message. + // WorkerClientException carrying the rich diagnostic message. The + // drain is bounded by TestTimeout so a regression that leaves the + // channel uncompleted fails the test instead of hanging it. + using CancellationTokenSource drainTimeout = new(TestTimeout); WorkerClientException fault = await Assert.ThrowsAsync(async () => { - await foreach (WorkerEvent _ in client.ReadEventsAsync(CancellationToken.None)) + await foreach (WorkerEvent _ in client.ReadEventsAsync(drainTimeout.Token)) { } });