fix: resolve code-review findings (locally verified)
Server-054/055/056, Contracts-020/021/022, Tests-036/038/039, IntegrationTests-030/031/032 (+033 deferred to live rig), Client.Dotnet-026/028/029 (+027 won't-fix), Client.Go-030..034, Client.Python-032..036, Client.Rust-033..038. Key fix: SessionEventDistributor orphaned a subscriber that registered after the pump completed but before disposal (Server-056) -> register paths now complete late registrants under _lifecycleLock; regression test added. The racy dashboard-mirror gRPC test made deterministic (Tests-039). Verified green locally: gateway Tests targeted classes (GatewaySession, SessionEventDistributor, GatewayOptionsValidator, ProtobufContractRoundTrip, GatewaySessionDashboardMirror) + dotnet/go/python/rust client suites.
This commit is contained in:
@@ -393,4 +393,91 @@ public sealed class GatewayOptionsValidatorTests
|
||||
ValidateOptionsResult result = new GatewayOptionsValidator().Validate(null, options);
|
||||
Assert.True(result.Succeeded);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_Fails_WhenDetachGraceSecondsIsNegative()
|
||||
{
|
||||
GatewayOptions options = CloneWithSessions(
|
||||
ValidOptions(),
|
||||
new SessionOptions { DetachGraceSeconds = -1 });
|
||||
ValidateOptionsResult result = new GatewayOptionsValidator().Validate(null, options);
|
||||
Assert.True(result.Failed);
|
||||
Assert.Contains(
|
||||
result.Failures!,
|
||||
f => f.Contains("MxGateway:Sessions:DetachGraceSeconds"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_Succeeds_WhenDetachGraceSecondsIsZero()
|
||||
{
|
||||
GatewayOptions options = CloneWithSessions(
|
||||
ValidOptions(),
|
||||
new SessionOptions { DetachGraceSeconds = 0 });
|
||||
ValidateOptionsResult result = new GatewayOptionsValidator().Validate(null, options);
|
||||
Assert.True(result.Succeeded);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// ReplayBufferCapacity / ReplayRetentionSeconds validation
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private static GatewayOptions CloneWithEvents(GatewayOptions source, EventOptions events)
|
||||
=> new()
|
||||
{
|
||||
Authentication = source.Authentication,
|
||||
Ldap = source.Ldap,
|
||||
Worker = source.Worker,
|
||||
Sessions = source.Sessions,
|
||||
Events = events,
|
||||
Dashboard = source.Dashboard,
|
||||
Protocol = source.Protocol,
|
||||
Alarms = source.Alarms,
|
||||
Tls = source.Tls,
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void Validate_Fails_WhenReplayBufferCapacityIsNegative()
|
||||
{
|
||||
GatewayOptions options = CloneWithEvents(
|
||||
ValidOptions(),
|
||||
new EventOptions { ReplayBufferCapacity = -1 });
|
||||
ValidateOptionsResult result = new GatewayOptionsValidator().Validate(null, options);
|
||||
Assert.True(result.Failed);
|
||||
Assert.Contains(
|
||||
result.Failures!,
|
||||
f => f.Contains("MxGateway:Events:ReplayBufferCapacity"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_Succeeds_WhenReplayBufferCapacityIsZero()
|
||||
{
|
||||
GatewayOptions options = CloneWithEvents(
|
||||
ValidOptions(),
|
||||
new EventOptions { ReplayBufferCapacity = 0 });
|
||||
ValidateOptionsResult result = new GatewayOptionsValidator().Validate(null, options);
|
||||
Assert.True(result.Succeeded);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_Fails_WhenReplayRetentionSecondsIsNegative()
|
||||
{
|
||||
GatewayOptions options = CloneWithEvents(
|
||||
ValidOptions(),
|
||||
new EventOptions { ReplayRetentionSeconds = -1 });
|
||||
ValidateOptionsResult result = new GatewayOptionsValidator().Validate(null, options);
|
||||
Assert.True(result.Failed);
|
||||
Assert.Contains(
|
||||
result.Failures!,
|
||||
f => f.Contains("MxGateway:Events:ReplayRetentionSeconds"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_Succeeds_WhenReplayRetentionSecondsIsZero()
|
||||
{
|
||||
GatewayOptions options = CloneWithEvents(
|
||||
ValidOptions(),
|
||||
new EventOptions { ReplayRetentionSeconds = 0 });
|
||||
ValidateOptionsResult result = new GatewayOptionsValidator().Validate(null, options);
|
||||
Assert.True(result.Succeeded);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1543,4 +1543,49 @@ public sealed class ProtobufContractRoundTripTests
|
||||
Assert.Equal(AlarmProviderMode.Subtag, parsed.OnAlarmProviderModeChanged.Mode);
|
||||
Assert.Equal(unchecked((int)0x80004005), parsed.OnAlarmProviderModeChanged.Hresult);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that an <see cref="MxEvent"/> carrying a
|
||||
/// <see cref="ReplayGap"/> (the <c>optional replay_gap = 14</c> field)
|
||||
/// round-trips with both sequence fields populated, that
|
||||
/// <see cref="MxEvent.BodyCase"/> remains <see cref="MxEvent.BodyOneofCase.None"/>
|
||||
/// (replay_gap is not part of the body oneof), and pins the wire field
|
||||
/// numbers for <c>MxEvent.replay_gap</c> (14),
|
||||
/// <c>ReplayGap.requested_after_sequence</c> (1), and
|
||||
/// <c>ReplayGap.oldest_available_sequence</c> (2) via the descriptor.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void MxEvent_RoundTripsReplayGapSentinelAndPinsFieldNumbers()
|
||||
{
|
||||
// ReplayGap field on MxEvent must be wire number 14.
|
||||
Assert.Equal(14, MxEvent.ReplayGapFieldNumber);
|
||||
|
||||
// ReplayGap sub-field numbers must be pinned.
|
||||
var replayGapFields = ReplayGap.Descriptor.Fields;
|
||||
Assert.Equal(1, replayGapFields[ReplayGap.RequestedAfterSequenceFieldNumber].FieldNumber);
|
||||
Assert.Equal("requested_after_sequence", replayGapFields[ReplayGap.RequestedAfterSequenceFieldNumber].Name);
|
||||
Assert.Equal(2, replayGapFields[ReplayGap.OldestAvailableSequenceFieldNumber].FieldNumber);
|
||||
Assert.Equal("oldest_available_sequence", replayGapFields[ReplayGap.OldestAvailableSequenceFieldNumber].Name);
|
||||
|
||||
// Build a sentinel MxEvent: replay_gap set, body oneof unset, family UNSPECIFIED.
|
||||
var original = new MxEvent
|
||||
{
|
||||
SessionId = "session-1",
|
||||
WorkerSequence = 0,
|
||||
ReplayGap = new ReplayGap
|
||||
{
|
||||
RequestedAfterSequence = 150,
|
||||
OldestAvailableSequence = 200,
|
||||
},
|
||||
};
|
||||
|
||||
var parsed = MxEvent.Parser.ParseFrom(original.ToByteArray());
|
||||
|
||||
Assert.Equal(original, parsed);
|
||||
// replay_gap is NOT part of the body oneof — BodyCase must remain None.
|
||||
Assert.Equal(MxEvent.BodyOneofCase.None, parsed.BodyCase);
|
||||
Assert.NotNull(parsed.ReplayGap);
|
||||
Assert.Equal(150UL, parsed.ReplayGap.RequestedAfterSequence);
|
||||
Assert.Equal(200UL, parsed.ReplayGap.OldestAvailableSequence);
|
||||
}
|
||||
}
|
||||
|
||||
+41
-5
@@ -67,6 +67,12 @@ public sealed class GatewaySessionDashboardMirrorTests
|
||||
workerClient.Events.Add(CreateWorkerEvent(2, MxEventFamily.OnDataChange));
|
||||
workerClient.Events.Add(CreateWorkerEvent(3, MxEventFamily.OnWriteComplete));
|
||||
workerClient.CompleteAfterConfiguredEvents = true;
|
||||
// Hold the worker stream until BOTH subscribers are attached so neither misses an event.
|
||||
// MarkReady registers the internal dashboard subscriber and starts the pump, which then
|
||||
// blocks on the gate; the gRPC subscriber attaches below; only then is the finite stream
|
||||
// released. Without this gate the pump can drain all three events before the gRPC
|
||||
// subscriber registers — a register-vs-pump race that otherwise makes this test flaky.
|
||||
workerClient.HoldEventsUntilReleased();
|
||||
RecordingDashboardEventBroadcaster broadcaster = new();
|
||||
|
||||
await using GatewaySession session = CreateSession(workerClient, broadcaster);
|
||||
@@ -79,13 +85,22 @@ public sealed class GatewaySessionDashboardMirrorTests
|
||||
new GatewayMetrics());
|
||||
|
||||
List<MxEvent> grpcEvents = [];
|
||||
await foreach (MxEvent mxEvent in service
|
||||
.StreamEventsAsync(new StreamEventsRequest { SessionId = session.SessionId }, CancellationToken.None)
|
||||
.WithCancellation(CancellationToken.None))
|
||||
Task grpcReader = Task.Run(async () =>
|
||||
{
|
||||
grpcEvents.Add(mxEvent);
|
||||
}
|
||||
await foreach (MxEvent mxEvent in service
|
||||
.StreamEventsAsync(new StreamEventsRequest { SessionId = session.SessionId }, CancellationToken.None)
|
||||
.WithCancellation(CancellationToken.None))
|
||||
{
|
||||
grpcEvents.Add(mxEvent);
|
||||
}
|
||||
});
|
||||
|
||||
// The gRPC subscriber counts against ActiveEventSubscriberCount (the internal dashboard
|
||||
// mirror does not), so count == 1 confirms it has attached. Only then release the stream.
|
||||
await WaitUntilAsync(() => session.ActiveEventSubscriberCount == 1);
|
||||
workerClient.ReleaseEvents();
|
||||
|
||||
await grpcReader.WaitAsync(TestTimeout);
|
||||
await WaitUntilAsync(() => broadcaster.Captures.Count == 3);
|
||||
|
||||
Assert.Equal([1UL, 2UL, 3UL], grpcEvents.Select(mxEvent => mxEvent.WorkerSequence).ToArray());
|
||||
@@ -280,6 +295,24 @@ public sealed class GatewaySessionDashboardMirrorTests
|
||||
|
||||
public bool CompleteAfterConfiguredEvents { get; set; }
|
||||
|
||||
// Gate that holds the event stream before it yields anything. Released by default, so
|
||||
// ungated tests are unaffected. HoldEventsUntilReleased() makes ReadEventsAsync block
|
||||
// until ReleaseEvents(), letting a test attach every subscriber before a finite,
|
||||
// fast-completing stream drains (avoids a register-vs-pump race).
|
||||
private TaskCompletionSource _releaseGate = CreateReleasedGate();
|
||||
|
||||
private static TaskCompletionSource CreateReleasedGate()
|
||||
{
|
||||
TaskCompletionSource gate = new(TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
gate.SetResult();
|
||||
return gate;
|
||||
}
|
||||
|
||||
public void HoldEventsUntilReleased() =>
|
||||
_releaseGate = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
|
||||
public void ReleaseEvents() => _releaseGate.TrySetResult();
|
||||
|
||||
public string SessionId { get; } = "session-dashboard-mirror";
|
||||
|
||||
public int? ProcessId { get; } = 1234;
|
||||
@@ -298,6 +331,9 @@ public sealed class GatewaySessionDashboardMirrorTests
|
||||
public async IAsyncEnumerable<WorkerEvent> ReadEventsAsync(
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
// Block before yielding any event until released (ungated by default).
|
||||
await _releaseGate.Task.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
foreach (WorkerEvent workerEvent in Events)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
@@ -545,6 +545,43 @@ public sealed class GatewaySessionTests
|
||||
Assert.False(session.IsDetachGraceExpired(clock.GetUtcNow()));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Server-055 regression. A FAILED first attach (the distributor never registered a
|
||||
/// subscriber) must NOT enter the detach-grace window: the catch path's
|
||||
/// <c>DetachEventSubscriber</c> rolls the reserved slot back to 0 but must not stamp
|
||||
/// <c>DetachedAtUtc</c>, because the "last subscriber dropped" semantics only apply once
|
||||
/// a subscriber was successfully registered. A freshly-Ready session whose first attach
|
||||
/// failed must therefore stay out of grace and never become sweep-eligible on that basis.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task DetachGrace_FailedFirstAttach_DoesNotEnterGrace()
|
||||
{
|
||||
FakeTimeProvider clock = new(DateTimeOffset.UtcNow);
|
||||
FakeWorkerClient workerClient = new();
|
||||
|
||||
// QueueCapacity = 0 makes the distributor constructor throw ArgumentOutOfRangeException
|
||||
// inside StartDistributorAndRegister, so the very first AttachEventSubscriber fails after
|
||||
// it reserved a slot — exercising the catch → DetachEventSubscriber rollback path.
|
||||
await using GatewaySession session = CreateReadySessionWithDetachGrace(
|
||||
workerClient,
|
||||
clock,
|
||||
detachGrace: TimeSpan.FromSeconds(30),
|
||||
queueCapacity: 0);
|
||||
|
||||
Assert.ThrowsAny<ArgumentException>(
|
||||
() => session.AttachEventSubscriber(maxSubscribers: 1));
|
||||
|
||||
// The reserved slot was rolled back, but no successful subscriber ever existed, so the
|
||||
// session must NOT have entered detach-grace.
|
||||
Assert.Equal(SessionState.Ready, session.State);
|
||||
Assert.Equal(0, session.ActiveEventSubscriberCount);
|
||||
Assert.Null(session.DetachedAtUtc);
|
||||
|
||||
// And it must never become detach-grace-eligible no matter how far the clock advances.
|
||||
clock.Advance(TimeSpan.FromHours(1));
|
||||
Assert.False(session.IsDetachGraceExpired(clock.GetUtcNow()));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Task 11. The gateway-owned internal dashboard subscriber must NOT keep a session out
|
||||
/// of detach-grace: with only the dashboard mirror attached (and no external gRPC
|
||||
@@ -618,7 +655,8 @@ public sealed class GatewaySessionTests
|
||||
IWorkerClient workerClient,
|
||||
TimeProvider timeProvider,
|
||||
TimeSpan detachGrace,
|
||||
IDashboardEventBroadcaster? dashboardBroadcaster = null)
|
||||
IDashboardEventBroadcaster? dashboardBroadcaster = null,
|
||||
int queueCapacity = 8)
|
||||
{
|
||||
GatewaySession session = new(
|
||||
sessionId: "session-test-detach-grace",
|
||||
@@ -636,7 +674,7 @@ public sealed class GatewaySessionTests
|
||||
openedAt: timeProvider.GetUtcNow(),
|
||||
eventStreaming: new SessionEventStreaming(
|
||||
new MxAccessGrpcMapper(),
|
||||
new EventOptions { QueueCapacity = 8 },
|
||||
new EventOptions { QueueCapacity = queueCapacity },
|
||||
NullLogger<SessionEventDistributor>.Instance,
|
||||
timeProvider,
|
||||
new GatewayMetrics(),
|
||||
|
||||
@@ -702,16 +702,71 @@ public sealed class SessionEventDistributorTests
|
||||
private static async Task DrainUntilFaultAsync(ChannelReader<MxEvent> reader)
|
||||
{
|
||||
// Drains any buffered events, then surfaces the channel's completion fault (if any)
|
||||
// by awaiting the final read past the buffered tail.
|
||||
// by awaiting the final WaitToReadAsync past the buffered tail.
|
||||
// If WaitToReadAsync returns false (graceful completion rather than a fault),
|
||||
// await Completion to surface any fault stored there, then Assert.Fail so the
|
||||
// helper does not spin forever on a channel that completes without an exception.
|
||||
while (true)
|
||||
{
|
||||
await reader.WaitToReadAsync().AsTask().WaitAsync(ReadTimeout);
|
||||
bool hasMore = await reader.WaitToReadAsync().AsTask().WaitAsync(ReadTimeout);
|
||||
if (!hasMore)
|
||||
{
|
||||
// Graceful completion — propagate any stored exception, then fail.
|
||||
await reader.Completion;
|
||||
Assert.Fail("DrainUntilFaultAsync: channel completed gracefully (no fault).");
|
||||
return;
|
||||
}
|
||||
|
||||
while (reader.TryRead(out _))
|
||||
{
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression: a subscriber that registers in the window AFTER the pump has completed
|
||||
/// (its event source finished) but BEFORE the distributor is disposed must have its
|
||||
/// channel completed immediately, not left open forever. The pump has already run its
|
||||
/// final <c>CompleteAllSubscribers</c> sweep and exited, so without the
|
||||
/// register-after-completion guard the late subscriber's reader hangs indefinitely.
|
||||
/// This was observed as an order-dependent hang in
|
||||
/// <c>GatewaySessionDashboardMirrorTests</c>, where a gRPC subscriber attached after a
|
||||
/// fast-completing worker stream had already drained.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Register_AfterSourceCompletes_CompletesLateSubscriberInsteadOfHanging()
|
||||
{
|
||||
Channel<MxEvent> source = Channel.CreateUnbounded<MxEvent>();
|
||||
await using SessionEventDistributor distributor = CreateDistributor(source.Reader);
|
||||
await distributor.StartAsync(CancellationToken.None);
|
||||
|
||||
// An early subscriber lets us observe when the pump's final completion sweep has run.
|
||||
using IEventSubscriberLease early = distributor.Register();
|
||||
|
||||
// Complete the source: the pump drains it, runs CompleteAllSubscribers, and exits.
|
||||
source.Writer.Complete();
|
||||
|
||||
// Draining the early subscriber to completion proves the pump finished its sweep — so
|
||||
// a subscriber registering now is unambiguously in the register-after-completion window.
|
||||
using (CancellationTokenSource earlyCts = new(ReadTimeout))
|
||||
{
|
||||
await foreach (MxEvent _ in early.Reader.ReadAllAsync(earlyCts.Token))
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
// Register AFTER the pump has completed. The channel must be completed immediately; the
|
||||
// bounded read below must end rather than hang (the ReadTimeout converts a regression
|
||||
// into a fast OperationCanceledException failure instead of an indefinite hang).
|
||||
using IEventSubscriberLease late = distributor.Register();
|
||||
using CancellationTokenSource lateCts = new(ReadTimeout);
|
||||
await foreach (MxEvent _ in late.Reader.ReadAllAsync(lateCts.Token))
|
||||
{
|
||||
}
|
||||
|
||||
Assert.False(lateCts.IsCancellationRequested);
|
||||
}
|
||||
|
||||
private static SessionEventDistributor CreateDistributor(ChannelReader<MxEvent> source)
|
||||
=> CreateDistributor(source, replayBufferCapacity: 1024, replayRetentionSeconds: 300);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user