Resolve Tests-027..031: flake root cause + coverage gaps

Tests-027  GatewayMetrics exposes its internal Meter; the
           StreamEvents_WhenEventIsWritten_RecordsSendDuration listener
           now filters by ReferenceEquals(instrument.Meter, metrics.Meter)
           instead of Meter.Name, so parallel tests with their own
           GatewayMetrics no longer cross-contaminate the families list.
Tests-028  FakeWorkerClient.Kill now captures LastKillReason;
           SessionManager.KillWorkerAsync tests pin the reason
           propagation end-to-end and cover the blank/null guard. The
           DashboardSessionAdminService kill test pins the literal
           dashboard-admin-kill reason.
Tests-029  Added CloseSessionAsync_BlankSessionId_ReturnsFailure to mirror
           the existing KillWorkerAsync blank-id coverage.
Tests-030  DeleteAsync_WhenStoreRefuses_ReportsFriendlyError renamed and
           extended to assert the dashboard-delete-key audit row with
           Details = not-found-or-active. Added
           DeleteAsync_BlankKeyId_ReturnsFailure.
Tests-031  DashboardSnapshotPublisher reconnect test now measures the
           gap from the first throw inside the fake (firstThrowAt) to
           secondSubscribeAt, isolating Task.Delay from StartAsync /
           scheduling overhead.

All resolved at 2026-05-24; 512/512 gateway tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-24 09:28:54 -04:00
parent 430187c28b
commit 6bae5ea3a3
7 changed files with 255 additions and 18 deletions
@@ -194,7 +194,17 @@ public sealed class MxAccessGatewayServiceTests
Assert.Equal("session-1", sessionManager.LastReadEventsSessionId);
}
/// <summary>Verifies that StreamEvents records send duration metrics when an event is written.</summary>
/// <summary>
/// Verifies that <c>StreamEvents</c> records the send-duration histogram per event.
///
/// <para>Tests-027 (concurrency flake): the listener must filter by the specific
/// <see cref="System.Diagnostics.Metrics.Meter"/> instance owned by this test, not by the process-shared
/// <see cref="GatewayMetrics.MeterName"/>. Otherwise a parallel test that constructs its own
/// <see cref="GatewayMetrics"/> and records <c>mxgateway.events.stream_send.duration</c> would
/// cross-contaminate <c>families</c> and break the equality assertion below. See the companion
/// <see cref="StreamEvents_RecordSendDurationListener_IgnoresMeasurementsFromOtherMetersWithSameName"/>
/// regression for the cross-talk reproduction.</para>
/// </summary>
[Fact]
public async Task StreamEvents_WhenEventIsWritten_RecordsSendDuration()
{
@@ -203,7 +213,7 @@ public sealed class MxAccessGatewayServiceTests
List<string> families = [];
listener.InstrumentPublished = (instrument, meterListener) =>
{
if (instrument.Meter.Name == GatewayMetrics.MeterName
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.events.stream_send.duration")
{
meterListener.EnableMeasurementEvents(instrument);
@@ -212,7 +222,8 @@ public sealed class MxAccessGatewayServiceTests
listener.SetMeasurementEventCallback<double>(
(instrument, measurement, tags, _) =>
{
if (instrument.Name != "mxgateway.events.stream_send.duration")
if (!ReferenceEquals(instrument.Meter, metrics.Meter)
|| instrument.Name != "mxgateway.events.stream_send.duration")
{
return;
}
@@ -239,6 +250,69 @@ public sealed class MxAccessGatewayServiceTests
Assert.Equal([MxEventFamily.OnDataChange.ToString()], families);
}
/// <summary>
/// Tests-027 regression: a <see cref="MeterListener"/> that filters by the specific
/// <see cref="System.Diagnostics.Metrics.Meter"/> instance (via <see cref="object.ReferenceEquals"/>)
/// must NOT observe measurements recorded on a different <see cref="GatewayMetrics"/> that shares
/// the same <see cref="GatewayMetrics.MeterName"/>. This is the cross-talk vector that previously
/// caused <c>StreamEvents_WhenEventIsWritten_RecordsSendDuration</c> to fail intermittently when
/// run in parallel with another test recording the same histogram.
/// </summary>
[Fact]
public async Task StreamEvents_RecordSendDurationListener_IgnoresMeasurementsFromOtherMetersWithSameName()
{
using GatewayMetrics metricsUnderTest = new();
using GatewayMetrics otherMetrics = new();
using MeterListener listener = new();
List<string> families = [];
listener.InstrumentPublished = (instrument, meterListener) =>
{
// Subscribe to the stream_send histogram on BOTH meters so the listener
// would observe a cross-talk measurement if the callback did not filter.
if (instrument.Name == "mxgateway.events.stream_send.duration")
{
meterListener.EnableMeasurementEvents(instrument);
}
};
listener.SetMeasurementEventCallback<double>(
(instrument, measurement, tags, _) =>
{
if (!ReferenceEquals(instrument.Meter, metricsUnderTest.Meter)
|| instrument.Name != "mxgateway.events.stream_send.duration")
{
return;
}
foreach (KeyValuePair<string, object?> tag in tags)
{
if (tag.Key == "family" && tag.Value is string family)
{
families.Add(family);
}
}
});
listener.Start();
// Simulate the cross-talk: another test's GatewayMetrics records a value
// before the test-under-test does its single event publish. The listener
// must filter this out by Meter reference.
otherMetrics.RecordEventStreamSend(MxEventFamily.OnWriteComplete.ToString(), TimeSpan.FromMilliseconds(123));
FakeSessionManager sessionManager = new();
sessionManager.Events.Add(CreateWorkerEvent("session-1", workerSequence: 2));
MxAccessGatewayService service = CreateService(sessionManager, metrics: metricsUnderTest);
RecordingServerStreamWriter<MxEvent> writer = new();
await service.StreamEvents(
new StreamEventsRequest { SessionId = "session-1" },
writer,
new TestServerCallContext());
// Only the test-under-test's OnDataChange recording should be observed —
// the OnAlarm recording on the sibling meter must NOT leak through.
Assert.Equal([MxEventFamily.OnDataChange.ToString()], families);
}
/// <summary>Verifies that CloseSession throws InvalidArgument when session ID is blank.</summary>
[Fact]
public async Task CloseSession_WithBlankSessionId_ThrowsInvalidArgument()