fix(tests): stabilize three flaky tests under parallel full-solution load

#1 EventPumpBoundedChannelTests.Tags_metrics_with_client_name_for_multi_driver_hosts:
Replace fixed Task.Delay(100) with a poll-until-condition loop (5 s
timeout, 25 ms poll) so the test waits until the galaxy.events.received
measurement for galaxy.client=Driver-X actually lands in the listener.
Also adds lock(captured) in the MeterListener callback and at all reads,
since Counter.Add() fires the callback on the RunAsync background thread.

#2 VirtualTagEngineTests.Upstream_change_triggers_cascade_through_two_levels:
After waiting for B=15.0, also await WaitForConditionAsync for C=30.0
before asserting C. The cascade runs B then C sequentially under the
_evalGate semaphore; the prior code could read C while its evaluation
had not yet acquired the gate.

#3 ThreeUserInteropMatrixTests.Admin_Resolves_All_Five_Groups_From_LDAP:
Wrap the AuthenticateAsync call in a 15 s linked CancellationTokenSource
with one retry so transient GLAuth latency spikes under parallel test
load do not cause a CancellationToken expiry before the LDAP bind/search
complete.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-18 05:50:07 -04:00
parent 41f133a337
commit 392b219233
3 changed files with 63 additions and 10 deletions

View File

@@ -68,9 +68,15 @@ public sealed class VirtualTagEngineTests
engine.Read("B").Value.ShouldBe(11.0);
engine.Read("C").Value.ShouldBe(22.0);
// Change upstream — cascade should recompute B (11→15.0) then C (30.0)
// Change upstream — cascade should recompute B (11→15.0) then C (30.0).
// Both B and C are updated in the same CascadeAsync call (topological order:
// B then C), but we must wait for each independently: the WaitForConditionAsync
// on B returns as soon as _valueCache["B"] is set (before the semaphore is
// released for C's evaluation), so asserting C immediately after the B-wait
// races against C's still-in-progress evaluation. Wait for C explicitly.
up.Push("A", 5.0);
await WaitForConditionAsync(() => Equals(engine.Read("B").Value, 15.0));
await WaitForConditionAsync(() => Equals(engine.Read("C").Value, 30.0));
engine.Read("B").Value.ShouldBe(15.0);
engine.Read("C").Value.ShouldBe(30.0);
}

View File

@@ -87,9 +87,11 @@ public sealed class EventPumpBoundedChannelTests
{
if (instr.Meter.Name == EventPump.MeterName) l.EnableMeasurementEvents(instr);
};
// The callback fires on the thread that calls Counter.Add() — that is the
// RunAsync background Task. Use lock(captured) everywhere to avoid torn reads.
listener.SetMeasurementEventCallback<long>((instr, _, tags, _) =>
{
captured.Add((instr.Name, tags.ToArray()));
lock (captured) { captured.Add((instr.Name, tags.ToArray())); }
});
listener.Start();
@@ -101,17 +103,40 @@ public sealed class EventPumpBoundedChannelTests
{
pump.Start();
await subscriber.EmitAsync(7, 42.0);
await Task.Delay(100);
// Poll until at least one galaxy.events.received measurement tagged
// galaxy.client=Driver-X lands in the listener, rather than using a
// fixed delay that races under parallel test load on a busy box.
var deadline = DateTime.UtcNow.AddSeconds(5);
bool found = false;
while (DateTime.UtcNow < deadline)
{
listener.RecordObservableInstruments();
bool hasMatch;
lock (captured)
{
hasMatch = captured.Any(c =>
c.Instrument == "galaxy.events.received" &&
c.Tags.Any(t => t.Key == "galaxy.client" &&
string.Equals((string?)t.Value, "Driver-X", StringComparison.Ordinal)));
}
if (hasMatch) { found = true; break; }
await Task.Delay(25);
}
_ = found; // assertion happens below after dispose
}
// The static Meter is shared across all EventPump instances in the test
// assembly; xUnit may run other pump tests in parallel and their
// measurements land on the same listener. Filter to our pump's tag value.
var ours = captured
List<(string Instrument, KeyValuePair<string, object?>[] Tags)> ours;
lock (captured)
{
ours = captured
.Where(c => c.Tags.Any(t => t.Key == "galaxy.client"
&& string.Equals((string?)t.Value, "Driver-X", StringComparison.Ordinal)))
.ToList();
}
ours.ShouldNotBeEmpty(
"at least one measurement from this test's pump must carry galaxy.client=Driver-X");

View File

@@ -207,9 +207,31 @@ public sealed class ThreeUserInteropMatrixTests
// pins the resolution explicitly in strict mode.
if (!GlauthReachable()) Assert.Skip("GLAuth unreachable at localhost:3893.");
var auth = await NewAuthenticator().AuthenticateAsync("admin", "admin123", TestContext.Current.CancellationToken);
// Under parallel full-solution test load, GLAuth on localhost can be slow to
// respond; use a generous per-call timeout independent of xUnit's test runner
// deadline so we don't race against the runner's own CancellationToken, and
// retry once on timeout to absorb transient latency spikes.
const int LdapTimeoutSeconds = 15;
UserAuthResult? auth = null;
for (var attempt = 0; attempt < 2; attempt++)
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(
TestContext.Current.CancellationToken);
cts.CancelAfter(TimeSpan.FromSeconds(LdapTimeoutSeconds));
try
{
auth = await NewAuthenticator().AuthenticateAsync("admin", "admin123", cts.Token);
break; // success — no retry needed
}
catch (OperationCanceledException) when (!TestContext.Current.CancellationToken.IsCancellationRequested)
{
if (attempt == 1) throw; // second attempt also timed out — let it fail
// First attempt timed out under load; retry once with a fresh token.
}
}
auth.Success.ShouldBeTrue();
auth.ShouldNotBeNull();
auth!.Success.ShouldBeTrue();
auth.Groups.ShouldContain("ReadOnly");
auth.Groups.ShouldContain("WriteOperate");
auth.Groups.ShouldContain("WriteTune");