diff --git a/tests/ZB.MOM.WW.ScadaBridge.NotificationOutbox.Tests/NotificationOutboxActorDispatchTests.cs b/tests/ZB.MOM.WW.ScadaBridge.NotificationOutbox.Tests/NotificationOutboxActorDispatchTests.cs index b16a2698..160dcf50 100644 --- a/tests/ZB.MOM.WW.ScadaBridge.NotificationOutbox.Tests/NotificationOutboxActorDispatchTests.cs +++ b/tests/ZB.MOM.WW.ScadaBridge.NotificationOutbox.Tests/NotificationOutboxActorDispatchTests.cs @@ -108,6 +108,29 @@ public class NotificationOutboxActorDispatchTests : TestKit .Returns(new[] { config }); } + /// + /// Drives a sweep to completion by re-telling + /// on every poll iteration until holds. This is the durable + /// barrier for chained-sweep tests: the in-flight guard (_dispatching) is only + /// lowered when a sweep's round-trips back + /// to the actor — AFTER the side-effect the assertion observes — so a single one-shot tick + /// can race the still-raised guard and be silently dropped (the dispatcher's intended + /// overlap protection), permanently stalling the count. A dropped tick is never retried, so + /// merely widening the wait cannot recover it. Re-telling each poll is idempotent (a tick + /// landing while the guard is up is harmlessly dropped) and the assertion's exact-count + /// strength is preserved unchanged. + /// + private void DrivePollingTick(IActorRef actor, Action assertion) + { + AwaitAssert( + () => + { + actor.Tell(InternalMessages.DispatchTick.Instance); + assertion(); + }, + duration: TimeSpan.FromSeconds(10)); + } + [Fact] public void DispatchTick_ClaimsDueNotifications_AndInvokesAdapter() { @@ -254,20 +277,48 @@ public class NotificationOutboxActorDispatchTests : TestKit SetupSmtpRetryPolicy(maxRetries: 5, retryDelay: TimeSpan.FromMinutes(1)); // GetDueAsync throws on every call: the dispatch pass's task could fault if the // failure were not handled, which would leave _dispatching stuck true forever. + // Count every claim via a thread-safe counter so the assertions can reason about + // the number of sweeps that actually ran without depending on NSubstitute's + // exact-count matchers (see the de-race note below). + var claimAttempts = 0; _outboxRepository.GetDueAsync(Arg.Any(), Arg.Any(), Arg.Any()) - .Returns>(_ => throw new InvalidOperationException("db down")); + .Returns>(_ => + { + Interlocked.Increment(ref claimAttempts); + throw new InvalidOperationException("db down"); + }); var actor = CreateActor([]); // First tick: the pass faults internally but must still clear the in-flight guard. actor.Tell(InternalMessages.DispatchTick.Instance); - AwaitAssert(() => _outboxRepository.Received(1).GetDueAsync( - Arg.Any(), Arg.Any(), Arg.Any())); + AwaitAssert( + () => Assert.True(Volatile.Read(ref claimAttempts) >= 1), + duration: TimeSpan.FromSeconds(10)); - // Second tick after the first completes: if the guard had wedged, this would be - // dropped and GetDueAsync would still show only one call. - actor.Tell(InternalMessages.DispatchTick.Instance); - AwaitAssert(() => _outboxRepository.Received(2).GetDueAsync( - Arg.Any(), Arg.Any(), Arg.Any())); + // The guard must clear after the faulted sweep so a fresh tick runs another sweep. + // + // De-race (S11): the in-flight guard (_dispatching) is only lowered when the + // faulted sweep's DispatchComplete round-trips back to the actor — which happens + // AFTER the GetDueAsync side-effect the barrier above observes. So a single second + // tick sent right after that barrier can race the guard while it is still up and + // be silently dropped (the dispatcher's intended overlap protection), leaving the + // claim count stuck at one forever — a dropped tick is never retried, so widening + // the timeout cannot recover it. Re-tell the tick on every poll iteration instead: + // a tick that lands while the guard is still up is harmlessly dropped, and the + // first tick that lands after the guard lowers runs a fresh sweep. The assertion + // is "at least two sweeps ran" rather than "exactly two": that is strictly the + // wedge invariant under test (a wedged guard would pin the count at one no matter + // how many ticks arrive), and an at-least assertion is immune to the surplus ticks + // the re-tell driver may land under CPU starvation. + AwaitAssert( + () => + { + actor.Tell(InternalMessages.DispatchTick.Instance); + Assert.True( + Volatile.Read(ref claimAttempts) >= 2, + "the in-flight guard wedged: a second dispatch tick never ran a fresh sweep"); + }, + duration: TimeSpan.FromSeconds(10)); } [Fact] @@ -410,8 +461,21 @@ public class NotificationOutboxActorDispatchTests : TestKit // so we don't mutate the shared _outboxRepository field that other tests in // this class configure differently. var outboxRepository = Substitute.For(); + // De-race (S11): hand out a fresh due notification for the FIRST THREE claims, then + // an empty batch forever. This caps the deliverable work — and therefore the + // UpdateAsync count — at exactly three, no matter how many dispatch ticks the + // driver below fires. Without this ceiling the exact-count barrier is doubly + // brittle: a single one-shot tick that races the in-flight guard is silently + // dropped (stalling the count below three), while a tick-per-poll driver lets + // queued ticks pile up under CPU starvation and overshoot three. Capping the + // claimable batch lets the driver re-tell ticks idempotently — surplus sweeps + // claim nothing and never call UpdateAsync — so Received(3) is a stable target + // that, once reached, cannot be exceeded. + var dueClaims = 0; outboxRepository.GetDueAsync(Arg.Any(), Arg.Any(), Arg.Any()) - .Returns(_ => new[] { MakeNotification() }); + .Returns(_ => Interlocked.Increment(ref dueClaims) <= 3 + ? new[] { MakeNotification() } + : Array.Empty()); // Counting factory: increments each time the DI container resolves an // INotificationDeliveryAdapter. Pre-fix this would have ticked once per @@ -433,22 +497,20 @@ public class NotificationOutboxActorDispatchTests : TestKit new NoOpCentralAuditWriter(), NullLogger.Instance))); - // Fire three sweeps end-to-end. Each waits on the previous via the - // in-flight guard, so the UpdateAsync count climbs monotonically. - actor.Tell(InternalMessages.DispatchTick.Instance); - AwaitAssert(() => outboxRepository.Received(1).UpdateAsync( + // Drive dispatch sweeps until all three claimable notifications have been + // delivered (Received(3).UpdateAsync). DrivePollingTick re-tells the tick on + // every poll iteration; the GetDueAsync cap above makes that idempotent (a + // surplus sweep claims an empty batch and calls UpdateAsync zero times), so + // the count climbs to exactly three and then holds there — Received(3) is a + // stable ceiling rather than a fragile one-shot target. This proves multiple + // dispatch sweeps ran across the actor's lifetime, which is the precondition + // for the resolve-adapters-once assertion that follows. + DrivePollingTick(actor, () => outboxRepository.Received(3).UpdateAsync( Arg.Any(), Arg.Any())); - actor.Tell(InternalMessages.DispatchTick.Instance); - AwaitAssert(() => outboxRepository.Received(2).UpdateAsync( - Arg.Any(), Arg.Any())); - - actor.Tell(InternalMessages.DispatchTick.Instance); - AwaitAssert(() => outboxRepository.Received(3).UpdateAsync( - Arg.Any(), Arg.Any())); - - // The adapter resolution must have happened EXACTLY ONCE despite three - // dispatch sweeps. Pre-fix this would have been 3 (or more). + // The adapter resolution must have happened EXACTLY ONCE despite the multiple + // dispatch sweeps just driven. Pre-fix this would have been once per sweep. + // This is the real invariant under test (NotificationOutbox-006). Assert.Equal(1, resolutionCount); }