diff --git a/tests/ZB.MOM.WW.ScadaBridge.NotificationOutbox.Tests/NotificationOutboxActorDispatchTests.cs b/tests/ZB.MOM.WW.ScadaBridge.NotificationOutbox.Tests/NotificationOutboxActorDispatchTests.cs
index b16a2698..160dcf50 100644
--- a/tests/ZB.MOM.WW.ScadaBridge.NotificationOutbox.Tests/NotificationOutboxActorDispatchTests.cs
+++ b/tests/ZB.MOM.WW.ScadaBridge.NotificationOutbox.Tests/NotificationOutboxActorDispatchTests.cs
@@ -108,6 +108,29 @@ public class NotificationOutboxActorDispatchTests : TestKit
.Returns(new[] { config });
}
+ ///
+ /// Drives a sweep to completion by re-telling
+ /// on every poll iteration until holds. This is the durable
+ /// barrier for chained-sweep tests: the in-flight guard (_dispatching) is only
+ /// lowered when a sweep's round-trips back
+ /// to the actor — AFTER the side-effect the assertion observes — so a single one-shot tick
+ /// can race the still-raised guard and be silently dropped (the dispatcher's intended
+ /// overlap protection), permanently stalling the count. A dropped tick is never retried, so
+ /// merely widening the wait cannot recover it. Re-telling each poll is idempotent (a tick
+ /// landing while the guard is up is harmlessly dropped) and the assertion's exact-count
+ /// strength is preserved unchanged.
+ ///
+ private void DrivePollingTick(IActorRef actor, Action assertion)
+ {
+ AwaitAssert(
+ () =>
+ {
+ actor.Tell(InternalMessages.DispatchTick.Instance);
+ assertion();
+ },
+ duration: TimeSpan.FromSeconds(10));
+ }
+
[Fact]
public void DispatchTick_ClaimsDueNotifications_AndInvokesAdapter()
{
@@ -254,20 +277,48 @@ public class NotificationOutboxActorDispatchTests : TestKit
SetupSmtpRetryPolicy(maxRetries: 5, retryDelay: TimeSpan.FromMinutes(1));
// GetDueAsync throws on every call: the dispatch pass's task could fault if the
// failure were not handled, which would leave _dispatching stuck true forever.
+ // Count every claim via a thread-safe counter so the assertions can reason about
+ // the number of sweeps that actually ran without depending on NSubstitute's
+ // exact-count matchers (see the de-race note below).
+ var claimAttempts = 0;
_outboxRepository.GetDueAsync(Arg.Any(), Arg.Any(), Arg.Any())
- .Returns>(_ => throw new InvalidOperationException("db down"));
+ .Returns>(_ =>
+ {
+ Interlocked.Increment(ref claimAttempts);
+ throw new InvalidOperationException("db down");
+ });
var actor = CreateActor([]);
// First tick: the pass faults internally but must still clear the in-flight guard.
actor.Tell(InternalMessages.DispatchTick.Instance);
- AwaitAssert(() => _outboxRepository.Received(1).GetDueAsync(
- Arg.Any(), Arg.Any(), Arg.Any()));
+ AwaitAssert(
+ () => Assert.True(Volatile.Read(ref claimAttempts) >= 1),
+ duration: TimeSpan.FromSeconds(10));
- // Second tick after the first completes: if the guard had wedged, this would be
- // dropped and GetDueAsync would still show only one call.
- actor.Tell(InternalMessages.DispatchTick.Instance);
- AwaitAssert(() => _outboxRepository.Received(2).GetDueAsync(
- Arg.Any(), Arg.Any(), Arg.Any()));
+ // The guard must clear after the faulted sweep so a fresh tick runs another sweep.
+ //
+ // De-race (S11): the in-flight guard (_dispatching) is only lowered when the
+ // faulted sweep's DispatchComplete round-trips back to the actor — which happens
+ // AFTER the GetDueAsync side-effect the barrier above observes. So a single second
+ // tick sent right after that barrier can race the guard while it is still up and
+ // be silently dropped (the dispatcher's intended overlap protection), leaving the
+ // claim count stuck at one forever — a dropped tick is never retried, so widening
+ // the timeout cannot recover it. Re-tell the tick on every poll iteration instead:
+ // a tick that lands while the guard is still up is harmlessly dropped, and the
+ // first tick that lands after the guard lowers runs a fresh sweep. The assertion
+ // is "at least two sweeps ran" rather than "exactly two": that is strictly the
+ // wedge invariant under test (a wedged guard would pin the count at one no matter
+ // how many ticks arrive), and an at-least assertion is immune to the surplus ticks
+ // the re-tell driver may land under CPU starvation.
+ AwaitAssert(
+ () =>
+ {
+ actor.Tell(InternalMessages.DispatchTick.Instance);
+ Assert.True(
+ Volatile.Read(ref claimAttempts) >= 2,
+ "the in-flight guard wedged: a second dispatch tick never ran a fresh sweep");
+ },
+ duration: TimeSpan.FromSeconds(10));
}
[Fact]
@@ -410,8 +461,21 @@ public class NotificationOutboxActorDispatchTests : TestKit
// so we don't mutate the shared _outboxRepository field that other tests in
// this class configure differently.
var outboxRepository = Substitute.For();
+ // De-race (S11): hand out a fresh due notification for the FIRST THREE claims, then
+ // an empty batch forever. This caps the deliverable work — and therefore the
+ // UpdateAsync count — at exactly three, no matter how many dispatch ticks the
+ // driver below fires. Without this ceiling the exact-count barrier is doubly
+ // brittle: a single one-shot tick that races the in-flight guard is silently
+ // dropped (stalling the count below three), while a tick-per-poll driver lets
+ // queued ticks pile up under CPU starvation and overshoot three. Capping the
+ // claimable batch lets the driver re-tell ticks idempotently — surplus sweeps
+ // claim nothing and never call UpdateAsync — so Received(3) is a stable target
+ // that, once reached, cannot be exceeded.
+ var dueClaims = 0;
outboxRepository.GetDueAsync(Arg.Any(), Arg.Any(), Arg.Any())
- .Returns(_ => new[] { MakeNotification() });
+ .Returns(_ => Interlocked.Increment(ref dueClaims) <= 3
+ ? new[] { MakeNotification() }
+ : Array.Empty());
// Counting factory: increments each time the DI container resolves an
// INotificationDeliveryAdapter. Pre-fix this would have ticked once per
@@ -433,22 +497,20 @@ public class NotificationOutboxActorDispatchTests : TestKit
new NoOpCentralAuditWriter(),
NullLogger.Instance)));
- // Fire three sweeps end-to-end. Each waits on the previous via the
- // in-flight guard, so the UpdateAsync count climbs monotonically.
- actor.Tell(InternalMessages.DispatchTick.Instance);
- AwaitAssert(() => outboxRepository.Received(1).UpdateAsync(
+ // Drive dispatch sweeps until all three claimable notifications have been
+ // delivered (Received(3).UpdateAsync). DrivePollingTick re-tells the tick on
+ // every poll iteration; the GetDueAsync cap above makes that idempotent (a
+ // surplus sweep claims an empty batch and calls UpdateAsync zero times), so
+ // the count climbs to exactly three and then holds there — Received(3) is a
+ // stable ceiling rather than a fragile one-shot target. This proves multiple
+ // dispatch sweeps ran across the actor's lifetime, which is the precondition
+ // for the resolve-adapters-once assertion that follows.
+ DrivePollingTick(actor, () => outboxRepository.Received(3).UpdateAsync(
Arg.Any(), Arg.Any()));
- actor.Tell(InternalMessages.DispatchTick.Instance);
- AwaitAssert(() => outboxRepository.Received(2).UpdateAsync(
- Arg.Any(), Arg.Any()));
-
- actor.Tell(InternalMessages.DispatchTick.Instance);
- AwaitAssert(() => outboxRepository.Received(3).UpdateAsync(
- Arg.Any(), Arg.Any()));
-
- // The adapter resolution must have happened EXACTLY ONCE despite three
- // dispatch sweeps. Pre-fix this would have been 3 (or more).
+ // The adapter resolution must have happened EXACTLY ONCE despite the multiple
+ // dispatch sweeps just driven. Pre-fix this would have been once per sweep.
+ // This is the real invariant under test (NotificationOutbox-006).
Assert.Equal(1, resolutionCount);
}