test(auditlog): ParentExecutionId e2e waits on audit kinds, not a row count
The headline ParentExecutionIdCorrelationTests intermittently failed under full-suite parallel load, seeing 6 of 7 routed-run rows (NotifySend missing). Root cause: WaitForSiteRowsPersistedAsync checked only a row *count*, which a partial snapshot could satisfy before the last-emitted NotifySend row settled, letting the SiteAuditTelemetryActor drain a partial batch. Fix is test-only: wait on the specific audit Kinds (guaranteeing NotifySend is durably in SQLite before the assertion) and widen the assertion ceiling 30s -> 90s for drain headroom under load. Also drops leftover // DIAG sampler debug scaffolding.
This commit is contained in:
@@ -277,6 +277,20 @@ public class ParentExecutionIdCorrelationTests : TestKit, IClassFixture<MsSqlMig
|
||||
new StringContent("{}", Encoding.UTF8, "application/json"));
|
||||
Assert.Equal(System.Net.HttpStatusCode.OK, response.StatusCode);
|
||||
|
||||
// The routed run emits its sync-ApiCall and NotifySend audit rows on a
|
||||
// deliberately fire-and-forget path (alog.md §7 — an audit write must
|
||||
// never block the user-facing script call). `Notify.Send` therefore
|
||||
// returns — and the routed `RouteToCallAsync` completes — BEFORE the
|
||||
// SqliteAuditWriter background loop has flushed the NotifySend row into
|
||||
// the site hot-path. Wait for all five site rows to be durably present
|
||||
// in SQLite before the central assertion: this is the production
|
||||
// durability point (the row IS in SQLite before it is considered
|
||||
// audited), and pinning it removes the emit-vs-drain race that
|
||||
// otherwise let the SiteAuditTelemetryADrain forward only four rows on
|
||||
// its first tick and leave NotifySend stranded for a full drain
|
||||
// interval under heavy parallel load.
|
||||
await WaitForSiteRowsPersistedAsync(sqliteWriter);
|
||||
|
||||
// The routed run produced a NotifySend that buffered a NotificationSubmit
|
||||
// into S&F. Drain that genuine site-produced submission to the central
|
||||
// NotificationOutboxActor so the NotifyDeliver dispatch rows materialise.
|
||||
@@ -347,7 +361,7 @@ public class ParentExecutionIdCorrelationTests : TestKit, IClassFixture<MsSqlMig
|
||||
AssertChain(treeFromChild, inboundExecutionId, routedExecutionId);
|
||||
var treeFromRoot = await repo.GetExecutionTreeAsync(inboundExecutionId);
|
||||
AssertChain(treeFromRoot, inboundExecutionId, routedExecutionId);
|
||||
}, TimeSpan.FromSeconds(30));
|
||||
}, TimeSpan.FromSeconds(90));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -450,6 +464,48 @@ public class ParentExecutionIdCorrelationTests : TestKit, IClassFixture<MsSqlMig
|
||||
outboxActor.Tell(InternalMessages.DispatchTick.Instance);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Polls the site SQLite hot-path until every audit <see cref="AuditKind"/>
|
||||
/// the routed run is expected to emit — sync <c>ApiCall</c>, the cached
|
||||
/// <c>CachedSubmit</c>/<c>ApiCallCached</c>/<c>CachedResolve</c> lifecycle,
|
||||
/// and <c>NotifySend</c> — is durably present (Pending or Forwarded).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The routed run's sync-<c>ApiCall</c> and <c>NotifySend</c> audit rows are
|
||||
/// written fire-and-forget (the script call must not block on the audit
|
||||
/// writer — alog.md §7), so the routed <c>RouteToCallAsync</c> returns
|
||||
/// before the background writer loop has committed those rows.
|
||||
/// <c>NotifySend</c> is emitted last and therefore settles last. This wait
|
||||
/// asserts the specific <b>Kinds</b> are present, not merely a row count: a
|
||||
/// bare count could be satisfied while the last-emitted <c>NotifySend</c>
|
||||
/// row was still in flight, letting the <c>SiteAuditTelemetryActor</c> drain
|
||||
/// only a partial snapshot and leave <c>NotifySend</c> stranded for a later
|
||||
/// tick — the emit-vs-drain race that failed this test under full-suite load.
|
||||
/// </remarks>
|
||||
private async Task WaitForSiteRowsPersistedAsync(SqliteAuditWriter sqliteWriter)
|
||||
{
|
||||
var expectedKinds = new[]
|
||||
{
|
||||
AuditKind.ApiCall, AuditKind.CachedSubmit, AuditKind.ApiCallCached,
|
||||
AuditKind.CachedResolve, AuditKind.NotifySend,
|
||||
};
|
||||
await AwaitAssertAsync(
|
||||
async () =>
|
||||
{
|
||||
var pending = await sqliteWriter.ReadPendingAsync(256);
|
||||
var forwarded = await sqliteWriter.ReadForwardedAsync(256);
|
||||
var kinds = pending.Concat(forwarded).Select(r => r.Kind).ToHashSet();
|
||||
var missing = expectedKinds.Where(k => !kinds.Contains(k)).ToList();
|
||||
Assert.True(
|
||||
missing.Count == 0,
|
||||
"Expected every routed-run audit Kind durably in SQLite; missing: "
|
||||
+ string.Join(", ", missing)
|
||||
+ $" (saw {pending.Count} Pending + {forwarded.Count} Forwarded).");
|
||||
},
|
||||
TimeSpan.FromSeconds(30),
|
||||
TimeSpan.FromMilliseconds(50));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stub <see cref="INotificationDeliveryAdapter"/> that always reports a
|
||||
/// successful delivery — a single dispatch sweep then yields one
|
||||
|
||||
Reference in New Issue
Block a user