test(auditlog): ParentExecutionId e2e waits on audit kinds, not a row count
The headline ParentExecutionIdCorrelationTests intermittently failed under full-suite parallel load, seeing 6 of 7 routed-run rows (NotifySend missing). Root cause: WaitForSiteRowsPersistedAsync checked only a row *count*, which a partial snapshot could satisfy before the last-emitted NotifySend row settled, letting the SiteAuditTelemetryActor drain a partial batch. Fix is test-only: wait on the specific audit Kinds (guaranteeing NotifySend is durably in SQLite before the assertion) and widen the assertion ceiling 30s -> 90s for drain headroom under load. Also drops leftover // DIAG sampler debug scaffolding.
This commit is contained in:
@@ -277,6 +277,20 @@ public class ParentExecutionIdCorrelationTests : TestKit, IClassFixture<MsSqlMig
|
|||||||
new StringContent("{}", Encoding.UTF8, "application/json"));
|
new StringContent("{}", Encoding.UTF8, "application/json"));
|
||||||
Assert.Equal(System.Net.HttpStatusCode.OK, response.StatusCode);
|
Assert.Equal(System.Net.HttpStatusCode.OK, response.StatusCode);
|
||||||
|
|
||||||
|
// The routed run emits its sync-ApiCall and NotifySend audit rows on a
|
||||||
|
// deliberately fire-and-forget path (alog.md §7 — an audit write must
|
||||||
|
// never block the user-facing script call). `Notify.Send` therefore
|
||||||
|
// returns — and the routed `RouteToCallAsync` completes — BEFORE the
|
||||||
|
// SqliteAuditWriter background loop has flushed the NotifySend row into
|
||||||
|
// the site hot-path. Wait for all five site rows to be durably present
|
||||||
|
// in SQLite before the central assertion: this is the production
|
||||||
|
// durability point (the row IS in SQLite before it is considered
|
||||||
|
// audited), and pinning it removes the emit-vs-drain race that
|
||||||
|
// otherwise let the SiteAuditTelemetryADrain forward only four rows on
|
||||||
|
// its first tick and leave NotifySend stranded for a full drain
|
||||||
|
// interval under heavy parallel load.
|
||||||
|
await WaitForSiteRowsPersistedAsync(sqliteWriter);
|
||||||
|
|
||||||
// The routed run produced a NotifySend that buffered a NotificationSubmit
|
// The routed run produced a NotifySend that buffered a NotificationSubmit
|
||||||
// into S&F. Drain that genuine site-produced submission to the central
|
// into S&F. Drain that genuine site-produced submission to the central
|
||||||
// NotificationOutboxActor so the NotifyDeliver dispatch rows materialise.
|
// NotificationOutboxActor so the NotifyDeliver dispatch rows materialise.
|
||||||
@@ -347,7 +361,7 @@ public class ParentExecutionIdCorrelationTests : TestKit, IClassFixture<MsSqlMig
|
|||||||
AssertChain(treeFromChild, inboundExecutionId, routedExecutionId);
|
AssertChain(treeFromChild, inboundExecutionId, routedExecutionId);
|
||||||
var treeFromRoot = await repo.GetExecutionTreeAsync(inboundExecutionId);
|
var treeFromRoot = await repo.GetExecutionTreeAsync(inboundExecutionId);
|
||||||
AssertChain(treeFromRoot, inboundExecutionId, routedExecutionId);
|
AssertChain(treeFromRoot, inboundExecutionId, routedExecutionId);
|
||||||
}, TimeSpan.FromSeconds(30));
|
}, TimeSpan.FromSeconds(90));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -450,6 +464,48 @@ public class ParentExecutionIdCorrelationTests : TestKit, IClassFixture<MsSqlMig
|
|||||||
outboxActor.Tell(InternalMessages.DispatchTick.Instance);
|
outboxActor.Tell(InternalMessages.DispatchTick.Instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Polls the site SQLite hot-path until every audit <see cref="AuditKind"/>
|
||||||
|
/// the routed run is expected to emit — sync <c>ApiCall</c>, the cached
|
||||||
|
/// <c>CachedSubmit</c>/<c>ApiCallCached</c>/<c>CachedResolve</c> lifecycle,
|
||||||
|
/// and <c>NotifySend</c> — is durably present (Pending or Forwarded).
|
||||||
|
/// </summary>
|
||||||
|
/// <remarks>
|
||||||
|
/// The routed run's sync-<c>ApiCall</c> and <c>NotifySend</c> audit rows are
|
||||||
|
/// written fire-and-forget (the script call must not block on the audit
|
||||||
|
/// writer — alog.md §7), so the routed <c>RouteToCallAsync</c> returns
|
||||||
|
/// before the background writer loop has committed those rows.
|
||||||
|
/// <c>NotifySend</c> is emitted last and therefore settles last. This wait
|
||||||
|
/// asserts the specific <b>Kinds</b> are present, not merely a row count: a
|
||||||
|
/// bare count could be satisfied while the last-emitted <c>NotifySend</c>
|
||||||
|
/// row was still in flight, letting the <c>SiteAuditTelemetryActor</c> drain
|
||||||
|
/// only a partial snapshot and leave <c>NotifySend</c> stranded for a later
|
||||||
|
/// tick — the emit-vs-drain race that failed this test under full-suite load.
|
||||||
|
/// </remarks>
|
||||||
|
private async Task WaitForSiteRowsPersistedAsync(SqliteAuditWriter sqliteWriter)
|
||||||
|
{
|
||||||
|
var expectedKinds = new[]
|
||||||
|
{
|
||||||
|
AuditKind.ApiCall, AuditKind.CachedSubmit, AuditKind.ApiCallCached,
|
||||||
|
AuditKind.CachedResolve, AuditKind.NotifySend,
|
||||||
|
};
|
||||||
|
await AwaitAssertAsync(
|
||||||
|
async () =>
|
||||||
|
{
|
||||||
|
var pending = await sqliteWriter.ReadPendingAsync(256);
|
||||||
|
var forwarded = await sqliteWriter.ReadForwardedAsync(256);
|
||||||
|
var kinds = pending.Concat(forwarded).Select(r => r.Kind).ToHashSet();
|
||||||
|
var missing = expectedKinds.Where(k => !kinds.Contains(k)).ToList();
|
||||||
|
Assert.True(
|
||||||
|
missing.Count == 0,
|
||||||
|
"Expected every routed-run audit Kind durably in SQLite; missing: "
|
||||||
|
+ string.Join(", ", missing)
|
||||||
|
+ $" (saw {pending.Count} Pending + {forwarded.Count} Forwarded).");
|
||||||
|
},
|
||||||
|
TimeSpan.FromSeconds(30),
|
||||||
|
TimeSpan.FromMilliseconds(50));
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Stub <see cref="INotificationDeliveryAdapter"/> that always reports a
|
/// Stub <see cref="INotificationDeliveryAdapter"/> that always reports a
|
||||||
/// successful delivery — a single dispatch sweep then yields one
|
/// successful delivery — a single dispatch sweep then yields one
|
||||||
|
|||||||
Reference in New Issue
Block a user