refactor(auditlog): GetExecutionTreeAsync recurses over a distinct edge set
This commit is contained in:
@@ -576,18 +576,22 @@ VALUES
|
||||
/// climbs from the supplied node to the root — the last execution id with no
|
||||
/// parent. The loop is capped at <see cref="ExecutionChainMaxDepth"/>
|
||||
/// iterations; a purged/missing parent simply ends the climb early. <b>Walk
|
||||
/// down:</b> a recursive CTE seeded at the root joins
|
||||
/// <c>child.ParentExecutionId = parent.ExecutionId</c> to enumerate every
|
||||
/// descendant, bounded by <c>OPTION (MAXRECURSION 32)</c> — corrupt cyclic
|
||||
/// data raises a <see cref="SqlException"/> (msg 530) rather than spinning.
|
||||
/// down:</b> a recursive CTE over a DISTINCT
|
||||
/// <c>(ExecutionId, ParentExecutionId)</c> edge set, seeded at the root edge
|
||||
/// and joining <c>edge.ParentExecutionId = chain.ExecutionId</c> to
|
||||
/// enumerate every descendant. Recursing over edges rather than raw rows
|
||||
/// keeps the recursion one path wide per execution. It is bounded by
|
||||
/// <c>OPTION (MAXRECURSION 32)</c> — corrupt cyclic data raises a
|
||||
/// <see cref="SqlException"/> (msg 530) rather than spinning.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The chain's full execution-id set is the union of the rows'
|
||||
/// <c>ExecutionId</c> and their <c>ParentExecutionId</c>, so an execution
|
||||
/// referenced only as a parent — a "stub" that emitted no rows of its own —
|
||||
/// is included. The final projection LEFT JOINs that id set back to
|
||||
/// <c>AuditLog</c> and <c>GROUP BY</c>s, so a stub yields a node with
|
||||
/// <c>RowCount = 0</c> and empty/null aggregates. The query is SELECT-only
|
||||
/// The chain's full execution-id set is every edge's <c>ExecutionId</c>
|
||||
/// unioned with its non-null <c>ParentExecutionId</c>, so an execution
|
||||
/// referenced only as a parent — a "stub" that emitted no rows of its own,
|
||||
/// and therefore owns no edge of its own — is still included. The final
|
||||
/// projection LEFT JOINs that id set back to <c>AuditLog</c> and
|
||||
/// <c>GROUP BY</c>s, so a stub yields a node with <c>RowCount = 0</c> and
|
||||
/// empty/null aggregates. The query is SELECT-only
|
||||
/// (the audit writer role grants no UPDATE/DELETE — reads are unrestricted).
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
@@ -639,36 +643,61 @@ VALUES
|
||||
}
|
||||
|
||||
// --- Phase 2: walk down from the root via a recursive CTE ---------
|
||||
// Chain : seeded at the root, recursively pulls every distinct
|
||||
// ExecutionId whose rows carry a ParentExecutionId already
|
||||
// in the chain. SELECT DISTINCT in the recursive member is
|
||||
// rejected by SQL Server, so the recursion walks raw rows
|
||||
// and the outer query de-duplicates.
|
||||
// ChainIds: the chain's full execution-id set = every ExecutionId in
|
||||
// Chain UNIONed with every non-null ParentExecutionId — the
|
||||
// UNION pulls in stub parents that emitted no rows.
|
||||
// Final : LEFT JOIN ChainIds back to AuditLog and GROUP BY so a
|
||||
// stub surfaces with RowCount 0 and NULL aggregates.
|
||||
// Edges : a non-recursive, DISTINCT (ExecutionId, ParentExecutionId)
|
||||
// edge set distilled from AuditLog. Recursing over edges
|
||||
// instead of raw rows means an execution with N audit rows
|
||||
// contributes ONE recursion path, not N — MAXRECURSION
|
||||
// bounds depth, not per-level width, so the raw-row form
|
||||
// could fan out badly. One edge per execution because all
|
||||
// rows of an execution share a single ParentExecutionId
|
||||
// (see the MIN(...) note on the final projection).
|
||||
// Chain : seeded at the root edge, recursively joins each edge whose
|
||||
// ParentExecutionId is an ExecutionId already in the chain.
|
||||
// Each edge carries its own ParentExecutionId, so the chain
|
||||
// of edges already surfaces every execution id in the tree
|
||||
// — including a row-less stub parent, which appears as the
|
||||
// ParentExecutionId of its child's edge. No separate
|
||||
// union-back CTE is needed.
|
||||
// Final : collect every distinct execution id reachable from the
|
||||
// chain (each edge's ExecutionId plus its non-null
|
||||
// ParentExecutionId), LEFT JOIN back to AuditLog and
|
||||
// GROUP BY so a stub parent — which owns no edge of its own
|
||||
// because it emitted no rows — still surfaces as a node with
|
||||
// RowCount 0 and NULL aggregates.
|
||||
var nodes = new List<ExecutionTreeNode>();
|
||||
await using (var downCmd = conn.CreateCommand())
|
||||
{
|
||||
downCmd.CommandText = @"
|
||||
WITH Chain AS (
|
||||
SELECT CAST(@root AS uniqueidentifier) AS ExecutionId
|
||||
downCmd.CommandText = $@"
|
||||
WITH Edges AS (
|
||||
SELECT DISTINCT ExecutionId, ParentExecutionId
|
||||
FROM dbo.AuditLog
|
||||
WHERE ExecutionId IS NOT NULL
|
||||
),
|
||||
Chain AS (
|
||||
-- Anchor: the root execution id, seeded as a literal so
|
||||
-- it is present even when the root is a row-less stub
|
||||
-- (a purged/no-action parent owns no edge of its own).
|
||||
-- The root is parentless by construction — the upward
|
||||
-- walk stopped there — so its ParentExecutionId is NULL.
|
||||
SELECT CAST(@root AS uniqueidentifier) AS ExecutionId,
|
||||
CAST(NULL AS uniqueidentifier) AS ParentExecutionId
|
||||
UNION ALL
|
||||
SELECT a.ExecutionId
|
||||
FROM dbo.AuditLog a
|
||||
INNER JOIN Chain c ON a.ParentExecutionId = c.ExecutionId
|
||||
WHERE a.ExecutionId IS NOT NULL
|
||||
SELECT e.ExecutionId, e.ParentExecutionId
|
||||
FROM Edges e
|
||||
INNER JOIN Chain c ON e.ParentExecutionId = c.ExecutionId
|
||||
),
|
||||
ChainIds AS (
|
||||
SELECT DISTINCT ExecutionId FROM Chain
|
||||
SELECT ExecutionId FROM Chain
|
||||
UNION
|
||||
SELECT DISTINCT a.ParentExecutionId
|
||||
FROM dbo.AuditLog a
|
||||
INNER JOIN Chain c ON a.ExecutionId = c.ExecutionId
|
||||
WHERE a.ParentExecutionId IS NOT NULL
|
||||
SELECT ParentExecutionId FROM Chain
|
||||
WHERE ParentExecutionId IS NOT NULL
|
||||
)
|
||||
-- ParentExecutionId / SourceSiteId / SourceInstanceId are
|
||||
-- derived via MIN: every audit row of one execution carries
|
||||
-- the SAME ParentExecutionId (and source identity) — it is
|
||||
-- stamped once per script run / inbound request — so MIN
|
||||
-- simply picks that one shared value, it is not collapsing a
|
||||
-- genuine disagreement across rows.
|
||||
SELECT
|
||||
ids.ExecutionId AS [ExecutionId],
|
||||
MIN(a.ParentExecutionId) AS [ParentExecutionId],
|
||||
@@ -686,7 +715,7 @@ VALUES
|
||||
FROM ChainIds ids
|
||||
LEFT JOIN dbo.AuditLog a ON a.ExecutionId = ids.ExecutionId
|
||||
GROUP BY ids.ExecutionId
|
||||
OPTION (MAXRECURSION 32);";
|
||||
OPTION (MAXRECURSION {ExecutionChainMaxDepth});";
|
||||
|
||||
var pRoot = downCmd.CreateParameter();
|
||||
pRoot.ParameterName = "@root";
|
||||
|
||||
@@ -770,18 +770,28 @@ public class AuditLogRepositoryTests : IClassFixture<MsSqlMigrationFixture>
|
||||
|
||||
// A 3-level chain: root -> mid -> leaf. Each execution emits two rows so
|
||||
// RowCount aggregation is exercised; the child rows carry the parent's
|
||||
// ExecutionId as ParentExecutionId.
|
||||
// ExecutionId as ParentExecutionId. Each execution is given a DISTINCT
|
||||
// channel, and its two rows carry DISTINCT statuses and timestamps, so
|
||||
// the per-node Channels/Statuses sets and the FirstOccurred/LastOccurred
|
||||
// span are meaningfully asserted (not all-defaults).
|
||||
var rootExec = Guid.NewGuid();
|
||||
var midExec = Guid.NewGuid();
|
||||
var leafExec = Guid.NewGuid();
|
||||
|
||||
var t0 = new DateTime(2026, 10, 5, 9, 0, 0, DateTimeKind.Utc);
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0, executionId: rootExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(1), executionId: rootExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(2), executionId: midExec, parentExecutionId: rootExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(3), executionId: midExec, parentExecutionId: rootExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(4), executionId: leafExec, parentExecutionId: midExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(5), executionId: leafExec, parentExecutionId: midExec));
|
||||
var rootT0 = t0;
|
||||
var rootT1 = t0.AddMinutes(1);
|
||||
var midT0 = t0.AddMinutes(2);
|
||||
var midT1 = t0.AddMinutes(3);
|
||||
var leafT0 = t0.AddMinutes(4);
|
||||
var leafT1 = t0.AddMinutes(5);
|
||||
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: rootT0, channel: AuditChannel.ApiOutbound, status: AuditStatus.Submitted, executionId: rootExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: rootT1, channel: AuditChannel.ApiOutbound, status: AuditStatus.Delivered, executionId: rootExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: midT0, channel: AuditChannel.DbOutbound, status: AuditStatus.Submitted, executionId: midExec, parentExecutionId: rootExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: midT1, channel: AuditChannel.DbOutbound, status: AuditStatus.Failed, executionId: midExec, parentExecutionId: rootExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: leafT0, channel: AuditChannel.Notification, status: AuditStatus.Submitted, executionId: leafExec, parentExecutionId: midExec));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: leafT1, channel: AuditChannel.Notification, status: AuditStatus.Parked, executionId: leafExec, parentExecutionId: midExec));
|
||||
|
||||
var expected = new[] { rootExec, midExec, leafExec };
|
||||
|
||||
@@ -807,6 +817,37 @@ public class AuditLogRepositoryTests : IClassFixture<MsSqlMigrationFixture>
|
||||
Assert.Equal(2, root.RowCount);
|
||||
Assert.Equal(2, mid.RowCount);
|
||||
Assert.Equal(2, leaf.RowCount);
|
||||
|
||||
// Each populated node aggregates its own rows' channels and
|
||||
// statuses — distinct per execution, so a regression that mixes
|
||||
// executions or drops the per-id aggregate would be caught.
|
||||
Assert.Equal(
|
||||
new[] { nameof(AuditChannel.ApiOutbound) },
|
||||
root.Channels);
|
||||
Assert.Equal(
|
||||
new[] { nameof(AuditChannel.DbOutbound) },
|
||||
mid.Channels);
|
||||
Assert.Equal(
|
||||
new[] { nameof(AuditChannel.Notification) },
|
||||
leaf.Channels);
|
||||
|
||||
Assert.True(
|
||||
new[] { nameof(AuditStatus.Submitted), nameof(AuditStatus.Delivered) }
|
||||
.ToHashSet().SetEquals(root.Statuses));
|
||||
Assert.True(
|
||||
new[] { nameof(AuditStatus.Submitted), nameof(AuditStatus.Failed) }
|
||||
.ToHashSet().SetEquals(mid.Statuses));
|
||||
Assert.True(
|
||||
new[] { nameof(AuditStatus.Submitted), nameof(AuditStatus.Parked) }
|
||||
.ToHashSet().SetEquals(leaf.Statuses));
|
||||
|
||||
// Each populated node's timestamp span covers exactly its two rows.
|
||||
Assert.Equal(rootT0, root.FirstOccurredAtUtc);
|
||||
Assert.Equal(rootT1, root.LastOccurredAtUtc);
|
||||
Assert.Equal(midT0, mid.FirstOccurredAtUtc);
|
||||
Assert.Equal(midT1, mid.LastOccurredAtUtc);
|
||||
Assert.Equal(leafT0, leaf.FirstOccurredAtUtc);
|
||||
Assert.Equal(leafT1, leaf.LastOccurredAtUtc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user