refactor(auditlog): GetExecutionTreeAsync recurses over a distinct edge set

This commit is contained in:
Joseph Doherty
2026-05-21 18:29:48 -04:00
parent 255dd95cd9
commit 252bf0a970
2 changed files with 110 additions and 40 deletions

View File

@@ -576,18 +576,22 @@ VALUES
/// climbs from the supplied node to the root — the last execution id with no
/// parent. The loop is capped at <see cref="ExecutionChainMaxDepth"/>
/// iterations; a purged/missing parent simply ends the climb early. <b>Walk
/// down:</b> a recursive CTE seeded at the root joins
/// <c>child.ParentExecutionId = parent.ExecutionId</c> to enumerate every
/// descendant, bounded by <c>OPTION (MAXRECURSION 32)</c> — corrupt cyclic
/// data raises a <see cref="SqlException"/> (msg 530) rather than spinning.
/// down:</b> a recursive CTE over a DISTINCT
/// <c>(ExecutionId, ParentExecutionId)</c> edge set, seeded at the root edge
/// and joining <c>edge.ParentExecutionId = chain.ExecutionId</c> to
/// enumerate every descendant. Recursing over edges rather than raw rows
/// keeps the recursion one path wide per execution. It is bounded by
/// <c>OPTION (MAXRECURSION 32)</c> — corrupt cyclic data raises a
/// <see cref="SqlException"/> (msg 530) rather than spinning.
/// </para>
/// <para>
/// The chain's full execution-id set is the union of the rows'
/// <c>ExecutionId</c> and their <c>ParentExecutionId</c>, so an execution
/// referenced only as a parent — a "stub" that emitted no rows of its own
/// is included. The final projection LEFT JOINs that id set back to
/// <c>AuditLog</c> and <c>GROUP BY</c>s, so a stub yields a node with
/// <c>RowCount = 0</c> and empty/null aggregates. The query is SELECT-only
/// The chain's full execution-id set is every edge's <c>ExecutionId</c>
/// unioned with its non-null <c>ParentExecutionId</c>, so an execution
/// referenced only as a parent — a "stub" that emitted no rows of its own,
/// and therefore owns no edge of its own — is still included. The final
/// projection LEFT JOINs that id set back to <c>AuditLog</c> and
/// <c>GROUP BY</c>s, so a stub yields a node with <c>RowCount = 0</c> and
/// empty/null aggregates. The query is SELECT-only
/// (the audit writer role grants no UPDATE/DELETE — reads are unrestricted).
/// </para>
/// </remarks>
@@ -639,36 +643,61 @@ VALUES
}
// --- Phase 2: walk down from the root via a recursive CTE ---------
// Chain : seeded at the root, recursively pulls every distinct
// ExecutionId whose rows carry a ParentExecutionId already
// in the chain. SELECT DISTINCT in the recursive member is
// rejected by SQL Server, so the recursion walks raw rows
// and the outer query de-duplicates.
// ChainIds: the chain's full execution-id set = every ExecutionId in
// Chain UNIONed with every non-null ParentExecutionId — the
// UNION pulls in stub parents that emitted no rows.
// Final : LEFT JOIN ChainIds back to AuditLog and GROUP BY so a
// stub surfaces with RowCount 0 and NULL aggregates.
// Edges : a non-recursive, DISTINCT (ExecutionId, ParentExecutionId)
// edge set distilled from AuditLog. Recursing over edges
// instead of raw rows means an execution with N audit rows
// contributes ONE recursion path, not N — MAXRECURSION
// bounds depth, not per-level width, so the raw-row form
// could fan out badly. One edge per execution because all
// rows of an execution share a single ParentExecutionId
// (see the MIN(...) note on the final projection).
// Chain : seeded at the root edge, recursively joins each edge whose
// ParentExecutionId is an ExecutionId already in the chain.
// Each edge carries its own ParentExecutionId, so the chain
// of edges already surfaces every execution id in the tree
// — including a row-less stub parent, which appears as the
// ParentExecutionId of its child's edge. No separate
// union-back CTE is needed.
// Final : collect every distinct execution id reachable from the
// chain (each edge's ExecutionId plus its non-null
// ParentExecutionId), LEFT JOIN back to AuditLog and
// GROUP BY so a stub parent — which owns no edge of its own
// because it emitted no rows — still surfaces as a node with
// RowCount 0 and NULL aggregates.
var nodes = new List<ExecutionTreeNode>();
await using (var downCmd = conn.CreateCommand())
{
downCmd.CommandText = @"
WITH Chain AS (
SELECT CAST(@root AS uniqueidentifier) AS ExecutionId
downCmd.CommandText = $@"
WITH Edges AS (
SELECT DISTINCT ExecutionId, ParentExecutionId
FROM dbo.AuditLog
WHERE ExecutionId IS NOT NULL
),
Chain AS (
-- Anchor: the root execution id, seeded as a literal so
-- it is present even when the root is a row-less stub
-- (a purged/no-action parent owns no edge of its own).
-- The root is parentless by construction — the upward
-- walk stopped there — so its ParentExecutionId is NULL.
SELECT CAST(@root AS uniqueidentifier) AS ExecutionId,
CAST(NULL AS uniqueidentifier) AS ParentExecutionId
UNION ALL
SELECT a.ExecutionId
FROM dbo.AuditLog a
INNER JOIN Chain c ON a.ParentExecutionId = c.ExecutionId
WHERE a.ExecutionId IS NOT NULL
SELECT e.ExecutionId, e.ParentExecutionId
FROM Edges e
INNER JOIN Chain c ON e.ParentExecutionId = c.ExecutionId
),
ChainIds AS (
SELECT DISTINCT ExecutionId FROM Chain
SELECT ExecutionId FROM Chain
UNION
SELECT DISTINCT a.ParentExecutionId
FROM dbo.AuditLog a
INNER JOIN Chain c ON a.ExecutionId = c.ExecutionId
WHERE a.ParentExecutionId IS NOT NULL
SELECT ParentExecutionId FROM Chain
WHERE ParentExecutionId IS NOT NULL
)
-- ParentExecutionId / SourceSiteId / SourceInstanceId are
-- derived via MIN: every audit row of one execution carries
-- the SAME ParentExecutionId (and source identity) — it is
-- stamped once per script run / inbound request — so MIN
-- simply picks that one shared value, it is not collapsing a
-- genuine disagreement across rows.
SELECT
ids.ExecutionId AS [ExecutionId],
MIN(a.ParentExecutionId) AS [ParentExecutionId],
@@ -686,7 +715,7 @@ VALUES
FROM ChainIds ids
LEFT JOIN dbo.AuditLog a ON a.ExecutionId = ids.ExecutionId
GROUP BY ids.ExecutionId
OPTION (MAXRECURSION 32);";
OPTION (MAXRECURSION {ExecutionChainMaxDepth});";
var pRoot = downCmd.CreateParameter();
pRoot.ParameterName = "@root";

View File

@@ -770,18 +770,28 @@ public class AuditLogRepositoryTests : IClassFixture<MsSqlMigrationFixture>
// A 3-level chain: root -> mid -> leaf. Each execution emits two rows so
// RowCount aggregation is exercised; the child rows carry the parent's
// ExecutionId as ParentExecutionId.
// ExecutionId as ParentExecutionId. Each execution is given a DISTINCT
// channel, and its two rows carry DISTINCT statuses and timestamps, so
// the per-node Channels/Statuses sets and the FirstOccurred/LastOccurred
// span are meaningfully asserted (not all-defaults).
var rootExec = Guid.NewGuid();
var midExec = Guid.NewGuid();
var leafExec = Guid.NewGuid();
var t0 = new DateTime(2026, 10, 5, 9, 0, 0, DateTimeKind.Utc);
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0, executionId: rootExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(1), executionId: rootExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(2), executionId: midExec, parentExecutionId: rootExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(3), executionId: midExec, parentExecutionId: rootExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(4), executionId: leafExec, parentExecutionId: midExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(5), executionId: leafExec, parentExecutionId: midExec));
var rootT0 = t0;
var rootT1 = t0.AddMinutes(1);
var midT0 = t0.AddMinutes(2);
var midT1 = t0.AddMinutes(3);
var leafT0 = t0.AddMinutes(4);
var leafT1 = t0.AddMinutes(5);
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: rootT0, channel: AuditChannel.ApiOutbound, status: AuditStatus.Submitted, executionId: rootExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: rootT1, channel: AuditChannel.ApiOutbound, status: AuditStatus.Delivered, executionId: rootExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: midT0, channel: AuditChannel.DbOutbound, status: AuditStatus.Submitted, executionId: midExec, parentExecutionId: rootExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: midT1, channel: AuditChannel.DbOutbound, status: AuditStatus.Failed, executionId: midExec, parentExecutionId: rootExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: leafT0, channel: AuditChannel.Notification, status: AuditStatus.Submitted, executionId: leafExec, parentExecutionId: midExec));
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: leafT1, channel: AuditChannel.Notification, status: AuditStatus.Parked, executionId: leafExec, parentExecutionId: midExec));
var expected = new[] { rootExec, midExec, leafExec };
@@ -807,6 +817,37 @@ public class AuditLogRepositoryTests : IClassFixture<MsSqlMigrationFixture>
Assert.Equal(2, root.RowCount);
Assert.Equal(2, mid.RowCount);
Assert.Equal(2, leaf.RowCount);
// Each populated node aggregates its own rows' channels and
// statuses — distinct per execution, so a regression that mixes
// executions or drops the per-id aggregate would be caught.
Assert.Equal(
new[] { nameof(AuditChannel.ApiOutbound) },
root.Channels);
Assert.Equal(
new[] { nameof(AuditChannel.DbOutbound) },
mid.Channels);
Assert.Equal(
new[] { nameof(AuditChannel.Notification) },
leaf.Channels);
Assert.True(
new[] { nameof(AuditStatus.Submitted), nameof(AuditStatus.Delivered) }
.ToHashSet().SetEquals(root.Statuses));
Assert.True(
new[] { nameof(AuditStatus.Submitted), nameof(AuditStatus.Failed) }
.ToHashSet().SetEquals(mid.Statuses));
Assert.True(
new[] { nameof(AuditStatus.Submitted), nameof(AuditStatus.Parked) }
.ToHashSet().SetEquals(leaf.Statuses));
// Each populated node's timestamp span covers exactly its two rows.
Assert.Equal(rootT0, root.FirstOccurredAtUtc);
Assert.Equal(rootT1, root.LastOccurredAtUtc);
Assert.Equal(midT0, mid.FirstOccurredAtUtc);
Assert.Equal(midT1, mid.LastOccurredAtUtc);
Assert.Equal(leafT0, leaf.FirstOccurredAtUtc);
Assert.Equal(leafT1, leaf.LastOccurredAtUtc);
}
}