refactor(auditlog): GetExecutionTreeAsync recurses over a distinct edge set

This commit is contained in:
Joseph Doherty
2026-05-21 18:29:48 -04:00
parent 255dd95cd9
commit 252bf0a970
2 changed files with 110 additions and 40 deletions

View File

@@ -576,18 +576,22 @@ VALUES
/// climbs from the supplied node to the root — the last execution id with no
/// parent. The loop is capped at <see cref="ExecutionChainMaxDepth"/>
/// iterations; a purged/missing parent simply ends the climb early. <b>Walk
/// down:</b> a recursive CTE seeded at the root joins
/// <c>child.ParentExecutionId = parent.ExecutionId</c> to enumerate every
/// descendant, bounded by <c>OPTION (MAXRECURSION 32)</c> — corrupt cyclic
/// data raises a <see cref="SqlException"/> (msg 530) rather than spinning.
/// down:</b> a recursive CTE over a DISTINCT
/// <c>(ExecutionId, ParentExecutionId)</c> edge set, seeded at the root edge
/// and joining <c>edge.ParentExecutionId = chain.ExecutionId</c> to
/// enumerate every descendant. Recursing over edges rather than raw rows
/// keeps the recursion one path wide per execution. It is bounded by
/// <c>OPTION (MAXRECURSION 32)</c> — corrupt cyclic data raises a
/// <see cref="SqlException"/> (msg 530) rather than spinning.
/// </para>
/// <para>
/// The chain's full execution-id set is the union of the rows'
/// <c>ExecutionId</c> and their <c>ParentExecutionId</c>, so an execution
/// referenced only as a parent — a "stub" that emitted no rows of its own
/// is included. The final projection LEFT JOINs that id set back to
/// <c>AuditLog</c> and <c>GROUP BY</c>s, so a stub yields a node with
/// <c>RowCount = 0</c> and empty/null aggregates. The query is SELECT-only
/// The chain's full execution-id set is every edge's <c>ExecutionId</c>
/// unioned with its non-null <c>ParentExecutionId</c>, so an execution
/// referenced only as a parent — a "stub" that emitted no rows of its own,
/// and therefore owns no edge of its own — is still included. The final
/// projection LEFT JOINs that id set back to <c>AuditLog</c> and
/// <c>GROUP BY</c>s, so a stub yields a node with <c>RowCount = 0</c> and
/// empty/null aggregates. The query is SELECT-only
/// (the audit writer role grants no UPDATE/DELETE — reads are unrestricted).
/// </para>
/// </remarks>
@@ -639,36 +643,61 @@ VALUES
}
// --- Phase 2: walk down from the root via a recursive CTE ---------
// Chain : seeded at the root, recursively pulls every distinct
// ExecutionId whose rows carry a ParentExecutionId already
// in the chain. SELECT DISTINCT in the recursive member is
// rejected by SQL Server, so the recursion walks raw rows
// and the outer query de-duplicates.
// ChainIds: the chain's full execution-id set = every ExecutionId in
// Chain UNIONed with every non-null ParentExecutionId — the
// UNION pulls in stub parents that emitted no rows.
// Final : LEFT JOIN ChainIds back to AuditLog and GROUP BY so a
// stub surfaces with RowCount 0 and NULL aggregates.
// Edges : a non-recursive, DISTINCT (ExecutionId, ParentExecutionId)
// edge set distilled from AuditLog. Recursing over edges
// instead of raw rows means an execution with N audit rows
// contributes ONE recursion path, not N — MAXRECURSION
// bounds depth, not per-level width, so the raw-row form
// could fan out badly. One edge per execution because all
// rows of an execution share a single ParentExecutionId
// (see the MIN(...) note on the final projection).
// Chain : seeded at the root edge, recursively joins each edge whose
// ParentExecutionId is an ExecutionId already in the chain.
// Each edge carries its own ParentExecutionId, so the chain
// of edges already surfaces every execution id in the tree
// — including a row-less stub parent, which appears as the
// ParentExecutionId of its child's edge. No separate
// union-back CTE is needed.
// Final : collect every distinct execution id reachable from the
// chain (each edge's ExecutionId plus its non-null
// ParentExecutionId), LEFT JOIN back to AuditLog and
// GROUP BY so a stub parent — which owns no edge of its own
// because it emitted no rows — still surfaces as a node with
// RowCount 0 and NULL aggregates.
var nodes = new List<ExecutionTreeNode>();
await using (var downCmd = conn.CreateCommand())
{
downCmd.CommandText = @"
WITH Chain AS (
SELECT CAST(@root AS uniqueidentifier) AS ExecutionId
downCmd.CommandText = $@"
WITH Edges AS (
SELECT DISTINCT ExecutionId, ParentExecutionId
FROM dbo.AuditLog
WHERE ExecutionId IS NOT NULL
),
Chain AS (
-- Anchor: the root execution id, seeded as a literal so
-- it is present even when the root is a row-less stub
-- (a purged/no-action parent owns no edge of its own).
-- The root is parentless by construction — the upward
-- walk stopped there — so its ParentExecutionId is NULL.
SELECT CAST(@root AS uniqueidentifier) AS ExecutionId,
CAST(NULL AS uniqueidentifier) AS ParentExecutionId
UNION ALL
SELECT a.ExecutionId
FROM dbo.AuditLog a
INNER JOIN Chain c ON a.ParentExecutionId = c.ExecutionId
WHERE a.ExecutionId IS NOT NULL
SELECT e.ExecutionId, e.ParentExecutionId
FROM Edges e
INNER JOIN Chain c ON e.ParentExecutionId = c.ExecutionId
),
ChainIds AS (
SELECT DISTINCT ExecutionId FROM Chain
SELECT ExecutionId FROM Chain
UNION
SELECT DISTINCT a.ParentExecutionId
FROM dbo.AuditLog a
INNER JOIN Chain c ON a.ExecutionId = c.ExecutionId
WHERE a.ParentExecutionId IS NOT NULL
SELECT ParentExecutionId FROM Chain
WHERE ParentExecutionId IS NOT NULL
)
-- ParentExecutionId / SourceSiteId / SourceInstanceId are
-- derived via MIN: every audit row of one execution carries
-- the SAME ParentExecutionId (and source identity) — it is
-- stamped once per script run / inbound request — so MIN
-- simply picks that one shared value, it is not collapsing a
-- genuine disagreement across rows.
SELECT
ids.ExecutionId AS [ExecutionId],
MIN(a.ParentExecutionId) AS [ParentExecutionId],
@@ -686,7 +715,7 @@ VALUES
FROM ChainIds ids
LEFT JOIN dbo.AuditLog a ON a.ExecutionId = ids.ExecutionId
GROUP BY ids.ExecutionId
OPTION (MAXRECURSION 32);";
OPTION (MAXRECURSION {ExecutionChainMaxDepth});";
var pRoot = downCmd.CreateParameter();
pRoot.ParameterName = "@root";