From 252bf0a9704c4ea9dd627d2a4e74ddb2fad0ddbb Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 21 May 2026 18:29:48 -0400 Subject: [PATCH] refactor(auditlog): GetExecutionTreeAsync recurses over a distinct edge set --- .../Repositories/AuditLogRepository.cs | 95 ++++++++++++------- .../Repositories/AuditLogRepositoryTests.cs | 55 +++++++++-- 2 files changed, 110 insertions(+), 40 deletions(-) diff --git a/src/ScadaLink.ConfigurationDatabase/Repositories/AuditLogRepository.cs b/src/ScadaLink.ConfigurationDatabase/Repositories/AuditLogRepository.cs index 85afb46..76d4b47 100644 --- a/src/ScadaLink.ConfigurationDatabase/Repositories/AuditLogRepository.cs +++ b/src/ScadaLink.ConfigurationDatabase/Repositories/AuditLogRepository.cs @@ -576,18 +576,22 @@ VALUES /// climbs from the supplied node to the root — the last execution id with no /// parent. The loop is capped at /// iterations; a purged/missing parent simply ends the climb early. Walk - /// down: a recursive CTE seeded at the root joins - /// child.ParentExecutionId = parent.ExecutionId to enumerate every - /// descendant, bounded by OPTION (MAXRECURSION 32) — corrupt cyclic - /// data raises a (msg 530) rather than spinning. + /// down: a recursive CTE over a DISTINCT + /// (ExecutionId, ParentExecutionId) edge set, seeded at the root edge + /// and joining edge.ParentExecutionId = chain.ExecutionId to + /// enumerate every descendant. Recursing over edges rather than raw rows + /// keeps the recursion one path wide per execution. It is bounded by + /// OPTION (MAXRECURSION 32) — corrupt cyclic data raises a + /// (msg 530) rather than spinning. /// /// - /// The chain's full execution-id set is the union of the rows' - /// ExecutionId and their ParentExecutionId, so an execution - /// referenced only as a parent — a "stub" that emitted no rows of its own — - /// is included. The final projection LEFT JOINs that id set back to - /// AuditLog and GROUP BYs, so a stub yields a node with - /// RowCount = 0 and empty/null aggregates. The query is SELECT-only + /// The chain's full execution-id set is every edge's ExecutionId + /// unioned with its non-null ParentExecutionId, so an execution + /// referenced only as a parent — a "stub" that emitted no rows of its own, + /// and therefore owns no edge of its own — is still included. The final + /// projection LEFT JOINs that id set back to AuditLog and + /// GROUP BYs, so a stub yields a node with RowCount = 0 and + /// empty/null aggregates. The query is SELECT-only /// (the audit writer role grants no UPDATE/DELETE — reads are unrestricted). /// /// @@ -639,36 +643,61 @@ VALUES } // --- Phase 2: walk down from the root via a recursive CTE --------- - // Chain : seeded at the root, recursively pulls every distinct - // ExecutionId whose rows carry a ParentExecutionId already - // in the chain. SELECT DISTINCT in the recursive member is - // rejected by SQL Server, so the recursion walks raw rows - // and the outer query de-duplicates. - // ChainIds: the chain's full execution-id set = every ExecutionId in - // Chain UNIONed with every non-null ParentExecutionId — the - // UNION pulls in stub parents that emitted no rows. - // Final : LEFT JOIN ChainIds back to AuditLog and GROUP BY so a - // stub surfaces with RowCount 0 and NULL aggregates. + // Edges : a non-recursive, DISTINCT (ExecutionId, ParentExecutionId) + // edge set distilled from AuditLog. Recursing over edges + // instead of raw rows means an execution with N audit rows + // contributes ONE recursion path, not N — MAXRECURSION + // bounds depth, not per-level width, so the raw-row form + // could fan out badly. One edge per execution because all + // rows of an execution share a single ParentExecutionId + // (see the MIN(...) note on the final projection). + // Chain : seeded at the root edge, recursively joins each edge whose + // ParentExecutionId is an ExecutionId already in the chain. + // Each edge carries its own ParentExecutionId, so the chain + // of edges already surfaces every execution id in the tree + // — including a row-less stub parent, which appears as the + // ParentExecutionId of its child's edge. No separate + // union-back CTE is needed. + // Final : collect every distinct execution id reachable from the + // chain (each edge's ExecutionId plus its non-null + // ParentExecutionId), LEFT JOIN back to AuditLog and + // GROUP BY so a stub parent — which owns no edge of its own + // because it emitted no rows — still surfaces as a node with + // RowCount 0 and NULL aggregates. var nodes = new List(); await using (var downCmd = conn.CreateCommand()) { - downCmd.CommandText = @" - WITH Chain AS ( - SELECT CAST(@root AS uniqueidentifier) AS ExecutionId + downCmd.CommandText = $@" + WITH Edges AS ( + SELECT DISTINCT ExecutionId, ParentExecutionId + FROM dbo.AuditLog + WHERE ExecutionId IS NOT NULL + ), + Chain AS ( + -- Anchor: the root execution id, seeded as a literal so + -- it is present even when the root is a row-less stub + -- (a purged/no-action parent owns no edge of its own). + -- The root is parentless by construction — the upward + -- walk stopped there — so its ParentExecutionId is NULL. + SELECT CAST(@root AS uniqueidentifier) AS ExecutionId, + CAST(NULL AS uniqueidentifier) AS ParentExecutionId UNION ALL - SELECT a.ExecutionId - FROM dbo.AuditLog a - INNER JOIN Chain c ON a.ParentExecutionId = c.ExecutionId - WHERE a.ExecutionId IS NOT NULL + SELECT e.ExecutionId, e.ParentExecutionId + FROM Edges e + INNER JOIN Chain c ON e.ParentExecutionId = c.ExecutionId ), ChainIds AS ( - SELECT DISTINCT ExecutionId FROM Chain + SELECT ExecutionId FROM Chain UNION - SELECT DISTINCT a.ParentExecutionId - FROM dbo.AuditLog a - INNER JOIN Chain c ON a.ExecutionId = c.ExecutionId - WHERE a.ParentExecutionId IS NOT NULL + SELECT ParentExecutionId FROM Chain + WHERE ParentExecutionId IS NOT NULL ) + -- ParentExecutionId / SourceSiteId / SourceInstanceId are + -- derived via MIN: every audit row of one execution carries + -- the SAME ParentExecutionId (and source identity) — it is + -- stamped once per script run / inbound request — so MIN + -- simply picks that one shared value, it is not collapsing a + -- genuine disagreement across rows. SELECT ids.ExecutionId AS [ExecutionId], MIN(a.ParentExecutionId) AS [ParentExecutionId], @@ -686,7 +715,7 @@ VALUES FROM ChainIds ids LEFT JOIN dbo.AuditLog a ON a.ExecutionId = ids.ExecutionId GROUP BY ids.ExecutionId - OPTION (MAXRECURSION 32);"; + OPTION (MAXRECURSION {ExecutionChainMaxDepth});"; var pRoot = downCmd.CreateParameter(); pRoot.ParameterName = "@root"; diff --git a/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/AuditLogRepositoryTests.cs b/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/AuditLogRepositoryTests.cs index f4ddc4a..1d7e337 100644 --- a/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/AuditLogRepositoryTests.cs +++ b/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/AuditLogRepositoryTests.cs @@ -770,18 +770,28 @@ public class AuditLogRepositoryTests : IClassFixture // A 3-level chain: root -> mid -> leaf. Each execution emits two rows so // RowCount aggregation is exercised; the child rows carry the parent's - // ExecutionId as ParentExecutionId. + // ExecutionId as ParentExecutionId. Each execution is given a DISTINCT + // channel, and its two rows carry DISTINCT statuses and timestamps, so + // the per-node Channels/Statuses sets and the FirstOccurred/LastOccurred + // span are meaningfully asserted (not all-defaults). var rootExec = Guid.NewGuid(); var midExec = Guid.NewGuid(); var leafExec = Guid.NewGuid(); var t0 = new DateTime(2026, 10, 5, 9, 0, 0, DateTimeKind.Utc); - await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0, executionId: rootExec)); - await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(1), executionId: rootExec)); - await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(2), executionId: midExec, parentExecutionId: rootExec)); - await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(3), executionId: midExec, parentExecutionId: rootExec)); - await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(4), executionId: leafExec, parentExecutionId: midExec)); - await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: t0.AddMinutes(5), executionId: leafExec, parentExecutionId: midExec)); + var rootT0 = t0; + var rootT1 = t0.AddMinutes(1); + var midT0 = t0.AddMinutes(2); + var midT1 = t0.AddMinutes(3); + var leafT0 = t0.AddMinutes(4); + var leafT1 = t0.AddMinutes(5); + + await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: rootT0, channel: AuditChannel.ApiOutbound, status: AuditStatus.Submitted, executionId: rootExec)); + await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: rootT1, channel: AuditChannel.ApiOutbound, status: AuditStatus.Delivered, executionId: rootExec)); + await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: midT0, channel: AuditChannel.DbOutbound, status: AuditStatus.Submitted, executionId: midExec, parentExecutionId: rootExec)); + await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: midT1, channel: AuditChannel.DbOutbound, status: AuditStatus.Failed, executionId: midExec, parentExecutionId: rootExec)); + await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: leafT0, channel: AuditChannel.Notification, status: AuditStatus.Submitted, executionId: leafExec, parentExecutionId: midExec)); + await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: leafT1, channel: AuditChannel.Notification, status: AuditStatus.Parked, executionId: leafExec, parentExecutionId: midExec)); var expected = new[] { rootExec, midExec, leafExec }; @@ -807,6 +817,37 @@ public class AuditLogRepositoryTests : IClassFixture Assert.Equal(2, root.RowCount); Assert.Equal(2, mid.RowCount); Assert.Equal(2, leaf.RowCount); + + // Each populated node aggregates its own rows' channels and + // statuses — distinct per execution, so a regression that mixes + // executions or drops the per-id aggregate would be caught. + Assert.Equal( + new[] { nameof(AuditChannel.ApiOutbound) }, + root.Channels); + Assert.Equal( + new[] { nameof(AuditChannel.DbOutbound) }, + mid.Channels); + Assert.Equal( + new[] { nameof(AuditChannel.Notification) }, + leaf.Channels); + + Assert.True( + new[] { nameof(AuditStatus.Submitted), nameof(AuditStatus.Delivered) } + .ToHashSet().SetEquals(root.Statuses)); + Assert.True( + new[] { nameof(AuditStatus.Submitted), nameof(AuditStatus.Failed) } + .ToHashSet().SetEquals(mid.Statuses)); + Assert.True( + new[] { nameof(AuditStatus.Submitted), nameof(AuditStatus.Parked) } + .ToHashSet().SetEquals(leaf.Statuses)); + + // Each populated node's timestamp span covers exactly its two rows. + Assert.Equal(rootT0, root.FirstOccurredAtUtc); + Assert.Equal(rootT1, root.LastOccurredAtUtc); + Assert.Equal(midT0, mid.FirstOccurredAtUtc); + Assert.Equal(midT1, mid.LastOccurredAtUtc); + Assert.Equal(leafT0, leaf.FirstOccurredAtUtc); + Assert.Equal(leafT1, leaf.LastOccurredAtUtc); } }