diff --git a/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs b/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs index d23a467..d5c98a4 100644 --- a/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs +++ b/src/ScadaLink.Commons/Messages/Audit/SiteCallQueries.cs @@ -18,6 +18,10 @@ namespace ScadaLink.Commons.Messages.Audit; /// exact-match target filter, consistent with the repository's /// predicate. /// +/// +/// Requested page size. The actor clamps this to the [1, 200] range, so +/// the effective ceiling is 200 rows per page regardless of the value sent. +/// public sealed record SiteCallQueryRequest( string CorrelationId, string? StatusFilter, @@ -39,6 +43,12 @@ public sealed record SiteCallQueryRequest( /// /// none are surfaced here. /// +/// +/// is not called out in the Site Call Audit plan, but +/// it is a real (nullable) +/// column — the last HTTP status code observed for the call — so it is surfaced +/// here for the grid; null for non-HTTP channels or before a first attempt. +/// public sealed record SiteCallSummary( Guid TrackedOperationId, string SourceSite, diff --git a/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs b/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs index 07873fb..fa07c8f 100644 --- a/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs +++ b/src/ScadaLink.Commons/Types/Audit/SiteCallKpiSnapshot.cs @@ -8,7 +8,7 @@ namespace ScadaLink.Commons.Types.Audit; /// Notification Outbox tile layout. /// /// -/// Count of non-terminal rows (Pending + Retrying) — calls +/// Count of non-terminal rows (TerminalAtUtc IS NULL) — calls /// buffered at sites awaiting retry. /// /// Count of rows in the Parked status. @@ -25,7 +25,7 @@ namespace ScadaLink.Commons.Types.Audit; /// null when there are no non-terminal rows. /// /// -/// Count of non-terminal rows (Pending/Retrying) whose +/// Count of non-terminal rows (TerminalAtUtc IS NULL) whose /// is older /// than the supplied stuck cutoff. Display-only — no escalation. /// diff --git a/src/ScadaLink.Commons/Types/Audit/SiteCallQueryFilter.cs b/src/ScadaLink.Commons/Types/Audit/SiteCallQueryFilter.cs index cf7e7d4..63f0c58 100644 --- a/src/ScadaLink.Commons/Types/Audit/SiteCallQueryFilter.cs +++ b/src/ScadaLink.Commons/Types/Audit/SiteCallQueryFilter.cs @@ -12,10 +12,25 @@ namespace ScadaLink.Commons.Types.Audit; /// underlying columns are bounded ASCII (varchar) and the Central UI Site Calls /// page exposes them as drop-down filters, not free-text search. /// +/// Restrict to a single channel (exact match). +/// Restrict to a single source site (exact match). +/// Restrict to a single status (exact match). +/// Restrict to a single target (exact match). +/// Inclusive lower bound on CreatedAtUtc. +/// Inclusive upper bound on CreatedAtUtc. +/// +/// When set, restrict to stuck rows: TerminalAtUtc IS NULL AND CreatedAtUtc < +/// StuckCutoffUtc. Both columns are plain (no value converter) and compose +/// directly with the keyset cursor. Mirrors +/// ; +/// keeps the "StuckOnly" filter honest so paging never returns under-filled +/// pages with a non-null next cursor. +/// public sealed record SiteCallQueryFilter( string? Channel = null, string? SourceSite = null, string? Status = null, string? Target = null, DateTime? FromUtc = null, - DateTime? ToUtc = null); + DateTime? ToUtc = null, + DateTime? StuckCutoffUtc = null); diff --git a/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs b/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs index c67c895..c5c8208 100644 --- a/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs +++ b/src/ScadaLink.Commons/Types/Audit/SiteCallSiteKpiSnapshot.cs @@ -7,7 +7,7 @@ namespace ScadaLink.Commons.Types.Audit; /// . /// /// The site identifier these metrics are scoped to. -/// Count of this site's non-terminal rows (Pending + Retrying). +/// Count of this site's non-terminal rows (TerminalAtUtc IS NULL). /// Count of this site's rows in the Parked status. /// /// Count of this site's Failed rows whose TerminalAtUtc is at or diff --git a/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs b/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs index 954e490..d90d0d9 100644 --- a/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs +++ b/src/ScadaLink.ConfigurationDatabase/Repositories/SiteCallAuditRepository.cs @@ -164,7 +164,13 @@ WHERE TrackedOperationId = {idText} var fromUtc = filter.FromUtc; var toUtc = filter.ToUtc; + var stuckCutoff = filter.StuckCutoffUtc; + // The stuck predicate (TerminalAtUtc IS NULL AND CreatedAtUtc < cutoff) + // is pushed into SQL here — both columns are plain (no value converter) + // and compose with the keyset cursor, so a StuckOnly page is honest: + // never under-filled with a non-null next cursor. Mirrors how + // NotificationOutboxRepository.QueryAsync applies NotificationOutboxFilter.StuckCutoff. FormattableString sql = $@" SELECT TOP ({paging.PageSize}) TrackedOperationId, Channel, Target, SourceSite, Status, RetryCount, @@ -176,6 +182,7 @@ WHERE ({filter.Channel} IS NULL OR Channel = {filter.Channel}) AND ({filter.Target} IS NULL OR Target = {filter.Target}) AND ({fromUtc} IS NULL OR CreatedAtUtc >= {fromUtc}) AND ({toUtc} IS NULL OR CreatedAtUtc <= {toUtc}) + AND ({stuckCutoff} IS NULL OR (TerminalAtUtc IS NULL AND CreatedAtUtc < {stuckCutoff})) AND ({(hasCursor ? 1 : 0)} = 0 OR CreatedAtUtc < {afterCreated} OR (CreatedAtUtc = {afterCreated} AND TrackedOperationId < {afterIdString})) diff --git a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs index a6537dd..11af5ca 100644 --- a/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs +++ b/src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs @@ -21,10 +21,9 @@ namespace ScadaLink.SiteCallAudit; /// /// /// -/// M3 ships the minimum surface: ingest only. Reconciliation, KPIs, and -/// central→site Retry/Discard relay are deferred (per CLAUDE.md scope -/// discipline — Site Call Audit's KPIs and the Retry/Discard relay land in a -/// follow-up). +/// Query, detail and KPIs land in Task 4; reconciliation and the central→site +/// Retry/Discard relay remain deferred (per CLAUDE.md scope discipline — they +/// land in a later follow-up). /// /// /// Per CLAUDE.md "audit-write failure NEVER aborts the user-facing action" — @@ -195,13 +194,20 @@ public class SiteCallAuditActor : ReceiveActor private async Task QueryAsync(SiteCallQueryRequest request, DateTime now) { + var stuckCutoff = now - _options.StuckAgeThreshold; + var filter = new SiteCallQueryFilter( Channel: NullIfBlank(request.ChannelFilter), SourceSite: NullIfBlank(request.SourceSiteFilter), Status: NullIfBlank(request.StatusFilter), Target: NullIfBlank(request.TargetKeyword), FromUtc: request.FromUtc, - ToUtc: request.ToUtc); + ToUtc: request.ToUtc, + // StuckOnly is pushed into the repository SQL via StuckCutoffUtc — + // TerminalAtUtc IS NULL AND CreatedAtUtc < cutoff composes with the + // keyset cursor, so the page is always honest (full pages, no empty + // pages with a non-null next cursor). + StuckCutoffUtc: request.StuckOnly ? stuckCutoff : null); var pageSize = Math.Clamp(request.PageSize, 1, MaxPageSize); var paging = new SiteCallPaging( @@ -214,21 +220,11 @@ public class SiteCallAuditActor : ReceiveActor { var rows = await repository.QueryAsync(filter, paging).ConfigureAwait(false); - var stuckCutoff = now - _options.StuckAgeThreshold; var summaries = rows - // StuckOnly is post-filtered here rather than pushed into the - // repository SQL — the SiteCallQueryFilter has no stuck predicate - // and a status-aware created-before clause does not compose with - // the keyset cursor. The page may therefore return fewer than - // PageSize rows when StuckOnly is set; that is acceptable for a - // display-only filter. - .Where(row => !request.StuckOnly || IsStuck(row, stuckCutoff)) .Select(row => ToSummary(row, stuckCutoff)) .ToList(); - // The next-page cursor is the LAST row of the materialised page — - // before StuckOnly post-filtering, so paging still advances even - // when every row on a page was filtered out. + // The next-page cursor is the last row of the materialised page. var cursorRow = rows.Count > 0 ? rows[^1] : null; return new SiteCallQueryResponse( diff --git a/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs b/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs index 1156420..67d93f2 100644 --- a/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs +++ b/tests/ScadaLink.ConfigurationDatabase.Tests/Repositories/SiteCallAuditRepositoryTests.cs @@ -271,6 +271,67 @@ public class SiteCallAuditRepositoryTests : IClassFixture Assert.Equal(5, allIds.Count); } + [SkippableFact] + public async Task QueryAsync_StuckCutoff_ComposesWithKeysetPaging_NoEmptyPages() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + var site = NewSiteId(); + await using var context = CreateContext(); + var repo = new SiteCallAuditRepository(context); + + // Three stuck rows (non-terminal, created before the cutoff) interleaved + // by CreatedAtUtc with non-stuck rows: recent non-terminal rows and an + // old-but-terminal row. The stuck predicate is pushed into the SQL WHERE + // alongside the keyset cursor, so each page must come back full of stuck + // rows — never under-filled by a post-filter. + var t0 = new DateTime(2026, 5, 20, 8, 0, 0, DateTimeKind.Utc); + var cutoff = t0.AddMinutes(10); + + var stuckIds = new List(); + for (var i = 0; i < 3; i++) + { + var stuckId = TrackedOperationId.New(); + stuckIds.Add(stuckId); + // Stuck: non-terminal, created before the cutoff. + await repo.UpsertAsync(NewRow( + stuckId, sourceSite: site, status: "Attempted", + createdAtUtc: t0.AddMinutes(i))); + // Not stuck: non-terminal but created after the cutoff. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), sourceSite: site, status: "Attempted", + createdAtUtc: cutoff.AddMinutes(i + 1))); + // Not stuck: created before the cutoff but terminal. + await repo.UpsertAsync(NewRow( + TrackedOperationId.New(), sourceSite: site, status: "Delivered", + createdAtUtc: t0.AddMinutes(i), terminal: true, + terminalAtUtc: t0.AddMinutes(i + 1))); + } + + var filter = new SiteCallQueryFilter(SourceSite: site, StuckCutoffUtc: cutoff); + + var page1 = await repo.QueryAsync(filter, new SiteCallPaging(PageSize: 2)); + Assert.Equal(2, page1.Count); + Assert.All(page1, r => Assert.Null(r.TerminalAtUtc)); + Assert.All(page1, r => Assert.True(r.CreatedAtUtc < cutoff)); + + var cursor1 = page1[^1]; + var page2 = await repo.QueryAsync( + filter, + new SiteCallPaging( + PageSize: 2, + AfterCreatedAtUtc: cursor1.CreatedAtUtc, + AfterId: cursor1.TrackedOperationId)); + // Only the third stuck row remains — no empty trailing page. + Assert.Single(page2); + Assert.Null(page2[0].TerminalAtUtc); + Assert.True(page2[0].CreatedAtUtc < cutoff); + + // Exactly the three stuck rows, no overlap, no non-stuck leakage. + var returned = page1.Concat(page2).Select(r => r.TrackedOperationId).ToHashSet(); + Assert.Equal(stuckIds.ToHashSet(), returned); + } + [SkippableFact] public async Task PurgeTerminalAsync_RemovesTerminalAndOld() { diff --git a/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs index 73f3ead..3c39257 100644 --- a/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs +++ b/tests/ScadaLink.SiteCallAudit.Tests/SiteCallAuditActorTests.cs @@ -293,6 +293,65 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture(TimeSpan.FromSeconds(10)); + Assert.True(page1.Success); + // Page is full — two stuck rows, both honestly stuck. + Assert.Equal(2, page1.SiteCalls.Count); + Assert.All(page1.SiteCalls, s => Assert.True(s.IsStuck)); + Assert.NotNull(page1.NextAfterCreatedAtUtc); + + actor.Tell( + new SiteCallQueryRequest( + "corr-stuck-p2", null, siteId, null, null, StuckOnly: true, + null, null, page1.NextAfterCreatedAtUtc, page1.NextAfterId, + PageSize: 2), + TestActor); + var page2 = ExpectMsg(TimeSpan.FromSeconds(10)); + Assert.True(page2.Success); + // Final page — the third stuck row, the only remaining match. + Assert.Single(page2.SiteCalls); + Assert.All(page2.SiteCalls, s => Assert.True(s.IsStuck)); + + // No overlap, exactly the three stuck rows across both pages. + var allIds = page1.SiteCalls.Concat(page2.SiteCalls) + .Select(s => s.TrackedOperationId).ToHashSet(); + Assert.Equal(3, allIds.Count); + } + [SkippableFact] public async Task SiteCallDetailRequest_KnownId_ReturnsFullDetail() {