fix(sitecallaudit): push StuckOnly filter into SQL; doc accuracy fixes

This commit is contained in:
Joseph Doherty
2026-05-21 04:24:16 -04:00
parent e3519fdb39
commit ac1f73cf8a
8 changed files with 168 additions and 20 deletions

View File

@@ -18,6 +18,10 @@ namespace ScadaLink.Commons.Messages.Audit;
/// exact-match target filter, consistent with the repository's
/// <see cref="SiteCallQueryFilter.Target"/> predicate.
/// </remarks>
/// <param name="PageSize">
/// Requested page size. The actor clamps this to the <c>[1, 200]</c> range, so
/// the effective ceiling is 200 rows per page regardless of the value sent.
/// </param>
public sealed record SiteCallQueryRequest(
string CorrelationId,
string? StatusFilter,
@@ -39,6 +43,12 @@ public sealed record SiteCallQueryRequest(
/// <see cref="ScadaLink.Commons.Messages.Notification.NotificationSummary"/>
/// none are surfaced here.
/// </summary>
/// <remarks>
/// <see cref="HttpStatus"/> is not called out in the Site Call Audit plan, but
/// it is a real (nullable) <see cref="ScadaLink.Commons.Entities.Audit.SiteCall"/>
/// column — the last HTTP status code observed for the call — so it is surfaced
/// here for the grid; <c>null</c> for non-HTTP channels or before a first attempt.
/// </remarks>
public sealed record SiteCallSummary(
Guid TrackedOperationId,
string SourceSite,

View File

@@ -8,7 +8,7 @@ namespace ScadaLink.Commons.Types.Audit;
/// Notification Outbox tile layout.
/// </summary>
/// <param name="BufferedCount">
/// Count of non-terminal rows (<c>Pending</c> + <c>Retrying</c>) — calls
/// Count of non-terminal rows (<c>TerminalAtUtc IS NULL</c>) — calls
/// buffered at sites awaiting retry.
/// </param>
/// <param name="ParkedCount">Count of rows in the <c>Parked</c> status.</param>
@@ -25,7 +25,7 @@ namespace ScadaLink.Commons.Types.Audit;
/// <c>null</c> when there are no non-terminal rows.
/// </param>
/// <param name="StuckCount">
/// Count of non-terminal rows (<c>Pending</c>/<c>Retrying</c>) whose
/// Count of non-terminal rows (<c>TerminalAtUtc IS NULL</c>) whose
/// <see cref="ScadaLink.Commons.Entities.Audit.SiteCall.CreatedAtUtc"/> is older
/// than the supplied stuck cutoff. Display-only — no escalation.
/// </param>

View File

@@ -12,10 +12,25 @@ namespace ScadaLink.Commons.Types.Audit;
/// underlying columns are bounded ASCII (varchar) and the Central UI Site Calls
/// page exposes them as drop-down filters, not free-text search.
/// </remarks>
/// <param name="Channel">Restrict to a single channel (exact match).</param>
/// <param name="SourceSite">Restrict to a single source site (exact match).</param>
/// <param name="Status">Restrict to a single status (exact match).</param>
/// <param name="Target">Restrict to a single target (exact match).</param>
/// <param name="FromUtc">Inclusive lower bound on <c>CreatedAtUtc</c>.</param>
/// <param name="ToUtc">Inclusive upper bound on <c>CreatedAtUtc</c>.</param>
/// <param name="StuckCutoffUtc">
/// When set, restrict to stuck rows: <c>TerminalAtUtc IS NULL AND CreatedAtUtc &lt;
/// StuckCutoffUtc</c>. Both columns are plain (no value converter) and compose
/// directly with the keyset cursor. Mirrors
/// <see cref="ScadaLink.Commons.Types.Notifications.NotificationOutboxFilter.StuckCutoff"/>;
/// keeps the "StuckOnly" filter honest so paging never returns under-filled
/// pages with a non-null next cursor.
/// </param>
public sealed record SiteCallQueryFilter(
string? Channel = null,
string? SourceSite = null,
string? Status = null,
string? Target = null,
DateTime? FromUtc = null,
DateTime? ToUtc = null);
DateTime? ToUtc = null,
DateTime? StuckCutoffUtc = null);

View File

@@ -7,7 +7,7 @@ namespace ScadaLink.Commons.Types.Audit;
/// <see cref="ScadaLink.Commons.Types.Notifications.SiteNotificationKpiSnapshot"/>.
/// </summary>
/// <param name="SourceSite">The site identifier these metrics are scoped to.</param>
/// <param name="BufferedCount">Count of this site's non-terminal rows (<c>Pending</c> + <c>Retrying</c>).</param>
/// <param name="BufferedCount">Count of this site's non-terminal rows (<c>TerminalAtUtc IS NULL</c>).</param>
/// <param name="ParkedCount">Count of this site's rows in the <c>Parked</c> status.</param>
/// <param name="FailedLastInterval">
/// Count of this site's <c>Failed</c> rows whose <c>TerminalAtUtc</c> is at or

View File

@@ -164,7 +164,13 @@ WHERE TrackedOperationId = {idText}
var fromUtc = filter.FromUtc;
var toUtc = filter.ToUtc;
var stuckCutoff = filter.StuckCutoffUtc;
// The stuck predicate (TerminalAtUtc IS NULL AND CreatedAtUtc < cutoff)
// is pushed into SQL here — both columns are plain (no value converter)
// and compose with the keyset cursor, so a StuckOnly page is honest:
// never under-filled with a non-null next cursor. Mirrors how
// NotificationOutboxRepository.QueryAsync applies NotificationOutboxFilter.StuckCutoff.
FormattableString sql = $@"
SELECT TOP ({paging.PageSize})
TrackedOperationId, Channel, Target, SourceSite, Status, RetryCount,
@@ -176,6 +182,7 @@ WHERE ({filter.Channel} IS NULL OR Channel = {filter.Channel})
AND ({filter.Target} IS NULL OR Target = {filter.Target})
AND ({fromUtc} IS NULL OR CreatedAtUtc >= {fromUtc})
AND ({toUtc} IS NULL OR CreatedAtUtc <= {toUtc})
AND ({stuckCutoff} IS NULL OR (TerminalAtUtc IS NULL AND CreatedAtUtc < {stuckCutoff}))
AND ({(hasCursor ? 1 : 0)} = 0
OR CreatedAtUtc < {afterCreated}
OR (CreatedAtUtc = {afterCreated} AND TrackedOperationId < {afterIdString}))

View File

@@ -21,10 +21,9 @@ namespace ScadaLink.SiteCallAudit;
/// </summary>
/// <remarks>
/// <para>
/// M3 ships the minimum surface: ingest only. Reconciliation, KPIs, and
/// central→site Retry/Discard relay are deferred (per CLAUDE.md scope
/// discipline — Site Call Audit's KPIs and the Retry/Discard relay land in a
/// follow-up).
/// Query, detail and KPIs land in Task 4; reconciliation and the central→site
/// Retry/Discard relay remain deferred (per CLAUDE.md scope discipline — they
/// land in a later follow-up).
/// </para>
/// <para>
/// Per CLAUDE.md "audit-write failure NEVER aborts the user-facing action" —
@@ -195,13 +194,20 @@ public class SiteCallAuditActor : ReceiveActor
private async Task<SiteCallQueryResponse> QueryAsync(SiteCallQueryRequest request, DateTime now)
{
var stuckCutoff = now - _options.StuckAgeThreshold;
var filter = new SiteCallQueryFilter(
Channel: NullIfBlank(request.ChannelFilter),
SourceSite: NullIfBlank(request.SourceSiteFilter),
Status: NullIfBlank(request.StatusFilter),
Target: NullIfBlank(request.TargetKeyword),
FromUtc: request.FromUtc,
ToUtc: request.ToUtc);
ToUtc: request.ToUtc,
// StuckOnly is pushed into the repository SQL via StuckCutoffUtc —
// TerminalAtUtc IS NULL AND CreatedAtUtc < cutoff composes with the
// keyset cursor, so the page is always honest (full pages, no empty
// pages with a non-null next cursor).
StuckCutoffUtc: request.StuckOnly ? stuckCutoff : null);
var pageSize = Math.Clamp(request.PageSize, 1, MaxPageSize);
var paging = new SiteCallPaging(
@@ -214,21 +220,11 @@ public class SiteCallAuditActor : ReceiveActor
{
var rows = await repository.QueryAsync(filter, paging).ConfigureAwait(false);
var stuckCutoff = now - _options.StuckAgeThreshold;
var summaries = rows
// StuckOnly is post-filtered here rather than pushed into the
// repository SQL — the SiteCallQueryFilter has no stuck predicate
// and a status-aware created-before clause does not compose with
// the keyset cursor. The page may therefore return fewer than
// PageSize rows when StuckOnly is set; that is acceptable for a
// display-only filter.
.Where(row => !request.StuckOnly || IsStuck(row, stuckCutoff))
.Select(row => ToSummary(row, stuckCutoff))
.ToList();
// The next-page cursor is the LAST row of the materialised page
// before StuckOnly post-filtering, so paging still advances even
// when every row on a page was filtered out.
// The next-page cursor is the last row of the materialised page.
var cursorRow = rows.Count > 0 ? rows[^1] : null;
return new SiteCallQueryResponse(

View File

@@ -271,6 +271,67 @@ public class SiteCallAuditRepositoryTests : IClassFixture<MsSqlMigrationFixture>
Assert.Equal(5, allIds.Count);
}
[SkippableFact]
public async Task QueryAsync_StuckCutoff_ComposesWithKeysetPaging_NoEmptyPages()
{
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
var site = NewSiteId();
await using var context = CreateContext();
var repo = new SiteCallAuditRepository(context);
// Three stuck rows (non-terminal, created before the cutoff) interleaved
// by CreatedAtUtc with non-stuck rows: recent non-terminal rows and an
// old-but-terminal row. The stuck predicate is pushed into the SQL WHERE
// alongside the keyset cursor, so each page must come back full of stuck
// rows — never under-filled by a post-filter.
var t0 = new DateTime(2026, 5, 20, 8, 0, 0, DateTimeKind.Utc);
var cutoff = t0.AddMinutes(10);
var stuckIds = new List<TrackedOperationId>();
for (var i = 0; i < 3; i++)
{
var stuckId = TrackedOperationId.New();
stuckIds.Add(stuckId);
// Stuck: non-terminal, created before the cutoff.
await repo.UpsertAsync(NewRow(
stuckId, sourceSite: site, status: "Attempted",
createdAtUtc: t0.AddMinutes(i)));
// Not stuck: non-terminal but created after the cutoff.
await repo.UpsertAsync(NewRow(
TrackedOperationId.New(), sourceSite: site, status: "Attempted",
createdAtUtc: cutoff.AddMinutes(i + 1)));
// Not stuck: created before the cutoff but terminal.
await repo.UpsertAsync(NewRow(
TrackedOperationId.New(), sourceSite: site, status: "Delivered",
createdAtUtc: t0.AddMinutes(i), terminal: true,
terminalAtUtc: t0.AddMinutes(i + 1)));
}
var filter = new SiteCallQueryFilter(SourceSite: site, StuckCutoffUtc: cutoff);
var page1 = await repo.QueryAsync(filter, new SiteCallPaging(PageSize: 2));
Assert.Equal(2, page1.Count);
Assert.All(page1, r => Assert.Null(r.TerminalAtUtc));
Assert.All(page1, r => Assert.True(r.CreatedAtUtc < cutoff));
var cursor1 = page1[^1];
var page2 = await repo.QueryAsync(
filter,
new SiteCallPaging(
PageSize: 2,
AfterCreatedAtUtc: cursor1.CreatedAtUtc,
AfterId: cursor1.TrackedOperationId));
// Only the third stuck row remains — no empty trailing page.
Assert.Single(page2);
Assert.Null(page2[0].TerminalAtUtc);
Assert.True(page2[0].CreatedAtUtc < cutoff);
// Exactly the three stuck rows, no overlap, no non-stuck leakage.
var returned = page1.Concat(page2).Select(r => r.TrackedOperationId).ToHashSet();
Assert.Equal(stuckIds.ToHashSet(), returned);
}
[SkippableFact]
public async Task PurgeTerminalAsync_RemovesTerminalAndOld()
{

View File

@@ -293,6 +293,65 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
Assert.True(response.SiteCalls[0].IsStuck);
}
[SkippableFact]
public async Task SiteCallQueryRequest_StuckOnly_PagesAreFull_NoEmptyPagesWithCursor()
{
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
var siteId = NewSiteId();
await using var context = CreateContext();
var repo = new SiteCallAuditRepository(context);
var actor = CreateActor(repo, new SiteCallAuditOptions { StuckAgeThreshold = TimeSpan.FromMinutes(10) });
var now = DateTime.UtcNow;
// Three stuck rows interleaved (by CreatedAtUtc) with three non-stuck
// rows: recent non-terminal and old-but-terminal. With the StuckOnly
// filter pushed into SQL, a page-size-2 query must return exactly the
// stuck rows two-per-page — never an under-filled page with a non-null
// next cursor caused by post-filtering.
for (var i = 0; i < 3; i++)
{
await repo.UpsertAsync(NewRow(
TrackedOperationId.New(), siteId, status: "Attempted",
createdAtUtc: now.AddMinutes(-30 - i)));
await repo.UpsertAsync(NewRow(
TrackedOperationId.New(), siteId, status: "Attempted",
createdAtUtc: now.AddMinutes(-2 - i)));
await repo.UpsertAsync(NewRow(
TrackedOperationId.New(), siteId, status: "Delivered",
createdAtUtc: now.AddMinutes(-40 - i), terminal: true));
}
actor.Tell(
new SiteCallQueryRequest(
"corr-stuck-p1", null, siteId, null, null, StuckOnly: true,
null, null, null, null, PageSize: 2),
TestActor);
var page1 = ExpectMsg<SiteCallQueryResponse>(TimeSpan.FromSeconds(10));
Assert.True(page1.Success);
// Page is full — two stuck rows, both honestly stuck.
Assert.Equal(2, page1.SiteCalls.Count);
Assert.All(page1.SiteCalls, s => Assert.True(s.IsStuck));
Assert.NotNull(page1.NextAfterCreatedAtUtc);
actor.Tell(
new SiteCallQueryRequest(
"corr-stuck-p2", null, siteId, null, null, StuckOnly: true,
null, null, page1.NextAfterCreatedAtUtc, page1.NextAfterId,
PageSize: 2),
TestActor);
var page2 = ExpectMsg<SiteCallQueryResponse>(TimeSpan.FromSeconds(10));
Assert.True(page2.Success);
// Final page — the third stuck row, the only remaining match.
Assert.Single(page2.SiteCalls);
Assert.All(page2.SiteCalls, s => Assert.True(s.IsStuck));
// No overlap, exactly the three stuck rows across both pages.
var allIds = page1.SiteCalls.Concat(page2.SiteCalls)
.Select(s => s.TrackedOperationId).ToHashSet();
Assert.Equal(3, allIds.Count);
}
[SkippableFact]
public async Task SiteCallDetailRequest_KnownId_ReturnsFullDetail()
{