fix(sitecallaudit): UpdatedAtUtc index + per-row pull resilience + UTC-convention + first-cycle test (review)

This commit is contained in:
Joseph Doherty
2026-06-15 10:47:25 -04:00
parent 963e3427da
commit 6b0140dd62
5 changed files with 118 additions and 21 deletions
@@ -41,8 +41,10 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// <see cref="StatusCode.DeadlineExceeded"/>, <see cref="StatusCode.Cancelled"/>,
/// bare <see cref="HttpRequestException"/> / <c>SocketException</c>) are caught
/// and collapsed to an empty response so one offline site never sinks the rest
/// of the reconciliation tick. Any other fault (e.g. a malformed reply that
/// fails DTO mapping) is also swallowed to empty: reconciliation is best-effort.
/// of the reconciliation tick. Any other transport/protocol fault is also
/// swallowed to empty: reconciliation is best-effort. Per-row DTO mapping faults
/// (e.g. a single unparseable <c>TrackedOperationId</c>) are narrower still —
/// the offending row is skipped+logged and the rest of the batch is returned.
/// </para>
/// <para>
/// <b>Testability.</b> The unary call is reached through the
@@ -138,15 +140,30 @@ public sealed class GrpcPullSiteCallsClient : IPullSiteCallsClient
return Empty;
}
// Map proto DTOs to central SiteCall entities, re-stamp SourceSite from
// the dialed siteId (the site leaves it empty), and order oldest-first by
// UpdatedAtUtc (the wire is already ordered by the site read, but the
// contract is explicit, so sort defensively).
var siteCalls = reply.Operationals
.Select(SiteCallDtoMapper.FromDto)
.Select(sc => sc with { SourceSite = siteId })
.OrderBy(sc => sc.UpdatedAtUtc)
.ToList();
// Map proto DTOs to central SiteCall entities PER-ROW so one malformed
// operational (e.g. an unparseable TrackedOperationId) is skipped+logged
// rather than sinking the whole batch through the outer catch-all. Each
// survivor is re-stamped with SourceSite from the dialed siteId (the site
// leaves it empty).
var siteCalls = new List<SiteCall>(reply.Operationals.Count);
foreach (var dto in reply.Operationals)
{
try
{
var sc = SiteCallDtoMapper.FromDto(dto) with { SourceSite = siteId };
siteCalls.Add(sc);
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"PullSiteCalls dropped a malformed operational row from site {SiteId} (id='{Id}'); continuing with the rest of the batch.",
siteId, dto.TrackedOperationId);
}
}
// Order oldest-first by UpdatedAtUtc (the wire is already ordered by the
// site read, but the contract is explicit, so sort defensively).
siteCalls.Sort((a, b) => a.UpdatedAtUtc.CompareTo(b.UpdatedAtUtc));
return new PullSiteCallsResponse(siteCalls, reply.MoreAvailable);
}
@@ -457,7 +457,9 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
// sinceUtc defaults to DateTime.MinValue when the wrapper is absent —
// i.e. "pull from the beginning of recorded history", which is the
// intended behaviour for the very first reconciliation cycle.
var since = request.SinceUtc?.ToDateTime().ToUniversalTime() ?? DateTime.MinValue;
var since = request.SinceUtc is not null
? DateTime.SpecifyKind(request.SinceUtc.ToDateTime(), DateTimeKind.Utc)
: DateTime.MinValue;
IReadOnlyList<AuditEvent> events;
try
@@ -537,10 +539,10 @@ public class SiteStreamGrpcServer : SiteStreamService.SiteStreamServiceBase
// since_utc defaults to DateTime.MinValue when the wrapper is absent —
// i.e. "pull from the beginning of recorded history", the intended
// behaviour for the very first reconciliation cycle. ToUniversalTime
// is safe here (the wire value is always a real UTC Timestamp, never the
// unspecified-MinValue the central client guards against on its side).
var since = request.SinceUtc?.ToDateTime().ToUniversalTime() ?? DateTime.MinValue;
// behaviour for the very first reconciliation cycle.
var since = request.SinceUtc is not null
? DateTime.SpecifyKind(request.SinceUtc.ToDateTime(), DateTimeKind.Utc)
: DateTime.MinValue;
IReadOnlyList<SiteCallOperational> operationals;
try
@@ -91,6 +91,8 @@ public class OperationTrackingStore : IOperationTrackingStore, IAsyncDisposable,
);
CREATE INDEX IF NOT EXISTS IX_OperationTracking_Status_Updated
ON OperationTracking (Status, UpdatedAtUtc);
CREATE INDEX IF NOT EXISTS IX_OperationTracking_UpdatedAt
ON OperationTracking (UpdatedAtUtc);
""";
cmd.ExecuteNonQuery();
@@ -370,8 +372,10 @@ public class OperationTrackingStore : IOperationTrackingStore, IAsyncDisposable,
// SiteRuntime-024: like GetStatusAsync, the reconciliation pull opens a
// fresh, ungated read connection so a long-running write never blocks
// central's PullSiteCalls. The query is a bounded, ordered scan over the
// (Status, UpdatedAtUtc) index range — UpdatedAtUtc is the cursor.
// central's PullSiteCalls. The query is a bounded, ordered scan served by
// the standalone IX_OperationTracking_UpdatedAt index — UpdatedAtUtc is
// the cursor. (The composite (Status, UpdatedAtUtc) index cannot satisfy a
// status-less UpdatedAtUtc range scan; this dedicated index does.)
await using var readConnection = new SqliteConnection(_connectionString);
await readConnection.OpenAsync(ct).ConfigureAwait(false);
@@ -390,9 +394,15 @@ public class OperationTrackingStore : IOperationTrackingStore, IAsyncDisposable,
ORDER BY UpdatedAtUtc ASC
LIMIT $batchSize;
""";
cmd.Parameters.AddWithValue(
"$since",
sinceUtc.ToString("o", CultureInfo.InvariantCulture));
// Force UTC kind before formatting so the cursor's "o" text matches the
// 'Z'-suffixed round-trip form the write path persists (DateTime.UtcNow
// .ToString("o")). A first-cycle DateTime.MinValue arrives Unspecified —
// without this its "o" rendering would lack the 'Z', and the SQLite text
// compare against 'Z'-suffixed stored values would be subtly inconsistent.
var sinceText = DateTime
.SpecifyKind(sinceUtc, DateTimeKind.Utc)
.ToString("o", CultureInfo.InvariantCulture);
cmd.Parameters.AddWithValue("$since", sinceText);
cmd.Parameters.AddWithValue("$batchSize", batchSize);
var rows = new List<SiteCallOperational>();