feat(sitecallaudit): query, KPI and detail backend for the Site Calls page

This commit is contained in:
Joseph Doherty
2026-05-21 04:14:49 -04:00
parent 6f0d2ca499
commit e3519fdb39
17 changed files with 1514 additions and 18 deletions

View File

@@ -13,6 +13,8 @@
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<!-- BindConfiguration extension for the SiteCallAuditOptions binding. -->
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" />
</ItemGroup>
<ItemGroup>

View File

@@ -7,33 +7,34 @@ namespace ScadaLink.SiteCallAudit;
/// </summary>
/// <remarks>
/// <para>
/// M3 Bundle C ships the ingest-only minimum surface (the actor itself); the
/// full DI surface — reconciliation puller, KPI projector, central→site
/// Retry/Discard relay, options + validators — is deferred to a follow-up.
/// Binds <see cref="SiteCallAuditOptions"/> (stuck-call detection + KPI
/// windowing for the read-side query/KPI handlers). The reconciliation puller
/// and central→site Retry/Discard relay are still deferred to later follow-ups.
/// </para>
/// <para>
/// The repository (<c>ISiteCallAuditRepository</c>) is registered by
/// <c>ScadaLink.ConfigurationDatabase.ServiceCollectionExtensions.AddConfigurationDatabase</c>,
/// so callers (the Host on the central node) must also call that. The actor's
/// <c>Props</c> are wired up in Host registration (Bundle F); this extension
/// is currently a no-op placeholder kept for symmetry with the AuditLog and
/// NotificationOutbox composition roots — adding it now means consumers can
/// reference the method without re-touching the Host project later.
/// <c>Props</c> are wired up in Host registration.
/// </para>
/// </remarks>
public static class ServiceCollectionExtensions
{
/// <summary>Configuration section bound to <see cref="SiteCallAuditOptions"/>.</summary>
public const string OptionsSection = "ScadaLink:SiteCallAudit";
/// <summary>
/// Registers Site Call Audit (#22) services. Currently a no-op
/// placeholder — Bundle F will populate this with the actor's Props
/// factory + options bindings. The method is exposed now so the Host
/// wiring call already exists at the API boundary.
/// Registers Site Call Audit (#22) services: the <see cref="SiteCallAuditOptions"/>
/// binding consumed by the actor's read-side KPI/query handlers. The actor's
/// <c>Props</c> are still constructed inline in Host wiring.
/// </summary>
public static IServiceCollection AddSiteCallAudit(this IServiceCollection services)
{
ArgumentNullException.ThrowIfNull(services);
// Actor props are constructed in Host wiring (Bundle F). This
// extension is a placeholder for future config + DI.
services.AddOptions<SiteCallAuditOptions>()
.BindConfiguration(OptionsSection);
return services;
}
}

View File

@@ -1,8 +1,11 @@
using Akka.Actor;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Messages.Audit;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Audit;
namespace ScadaLink.SiteCallAudit;
@@ -42,26 +45,34 @@ namespace ScadaLink.SiteCallAudit;
/// </remarks>
public class SiteCallAuditActor : ReceiveActor
{
/// <summary>Maximum page size honoured by a <see cref="SiteCallQueryRequest"/>.</summary>
private const int MaxPageSize = 200;
private readonly IServiceProvider? _serviceProvider;
private readonly ISiteCallAuditRepository? _injectedRepository;
private readonly SiteCallAuditOptions _options;
private readonly ILogger<SiteCallAuditActor> _logger;
/// <summary>
/// Test-mode constructor — injects a concrete repository instance whose
/// lifetime exceeds the test, so the actor reuses the same instance
/// across every message. Used by Bundle C's MSSQL-backed TestKit fixture.
/// An optional <paramref name="options"/> lets a test pin the stuck/KPI
/// windows; when omitted the production defaults apply.
/// </summary>
public SiteCallAuditActor(
ISiteCallAuditRepository repository,
ILogger<SiteCallAuditActor> logger)
ILogger<SiteCallAuditActor> logger,
SiteCallAuditOptions? options = null)
{
ArgumentNullException.ThrowIfNull(repository);
ArgumentNullException.ThrowIfNull(logger);
_injectedRepository = repository;
_logger = logger;
_options = options ?? new SiteCallAuditOptions();
ReceiveAsync<UpsertSiteCallCommand>(OnUpsertAsync);
RegisterHandlers();
}
/// <summary>
@@ -73,15 +84,33 @@ public class SiteCallAuditActor : ReceiveActor
/// </summary>
public SiteCallAuditActor(
IServiceProvider serviceProvider,
SiteCallAuditOptions options,
ILogger<SiteCallAuditActor> logger)
{
ArgumentNullException.ThrowIfNull(serviceProvider);
ArgumentNullException.ThrowIfNull(options);
ArgumentNullException.ThrowIfNull(logger);
_serviceProvider = serviceProvider;
_options = options;
_logger = logger;
RegisterHandlers();
}
/// <summary>
/// Wires up the message handlers shared by both constructors: the M3
/// ingest path plus the Task 4 read-side (query, detail, global + per-site
/// KPI). All read handlers reply to an Ask, so they capture <c>Sender</c>
/// before the first await and <c>PipeTo</c> the result back.
/// </summary>
private void RegisterHandlers()
{
ReceiveAsync<UpsertSiteCallCommand>(OnUpsertAsync);
Receive<SiteCallQueryRequest>(HandleQuery);
Receive<SiteCallDetailRequest>(HandleDetail);
Receive<SiteCallKpiRequest>(HandleKpi);
Receive<PerSiteSiteCallKpiRequest>(HandlePerSiteKpi);
}
/// <summary>
@@ -137,4 +166,305 @@ public class SiteCallAuditActor : ReceiveActor
scope?.Dispose();
}
}
// ── Task 4: read-side (query / detail / KPI) ──
/// <summary>
/// Handles a paginated, filtered query over the <c>SiteCalls</c> table.
/// Builds a <see cref="SiteCallQueryFilter"/> + <see cref="SiteCallPaging"/>
/// keyset cursor from the request, runs the query on a scoped repository,
/// and pipes the mapped response back to the captured sender. A repository
/// fault yields a failure response with an empty list.
/// </summary>
private void HandleQuery(SiteCallQueryRequest request)
{
var sender = Sender;
var now = DateTime.UtcNow;
QueryAsync(request, now).PipeTo(
sender,
success: response => response,
failure: ex => new SiteCallQueryResponse(
request.CorrelationId,
Success: false,
ErrorMessage: ex.GetBaseException().Message,
SiteCalls: Array.Empty<SiteCallSummary>(),
NextAfterCreatedAtUtc: null,
NextAfterId: null));
}
private async Task<SiteCallQueryResponse> QueryAsync(SiteCallQueryRequest request, DateTime now)
{
var filter = new SiteCallQueryFilter(
Channel: NullIfBlank(request.ChannelFilter),
SourceSite: NullIfBlank(request.SourceSiteFilter),
Status: NullIfBlank(request.StatusFilter),
Target: NullIfBlank(request.TargetKeyword),
FromUtc: request.FromUtc,
ToUtc: request.ToUtc);
var pageSize = Math.Clamp(request.PageSize, 1, MaxPageSize);
var paging = new SiteCallPaging(
PageSize: pageSize,
AfterCreatedAtUtc: request.AfterCreatedAtUtc,
AfterId: request.AfterId is { } id ? new TrackedOperationId(id) : null);
var (scope, repository) = ResolveRepository();
try
{
var rows = await repository.QueryAsync(filter, paging).ConfigureAwait(false);
var stuckCutoff = now - _options.StuckAgeThreshold;
var summaries = rows
// StuckOnly is post-filtered here rather than pushed into the
// repository SQL — the SiteCallQueryFilter has no stuck predicate
// and a status-aware created-before clause does not compose with
// the keyset cursor. The page may therefore return fewer than
// PageSize rows when StuckOnly is set; that is acceptable for a
// display-only filter.
.Where(row => !request.StuckOnly || IsStuck(row, stuckCutoff))
.Select(row => ToSummary(row, stuckCutoff))
.ToList();
// The next-page cursor is the LAST row of the materialised page —
// before StuckOnly post-filtering, so paging still advances even
// when every row on a page was filtered out.
var cursorRow = rows.Count > 0 ? rows[^1] : null;
return new SiteCallQueryResponse(
request.CorrelationId,
Success: true,
ErrorMessage: null,
SiteCalls: summaries,
NextAfterCreatedAtUtc: cursorRow?.CreatedAtUtc,
NextAfterId: cursorRow?.TrackedOperationId.Value);
}
finally
{
scope?.Dispose();
}
}
/// <summary>
/// Handles a full-detail query for a single cached call — backs the report
/// detail modal. A missing row yields <c>Success=false</c> with a "not
/// found" message; a repository fault yields <c>Success=false</c> with the
/// fault message.
/// </summary>
private void HandleDetail(SiteCallDetailRequest request)
{
var sender = Sender;
DetailAsync(request).PipeTo(
sender,
success: response => response,
failure: ex => new SiteCallDetailResponse(
request.CorrelationId,
Success: false,
ErrorMessage: ex.GetBaseException().Message,
Detail: null));
}
private async Task<SiteCallDetailResponse> DetailAsync(SiteCallDetailRequest request)
{
var (scope, repository) = ResolveRepository();
try
{
var row = await repository
.GetAsync(new TrackedOperationId(request.TrackedOperationId))
.ConfigureAwait(false);
if (row is null)
{
return new SiteCallDetailResponse(
request.CorrelationId,
Success: false,
ErrorMessage: "site call not found",
Detail: null);
}
return new SiteCallDetailResponse(
request.CorrelationId,
Success: true,
ErrorMessage: null,
Detail: ToDetail(row));
}
finally
{
scope?.Dispose();
}
}
/// <summary>
/// Handles a global KPI snapshot request, deriving the stuck cutoff from
/// <see cref="SiteCallAuditOptions.StuckAgeThreshold"/> and the
/// failed/delivered interval bound from <see cref="SiteCallAuditOptions.KpiInterval"/>.
/// </summary>
private void HandleKpi(SiteCallKpiRequest request)
{
var sender = Sender;
var now = DateTime.UtcNow;
var stuckCutoff = now - _options.StuckAgeThreshold;
var intervalSince = now - _options.KpiInterval;
KpiAsync(request.CorrelationId, stuckCutoff, intervalSince).PipeTo(
sender,
success: response => response,
failure: ex => new SiteCallKpiResponse(
request.CorrelationId,
Success: false,
ErrorMessage: ex.GetBaseException().Message,
BufferedCount: 0,
ParkedCount: 0,
FailedLastInterval: 0,
DeliveredLastInterval: 0,
OldestPendingAge: null,
StuckCount: 0));
}
private async Task<SiteCallKpiResponse> KpiAsync(
string correlationId, DateTime stuckCutoff, DateTime intervalSince)
{
var (scope, repository) = ResolveRepository();
try
{
var snapshot = await repository
.ComputeKpisAsync(stuckCutoff, intervalSince)
.ConfigureAwait(false);
return new SiteCallKpiResponse(
correlationId,
Success: true,
ErrorMessage: null,
snapshot.BufferedCount,
snapshot.ParkedCount,
snapshot.FailedLastInterval,
snapshot.DeliveredLastInterval,
snapshot.OldestPendingAge,
snapshot.StuckCount);
}
finally
{
scope?.Dispose();
}
}
/// <summary>
/// Handles a per-source-site KPI request, using the same stuck cutoff and
/// interval bound as <see cref="HandleKpi"/>.
/// </summary>
private void HandlePerSiteKpi(PerSiteSiteCallKpiRequest request)
{
var sender = Sender;
var now = DateTime.UtcNow;
var stuckCutoff = now - _options.StuckAgeThreshold;
var intervalSince = now - _options.KpiInterval;
PerSiteKpiAsync(request.CorrelationId, stuckCutoff, intervalSince).PipeTo(
sender,
success: response => response,
failure: ex => new PerSiteSiteCallKpiResponse(
request.CorrelationId,
Success: false,
ErrorMessage: ex.GetBaseException().Message,
Sites: Array.Empty<SiteCallSiteKpiSnapshot>()));
}
private async Task<PerSiteSiteCallKpiResponse> PerSiteKpiAsync(
string correlationId, DateTime stuckCutoff, DateTime intervalSince)
{
var (scope, repository) = ResolveRepository();
try
{
var sites = await repository
.ComputePerSiteKpisAsync(stuckCutoff, intervalSince)
.ConfigureAwait(false);
return new PerSiteSiteCallKpiResponse(
correlationId, Success: true, ErrorMessage: null, sites);
}
finally
{
scope?.Dispose();
}
}
/// <summary>
/// Resolves an <see cref="ISiteCallAuditRepository"/> for one read message.
/// In test mode the injected instance is returned with a null scope; in
/// production a fresh DI scope is created and returned so the caller can
/// dispose it once the read completes — the same scope-per-message pattern
/// as <see cref="OnUpsertAsync"/>.
/// </summary>
private (IServiceScope? Scope, ISiteCallAuditRepository Repository) ResolveRepository()
{
if (_injectedRepository is not null)
{
return (null, _injectedRepository);
}
var scope = _serviceProvider!.CreateScope();
return (scope, scope.ServiceProvider.GetRequiredService<ISiteCallAuditRepository>());
}
/// <summary>
/// A cached call counts as stuck when it is still non-terminal and was
/// created before <paramref name="stuckCutoff"/>. Non-terminal is keyed off
/// <see cref="SiteCall.TerminalAtUtc"/> being <c>null</c> — the
/// <c>SiteCalls</c> operational mirror stores <c>AuditStatus</c>-derived
/// status strings (<c>Attempted</c>/<c>Delivered</c>/<c>Parked</c>/...), not
/// the tracking-lifecycle <c>Pending</c>/<c>Retrying</c> names the spec's
/// KPI section uses, so there is no status string that means "buffered".
/// <c>TerminalAtUtc</c> is the entity's own active/terminal discriminator
/// and is consistent with the repository KPI counts and
/// <c>PurgeTerminalAsync</c>.
/// </summary>
private static bool IsStuck(SiteCall row, DateTime stuckCutoff)
{
return row.TerminalAtUtc is null && row.CreatedAtUtc < stuckCutoff;
}
private static SiteCallSummary ToSummary(SiteCall row, DateTime stuckCutoff)
{
return new SiteCallSummary(
TrackedOperationId: row.TrackedOperationId.Value,
SourceSite: row.SourceSite,
Channel: row.Channel,
Target: row.Target,
Status: row.Status,
RetryCount: row.RetryCount,
LastError: row.LastError,
HttpStatus: row.HttpStatus,
CreatedAtUtc: row.CreatedAtUtc,
UpdatedAtUtc: row.UpdatedAtUtc,
TerminalAtUtc: row.TerminalAtUtc,
IsStuck: IsStuck(row, stuckCutoff));
}
private static SiteCallDetail ToDetail(SiteCall row)
{
return new SiteCallDetail(
TrackedOperationId: row.TrackedOperationId.Value,
SourceSite: row.SourceSite,
Channel: row.Channel,
Target: row.Target,
Status: row.Status,
RetryCount: row.RetryCount,
LastError: row.LastError,
HttpStatus: row.HttpStatus,
CreatedAtUtc: row.CreatedAtUtc,
UpdatedAtUtc: row.UpdatedAtUtc,
TerminalAtUtc: row.TerminalAtUtc,
IngestedAtUtc: row.IngestedAtUtc);
}
/// <summary>
/// Treats an empty/whitespace filter string as "no constraint" — the
/// repository's <see cref="SiteCallQueryFilter"/> interprets <c>null</c> as
/// a no-op predicate, so a blank UI filter must collapse to <c>null</c>.
/// </summary>
private static string? NullIfBlank(string? value)
{
return string.IsNullOrWhiteSpace(value) ? null : value;
}
}

View File

@@ -0,0 +1,26 @@
namespace ScadaLink.SiteCallAudit;
/// <summary>
/// Configuration options for the Site Call Audit (#22) read-side: stuck-call
/// detection and KPI windowing. Mirrors the KPI-relevant subset of
/// <c>NotificationOutboxOptions</c> — the reconciliation, purge and dispatch
/// cadence options the Notification Outbox carries are not part of the Site
/// Call Audit read-side backend and are deliberately omitted here.
/// </summary>
public class SiteCallAuditOptions
{
/// <summary>
/// Age past which a non-terminal cached call (<c>Pending</c>/<c>Retrying</c>)
/// is considered stuck. Display-only — surfaced as the Stuck KPI and a row
/// badge, with no escalation. Default 10 minutes, matching
/// <c>NotificationOutboxOptions.StuckAgeThreshold</c>.
/// </summary>
public TimeSpan StuckAgeThreshold { get; set; } = TimeSpan.FromMinutes(10);
/// <summary>
/// Trailing window used to compute the delivered- and failed-last-interval
/// throughput KPIs. Default 1 minute, matching
/// <c>NotificationOutboxOptions.DeliveredKpiWindow</c>.
/// </summary>
public TimeSpan KpiInterval { get; set; } = TimeSpan.FromMinutes(1);
}