feat(sitecallaudit): query, KPI and detail backend for the Site Calls page

This commit is contained in:
Joseph Doherty
2026-05-21 04:14:49 -04:00
parent 6f0d2ca499
commit e3519fdb39
17 changed files with 1514 additions and 18 deletions

View File

@@ -63,4 +63,27 @@ public interface ISiteCallAuditRepository
/// deleted.
/// </summary>
Task<int> PurgeTerminalAsync(DateTime olderThanUtc, CancellationToken ct = default);
/// <summary>
/// Computes a point-in-time global <see cref="SiteCallKpiSnapshot"/> from the
/// <c>SiteCalls</c> table. Counts are aggregated server-side (no row
/// materialisation): <c>StuckCount</c> uses <paramref name="stuckCutoff"/>;
/// <c>FailedLastInterval</c> / <c>DeliveredLastInterval</c> use
/// <paramref name="intervalSince"/>; the current time for <c>OldestPendingAge</c>
/// is captured inside the method.
/// </summary>
Task<SiteCallKpiSnapshot> ComputeKpisAsync(
DateTime stuckCutoff,
DateTime intervalSince,
CancellationToken ct = default);
/// <summary>
/// Computes a point-in-time <see cref="SiteCallSiteKpiSnapshot"/> per source
/// site. Sites with no <c>SiteCalls</c> rows at all are omitted. The stuck
/// cutoff and interval bounds are interpreted as in <see cref="ComputeKpisAsync"/>.
/// </summary>
Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff,
DateTime intervalSince,
CancellationToken ct = default);
}

View File

@@ -0,0 +1,153 @@
using ScadaLink.Commons.Types.Audit;
namespace ScadaLink.Commons.Messages.Audit;
/// <summary>
/// Site Calls UI -> Central: paginated, filtered query over the central
/// <c>SiteCalls</c> table (Site Call Audit #22). All filter fields are optional;
/// <see cref="StuckOnly"/> restricts results to stuck cached calls. Mirrors
/// <see cref="ScadaLink.Commons.Messages.Notification.NotificationOutboxQueryRequest"/>
/// but uses keyset paging (<see cref="AfterCreatedAtUtc"/> + <see cref="AfterId"/>)
/// to match the repository's <c>(CreatedAtUtc DESC, TrackedOperationId DESC)</c>
/// cursor, rather than page numbers.
/// </summary>
/// <remarks>
/// <see cref="ChannelFilter"/> matches the <c>SiteCall.Channel</c> column —
/// <c>"ApiOutbound"</c> or <c>"DbOutbound"</c> (the spec's <c>Kind</c> notion;
/// the entity exposes it as <c>Channel</c>). <see cref="TargetKeyword"/> is an
/// exact-match target filter, consistent with the repository's
/// <see cref="SiteCallQueryFilter.Target"/> predicate.
/// </remarks>
public sealed record SiteCallQueryRequest(
string CorrelationId,
string? StatusFilter,
string? SourceSiteFilter,
string? ChannelFilter,
string? TargetKeyword,
bool StuckOnly,
DateTime? FromUtc,
DateTime? ToUtc,
DateTime? AfterCreatedAtUtc,
Guid? AfterId,
int PageSize);
/// <summary>
/// A single <c>SiteCalls</c> row summarised for the Site Calls UI grid. Carries
/// only the columns the <see cref="ScadaLink.Commons.Entities.Audit.SiteCall"/>
/// entity genuinely exposes — there are no source-instance/script provenance
/// columns on that entity, so unlike
/// <see cref="ScadaLink.Commons.Messages.Notification.NotificationSummary"/>
/// none are surfaced here.
/// </summary>
public sealed record SiteCallSummary(
Guid TrackedOperationId,
string SourceSite,
string Channel,
string Target,
string Status,
int RetryCount,
string? LastError,
int? HttpStatus,
DateTime CreatedAtUtc,
DateTime UpdatedAtUtc,
DateTime? TerminalAtUtc,
bool IsStuck);
/// <summary>
/// Central -> Site Calls UI: paginated response for a <see cref="SiteCallQueryRequest"/>.
/// The keyset cursor of the last row is echoed back as
/// <see cref="NextAfterCreatedAtUtc"/> + <see cref="NextAfterId"/> for the caller
/// to request the following page; both are <c>null</c> when the page was empty.
/// On a repository fault <see cref="Success"/> is <c>false</c>,
/// <see cref="ErrorMessage"/> carries the cause and <see cref="SiteCalls"/> is empty.
/// </summary>
public sealed record SiteCallQueryResponse(
string CorrelationId,
bool Success,
string? ErrorMessage,
IReadOnlyList<SiteCallSummary> SiteCalls,
DateTime? NextAfterCreatedAtUtc,
Guid? NextAfterId);
/// <summary>
/// Site Calls UI -> Central: request for the full detail of a single cached call,
/// for the report detail modal.
/// </summary>
public sealed record SiteCallDetailRequest(
string CorrelationId,
Guid TrackedOperationId);
/// <summary>
/// Central -> Site Calls UI: full detail for one cached call. On a repository
/// fault or missing row, <see cref="Success"/> is <c>false</c> /
/// <see cref="Detail"/> is <c>null</c> and <see cref="ErrorMessage"/> carries
/// the cause.
/// </summary>
public sealed record SiteCallDetailResponse(
string CorrelationId,
bool Success,
string? ErrorMessage,
SiteCallDetail? Detail);
/// <summary>
/// Full <c>SiteCalls</c> row detail for the report detail modal — every field
/// on the <see cref="ScadaLink.Commons.Entities.Audit.SiteCall"/> entity,
/// including <see cref="LastError"/> and the <see cref="IngestedAtUtc"/>
/// timestamp the grid summary omits.
/// </summary>
public sealed record SiteCallDetail(
Guid TrackedOperationId,
string SourceSite,
string Channel,
string Target,
string Status,
int RetryCount,
string? LastError,
int? HttpStatus,
DateTime CreatedAtUtc,
DateTime UpdatedAtUtc,
DateTime? TerminalAtUtc,
DateTime IngestedAtUtc);
/// <summary>
/// Site Calls UI -> Central: request for the global <c>SiteCalls</c> KPI summary.
/// Mirrors <see cref="ScadaLink.Commons.Messages.Notification.NotificationKpiRequest"/>.
/// </summary>
public sealed record SiteCallKpiRequest(
string CorrelationId);
/// <summary>
/// Central -> Site Calls UI: KPI summary for the Site Calls dashboard. On a
/// repository fault <see cref="Success"/> is <c>false</c>,
/// <see cref="ErrorMessage"/> carries the cause, and the KPI fields are
/// zeroed/<c>null</c>.
/// </summary>
public sealed record SiteCallKpiResponse(
string CorrelationId,
bool Success,
string? ErrorMessage,
int BufferedCount,
int ParkedCount,
int FailedLastInterval,
int DeliveredLastInterval,
TimeSpan? OldestPendingAge,
int StuckCount);
/// <summary>
/// Site Calls UI -> Central: request for the per-source-site <c>SiteCalls</c>
/// KPI breakdown. Mirrors
/// <see cref="ScadaLink.Commons.Messages.Notification.PerSiteNotificationKpiRequest"/>.
/// </summary>
public sealed record PerSiteSiteCallKpiRequest(
string CorrelationId);
/// <summary>
/// Central -> Site Calls UI: per-site KPI breakdown for the Site Calls KPIs
/// page. On a repository fault <see cref="Success"/> is <c>false</c>,
/// <see cref="ErrorMessage"/> carries the cause, and <see cref="Sites"/> is empty.
/// </summary>
public sealed record PerSiteSiteCallKpiResponse(
string CorrelationId,
bool Success,
string? ErrorMessage,
IReadOnlyList<SiteCallSiteKpiSnapshot> Sites);

View File

@@ -0,0 +1,38 @@
namespace ScadaLink.Commons.Types.Audit;
/// <summary>
/// Point-in-time operational metrics for the central <c>SiteCalls</c> table
/// (Site Call Audit #22), surfaced on the health dashboard. The cached-call
/// counterpart of <see cref="ScadaLink.Commons.Types.Notifications.NotificationKpiSnapshot"/>;
/// mirrors its shape so the Central UI Site Calls KPI tiles can reuse the
/// Notification Outbox tile layout.
/// </summary>
/// <param name="BufferedCount">
/// Count of non-terminal rows (<c>Pending</c> + <c>Retrying</c>) — calls
/// buffered at sites awaiting retry.
/// </param>
/// <param name="ParkedCount">Count of rows in the <c>Parked</c> status.</param>
/// <param name="FailedLastInterval">
/// Count of <c>Failed</c> rows whose <see cref="ScadaLink.Commons.Entities.Audit.SiteCall.TerminalAtUtc"/>
/// is at or after the supplied "since" timestamp.
/// </param>
/// <param name="DeliveredLastInterval">
/// Count of <c>Delivered</c> rows whose <see cref="ScadaLink.Commons.Entities.Audit.SiteCall.TerminalAtUtc"/>
/// is at or after the supplied "since" timestamp.
/// </param>
/// <param name="OldestPendingAge">
/// Age of the oldest non-terminal row (<c>now - min(CreatedAtUtc)</c>), or
/// <c>null</c> when there are no non-terminal rows.
/// </param>
/// <param name="StuckCount">
/// Count of non-terminal rows (<c>Pending</c>/<c>Retrying</c>) whose
/// <see cref="ScadaLink.Commons.Entities.Audit.SiteCall.CreatedAtUtc"/> is older
/// than the supplied stuck cutoff. Display-only — no escalation.
/// </param>
public sealed record SiteCallKpiSnapshot(
int BufferedCount,
int ParkedCount,
int FailedLastInterval,
int DeliveredLastInterval,
TimeSpan? OldestPendingAge,
int StuckCount);

View File

@@ -0,0 +1,34 @@
namespace ScadaLink.Commons.Types.Audit;
/// <summary>
/// Point-in-time <c>SiteCalls</c> metrics scoped to a single source site. The
/// per-site counterpart of <see cref="SiteCallKpiSnapshot"/>; surfaced in the
/// per-site breakdown table on the Site Calls KPIs page. Mirrors
/// <see cref="ScadaLink.Commons.Types.Notifications.SiteNotificationKpiSnapshot"/>.
/// </summary>
/// <param name="SourceSite">The site identifier these metrics are scoped to.</param>
/// <param name="BufferedCount">Count of this site's non-terminal rows (<c>Pending</c> + <c>Retrying</c>).</param>
/// <param name="ParkedCount">Count of this site's rows in the <c>Parked</c> status.</param>
/// <param name="FailedLastInterval">
/// Count of this site's <c>Failed</c> rows whose <c>TerminalAtUtc</c> is at or
/// after the "since" timestamp.
/// </param>
/// <param name="DeliveredLastInterval">
/// Count of this site's <c>Delivered</c> rows whose <c>TerminalAtUtc</c> is at
/// or after the "since" timestamp.
/// </param>
/// <param name="OldestPendingAge">
/// Age of this site's oldest non-terminal row, or <c>null</c> when it has none.
/// </param>
/// <param name="StuckCount">
/// Count of this site's non-terminal rows whose <c>CreatedAtUtc</c> is older
/// than the stuck cutoff.
/// </param>
public sealed record SiteCallSiteKpiSnapshot(
string SourceSite,
int BufferedCount,
int ParkedCount,
int FailedLastInterval,
int DeliveredLastInterval,
TimeSpan? OldestPendingAge,
int StuckCount);

View File

@@ -2,6 +2,7 @@ using Akka.Actor;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Messages.Artifacts;
using ScadaLink.Commons.Messages.Audit;
using ScadaLink.Commons.Messages.DebugView;
using ScadaLink.Commons.Messages.Deployment;
using ScadaLink.Commons.Messages.Health;
@@ -25,6 +26,7 @@ public class CommunicationService
private readonly ILogger<CommunicationService> _logger;
private IActorRef? _centralCommunicationActor;
private IActorRef? _notificationOutboxProxy;
private IActorRef? _siteCallAuditProxy;
public CommunicationService(
IOptions<CommunicationOptions> options,
@@ -52,6 +54,17 @@ public class CommunicationService
_notificationOutboxProxy = notificationOutboxProxy;
}
/// <summary>
/// Sets the Site Call Audit (#22) singleton proxy reference. Called during
/// actor system startup. The Site Call Audit actor is central-local, so Site
/// Calls read calls Ask this proxy directly (no SiteEnvelope routing), the
/// same pattern as <see cref="SetNotificationOutbox"/>.
/// </summary>
public void SetSiteCallAudit(IActorRef siteCallAuditProxy)
{
_siteCallAuditProxy = siteCallAuditProxy;
}
/// <summary>
/// Triggers an immediate refresh of the site address cache from the database.
/// </summary>
@@ -80,6 +93,15 @@ public class CommunicationService
?? throw new InvalidOperationException("CommunicationService not initialized. NotificationOutbox proxy not set.");
}
/// <summary>
/// Gets the Site Call Audit proxy reference. Throws if not yet initialized.
/// </summary>
private IActorRef GetSiteCallAudit()
{
return _siteCallAuditProxy
?? throw new InvalidOperationException("CommunicationService not initialized. SiteCallAudit proxy not set.");
}
// ── Pattern 1: Instance Deployment ──
public async Task<DeploymentStatusResponse> DeployInstanceAsync(
@@ -295,6 +317,36 @@ public class CommunicationService
return await GetNotificationOutbox().Ask<PerSiteNotificationKpiResponse>(
request, _options.QueryTimeout, cancellationToken);
}
// ── Site Call Audit (central-local actor — Asked directly, no SiteEnvelope) ──
public async Task<SiteCallQueryResponse> QuerySiteCallsAsync(
SiteCallQueryRequest request, CancellationToken cancellationToken = default)
{
return await GetSiteCallAudit().Ask<SiteCallQueryResponse>(
request, _options.QueryTimeout, cancellationToken);
}
public async Task<SiteCallDetailResponse> GetSiteCallDetailAsync(
SiteCallDetailRequest request, CancellationToken cancellationToken = default)
{
return await GetSiteCallAudit().Ask<SiteCallDetailResponse>(
request, _options.QueryTimeout, cancellationToken);
}
public async Task<SiteCallKpiResponse> GetSiteCallKpisAsync(
SiteCallKpiRequest request, CancellationToken cancellationToken = default)
{
return await GetSiteCallAudit().Ask<SiteCallKpiResponse>(
request, _options.QueryTimeout, cancellationToken);
}
public async Task<PerSiteSiteCallKpiResponse> GetPerSiteSiteCallKpisAsync(
PerSiteSiteCallKpiRequest request, CancellationToken cancellationToken = default)
{
return await GetSiteCallAudit().Ask<PerSiteSiteCallKpiResponse>(
request, _options.QueryTimeout, cancellationToken);
}
}
/// <summary>

View File

@@ -201,6 +201,141 @@ ORDER BY CreatedAtUtc DESC, TrackedOperationId DESC;";
ct);
}
// Terminal status string literals for the interval-throughput KPIs. The
// Status column is a plain varchar (no value converter), so these compare
// directly in translated SQL.
//
// NOTE on the "buffered/non-terminal" definition: the SiteCalls operational
// mirror stores AuditStatus-derived strings (Attempted/Delivered/Parked/
// Failed/...), NOT the tracking-lifecycle Pending/Retrying names the spec's
// KPI section uses. There is therefore no Status string that means
// "buffered". The schema-honest predicate for "non-terminal / buffered" is
// TerminalAtUtc IS NULL — consistent with PurgeTerminalAsync's terminal
// predicate and with the SiteCall entity's own contract ("TerminalAtUtc ...
// null while still active"). All buffered / stuck / oldest-pending counts
// below key off TerminalAtUtc, not Status.
private const string StatusParked = "Parked";
private const string StatusDelivered = "Delivered";
private const string StatusFailed = "Failed";
/// <summary>
/// Computes the global KPI snapshot with five server-side aggregate queries
/// against <c>dbo.SiteCalls</c>. No rows are materialised — every count is a
/// translated <c>COUNT</c> and the oldest-pending age is a translated
/// <c>MIN(CreatedAtUtc)</c>. The <c>Status</c> and <c>CreatedAtUtc</c>/<c>TerminalAtUtc</c>
/// columns have no value converter, so the aggregates translate cleanly to
/// SQL Server (unlike the NotificationOutbox's <c>DateTimeOffset</c>-converted
/// column, which forces an order-and-take). "Buffered" / "stuck" key off
/// <c>TerminalAtUtc IS NULL</c> — see the field comments above.
/// </summary>
public async Task<SiteCallKpiSnapshot> ComputeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default)
{
var now = DateTime.UtcNow;
var bufferedCount = await _context.SiteCalls
.CountAsync(s => s.TerminalAtUtc == null, ct);
var parkedCount = await _context.SiteCalls
.CountAsync(s => s.Status == StatusParked, ct);
var failedLastInterval = await _context.SiteCalls
.CountAsync(s => s.Status == StatusFailed
&& s.TerminalAtUtc != null
&& s.TerminalAtUtc >= intervalSince, ct);
var deliveredLastInterval = await _context.SiteCalls
.CountAsync(s => s.Status == StatusDelivered
&& s.TerminalAtUtc != null
&& s.TerminalAtUtc >= intervalSince, ct);
var stuckCount = await _context.SiteCalls
.CountAsync(s => s.TerminalAtUtc == null && s.CreatedAtUtc < stuckCutoff, ct);
var nonTerminal = _context.SiteCalls.Where(s => s.TerminalAtUtc == null);
TimeSpan? oldestPendingAge = null;
if (await nonTerminal.AnyAsync(ct))
{
var oldestCreatedAt = await nonTerminal.MinAsync(s => s.CreatedAtUtc, ct);
oldestPendingAge = now - oldestCreatedAt;
}
return new SiteCallKpiSnapshot(
BufferedCount: bufferedCount,
ParkedCount: parkedCount,
FailedLastInterval: failedLastInterval,
DeliveredLastInterval: deliveredLastInterval,
OldestPendingAge: oldestPendingAge,
StuckCount: stuckCount);
}
/// <summary>
/// Computes the per-source-site KPI breakdown. The five counts are
/// <c>GROUP BY SourceSite</c> aggregates; the oldest-pending age is a
/// per-site <c>MIN(CreatedAtUtc)</c> over the (bounded) non-terminal set —
/// all run server-side. A site appears in the result only if it has at
/// least one row matched by one of the count queries. "Buffered" / "stuck"
/// key off <c>TerminalAtUtc IS NULL</c> — see <see cref="ComputeKpisAsync"/>.
/// </summary>
public async Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default)
{
var now = DateTime.UtcNow;
var buffered = await CountBySiteAsync(s => s.TerminalAtUtc == null, ct);
var parked = await CountBySiteAsync(s => s.Status == StatusParked, ct);
var failed = await CountBySiteAsync(
s => s.Status == StatusFailed
&& s.TerminalAtUtc != null && s.TerminalAtUtc >= intervalSince, ct);
var delivered = await CountBySiteAsync(
s => s.Status == StatusDelivered
&& s.TerminalAtUtc != null && s.TerminalAtUtc >= intervalSince, ct);
var stuck = await CountBySiteAsync(
s => s.TerminalAtUtc == null && s.CreatedAtUtc < stuckCutoff, ct);
// Oldest non-terminal CreatedAtUtc per site — a server-side GROUP BY MIN.
var oldest = (await _context.SiteCalls
.Where(s => s.TerminalAtUtc == null)
.GroupBy(s => s.SourceSite)
.Select(g => new { Site = g.Key, Oldest = g.Min(s => s.CreatedAtUtc) })
.ToListAsync(ct))
.ToDictionary(x => x.Site, x => x.Oldest);
var siteIds = buffered.Keys
.Concat(parked.Keys).Concat(failed.Keys)
.Concat(delivered.Keys).Concat(stuck.Keys)
.Distinct()
.OrderBy(s => s, StringComparer.Ordinal);
return siteIds.Select(site => new SiteCallSiteKpiSnapshot(
SourceSite: site,
BufferedCount: buffered.GetValueOrDefault(site),
ParkedCount: parked.GetValueOrDefault(site),
FailedLastInterval: failed.GetValueOrDefault(site),
DeliveredLastInterval: delivered.GetValueOrDefault(site),
OldestPendingAge: oldest.TryGetValue(site, out var createdAt)
? now - createdAt
: null,
StuckCount: stuck.GetValueOrDefault(site))).ToList();
}
/// <summary>Counts <c>SiteCalls</c> rows matching <paramref name="predicate"/>, grouped by source site.</summary>
private async Task<Dictionary<string, int>> CountBySiteAsync(
System.Linq.Expressions.Expression<Func<SiteCall, bool>> predicate,
CancellationToken ct)
{
return await _context.SiteCalls
.Where(predicate)
.GroupBy(s => s.SourceSite)
.Select(g => new { Site = g.Key, Count = g.Count() })
.ToDictionaryAsync(x => x.Site, x => x.Count, ct);
}
private static int GetRankOrThrow(string status)
{
if (!StatusRank.TryGetValue(status, out var rank))

View File

@@ -423,10 +423,13 @@ akka {{
// is a scoped EF Core service.
var siteCallAuditLogger = _serviceProvider.GetRequiredService<ILoggerFactory>()
.CreateLogger<ScadaLink.SiteCallAudit.SiteCallAuditActor>();
var siteCallAuditOptions = _serviceProvider
.GetRequiredService<IOptions<ScadaLink.SiteCallAudit.SiteCallAuditOptions>>().Value;
var siteCallAuditSingletonProps = ClusterSingletonManager.Props(
singletonProps: Props.Create(() => new ScadaLink.SiteCallAudit.SiteCallAuditActor(
_serviceProvider,
siteCallAuditOptions,
siteCallAuditLogger)),
terminationMessage: PoisonPill.Instance,
settings: ClusterSingletonManagerSettings.Create(_actorSystem!)
@@ -437,7 +440,12 @@ akka {{
singletonManagerPath: "/user/site-call-audit-singleton",
settings: ClusterSingletonProxySettings.Create(_actorSystem)
.WithSingletonName("site-call-audit"));
_actorSystem.ActorOf(siteCallAuditProxyProps, "site-call-audit-proxy");
var siteCallAuditProxy = _actorSystem.ActorOf(siteCallAuditProxyProps, "site-call-audit-proxy");
// Hand the proxy to the CommunicationService so the Central UI can Ask
// the Site Call Audit actor directly (query, KPIs, detail) — mirrors the
// SetNotificationOutbox wiring above.
commService?.SetSiteCallAudit(siteCallAuditProxy);
_logger.LogInformation("SiteCallAuditActor singleton created");
_logger.LogInformation("Central actors registered. CentralCommunicationActor created.");

View File

@@ -13,6 +13,8 @@
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
<PackageReference Include="Microsoft.Extensions.Options" />
<!-- BindConfiguration extension for the SiteCallAuditOptions binding. -->
<PackageReference Include="Microsoft.Extensions.Options.ConfigurationExtensions" />
</ItemGroup>
<ItemGroup>

View File

@@ -7,33 +7,34 @@ namespace ScadaLink.SiteCallAudit;
/// </summary>
/// <remarks>
/// <para>
/// M3 Bundle C ships the ingest-only minimum surface (the actor itself); the
/// full DI surface — reconciliation puller, KPI projector, central→site
/// Retry/Discard relay, options + validators — is deferred to a follow-up.
/// Binds <see cref="SiteCallAuditOptions"/> (stuck-call detection + KPI
/// windowing for the read-side query/KPI handlers). The reconciliation puller
/// and central→site Retry/Discard relay are still deferred to later follow-ups.
/// </para>
/// <para>
/// The repository (<c>ISiteCallAuditRepository</c>) is registered by
/// <c>ScadaLink.ConfigurationDatabase.ServiceCollectionExtensions.AddConfigurationDatabase</c>,
/// so callers (the Host on the central node) must also call that. The actor's
/// <c>Props</c> are wired up in Host registration (Bundle F); this extension
/// is currently a no-op placeholder kept for symmetry with the AuditLog and
/// NotificationOutbox composition roots — adding it now means consumers can
/// reference the method without re-touching the Host project later.
/// <c>Props</c> are wired up in Host registration.
/// </para>
/// </remarks>
public static class ServiceCollectionExtensions
{
/// <summary>Configuration section bound to <see cref="SiteCallAuditOptions"/>.</summary>
public const string OptionsSection = "ScadaLink:SiteCallAudit";
/// <summary>
/// Registers Site Call Audit (#22) services. Currently a no-op
/// placeholder — Bundle F will populate this with the actor's Props
/// factory + options bindings. The method is exposed now so the Host
/// wiring call already exists at the API boundary.
/// Registers Site Call Audit (#22) services: the <see cref="SiteCallAuditOptions"/>
/// binding consumed by the actor's read-side KPI/query handlers. The actor's
/// <c>Props</c> are still constructed inline in Host wiring.
/// </summary>
public static IServiceCollection AddSiteCallAudit(this IServiceCollection services)
{
ArgumentNullException.ThrowIfNull(services);
// Actor props are constructed in Host wiring (Bundle F). This
// extension is a placeholder for future config + DI.
services.AddOptions<SiteCallAuditOptions>()
.BindConfiguration(OptionsSection);
return services;
}
}

View File

@@ -1,8 +1,11 @@
using Akka.Actor;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Messages.Audit;
using ScadaLink.Commons.Types;
using ScadaLink.Commons.Types.Audit;
namespace ScadaLink.SiteCallAudit;
@@ -42,26 +45,34 @@ namespace ScadaLink.SiteCallAudit;
/// </remarks>
public class SiteCallAuditActor : ReceiveActor
{
/// <summary>Maximum page size honoured by a <see cref="SiteCallQueryRequest"/>.</summary>
private const int MaxPageSize = 200;
private readonly IServiceProvider? _serviceProvider;
private readonly ISiteCallAuditRepository? _injectedRepository;
private readonly SiteCallAuditOptions _options;
private readonly ILogger<SiteCallAuditActor> _logger;
/// <summary>
/// Test-mode constructor — injects a concrete repository instance whose
/// lifetime exceeds the test, so the actor reuses the same instance
/// across every message. Used by Bundle C's MSSQL-backed TestKit fixture.
/// An optional <paramref name="options"/> lets a test pin the stuck/KPI
/// windows; when omitted the production defaults apply.
/// </summary>
public SiteCallAuditActor(
ISiteCallAuditRepository repository,
ILogger<SiteCallAuditActor> logger)
ILogger<SiteCallAuditActor> logger,
SiteCallAuditOptions? options = null)
{
ArgumentNullException.ThrowIfNull(repository);
ArgumentNullException.ThrowIfNull(logger);
_injectedRepository = repository;
_logger = logger;
_options = options ?? new SiteCallAuditOptions();
ReceiveAsync<UpsertSiteCallCommand>(OnUpsertAsync);
RegisterHandlers();
}
/// <summary>
@@ -73,15 +84,33 @@ public class SiteCallAuditActor : ReceiveActor
/// </summary>
public SiteCallAuditActor(
IServiceProvider serviceProvider,
SiteCallAuditOptions options,
ILogger<SiteCallAuditActor> logger)
{
ArgumentNullException.ThrowIfNull(serviceProvider);
ArgumentNullException.ThrowIfNull(options);
ArgumentNullException.ThrowIfNull(logger);
_serviceProvider = serviceProvider;
_options = options;
_logger = logger;
RegisterHandlers();
}
/// <summary>
/// Wires up the message handlers shared by both constructors: the M3
/// ingest path plus the Task 4 read-side (query, detail, global + per-site
/// KPI). All read handlers reply to an Ask, so they capture <c>Sender</c>
/// before the first await and <c>PipeTo</c> the result back.
/// </summary>
private void RegisterHandlers()
{
ReceiveAsync<UpsertSiteCallCommand>(OnUpsertAsync);
Receive<SiteCallQueryRequest>(HandleQuery);
Receive<SiteCallDetailRequest>(HandleDetail);
Receive<SiteCallKpiRequest>(HandleKpi);
Receive<PerSiteSiteCallKpiRequest>(HandlePerSiteKpi);
}
/// <summary>
@@ -137,4 +166,305 @@ public class SiteCallAuditActor : ReceiveActor
scope?.Dispose();
}
}
// ── Task 4: read-side (query / detail / KPI) ──
/// <summary>
/// Handles a paginated, filtered query over the <c>SiteCalls</c> table.
/// Builds a <see cref="SiteCallQueryFilter"/> + <see cref="SiteCallPaging"/>
/// keyset cursor from the request, runs the query on a scoped repository,
/// and pipes the mapped response back to the captured sender. A repository
/// fault yields a failure response with an empty list.
/// </summary>
private void HandleQuery(SiteCallQueryRequest request)
{
var sender = Sender;
var now = DateTime.UtcNow;
QueryAsync(request, now).PipeTo(
sender,
success: response => response,
failure: ex => new SiteCallQueryResponse(
request.CorrelationId,
Success: false,
ErrorMessage: ex.GetBaseException().Message,
SiteCalls: Array.Empty<SiteCallSummary>(),
NextAfterCreatedAtUtc: null,
NextAfterId: null));
}
private async Task<SiteCallQueryResponse> QueryAsync(SiteCallQueryRequest request, DateTime now)
{
var filter = new SiteCallQueryFilter(
Channel: NullIfBlank(request.ChannelFilter),
SourceSite: NullIfBlank(request.SourceSiteFilter),
Status: NullIfBlank(request.StatusFilter),
Target: NullIfBlank(request.TargetKeyword),
FromUtc: request.FromUtc,
ToUtc: request.ToUtc);
var pageSize = Math.Clamp(request.PageSize, 1, MaxPageSize);
var paging = new SiteCallPaging(
PageSize: pageSize,
AfterCreatedAtUtc: request.AfterCreatedAtUtc,
AfterId: request.AfterId is { } id ? new TrackedOperationId(id) : null);
var (scope, repository) = ResolveRepository();
try
{
var rows = await repository.QueryAsync(filter, paging).ConfigureAwait(false);
var stuckCutoff = now - _options.StuckAgeThreshold;
var summaries = rows
// StuckOnly is post-filtered here rather than pushed into the
// repository SQL — the SiteCallQueryFilter has no stuck predicate
// and a status-aware created-before clause does not compose with
// the keyset cursor. The page may therefore return fewer than
// PageSize rows when StuckOnly is set; that is acceptable for a
// display-only filter.
.Where(row => !request.StuckOnly || IsStuck(row, stuckCutoff))
.Select(row => ToSummary(row, stuckCutoff))
.ToList();
// The next-page cursor is the LAST row of the materialised page —
// before StuckOnly post-filtering, so paging still advances even
// when every row on a page was filtered out.
var cursorRow = rows.Count > 0 ? rows[^1] : null;
return new SiteCallQueryResponse(
request.CorrelationId,
Success: true,
ErrorMessage: null,
SiteCalls: summaries,
NextAfterCreatedAtUtc: cursorRow?.CreatedAtUtc,
NextAfterId: cursorRow?.TrackedOperationId.Value);
}
finally
{
scope?.Dispose();
}
}
/// <summary>
/// Handles a full-detail query for a single cached call — backs the report
/// detail modal. A missing row yields <c>Success=false</c> with a "not
/// found" message; a repository fault yields <c>Success=false</c> with the
/// fault message.
/// </summary>
private void HandleDetail(SiteCallDetailRequest request)
{
var sender = Sender;
DetailAsync(request).PipeTo(
sender,
success: response => response,
failure: ex => new SiteCallDetailResponse(
request.CorrelationId,
Success: false,
ErrorMessage: ex.GetBaseException().Message,
Detail: null));
}
private async Task<SiteCallDetailResponse> DetailAsync(SiteCallDetailRequest request)
{
var (scope, repository) = ResolveRepository();
try
{
var row = await repository
.GetAsync(new TrackedOperationId(request.TrackedOperationId))
.ConfigureAwait(false);
if (row is null)
{
return new SiteCallDetailResponse(
request.CorrelationId,
Success: false,
ErrorMessage: "site call not found",
Detail: null);
}
return new SiteCallDetailResponse(
request.CorrelationId,
Success: true,
ErrorMessage: null,
Detail: ToDetail(row));
}
finally
{
scope?.Dispose();
}
}
/// <summary>
/// Handles a global KPI snapshot request, deriving the stuck cutoff from
/// <see cref="SiteCallAuditOptions.StuckAgeThreshold"/> and the
/// failed/delivered interval bound from <see cref="SiteCallAuditOptions.KpiInterval"/>.
/// </summary>
private void HandleKpi(SiteCallKpiRequest request)
{
var sender = Sender;
var now = DateTime.UtcNow;
var stuckCutoff = now - _options.StuckAgeThreshold;
var intervalSince = now - _options.KpiInterval;
KpiAsync(request.CorrelationId, stuckCutoff, intervalSince).PipeTo(
sender,
success: response => response,
failure: ex => new SiteCallKpiResponse(
request.CorrelationId,
Success: false,
ErrorMessage: ex.GetBaseException().Message,
BufferedCount: 0,
ParkedCount: 0,
FailedLastInterval: 0,
DeliveredLastInterval: 0,
OldestPendingAge: null,
StuckCount: 0));
}
private async Task<SiteCallKpiResponse> KpiAsync(
string correlationId, DateTime stuckCutoff, DateTime intervalSince)
{
var (scope, repository) = ResolveRepository();
try
{
var snapshot = await repository
.ComputeKpisAsync(stuckCutoff, intervalSince)
.ConfigureAwait(false);
return new SiteCallKpiResponse(
correlationId,
Success: true,
ErrorMessage: null,
snapshot.BufferedCount,
snapshot.ParkedCount,
snapshot.FailedLastInterval,
snapshot.DeliveredLastInterval,
snapshot.OldestPendingAge,
snapshot.StuckCount);
}
finally
{
scope?.Dispose();
}
}
/// <summary>
/// Handles a per-source-site KPI request, using the same stuck cutoff and
/// interval bound as <see cref="HandleKpi"/>.
/// </summary>
private void HandlePerSiteKpi(PerSiteSiteCallKpiRequest request)
{
var sender = Sender;
var now = DateTime.UtcNow;
var stuckCutoff = now - _options.StuckAgeThreshold;
var intervalSince = now - _options.KpiInterval;
PerSiteKpiAsync(request.CorrelationId, stuckCutoff, intervalSince).PipeTo(
sender,
success: response => response,
failure: ex => new PerSiteSiteCallKpiResponse(
request.CorrelationId,
Success: false,
ErrorMessage: ex.GetBaseException().Message,
Sites: Array.Empty<SiteCallSiteKpiSnapshot>()));
}
private async Task<PerSiteSiteCallKpiResponse> PerSiteKpiAsync(
string correlationId, DateTime stuckCutoff, DateTime intervalSince)
{
var (scope, repository) = ResolveRepository();
try
{
var sites = await repository
.ComputePerSiteKpisAsync(stuckCutoff, intervalSince)
.ConfigureAwait(false);
return new PerSiteSiteCallKpiResponse(
correlationId, Success: true, ErrorMessage: null, sites);
}
finally
{
scope?.Dispose();
}
}
/// <summary>
/// Resolves an <see cref="ISiteCallAuditRepository"/> for one read message.
/// In test mode the injected instance is returned with a null scope; in
/// production a fresh DI scope is created and returned so the caller can
/// dispose it once the read completes — the same scope-per-message pattern
/// as <see cref="OnUpsertAsync"/>.
/// </summary>
private (IServiceScope? Scope, ISiteCallAuditRepository Repository) ResolveRepository()
{
if (_injectedRepository is not null)
{
return (null, _injectedRepository);
}
var scope = _serviceProvider!.CreateScope();
return (scope, scope.ServiceProvider.GetRequiredService<ISiteCallAuditRepository>());
}
/// <summary>
/// A cached call counts as stuck when it is still non-terminal and was
/// created before <paramref name="stuckCutoff"/>. Non-terminal is keyed off
/// <see cref="SiteCall.TerminalAtUtc"/> being <c>null</c> — the
/// <c>SiteCalls</c> operational mirror stores <c>AuditStatus</c>-derived
/// status strings (<c>Attempted</c>/<c>Delivered</c>/<c>Parked</c>/...), not
/// the tracking-lifecycle <c>Pending</c>/<c>Retrying</c> names the spec's
/// KPI section uses, so there is no status string that means "buffered".
/// <c>TerminalAtUtc</c> is the entity's own active/terminal discriminator
/// and is consistent with the repository KPI counts and
/// <c>PurgeTerminalAsync</c>.
/// </summary>
private static bool IsStuck(SiteCall row, DateTime stuckCutoff)
{
return row.TerminalAtUtc is null && row.CreatedAtUtc < stuckCutoff;
}
private static SiteCallSummary ToSummary(SiteCall row, DateTime stuckCutoff)
{
return new SiteCallSummary(
TrackedOperationId: row.TrackedOperationId.Value,
SourceSite: row.SourceSite,
Channel: row.Channel,
Target: row.Target,
Status: row.Status,
RetryCount: row.RetryCount,
LastError: row.LastError,
HttpStatus: row.HttpStatus,
CreatedAtUtc: row.CreatedAtUtc,
UpdatedAtUtc: row.UpdatedAtUtc,
TerminalAtUtc: row.TerminalAtUtc,
IsStuck: IsStuck(row, stuckCutoff));
}
private static SiteCallDetail ToDetail(SiteCall row)
{
return new SiteCallDetail(
TrackedOperationId: row.TrackedOperationId.Value,
SourceSite: row.SourceSite,
Channel: row.Channel,
Target: row.Target,
Status: row.Status,
RetryCount: row.RetryCount,
LastError: row.LastError,
HttpStatus: row.HttpStatus,
CreatedAtUtc: row.CreatedAtUtc,
UpdatedAtUtc: row.UpdatedAtUtc,
TerminalAtUtc: row.TerminalAtUtc,
IngestedAtUtc: row.IngestedAtUtc);
}
/// <summary>
/// Treats an empty/whitespace filter string as "no constraint" — the
/// repository's <see cref="SiteCallQueryFilter"/> interprets <c>null</c> as
/// a no-op predicate, so a blank UI filter must collapse to <c>null</c>.
/// </summary>
private static string? NullIfBlank(string? value)
{
return string.IsNullOrWhiteSpace(value) ? null : value;
}
}

View File

@@ -0,0 +1,26 @@
namespace ScadaLink.SiteCallAudit;
/// <summary>
/// Configuration options for the Site Call Audit (#22) read-side: stuck-call
/// detection and KPI windowing. Mirrors the KPI-relevant subset of
/// <c>NotificationOutboxOptions</c> — the reconciliation, purge and dispatch
/// cadence options the Notification Outbox carries are not part of the Site
/// Call Audit read-side backend and are deliberately omitted here.
/// </summary>
public class SiteCallAuditOptions
{
/// <summary>
/// Age past which a non-terminal cached call (<c>Pending</c>/<c>Retrying</c>)
/// is considered stuck. Display-only — surfaced as the Stuck KPI and a row
/// badge, with no escalation. Default 10 minutes, matching
/// <c>NotificationOutboxOptions.StuckAgeThreshold</c>.
/// </summary>
public TimeSpan StuckAgeThreshold { get; set; } = TimeSpan.FromMinutes(10);
/// <summary>
/// Trailing window used to compute the delivered- and failed-last-interval
/// throughput KPIs. Default 1 minute, matching
/// <c>NotificationOutboxOptions.DeliveredKpiWindow</c>.
/// </summary>
public TimeSpan KpiInterval { get; set; } = TimeSpan.FromMinutes(1);
}