feat(audit): M5.2 per-node stuck-count KPIs (T6) — repo per-node aggregation, actor message pair, CentralUI tiles
This commit is contained in:
@@ -58,3 +58,31 @@
|
||||
{
|
||||
<div class="text-muted small mb-3">Site Call KPIs unavailable: @ErrorMessage</div>
|
||||
}
|
||||
@* ── Per-node stuck/parked sub-table (T6: M5.2 per-node stuck-count KPIs) ── *@
|
||||
@if (HasNodeBreakdown)
|
||||
{
|
||||
<div class="mb-3">
|
||||
<div class="d-flex justify-content-between align-items-center mb-1">
|
||||
<small class="text-muted">By node</small>
|
||||
</div>
|
||||
<table class="table table-sm table-borderless mb-0 site-call-kpi-node-table">
|
||||
<thead class="table-light">
|
||||
<tr>
|
||||
<th class="small py-1">Node</th>
|
||||
<th class="text-end small py-1">Stuck</th>
|
||||
<th class="text-end small py-1">Parked</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@foreach (var n in PerNodeSnapshots!)
|
||||
{
|
||||
<tr @key="n.SourceNode">
|
||||
<td class="small py-1"><code>@n.SourceNode</code></td>
|
||||
<td class="text-end font-monospace small py-1 @(n.StuckCount > 0 ? "text-warning" : "")">@n.StuckCount</td>
|
||||
<td class="text-end font-monospace small py-1 @(n.ParkedCount > 0 ? "text-danger" : "")">@n.ParkedCount</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
using Microsoft.AspNetCore.Components;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Components.Health;
|
||||
|
||||
@@ -59,6 +60,24 @@ public partial class SiteCallKpiTiles
|
||||
/// </summary>
|
||||
[Parameter] public string? ErrorMessage { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Optional per-node KPI breakdown (T6: M5.2 per-node stuck-count KPIs).
|
||||
/// When non-null and non-empty, a compact node-level stuck/parked sub-table
|
||||
/// is rendered below the main tiles. <c>null</c> means the parent has not
|
||||
/// loaded it yet or has opted out — the sub-table is suppressed entirely.
|
||||
/// </summary>
|
||||
[Parameter] public IReadOnlyList<SiteCallNodeKpiSnapshot>? PerNodeSnapshots { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// True when <see cref="PerNodeSnapshots"/> is a successful query result.
|
||||
/// Used to suppress the sub-table on a load failure.
|
||||
/// </summary>
|
||||
[Parameter] public bool PerNodeAvailable { get; set; }
|
||||
|
||||
/// <summary>Whether the per-node sub-table has data to render.</summary>
|
||||
internal bool HasNodeBreakdown =>
|
||||
PerNodeAvailable && PerNodeSnapshots is { Count: > 0 };
|
||||
|
||||
// ── Buffered tile ───────────────────────────────────────────────────────
|
||||
|
||||
private string BufferedDisplay =>
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
@using ZB.MOM.WW.ScadaBridge.HealthMonitoring
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit
|
||||
@using ZB.MOM.WW.ScadaBridge.Communication
|
||||
@implements IDisposable
|
||||
@inject ICentralHealthAggregator HealthAggregator
|
||||
@@ -65,7 +66,9 @@
|
||||
(buffered / stuck / parked). Refreshed alongside the site states. *@
|
||||
<SiteCallKpiTiles Snapshot="@_siteCallKpi"
|
||||
IsAvailable="@_siteCallKpiAvailable"
|
||||
ErrorMessage="@_siteCallKpiError" />
|
||||
ErrorMessage="@_siteCallKpiError"
|
||||
PerNodeSnapshots="@_siteCallNodeKpis"
|
||||
PerNodeAvailable="@_siteCallNodeKpiAvailable" />
|
||||
|
||||
@* Audit Log (#23) M7 Bundle E — three KPI tiles for the Audit channel
|
||||
(volume / error rate / backlog). Refreshed alongside the site states. *@
|
||||
@@ -378,6 +381,12 @@
|
||||
private bool _siteCallKpiAvailable;
|
||||
private string? _siteCallKpiError;
|
||||
|
||||
// Per-node Site Call KPI breakdown (T6: M5.2 per-node stuck-count KPIs).
|
||||
// Passed to SiteCallKpiTiles as an optional sub-table.
|
||||
private IReadOnlyList<SiteCallNodeKpiSnapshot> _siteCallNodeKpis =
|
||||
Array.Empty<SiteCallNodeKpiSnapshot>();
|
||||
private bool _siteCallNodeKpiAvailable;
|
||||
|
||||
private static bool SiteHasActiveErrors(SiteHealthState state)
|
||||
{
|
||||
var report = state.LatestReport;
|
||||
@@ -415,7 +424,7 @@
|
||||
{
|
||||
_siteStates = HealthAggregator.GetAllSiteStates();
|
||||
await LoadOutboxKpis();
|
||||
await LoadSiteCallKpis();
|
||||
await Task.WhenAll(LoadSiteCallKpis(), LoadSiteCallNodeKpis());
|
||||
await LoadAuditKpis();
|
||||
}
|
||||
|
||||
@@ -474,6 +483,30 @@
|
||||
}
|
||||
}
|
||||
|
||||
// Per-node site-call KPI loader (T6: M5.2). Best-effort; a fault silently
|
||||
// suppresses the per-node sub-table rather than degrading the dashboard.
|
||||
private async Task LoadSiteCallNodeKpis()
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = await CommunicationService.GetPerNodeSiteCallKpisAsync(
|
||||
new PerNodeSiteCallKpiRequest(Guid.NewGuid().ToString("N")));
|
||||
if (response.Success)
|
||||
{
|
||||
_siteCallNodeKpis = response.Nodes;
|
||||
_siteCallNodeKpiAvailable = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
_siteCallNodeKpiAvailable = false;
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
_siteCallNodeKpiAvailable = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Tiles show the numeric KPI when available, or an em dash when the outbox
|
||||
// KPI query failed — matching how the page renders other unavailable data.
|
||||
private string OutboxTileValue(int value) =>
|
||||
|
||||
+73
-2
@@ -69,6 +69,51 @@
|
||||
</div>
|
||||
}
|
||||
|
||||
@* ── Per-node breakdown (T6: additive) ── *@
|
||||
<h5 class="mb-2">Per-node breakdown</h5>
|
||||
@if (_perNodeError != null)
|
||||
{
|
||||
<div class="alert alert-warning py-2">Per-node KPIs unavailable: @_perNodeError</div>
|
||||
}
|
||||
else if (_perNode.Count == 0)
|
||||
{
|
||||
<div class="card mb-3">
|
||||
<div class="card-body text-center text-muted py-3">
|
||||
<div class="small">No per-node activity (rows may have a null SourceNode).</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div class="table-responsive mb-3">
|
||||
<table class="table table-sm table-hover align-middle">
|
||||
<thead class="table-light">
|
||||
<tr>
|
||||
<th>Node</th>
|
||||
<th class="text-end">Queue Depth</th>
|
||||
<th class="text-end">Stuck</th>
|
||||
<th class="text-end">Parked</th>
|
||||
<th class="text-end">Delivered (last interval)</th>
|
||||
<th class="text-end">Oldest Pending Age</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@foreach (var n in _perNode)
|
||||
{
|
||||
<tr @key="n.SourceNode" class="@(n.StuckCount > 0 ? "table-warning" : "")">
|
||||
<td><code>@n.SourceNode</code></td>
|
||||
<td class="text-end font-monospace">@n.QueueDepth</td>
|
||||
<td class="text-end font-monospace @(n.StuckCount > 0 ? "text-warning" : "")">@n.StuckCount</td>
|
||||
<td class="text-end font-monospace @(n.ParkedCount > 0 ? "text-danger" : "")">@n.ParkedCount</td>
|
||||
<td class="text-end font-monospace text-success">@n.DeliveredLastInterval</td>
|
||||
<td class="text-end font-monospace">@FormatAge(n.OldestPendingAge)</td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
}
|
||||
|
||||
@* ── Per-site breakdown ── *@
|
||||
<h5 class="mb-2">Per-site breakdown</h5>
|
||||
@if (_perSiteError != null)
|
||||
@@ -124,6 +169,10 @@
|
||||
private IReadOnlyList<SiteNotificationKpiSnapshot> _perSite = Array.Empty<SiteNotificationKpiSnapshot>();
|
||||
private string? _perSiteError;
|
||||
|
||||
// ── Per-node (T6: M5.2 per-node stuck-count KPIs) ──
|
||||
private IReadOnlyList<NodeNotificationKpiSnapshot> _perNode = Array.Empty<NodeNotificationKpiSnapshot>();
|
||||
private string? _perNodeError;
|
||||
|
||||
private bool _loading;
|
||||
|
||||
protected override async Task OnInitializedAsync()
|
||||
@@ -144,9 +193,9 @@
|
||||
private async Task RefreshAll()
|
||||
{
|
||||
_loading = true;
|
||||
// Race-free despite both tasks mutating component fields: Blazor Server runs
|
||||
// Race-free despite all tasks mutating component fields: Blazor Server runs
|
||||
// every continuation on the circuit's single-threaded synchronization context.
|
||||
await Task.WhenAll(LoadGlobalKpis(), LoadPerSiteKpis());
|
||||
await Task.WhenAll(LoadGlobalKpis(), LoadPerSiteKpis(), LoadPerNodeKpis());
|
||||
_loading = false;
|
||||
}
|
||||
|
||||
@@ -194,6 +243,28 @@
|
||||
}
|
||||
}
|
||||
|
||||
private async Task LoadPerNodeKpis()
|
||||
{
|
||||
try
|
||||
{
|
||||
var response = await CommunicationService.GetPerNodeNotificationKpisAsync(
|
||||
new PerNodeNotificationKpiRequest(Guid.NewGuid().ToString("N")));
|
||||
if (response.Success)
|
||||
{
|
||||
_perNode = response.Nodes;
|
||||
_perNodeError = null;
|
||||
}
|
||||
else
|
||||
{
|
||||
_perNodeError = response.ErrorMessage ?? "Per-node KPI query failed.";
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_perNodeError = $"Per-node KPI query failed: {ex.Message}";
|
||||
}
|
||||
}
|
||||
|
||||
private string SiteName(string siteId) =>
|
||||
_sites.FirstOrDefault(s => s.SiteIdentifier == siteId)?.Name ?? siteId;
|
||||
|
||||
|
||||
+13
@@ -100,6 +100,19 @@ public interface INotificationOutboxRepository
|
||||
Task<IReadOnlyList<SiteNotificationKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Computes a point-in-time <see cref="NodeNotificationKpiSnapshot"/> per originating node.
|
||||
/// Nodes with no notification rows at all are omitted; rows with a <c>NULL</c>
|
||||
/// <c>SourceNode</c> are excluded. The stuck and delivered cutoffs are supplied by the
|
||||
/// caller; the current time used for <c>OldestPendingAge</c> is captured inside the method.
|
||||
/// </summary>
|
||||
/// <param name="stuckCutoff">The time threshold for marking notifications as stuck.</param>
|
||||
/// <param name="deliveredSince">The time threshold for counting delivered notifications.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>A list of per-node KPI snapshots, ordered by node name.</returns>
|
||||
Task<IReadOnlyList<NodeNotificationKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Persists pending changes tracked on the underlying context. Use this when staging
|
||||
/// multiple changes for a single commit; the individual mutating methods on this
|
||||
|
||||
@@ -107,4 +107,19 @@ public interface ISiteCallAuditRepository
|
||||
DateTime stuckCutoff,
|
||||
DateTime intervalSince,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Computes a point-in-time <see cref="SiteCallNodeKpiSnapshot"/> per originating
|
||||
/// node. Nodes with no <c>SiteCalls</c> rows at all are omitted; rows with a
|
||||
/// <c>NULL</c> <c>SourceNode</c> are excluded. The stuck cutoff and interval
|
||||
/// bounds are interpreted as in <see cref="ComputeKpisAsync"/>.
|
||||
/// </summary>
|
||||
/// <param name="stuckCutoff">UTC threshold for classifying a row as stuck.</param>
|
||||
/// <param name="intervalSince">UTC start of the delivered/failed interval window.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>A task that resolves to a per-node KPI list; nodes with no rows are omitted.</returns>
|
||||
Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTime stuckCutoff,
|
||||
DateTime intervalSince,
|
||||
CancellationToken ct = default);
|
||||
}
|
||||
|
||||
@@ -164,3 +164,24 @@ public sealed record PerSiteSiteCallKpiResponse(
|
||||
bool Success,
|
||||
string? ErrorMessage,
|
||||
IReadOnlyList<SiteCallSiteKpiSnapshot> Sites);
|
||||
|
||||
/// <summary>
|
||||
/// Site Calls UI -> Central: request for the per-node <c>SiteCalls</c>
|
||||
/// KPI breakdown. Mirrors <see cref="PerSiteSiteCallKpiRequest"/> but groups
|
||||
/// by <c>SourceNode</c> instead of <c>SourceSite</c>. Additive — does not
|
||||
/// change per-site behaviour.
|
||||
/// </summary>
|
||||
public sealed record PerNodeSiteCallKpiRequest(
|
||||
string CorrelationId);
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Site Calls UI: per-node KPI breakdown for the Site Calls KPIs
|
||||
/// page. On a repository fault <see cref="Success"/> is <c>false</c>,
|
||||
/// <see cref="ErrorMessage"/> carries the cause, and <see cref="Nodes"/> is empty.
|
||||
/// Nodes with a <c>NULL</c> <c>SourceNode</c> are omitted.
|
||||
/// </summary>
|
||||
public sealed record PerNodeSiteCallKpiResponse(
|
||||
string CorrelationId,
|
||||
bool Success,
|
||||
string? ErrorMessage,
|
||||
IReadOnlyList<SiteCallNodeKpiSnapshot> Nodes);
|
||||
|
||||
@@ -159,3 +159,23 @@ public record PerSiteNotificationKpiResponse(
|
||||
bool Success,
|
||||
string? ErrorMessage,
|
||||
IReadOnlyList<SiteNotificationKpiSnapshot> Sites);
|
||||
|
||||
/// <summary>
|
||||
/// Outbox UI -> Central: request for the per-node notification outbox KPI breakdown.
|
||||
/// Mirrors <see cref="PerSiteNotificationKpiRequest"/> but groups by <c>SourceNode</c>
|
||||
/// instead of <c>SourceSiteId</c>. Additive — does not change per-site behaviour.
|
||||
/// </summary>
|
||||
public record PerNodeNotificationKpiRequest(
|
||||
string CorrelationId);
|
||||
|
||||
/// <summary>
|
||||
/// Central -> Outbox UI: per-node KPI breakdown for the Notification KPIs page.
|
||||
/// On a repository fault <see cref="Success"/> is <c>false</c>, <see cref="ErrorMessage"/>
|
||||
/// carries the cause, and <see cref="Nodes"/> is empty. Nodes with a <c>NULL</c>
|
||||
/// <c>SourceNode</c> are omitted.
|
||||
/// </summary>
|
||||
public record PerNodeNotificationKpiResponse(
|
||||
string CorrelationId,
|
||||
bool Success,
|
||||
string? ErrorMessage,
|
||||
IReadOnlyList<NodeNotificationKpiSnapshot> Nodes);
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
namespace ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
|
||||
|
||||
/// <summary>
|
||||
/// Point-in-time <c>SiteCalls</c> metrics scoped to a single originating node. The
|
||||
/// per-node counterpart of <see cref="SiteCallSiteKpiSnapshot"/>; surfaced in the
|
||||
/// per-node breakdown table on the Site Calls KPIs page. Mirrors
|
||||
/// <see cref="ZB.MOM.WW.ScadaBridge.Commons.Types.Notifications.NodeNotificationKpiSnapshot"/>.
|
||||
/// </summary>
|
||||
/// <param name="SourceNode">
|
||||
/// The node identifier these metrics are scoped to (e.g. <c>node-a</c>,
|
||||
/// <c>node-b</c>). Rows with a <c>NULL</c> <c>SourceNode</c> are omitted.
|
||||
/// </param>
|
||||
/// <param name="BufferedCount">Count of this node's non-terminal rows (<c>TerminalAtUtc IS NULL</c>).</param>
|
||||
/// <param name="ParkedCount">Count of this node's rows in the <c>Parked</c> status.</param>
|
||||
/// <param name="FailedLastInterval">
|
||||
/// Count of this node's <c>Failed</c> rows whose <c>TerminalAtUtc</c> is at or
|
||||
/// after the "since" timestamp.
|
||||
/// </param>
|
||||
/// <param name="DeliveredLastInterval">
|
||||
/// Count of this node's <c>Delivered</c> rows whose <c>TerminalAtUtc</c> is at
|
||||
/// or after the "since" timestamp.
|
||||
/// </param>
|
||||
/// <param name="OldestPendingAge">
|
||||
/// Age of this node's oldest non-terminal row, or <c>null</c> when it has none.
|
||||
/// </param>
|
||||
/// <param name="StuckCount">
|
||||
/// Count of this node's non-terminal rows whose <c>CreatedAtUtc</c> is older
|
||||
/// than the stuck cutoff.
|
||||
/// </param>
|
||||
public sealed record SiteCallNodeKpiSnapshot(
|
||||
string SourceNode,
|
||||
int BufferedCount,
|
||||
int ParkedCount,
|
||||
int FailedLastInterval,
|
||||
int DeliveredLastInterval,
|
||||
TimeSpan? OldestPendingAge,
|
||||
int StuckCount);
|
||||
@@ -0,0 +1,30 @@
|
||||
namespace ZB.MOM.WW.ScadaBridge.Commons.Types.Notifications;
|
||||
|
||||
/// <summary>
|
||||
/// Point-in-time notification-outbox metrics scoped to a single originating node.
|
||||
/// The per-node counterpart of <see cref="SiteNotificationKpiSnapshot"/>; surfaced
|
||||
/// in the per-node breakdown table on the Notification KPIs page.
|
||||
/// </summary>
|
||||
/// <param name="SourceNode">
|
||||
/// The node identifier these metrics are scoped to (e.g. <c>node-a</c>,
|
||||
/// <c>node-b</c>). Rows with a <c>NULL</c> <c>SourceNode</c> are omitted.
|
||||
/// </param>
|
||||
/// <param name="QueueDepth">Count of this node's non-terminal rows (Pending + Retrying).</param>
|
||||
/// <param name="StuckCount">
|
||||
/// Count of this node's non-terminal rows whose <c>CreatedAt</c> is older than the stuck cutoff.
|
||||
/// </param>
|
||||
/// <param name="ParkedCount">Count of this node's rows in the Parked status.</param>
|
||||
/// <param name="DeliveredLastInterval">
|
||||
/// Count of this node's Delivered rows whose <c>DeliveredAt</c> is at or after the
|
||||
/// "delivered since" timestamp.
|
||||
/// </param>
|
||||
/// <param name="OldestPendingAge">
|
||||
/// Age of this node's oldest non-terminal row, or <c>null</c> when it has none.
|
||||
/// </param>
|
||||
public record NodeNotificationKpiSnapshot(
|
||||
string SourceNode,
|
||||
int QueueDepth,
|
||||
int StuckCount,
|
||||
int ParkedCount,
|
||||
int DeliveredLastInterval,
|
||||
TimeSpan? OldestPendingAge);
|
||||
@@ -525,6 +525,22 @@ public class CommunicationService
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets per-node KPI metrics for the notification outbox.
|
||||
/// Groups by <c>SourceNode</c> (e.g. <c>node-a</c>/<c>node-b</c>); rows with
|
||||
/// a <c>NULL</c> node are omitted. Additive alongside
|
||||
/// <see cref="GetPerSiteNotificationKpisAsync"/>.
|
||||
/// </summary>
|
||||
/// <param name="request">The per-node notification KPI request.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The per-node notification KPI response.</returns>
|
||||
public async Task<PerNodeNotificationKpiResponse> GetPerNodeNotificationKpisAsync(
|
||||
PerNodeNotificationKpiRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetNotificationOutbox().Ask<PerNodeNotificationKpiResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
// ── Site Call Audit (central-local actor — Asked directly, no SiteEnvelope) ──
|
||||
|
||||
/// <summary>
|
||||
@@ -579,6 +595,21 @@ public class CommunicationService
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets per-node KPI metrics for site calls. Groups by <c>SourceNode</c>
|
||||
/// (e.g. <c>node-a</c>/<c>node-b</c>); rows with a <c>NULL</c> node are
|
||||
/// omitted. Additive alongside <see cref="GetPerSiteSiteCallKpisAsync"/>.
|
||||
/// </summary>
|
||||
/// <param name="request">The per-node site call KPI request.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The per-node site call KPI response.</returns>
|
||||
public async Task<PerNodeSiteCallKpiResponse> GetPerNodeSiteCallKpisAsync(
|
||||
PerNodeSiteCallKpiRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await GetSiteCallAudit().Ask<PerNodeSiteCallKpiResponse>(
|
||||
request, _options.QueryTimeout, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Task 5 (#22): relays an operator Retry of a parked cached call to its
|
||||
/// owning site. The <c>SiteCallAuditActor</c> is Asked directly (it is
|
||||
|
||||
+73
@@ -300,6 +300,63 @@ VALUES
|
||||
: null)).ToList();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<NodeNotificationKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
// Exclude rows with NULL SourceNode (legacy / unstamped) — per-node KPIs
|
||||
// are only meaningful when the node identity is known.
|
||||
var queueDepth = await CountByNodeAsync(
|
||||
n => (n.Status == NotificationStatus.Pending || n.Status == NotificationStatus.Retrying)
|
||||
&& n.SourceNode != null,
|
||||
cancellationToken);
|
||||
|
||||
var stuck = await CountByNodeAsync(
|
||||
n => (n.Status == NotificationStatus.Pending || n.Status == NotificationStatus.Retrying)
|
||||
&& n.CreatedAt < stuckCutoff
|
||||
&& n.SourceNode != null,
|
||||
cancellationToken);
|
||||
|
||||
var parked = await CountByNodeAsync(
|
||||
n => n.Status == NotificationStatus.Parked && n.SourceNode != null,
|
||||
cancellationToken);
|
||||
|
||||
var delivered = await CountByNodeAsync(
|
||||
n => n.Status == NotificationStatus.Delivered
|
||||
&& n.DeliveredAt != null && n.DeliveredAt >= deliveredSince
|
||||
&& n.SourceNode != null,
|
||||
cancellationToken);
|
||||
|
||||
// Oldest non-terminal CreatedAt per node — same in-memory reduction
|
||||
// pattern as ComputePerSiteKpisAsync (DateTimeOffset converter makes
|
||||
// a SQL Min awkward).
|
||||
var oldest = (await _context.Notifications
|
||||
.Where(n => (n.Status == NotificationStatus.Pending
|
||||
|| n.Status == NotificationStatus.Retrying)
|
||||
&& n.SourceNode != null)
|
||||
.Select(n => new { n.SourceNode, n.CreatedAt })
|
||||
.ToListAsync(cancellationToken))
|
||||
.GroupBy(x => x.SourceNode!)
|
||||
.ToDictionary(g => g.Key, g => g.Min(x => x.CreatedAt));
|
||||
|
||||
var nodeNames = queueDepth.Keys
|
||||
.Concat(stuck.Keys).Concat(parked.Keys).Concat(delivered.Keys)
|
||||
.Distinct()
|
||||
.OrderBy(n => n, StringComparer.Ordinal);
|
||||
|
||||
return nodeNames.Select(node => new NodeNotificationKpiSnapshot(
|
||||
SourceNode: node,
|
||||
QueueDepth: queueDepth.GetValueOrDefault(node),
|
||||
StuckCount: stuck.GetValueOrDefault(node),
|
||||
ParkedCount: parked.GetValueOrDefault(node),
|
||||
DeliveredLastInterval: delivered.GetValueOrDefault(node),
|
||||
OldestPendingAge: oldest.TryGetValue(node, out var createdAt)
|
||||
? now - createdAt
|
||||
: null)).ToList();
|
||||
}
|
||||
|
||||
/// <summary>Counts notification rows matching <paramref name="predicate"/>, grouped by source site.</summary>
|
||||
private async Task<Dictionary<string, int>> CountBySiteAsync(
|
||||
System.Linq.Expressions.Expression<Func<Notification, bool>> predicate,
|
||||
@@ -312,6 +369,22 @@ VALUES
|
||||
.ToDictionaryAsync(x => x.Site, x => x.Count, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Counts notification rows matching <paramref name="predicate"/>, grouped by source node.
|
||||
/// Only rows with a non-null <c>SourceNode</c> should be included; the predicate is
|
||||
/// responsible for enforcing that guard.
|
||||
/// </summary>
|
||||
private async Task<Dictionary<string, int>> CountByNodeAsync(
|
||||
System.Linq.Expressions.Expression<Func<Notification, bool>> predicate,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
return await _context.Notifications
|
||||
.Where(predicate)
|
||||
.GroupBy(n => n.SourceNode!)
|
||||
.Select(g => new { Node = g.Key, Count = g.Count() })
|
||||
.ToDictionaryAsync(x => x.Node, x => x.Count, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<int> SaveChangesAsync(CancellationToken cancellationToken = default)
|
||||
=> await _context.SaveChangesAsync(cancellationToken);
|
||||
|
||||
+71
@@ -324,6 +324,61 @@ ORDER BY CreatedAtUtc DESC, TrackedOperationId DESC;";
|
||||
StuckCount: stuck.GetValueOrDefault(site))).ToList();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default)
|
||||
{
|
||||
var now = DateTime.UtcNow;
|
||||
|
||||
// Exclude rows with NULL SourceNode — per-node KPIs are only meaningful
|
||||
// when the node identity is known. Each predicate guards n.SourceNode != null
|
||||
// so the GROUP BY key is always non-null.
|
||||
var buffered = await CountByNodeAsync(
|
||||
s => s.TerminalAtUtc == null && s.SourceNode != null, ct);
|
||||
|
||||
var parked = await CountByNodeAsync(
|
||||
s => s.Status == StatusParked && s.SourceNode != null, ct);
|
||||
|
||||
var failed = await CountByNodeAsync(
|
||||
s => s.Status == StatusFailed
|
||||
&& s.TerminalAtUtc != null && s.TerminalAtUtc >= intervalSince
|
||||
&& s.SourceNode != null, ct);
|
||||
|
||||
var delivered = await CountByNodeAsync(
|
||||
s => s.Status == StatusDelivered
|
||||
&& s.TerminalAtUtc != null && s.TerminalAtUtc >= intervalSince
|
||||
&& s.SourceNode != null, ct);
|
||||
|
||||
var stuck = await CountByNodeAsync(
|
||||
s => s.TerminalAtUtc == null && s.CreatedAtUtc < stuckCutoff
|
||||
&& s.SourceNode != null, ct);
|
||||
|
||||
// Oldest non-terminal CreatedAtUtc per node — server-side GROUP BY MIN.
|
||||
var oldest = (await _context.SiteCalls
|
||||
.Where(s => s.TerminalAtUtc == null && s.SourceNode != null)
|
||||
.GroupBy(s => s.SourceNode!)
|
||||
.Select(g => new { Node = g.Key, Oldest = g.Min(s => s.CreatedAtUtc) })
|
||||
.ToListAsync(ct))
|
||||
.ToDictionary(x => x.Node, x => x.Oldest);
|
||||
|
||||
var nodeNames = buffered.Keys
|
||||
.Concat(parked.Keys).Concat(failed.Keys)
|
||||
.Concat(delivered.Keys).Concat(stuck.Keys)
|
||||
.Distinct()
|
||||
.OrderBy(n => n, StringComparer.Ordinal);
|
||||
|
||||
return nodeNames.Select(node => new SiteCallNodeKpiSnapshot(
|
||||
SourceNode: node,
|
||||
BufferedCount: buffered.GetValueOrDefault(node),
|
||||
ParkedCount: parked.GetValueOrDefault(node),
|
||||
FailedLastInterval: failed.GetValueOrDefault(node),
|
||||
DeliveredLastInterval: delivered.GetValueOrDefault(node),
|
||||
OldestPendingAge: oldest.TryGetValue(node, out var createdAt)
|
||||
? now - createdAt
|
||||
: null,
|
||||
StuckCount: stuck.GetValueOrDefault(node))).ToList();
|
||||
}
|
||||
|
||||
/// <summary>Counts <c>SiteCalls</c> rows matching <paramref name="predicate"/>, grouped by source site.</summary>
|
||||
private async Task<Dictionary<string, int>> CountBySiteAsync(
|
||||
System.Linq.Expressions.Expression<Func<SiteCall, bool>> predicate,
|
||||
@@ -336,6 +391,22 @@ ORDER BY CreatedAtUtc DESC, TrackedOperationId DESC;";
|
||||
.ToDictionaryAsync(x => x.Site, x => x.Count, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Counts <c>SiteCalls</c> rows matching <paramref name="predicate"/>, grouped by source node.
|
||||
/// Only rows with a non-null <c>SourceNode</c> should be included; the predicate is
|
||||
/// responsible for enforcing that guard.
|
||||
/// </summary>
|
||||
private async Task<Dictionary<string, int>> CountByNodeAsync(
|
||||
System.Linq.Expressions.Expression<Func<SiteCall, bool>> predicate,
|
||||
CancellationToken ct)
|
||||
{
|
||||
return await _context.SiteCalls
|
||||
.Where(predicate)
|
||||
.GroupBy(s => s.SourceNode!)
|
||||
.Select(g => new { Node = g.Key, Count = g.Count() })
|
||||
.ToDictionaryAsync(x => x.Node, x => x.Count, ct);
|
||||
}
|
||||
|
||||
private static int GetRankOrThrow(string status)
|
||||
{
|
||||
if (!StatusRank.TryGetValue(status, out var rank))
|
||||
|
||||
@@ -122,6 +122,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
Receive<DiscardNotificationRequest>(HandleDiscard);
|
||||
Receive<NotificationKpiRequest>(HandleKpiRequest);
|
||||
Receive<PerSiteNotificationKpiRequest>(HandlePerSiteKpiRequest);
|
||||
Receive<PerNodeNotificationKpiRequest>(HandlePerNodeKpiRequest);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -1081,6 +1082,38 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
|
||||
return new PerSiteNotificationKpiResponse(correlationId, Success: true, ErrorMessage: null, sites);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a per-node KPI request, computing the per-source-node outbox metrics with the
|
||||
/// same stuck cutoff and delivered window as <see cref="HandleKpiRequest"/>. Additive
|
||||
/// alongside <see cref="HandlePerSiteKpiRequest"/> — does not change per-site behaviour.
|
||||
/// </summary>
|
||||
private void HandlePerNodeKpiRequest(PerNodeNotificationKpiRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var stuckCutoff = StuckCutoff(now);
|
||||
var deliveredSince = now - _options.DeliveredKpiWindow;
|
||||
|
||||
ComputePerNodeKpisAsync(request.CorrelationId, stuckCutoff, deliveredSince).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new PerNodeNotificationKpiResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
Nodes: Array.Empty<NodeNotificationKpiSnapshot>()));
|
||||
}
|
||||
|
||||
private async Task<PerNodeNotificationKpiResponse> ComputePerNodeKpisAsync(
|
||||
string correlationId, DateTimeOffset stuckCutoff, DateTimeOffset deliveredSince)
|
||||
{
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var repository = scope.ServiceProvider.GetRequiredService<INotificationOutboxRepository>();
|
||||
var nodes = await repository.ComputePerNodeKpisAsync(stuckCutoff, deliveredSince);
|
||||
|
||||
return new PerNodeNotificationKpiResponse(correlationId, Success: true, ErrorMessage: null, nodes);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The instant before which a still-pending notification counts as stuck — <paramref name="now"/>
|
||||
/// offset back by <see cref="NotificationOutboxOptions.StuckAgeThreshold"/>.
|
||||
|
||||
@@ -239,6 +239,7 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
Receive<SiteCallDetailRequest>(HandleDetail);
|
||||
Receive<SiteCallKpiRequest>(HandleKpi);
|
||||
Receive<PerSiteSiteCallKpiRequest>(HandlePerSiteKpi);
|
||||
Receive<PerNodeSiteCallKpiRequest>(HandlePerNodeKpi);
|
||||
|
||||
// Task 5 (#22): central→site Retry/Discard relay for parked cached calls.
|
||||
Receive<RegisterCentralCommunication>(msg =>
|
||||
@@ -817,6 +818,47 @@ public class SiteCallAuditActor : ReceiveActor
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a per-node KPI request, using the same stuck cutoff and
|
||||
/// interval bound as <see cref="HandleKpi"/>. Additive alongside
|
||||
/// <see cref="HandlePerSiteKpi"/> — does not change per-site behaviour.
|
||||
/// </summary>
|
||||
private void HandlePerNodeKpi(PerNodeSiteCallKpiRequest request)
|
||||
{
|
||||
var sender = Sender;
|
||||
var now = DateTime.UtcNow;
|
||||
var stuckCutoff = now - _options.StuckAgeThreshold;
|
||||
var intervalSince = now - _options.KpiInterval;
|
||||
|
||||
PerNodeKpiAsync(request.CorrelationId, stuckCutoff, intervalSince).PipeTo(
|
||||
sender,
|
||||
success: response => response,
|
||||
failure: ex => new PerNodeSiteCallKpiResponse(
|
||||
request.CorrelationId,
|
||||
Success: false,
|
||||
ErrorMessage: ex.GetBaseException().Message,
|
||||
Nodes: Array.Empty<SiteCallNodeKpiSnapshot>()));
|
||||
}
|
||||
|
||||
private async Task<PerNodeSiteCallKpiResponse> PerNodeKpiAsync(
|
||||
string correlationId, DateTime stuckCutoff, DateTime intervalSince)
|
||||
{
|
||||
var (scope, repository) = ResolveRepository();
|
||||
try
|
||||
{
|
||||
var nodes = await repository
|
||||
.ComputePerNodeKpisAsync(stuckCutoff, intervalSince)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
return new PerNodeSiteCallKpiResponse(
|
||||
correlationId, Success: true, ErrorMessage: null, nodes);
|
||||
}
|
||||
finally
|
||||
{
|
||||
scope?.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
// ── Task 5: central→site Retry/Discard relay ──
|
||||
|
||||
/// <summary>
|
||||
|
||||
+6
@@ -362,6 +362,9 @@ public class AuditLogIngestActorCombinedTelemetryTests : TestKit, IClassFixture<
|
||||
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
_inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct);
|
||||
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
_inner.ComputePerNodeKpisAsync(stuckCutoff, intervalSince, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -399,5 +402,8 @@ public class AuditLogIngestActorCombinedTelemetryTests : TestKit, IClassFixture<
|
||||
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
_inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct);
|
||||
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
_inner.ComputePerNodeKpisAsync(stuckCutoff, intervalSince, ct);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
|
||||
using ZB.MOM.WW.ScadaBridge.Communication;
|
||||
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
|
||||
using HealthPage = ZB.MOM.WW.ScadaBridge.CentralUI.Components.Pages.Monitoring.Health;
|
||||
@@ -232,13 +233,18 @@ public class HealthPageTests : BunitContext
|
||||
|
||||
/// <summary>
|
||||
/// Stand-in for the Site Call Audit actor. Replies to the KPI request with
|
||||
/// the test's currently-scripted response.
|
||||
/// the test's currently-scripted response. Also handles the per-node KPI
|
||||
/// request (T6: M5.2) with an empty-nodes success reply so the Health page
|
||||
/// can complete initialization without a 30-second Ask timeout.
|
||||
/// </summary>
|
||||
private sealed class ScriptedSiteCallAuditActor : ReceiveActor
|
||||
{
|
||||
public ScriptedSiteCallAuditActor(HealthPageTests test)
|
||||
{
|
||||
Receive<SiteCallKpiRequest>(_ => Sender.Tell(test._siteCallKpiReply));
|
||||
Receive<PerNodeSiteCallKpiRequest>(req => Sender.Tell(
|
||||
new PerNodeSiteCallKpiResponse(req.CorrelationId, Success: true, ErrorMessage: null,
|
||||
Nodes: Array.Empty<SiteCallNodeKpiSnapshot>())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,7 +153,9 @@ public class NotificationKpisPageTests : BunitContext
|
||||
|
||||
/// <summary>
|
||||
/// Stand-in for the notification-outbox actor. Replies to each KPI message
|
||||
/// type with the test's currently-scripted response.
|
||||
/// type with the test's currently-scripted response. Also handles the per-node
|
||||
/// KPI request (T6: M5.2) with an empty-nodes success reply so the page can
|
||||
/// complete initialization without a 30-second Ask timeout.
|
||||
/// </summary>
|
||||
private sealed class ScriptedOutboxActor : ReceiveActor
|
||||
{
|
||||
@@ -161,6 +163,9 @@ public class NotificationKpisPageTests : BunitContext
|
||||
{
|
||||
Receive<NotificationKpiRequest>(_ => Sender.Tell(test._kpiReply));
|
||||
Receive<PerSiteNotificationKpiRequest>(_ => Sender.Tell(test._perSiteReply));
|
||||
Receive<PerNodeNotificationKpiRequest>(req => Sender.Tell(
|
||||
new PerNodeNotificationKpiResponse(req.CorrelationId, Success: true, ErrorMessage: null,
|
||||
Nodes: Array.Empty<NodeNotificationKpiSnapshot>())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+128
@@ -0,0 +1,128 @@
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Notifications;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||||
using ZB.MOM.WW.ScadaBridge.ConfigurationDatabase.Repositories;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.ConfigurationDatabase.Tests;
|
||||
|
||||
// Coverage for per-node KPI aggregation in the Notification Outbox repository
|
||||
// (T6: M5.2 per-node stuck-count KPIs).
|
||||
public class NotificationOutboxRepositoryPerNodeKpiTests
|
||||
{
|
||||
private static ScadaBridgeDbContext NewContext() => SqliteTestHelper.CreateInMemoryContext();
|
||||
|
||||
private static Notification NewNotification(
|
||||
string sourceSiteId,
|
||||
NotificationStatus status,
|
||||
DateTimeOffset createdAt,
|
||||
DateTimeOffset? deliveredAt = null,
|
||||
string? sourceNode = null)
|
||||
{
|
||||
return new Notification(
|
||||
Guid.NewGuid().ToString(), NotificationType.Email, "Ops List", "Subject", "Body", sourceSiteId)
|
||||
{
|
||||
Status = status,
|
||||
CreatedAt = createdAt,
|
||||
DeliveredAt = deliveredAt,
|
||||
SourceNode = sourceNode,
|
||||
};
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ComputePerNodeKpisAsync_AggregatesMetricsPerNode()
|
||||
{
|
||||
await using var ctx = NewContext();
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
// node-a: 1 pending (stuck, created 20m ago), 1 parked
|
||||
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Pending,
|
||||
createdAt: now.AddMinutes(-20), sourceNode: "node-a"));
|
||||
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Parked,
|
||||
createdAt: now.AddMinutes(-5), sourceNode: "node-a"));
|
||||
// node-b: 1 delivered in-window, 1 pending (fresh)
|
||||
ctx.Notifications.Add(NewNotification("plant-b", NotificationStatus.Delivered,
|
||||
createdAt: now.AddHours(-2), deliveredAt: now.AddMinutes(-2), sourceNode: "node-b"));
|
||||
ctx.Notifications.Add(NewNotification("plant-b", NotificationStatus.Pending,
|
||||
createdAt: now.AddMinutes(-1), sourceNode: "node-b"));
|
||||
// NULL SourceNode — must be excluded from per-node results
|
||||
ctx.Notifications.Add(NewNotification("plant-c", NotificationStatus.Pending,
|
||||
createdAt: now.AddMinutes(-5), sourceNode: null));
|
||||
await ctx.SaveChangesAsync();
|
||||
|
||||
var repo = new NotificationOutboxRepository(ctx);
|
||||
var result = await repo.ComputePerNodeKpisAsync(
|
||||
stuckCutoff: now.AddMinutes(-10), deliveredSince: now.AddMinutes(-30));
|
||||
|
||||
// Only node-a and node-b — the null-node row is excluded.
|
||||
Assert.Equal(2, result.Count);
|
||||
|
||||
var a = result.Single(n => n.SourceNode == "node-a");
|
||||
Assert.Equal(1, a.QueueDepth);
|
||||
Assert.Equal(1, a.StuckCount);
|
||||
Assert.Equal(1, a.ParkedCount);
|
||||
Assert.Equal(0, a.DeliveredLastInterval);
|
||||
Assert.NotNull(a.OldestPendingAge);
|
||||
|
||||
var b = result.Single(n => n.SourceNode == "node-b");
|
||||
Assert.Equal(1, b.QueueDepth);
|
||||
Assert.Equal(0, b.StuckCount);
|
||||
Assert.Equal(0, b.ParkedCount);
|
||||
Assert.Equal(1, b.DeliveredLastInterval);
|
||||
Assert.NotNull(b.OldestPendingAge);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ComputePerNodeKpisAsync_ExcludesNullSourceNode()
|
||||
{
|
||||
await using var ctx = NewContext();
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
// Only null-node rows — result must be empty.
|
||||
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Pending,
|
||||
createdAt: now.AddMinutes(-5), sourceNode: null));
|
||||
await ctx.SaveChangesAsync();
|
||||
|
||||
var repo = new NotificationOutboxRepository(ctx);
|
||||
var result = await repo.ComputePerNodeKpisAsync(
|
||||
stuckCutoff: now.AddMinutes(-10), deliveredSince: now.AddMinutes(-30));
|
||||
|
||||
Assert.Empty(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ComputePerNodeKpisAsync_ReturnsEmpty_WhenNoNotifications()
|
||||
{
|
||||
await using var ctx = NewContext();
|
||||
var repo = new NotificationOutboxRepository(ctx);
|
||||
var result = await repo.ComputePerNodeKpisAsync(
|
||||
DateTimeOffset.UtcNow, DateTimeOffset.UtcNow.AddMinutes(-30));
|
||||
Assert.Empty(result);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task ComputePerNodeKpisAsync_OldestPendingAge_ReflectsOlderRow()
|
||||
{
|
||||
await using var ctx = NewContext();
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
// node-a: pending 90m ago, retrying 40m ago.
|
||||
// OldestPendingAge must reflect the 90m row.
|
||||
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Pending,
|
||||
createdAt: now.AddMinutes(-90), sourceNode: "node-a"));
|
||||
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Retrying,
|
||||
createdAt: now.AddMinutes(-40), sourceNode: "node-a"));
|
||||
await ctx.SaveChangesAsync();
|
||||
|
||||
var repo = new NotificationOutboxRepository(ctx);
|
||||
var result = await repo.ComputePerNodeKpisAsync(
|
||||
stuckCutoff: now.AddMinutes(-10), deliveredSince: now.AddMinutes(-30));
|
||||
|
||||
var a = result.Single(n => n.SourceNode == "node-a");
|
||||
Assert.Equal(2, a.QueueDepth);
|
||||
Assert.Equal(2, a.StuckCount);
|
||||
Assert.NotNull(a.OldestPendingAge);
|
||||
Assert.True(a.OldestPendingAge >= TimeSpan.FromMinutes(85),
|
||||
$"expected OldestPendingAge >= 85m, got {a.OldestPendingAge}");
|
||||
Assert.True(a.OldestPendingAge < TimeSpan.FromMinutes(95),
|
||||
$"expected OldestPendingAge < 95m, got {a.OldestPendingAge}");
|
||||
}
|
||||
}
|
||||
+48
@@ -497,6 +497,54 @@ public class SiteCallAuditRepositoryTests : IClassFixture<MsSqlMigrationFixture>
|
||||
Assert.Null(b.OldestPendingAge);
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
public async Task ComputePerNodeKpisAsync_ScopesCountsToEachNode()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
// Use unique site + node combos to isolate from other tests running
|
||||
// concurrently on the shared MsSql fixture.
|
||||
var nodeId = "node-b3-" + Guid.NewGuid().ToString("N").Substring(0, 8);
|
||||
var nodeB = nodeId + "-b";
|
||||
await using var context = CreateContext();
|
||||
var repo = new SiteCallAuditRepository(context);
|
||||
|
||||
var now = DateTime.UtcNow;
|
||||
var stuckCutoff = now.AddMinutes(-10);
|
||||
var intervalSince = now.AddHours(-1);
|
||||
|
||||
// nodeId: 2 buffered (one stuck), 1 parked.
|
||||
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Attempted",
|
||||
createdAtUtc: now.AddMinutes(-30), sourceNode: nodeId));
|
||||
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Attempted",
|
||||
createdAtUtc: now.AddMinutes(-2), sourceNode: nodeId));
|
||||
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Parked",
|
||||
createdAtUtc: now.AddMinutes(-5), terminal: true, sourceNode: nodeId));
|
||||
// nodeB: 1 delivered within interval only.
|
||||
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Delivered",
|
||||
createdAtUtc: now.AddMinutes(-4), updatedAtUtc: now.AddMinutes(-1),
|
||||
terminal: true, terminalAtUtc: now.AddMinutes(-1), sourceNode: nodeB));
|
||||
// Null SourceNode row — must NOT appear in per-node results.
|
||||
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Attempted",
|
||||
createdAtUtc: now.AddMinutes(-3), sourceNode: null));
|
||||
|
||||
var perNode = await repo.ComputePerNodeKpisAsync(stuckCutoff, intervalSince);
|
||||
|
||||
var na = Assert.Single(perNode, n => n.SourceNode == nodeId);
|
||||
Assert.Equal(2, na.BufferedCount);
|
||||
Assert.Equal(1, na.ParkedCount);
|
||||
Assert.Equal(1, na.StuckCount);
|
||||
Assert.NotNull(na.OldestPendingAge);
|
||||
|
||||
var nb = Assert.Single(perNode, n => n.SourceNode == nodeB);
|
||||
Assert.Equal(0, nb.BufferedCount);
|
||||
Assert.Equal(1, nb.DeliveredLastInterval);
|
||||
Assert.Null(nb.OldestPendingAge);
|
||||
|
||||
// Null-node row must be absent.
|
||||
Assert.DoesNotContain(perNode, n => n.SourceNode is null);
|
||||
}
|
||||
|
||||
// --- helpers ------------------------------------------------------------
|
||||
|
||||
private ScadaBridgeDbContext CreateContext()
|
||||
|
||||
+46
@@ -495,4 +495,50 @@ public class NotificationOutboxActorQueryTests : TestKit
|
||||
Assert.Contains("db down", response.ErrorMessage);
|
||||
Assert.Empty(response.Sites);
|
||||
}
|
||||
|
||||
// ── Per-node KPI (T6: M5.2 per-node stuck-count KPIs) ──────────────────
|
||||
|
||||
[Fact]
|
||||
public void PerNodeKpiRequest_RepliesWithPerNodeSnapshots()
|
||||
{
|
||||
_repository.ComputePerNodeKpisAsync(
|
||||
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new List<NodeNotificationKpiSnapshot>
|
||||
{
|
||||
new("node-a", QueueDepth: 3, StuckCount: 1, ParkedCount: 0,
|
||||
DeliveredLastInterval: 5, OldestPendingAge: TimeSpan.FromMinutes(12)),
|
||||
});
|
||||
var actor = CreateActor();
|
||||
|
||||
actor.Tell(new PerNodeNotificationKpiRequest("corr-pn"), TestActor);
|
||||
|
||||
var response = ExpectMsg<PerNodeNotificationKpiResponse>();
|
||||
Assert.True(response.Success);
|
||||
Assert.Null(response.ErrorMessage);
|
||||
Assert.Equal("corr-pn", response.CorrelationId);
|
||||
Assert.Single(response.Nodes);
|
||||
Assert.Equal("node-a", response.Nodes[0].SourceNode);
|
||||
Assert.Equal(1, response.Nodes[0].StuckCount);
|
||||
|
||||
_repository.Received(1).ComputePerNodeKpisAsync(
|
||||
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PerNodeKpiRequest_RepositoryFault_RepliesUnsuccessful()
|
||||
{
|
||||
_repository.ComputePerNodeKpisAsync(
|
||||
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>())
|
||||
.ThrowsAsync(new InvalidOperationException("node-kpi db down"));
|
||||
var actor = CreateActor();
|
||||
|
||||
actor.Tell(new PerNodeNotificationKpiRequest("corr-pn"), TestActor);
|
||||
|
||||
var response = ExpectMsg<PerNodeNotificationKpiResponse>();
|
||||
Assert.False(response.Success);
|
||||
Assert.Equal("corr-pn", response.CorrelationId);
|
||||
Assert.NotNull(response.ErrorMessage);
|
||||
Assert.Contains("node-kpi db down", response.ErrorMessage);
|
||||
Assert.Empty(response.Nodes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -594,6 +594,43 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
|
||||
Assert.NotNull(response.OldestPendingAge);
|
||||
}
|
||||
|
||||
// ── Per-node KPI (T6: M5.2 per-node stuck-count KPIs) ──────────────────
|
||||
|
||||
[SkippableFact]
|
||||
public async Task PerNodeSiteCallKpiRequest_ScopesCountsToEachNode()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
var nodeId = "node-" + Guid.NewGuid().ToString("N").Substring(0, 8);
|
||||
await using var context = CreateContext();
|
||||
var repo = new SiteCallAuditRepository(context);
|
||||
var actor = CreateActor(repo, new SiteCallAuditOptions
|
||||
{
|
||||
StuckAgeThreshold = TimeSpan.FromMinutes(10),
|
||||
KpiInterval = TimeSpan.FromHours(1),
|
||||
});
|
||||
|
||||
var now = DateTime.UtcNow;
|
||||
var siteId = NewSiteId();
|
||||
// Non-terminal Attempted, created 30 min ago — buffered + stuck.
|
||||
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteId, status: "Attempted",
|
||||
createdAtUtc: now.AddMinutes(-30), sourceNode: nodeId));
|
||||
// Terminal Parked.
|
||||
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteId, status: "Parked",
|
||||
createdAtUtc: now.AddMinutes(-5), terminal: true, sourceNode: nodeId));
|
||||
|
||||
actor.Tell(new PerNodeSiteCallKpiRequest("corr-pnk"), TestActor);
|
||||
|
||||
var response = ExpectMsg<PerNodeSiteCallKpiResponse>(TimeSpan.FromSeconds(10));
|
||||
Assert.True(response.Success);
|
||||
|
||||
var myNode = Assert.Single(response.Nodes, n => n.SourceNode == nodeId);
|
||||
Assert.Equal(1, myNode.BufferedCount);
|
||||
Assert.Equal(1, myNode.ParkedCount);
|
||||
Assert.Equal(1, myNode.StuckCount);
|
||||
Assert.NotNull(myNode.OldestPendingAge);
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
public async Task PerSiteSiteCallKpiRequest_ScopesCountsToEachSite()
|
||||
{
|
||||
@@ -745,6 +782,10 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
|
||||
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
_inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct);
|
||||
|
||||
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
_inner.ComputePerNodeKpisAsync(stuckCutoff, intervalSince, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -790,5 +831,9 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
|
||||
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
_inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct);
|
||||
|
||||
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
_inner.ComputePerNodeKpisAsync(stuckCutoff, intervalSince, ct);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,6 +76,10 @@ public class SiteCallAuditPurgeTests : TestKit
|
||||
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
|
||||
|
||||
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
Task.FromResult<IReadOnlyList<SiteCallNodeKpiSnapshot>>(Array.Empty<SiteCallNodeKpiSnapshot>());
|
||||
}
|
||||
|
||||
/// <summary>Repository whose purge always throws — to prove continue-on-error keeps the singleton alive.</summary>
|
||||
@@ -94,6 +98,7 @@ public class SiteCallAuditPurgeTests : TestKit
|
||||
public Task<IReadOnlyList<SiteCall>> QueryAsync(SiteCallQueryFilter f, SiteCallPaging p, CancellationToken ct = default) => Task.FromResult<IReadOnlyList<SiteCall>>(Array.Empty<SiteCall>());
|
||||
public Task<SiteCallKpiSnapshot> ComputeKpisAsync(DateTime a, DateTime b, CancellationToken ct = default) => Task.FromResult(new SiteCallKpiSnapshot(0, 0, 0, 0, null, 0));
|
||||
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(DateTime a, DateTime b, CancellationToken ct = default) => Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
|
||||
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(DateTime a, DateTime b, CancellationToken ct = default) => Task.FromResult<IReadOnlyList<SiteCallNodeKpiSnapshot>>(Array.Empty<SiteCallNodeKpiSnapshot>());
|
||||
}
|
||||
|
||||
private IActorRef CreateActor(ISiteCallAuditRepository repo, SiteCallAuditOptions options) =>
|
||||
|
||||
@@ -142,6 +142,10 @@ public class SiteCallAuditReconciliationTests : TestKit
|
||||
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
|
||||
|
||||
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
Task.FromResult<IReadOnlyList<SiteCallNodeKpiSnapshot>>(Array.Empty<SiteCallNodeKpiSnapshot>());
|
||||
}
|
||||
|
||||
private IActorRef CreateActor(
|
||||
|
||||
@@ -50,6 +50,10 @@ public class SiteCallRelayTests : TestKit
|
||||
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
throw new InvalidOperationException("relay must not compute per-site KPIs");
|
||||
|
||||
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
|
||||
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
|
||||
throw new InvalidOperationException("relay must not compute per-node KPIs");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
Reference in New Issue
Block a user