feat(audit): M5.2 per-node stuck-count KPIs (T6) — repo per-node aggregation, actor message pair, CentralUI tiles

This commit is contained in:
Joseph Doherty
2026-06-16 21:34:14 -04:00
parent a07ff28f10
commit 209f368cb5
25 changed files with 840 additions and 6 deletions
@@ -362,6 +362,9 @@ public class AuditLogIngestActorCombinedTelemetryTests : TestKit, IClassFixture<
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
_inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct);
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
_inner.ComputePerNodeKpisAsync(stuckCutoff, intervalSince, ct);
}
/// <summary>
@@ -399,5 +402,8 @@ public class AuditLogIngestActorCombinedTelemetryTests : TestKit, IClassFixture<
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
_inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct);
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
_inner.ComputePerNodeKpisAsync(stuckCutoff, intervalSince, ct);
}
}
@@ -13,6 +13,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
using ZB.MOM.WW.ScadaBridge.Communication;
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
using HealthPage = ZB.MOM.WW.ScadaBridge.CentralUI.Components.Pages.Monitoring.Health;
@@ -232,13 +233,18 @@ public class HealthPageTests : BunitContext
/// <summary>
/// Stand-in for the Site Call Audit actor. Replies to the KPI request with
/// the test's currently-scripted response.
/// the test's currently-scripted response. Also handles the per-node KPI
/// request (T6: M5.2) with an empty-nodes success reply so the Health page
/// can complete initialization without a 30-second Ask timeout.
/// </summary>
private sealed class ScriptedSiteCallAuditActor : ReceiveActor
{
public ScriptedSiteCallAuditActor(HealthPageTests test)
{
Receive<SiteCallKpiRequest>(_ => Sender.Tell(test._siteCallKpiReply));
Receive<PerNodeSiteCallKpiRequest>(req => Sender.Tell(
new PerNodeSiteCallKpiResponse(req.CorrelationId, Success: true, ErrorMessage: null,
Nodes: Array.Empty<SiteCallNodeKpiSnapshot>())));
}
}
}
@@ -153,7 +153,9 @@ public class NotificationKpisPageTests : BunitContext
/// <summary>
/// Stand-in for the notification-outbox actor. Replies to each KPI message
/// type with the test's currently-scripted response.
/// type with the test's currently-scripted response. Also handles the per-node
/// KPI request (T6: M5.2) with an empty-nodes success reply so the page can
/// complete initialization without a 30-second Ask timeout.
/// </summary>
private sealed class ScriptedOutboxActor : ReceiveActor
{
@@ -161,6 +163,9 @@ public class NotificationKpisPageTests : BunitContext
{
Receive<NotificationKpiRequest>(_ => Sender.Tell(test._kpiReply));
Receive<PerSiteNotificationKpiRequest>(_ => Sender.Tell(test._perSiteReply));
Receive<PerNodeNotificationKpiRequest>(req => Sender.Tell(
new PerNodeNotificationKpiResponse(req.CorrelationId, Success: true, ErrorMessage: null,
Nodes: Array.Empty<NodeNotificationKpiSnapshot>())));
}
}
}
@@ -0,0 +1,128 @@
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Notifications;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
using ZB.MOM.WW.ScadaBridge.ConfigurationDatabase.Repositories;
namespace ZB.MOM.WW.ScadaBridge.ConfigurationDatabase.Tests;
// Coverage for per-node KPI aggregation in the Notification Outbox repository
// (T6: M5.2 per-node stuck-count KPIs).
public class NotificationOutboxRepositoryPerNodeKpiTests
{
private static ScadaBridgeDbContext NewContext() => SqliteTestHelper.CreateInMemoryContext();
private static Notification NewNotification(
string sourceSiteId,
NotificationStatus status,
DateTimeOffset createdAt,
DateTimeOffset? deliveredAt = null,
string? sourceNode = null)
{
return new Notification(
Guid.NewGuid().ToString(), NotificationType.Email, "Ops List", "Subject", "Body", sourceSiteId)
{
Status = status,
CreatedAt = createdAt,
DeliveredAt = deliveredAt,
SourceNode = sourceNode,
};
}
[Fact]
public async Task ComputePerNodeKpisAsync_AggregatesMetricsPerNode()
{
await using var ctx = NewContext();
var now = DateTimeOffset.UtcNow;
// node-a: 1 pending (stuck, created 20m ago), 1 parked
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Pending,
createdAt: now.AddMinutes(-20), sourceNode: "node-a"));
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Parked,
createdAt: now.AddMinutes(-5), sourceNode: "node-a"));
// node-b: 1 delivered in-window, 1 pending (fresh)
ctx.Notifications.Add(NewNotification("plant-b", NotificationStatus.Delivered,
createdAt: now.AddHours(-2), deliveredAt: now.AddMinutes(-2), sourceNode: "node-b"));
ctx.Notifications.Add(NewNotification("plant-b", NotificationStatus.Pending,
createdAt: now.AddMinutes(-1), sourceNode: "node-b"));
// NULL SourceNode — must be excluded from per-node results
ctx.Notifications.Add(NewNotification("plant-c", NotificationStatus.Pending,
createdAt: now.AddMinutes(-5), sourceNode: null));
await ctx.SaveChangesAsync();
var repo = new NotificationOutboxRepository(ctx);
var result = await repo.ComputePerNodeKpisAsync(
stuckCutoff: now.AddMinutes(-10), deliveredSince: now.AddMinutes(-30));
// Only node-a and node-b — the null-node row is excluded.
Assert.Equal(2, result.Count);
var a = result.Single(n => n.SourceNode == "node-a");
Assert.Equal(1, a.QueueDepth);
Assert.Equal(1, a.StuckCount);
Assert.Equal(1, a.ParkedCount);
Assert.Equal(0, a.DeliveredLastInterval);
Assert.NotNull(a.OldestPendingAge);
var b = result.Single(n => n.SourceNode == "node-b");
Assert.Equal(1, b.QueueDepth);
Assert.Equal(0, b.StuckCount);
Assert.Equal(0, b.ParkedCount);
Assert.Equal(1, b.DeliveredLastInterval);
Assert.NotNull(b.OldestPendingAge);
}
[Fact]
public async Task ComputePerNodeKpisAsync_ExcludesNullSourceNode()
{
await using var ctx = NewContext();
var now = DateTimeOffset.UtcNow;
// Only null-node rows — result must be empty.
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Pending,
createdAt: now.AddMinutes(-5), sourceNode: null));
await ctx.SaveChangesAsync();
var repo = new NotificationOutboxRepository(ctx);
var result = await repo.ComputePerNodeKpisAsync(
stuckCutoff: now.AddMinutes(-10), deliveredSince: now.AddMinutes(-30));
Assert.Empty(result);
}
[Fact]
public async Task ComputePerNodeKpisAsync_ReturnsEmpty_WhenNoNotifications()
{
await using var ctx = NewContext();
var repo = new NotificationOutboxRepository(ctx);
var result = await repo.ComputePerNodeKpisAsync(
DateTimeOffset.UtcNow, DateTimeOffset.UtcNow.AddMinutes(-30));
Assert.Empty(result);
}
[Fact]
public async Task ComputePerNodeKpisAsync_OldestPendingAge_ReflectsOlderRow()
{
await using var ctx = NewContext();
var now = DateTimeOffset.UtcNow;
// node-a: pending 90m ago, retrying 40m ago.
// OldestPendingAge must reflect the 90m row.
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Pending,
createdAt: now.AddMinutes(-90), sourceNode: "node-a"));
ctx.Notifications.Add(NewNotification("plant-a", NotificationStatus.Retrying,
createdAt: now.AddMinutes(-40), sourceNode: "node-a"));
await ctx.SaveChangesAsync();
var repo = new NotificationOutboxRepository(ctx);
var result = await repo.ComputePerNodeKpisAsync(
stuckCutoff: now.AddMinutes(-10), deliveredSince: now.AddMinutes(-30));
var a = result.Single(n => n.SourceNode == "node-a");
Assert.Equal(2, a.QueueDepth);
Assert.Equal(2, a.StuckCount);
Assert.NotNull(a.OldestPendingAge);
Assert.True(a.OldestPendingAge >= TimeSpan.FromMinutes(85),
$"expected OldestPendingAge >= 85m, got {a.OldestPendingAge}");
Assert.True(a.OldestPendingAge < TimeSpan.FromMinutes(95),
$"expected OldestPendingAge < 95m, got {a.OldestPendingAge}");
}
}
@@ -497,6 +497,54 @@ public class SiteCallAuditRepositoryTests : IClassFixture<MsSqlMigrationFixture>
Assert.Null(b.OldestPendingAge);
}
[SkippableFact]
public async Task ComputePerNodeKpisAsync_ScopesCountsToEachNode()
{
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
// Use unique site + node combos to isolate from other tests running
// concurrently on the shared MsSql fixture.
var nodeId = "node-b3-" + Guid.NewGuid().ToString("N").Substring(0, 8);
var nodeB = nodeId + "-b";
await using var context = CreateContext();
var repo = new SiteCallAuditRepository(context);
var now = DateTime.UtcNow;
var stuckCutoff = now.AddMinutes(-10);
var intervalSince = now.AddHours(-1);
// nodeId: 2 buffered (one stuck), 1 parked.
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Attempted",
createdAtUtc: now.AddMinutes(-30), sourceNode: nodeId));
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Attempted",
createdAtUtc: now.AddMinutes(-2), sourceNode: nodeId));
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Parked",
createdAtUtc: now.AddMinutes(-5), terminal: true, sourceNode: nodeId));
// nodeB: 1 delivered within interval only.
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Delivered",
createdAtUtc: now.AddMinutes(-4), updatedAtUtc: now.AddMinutes(-1),
terminal: true, terminalAtUtc: now.AddMinutes(-1), sourceNode: nodeB));
// Null SourceNode row — must NOT appear in per-node results.
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), status: "Attempted",
createdAtUtc: now.AddMinutes(-3), sourceNode: null));
var perNode = await repo.ComputePerNodeKpisAsync(stuckCutoff, intervalSince);
var na = Assert.Single(perNode, n => n.SourceNode == nodeId);
Assert.Equal(2, na.BufferedCount);
Assert.Equal(1, na.ParkedCount);
Assert.Equal(1, na.StuckCount);
Assert.NotNull(na.OldestPendingAge);
var nb = Assert.Single(perNode, n => n.SourceNode == nodeB);
Assert.Equal(0, nb.BufferedCount);
Assert.Equal(1, nb.DeliveredLastInterval);
Assert.Null(nb.OldestPendingAge);
// Null-node row must be absent.
Assert.DoesNotContain(perNode, n => n.SourceNode is null);
}
// --- helpers ------------------------------------------------------------
private ScadaBridgeDbContext CreateContext()
@@ -495,4 +495,50 @@ public class NotificationOutboxActorQueryTests : TestKit
Assert.Contains("db down", response.ErrorMessage);
Assert.Empty(response.Sites);
}
// ── Per-node KPI (T6: M5.2 per-node stuck-count KPIs) ──────────────────
[Fact]
public void PerNodeKpiRequest_RepliesWithPerNodeSnapshots()
{
_repository.ComputePerNodeKpisAsync(
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>())
.Returns(new List<NodeNotificationKpiSnapshot>
{
new("node-a", QueueDepth: 3, StuckCount: 1, ParkedCount: 0,
DeliveredLastInterval: 5, OldestPendingAge: TimeSpan.FromMinutes(12)),
});
var actor = CreateActor();
actor.Tell(new PerNodeNotificationKpiRequest("corr-pn"), TestActor);
var response = ExpectMsg<PerNodeNotificationKpiResponse>();
Assert.True(response.Success);
Assert.Null(response.ErrorMessage);
Assert.Equal("corr-pn", response.CorrelationId);
Assert.Single(response.Nodes);
Assert.Equal("node-a", response.Nodes[0].SourceNode);
Assert.Equal(1, response.Nodes[0].StuckCount);
_repository.Received(1).ComputePerNodeKpisAsync(
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>());
}
[Fact]
public void PerNodeKpiRequest_RepositoryFault_RepliesUnsuccessful()
{
_repository.ComputePerNodeKpisAsync(
Arg.Any<DateTimeOffset>(), Arg.Any<DateTimeOffset>(), Arg.Any<CancellationToken>())
.ThrowsAsync(new InvalidOperationException("node-kpi db down"));
var actor = CreateActor();
actor.Tell(new PerNodeNotificationKpiRequest("corr-pn"), TestActor);
var response = ExpectMsg<PerNodeNotificationKpiResponse>();
Assert.False(response.Success);
Assert.Equal("corr-pn", response.CorrelationId);
Assert.NotNull(response.ErrorMessage);
Assert.Contains("node-kpi db down", response.ErrorMessage);
Assert.Empty(response.Nodes);
}
}
@@ -594,6 +594,43 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
Assert.NotNull(response.OldestPendingAge);
}
// ── Per-node KPI (T6: M5.2 per-node stuck-count KPIs) ──────────────────
[SkippableFact]
public async Task PerNodeSiteCallKpiRequest_ScopesCountsToEachNode()
{
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
var nodeId = "node-" + Guid.NewGuid().ToString("N").Substring(0, 8);
await using var context = CreateContext();
var repo = new SiteCallAuditRepository(context);
var actor = CreateActor(repo, new SiteCallAuditOptions
{
StuckAgeThreshold = TimeSpan.FromMinutes(10),
KpiInterval = TimeSpan.FromHours(1),
});
var now = DateTime.UtcNow;
var siteId = NewSiteId();
// Non-terminal Attempted, created 30 min ago — buffered + stuck.
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteId, status: "Attempted",
createdAtUtc: now.AddMinutes(-30), sourceNode: nodeId));
// Terminal Parked.
await repo.UpsertAsync(NewRow(TrackedOperationId.New(), siteId, status: "Parked",
createdAtUtc: now.AddMinutes(-5), terminal: true, sourceNode: nodeId));
actor.Tell(new PerNodeSiteCallKpiRequest("corr-pnk"), TestActor);
var response = ExpectMsg<PerNodeSiteCallKpiResponse>(TimeSpan.FromSeconds(10));
Assert.True(response.Success);
var myNode = Assert.Single(response.Nodes, n => n.SourceNode == nodeId);
Assert.Equal(1, myNode.BufferedCount);
Assert.Equal(1, myNode.ParkedCount);
Assert.Equal(1, myNode.StuckCount);
Assert.NotNull(myNode.OldestPendingAge);
}
[SkippableFact]
public async Task PerSiteSiteCallKpiRequest_ScopesCountsToEachSite()
{
@@ -745,6 +782,10 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
_inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct);
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
_inner.ComputePerNodeKpisAsync(stuckCutoff, intervalSince, ct);
}
/// <summary>
@@ -790,5 +831,9 @@ public class SiteCallAuditActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
_inner.ComputePerSiteKpisAsync(stuckCutoff, intervalSince, ct);
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
_inner.ComputePerNodeKpisAsync(stuckCutoff, intervalSince, ct);
}
}
@@ -76,6 +76,10 @@ public class SiteCallAuditPurgeTests : TestKit
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCallNodeKpiSnapshot>>(Array.Empty<SiteCallNodeKpiSnapshot>());
}
/// <summary>Repository whose purge always throws — to prove continue-on-error keeps the singleton alive.</summary>
@@ -94,6 +98,7 @@ public class SiteCallAuditPurgeTests : TestKit
public Task<IReadOnlyList<SiteCall>> QueryAsync(SiteCallQueryFilter f, SiteCallPaging p, CancellationToken ct = default) => Task.FromResult<IReadOnlyList<SiteCall>>(Array.Empty<SiteCall>());
public Task<SiteCallKpiSnapshot> ComputeKpisAsync(DateTime a, DateTime b, CancellationToken ct = default) => Task.FromResult(new SiteCallKpiSnapshot(0, 0, 0, 0, null, 0));
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(DateTime a, DateTime b, CancellationToken ct = default) => Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(DateTime a, DateTime b, CancellationToken ct = default) => Task.FromResult<IReadOnlyList<SiteCallNodeKpiSnapshot>>(Array.Empty<SiteCallNodeKpiSnapshot>());
}
private IActorRef CreateActor(ISiteCallAuditRepository repo, SiteCallAuditOptions options) =>
@@ -142,6 +142,10 @@ public class SiteCallAuditReconciliationTests : TestKit
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCallSiteKpiSnapshot>>(Array.Empty<SiteCallSiteKpiSnapshot>());
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<SiteCallNodeKpiSnapshot>>(Array.Empty<SiteCallNodeKpiSnapshot>());
}
private IActorRef CreateActor(
@@ -50,6 +50,10 @@ public class SiteCallRelayTests : TestKit
public Task<IReadOnlyList<SiteCallSiteKpiSnapshot>> ComputePerSiteKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
throw new InvalidOperationException("relay must not compute per-site KPIs");
public Task<IReadOnlyList<SiteCallNodeKpiSnapshot>> ComputePerNodeKpisAsync(
DateTime stuckCutoff, DateTime intervalSince, CancellationToken ct = default) =>
throw new InvalidOperationException("relay must not compute per-node KPIs");
}
/// <summary>