feat(audit): M5.2 per-node stuck-count KPIs (T6) — repo per-node aggregation, actor message pair, CentralUI tiles

This commit is contained in:
Joseph Doherty
2026-06-16 21:34:14 -04:00
parent a07ff28f10
commit 209f368cb5
25 changed files with 840 additions and 6 deletions
@@ -58,3 +58,31 @@
{
<div class="text-muted small mb-3">Site Call KPIs unavailable: @ErrorMessage</div>
}
@* ── Per-node stuck/parked sub-table (T6: M5.2 per-node stuck-count KPIs) ── *@
@if (HasNodeBreakdown)
{
<div class="mb-3">
<div class="d-flex justify-content-between align-items-center mb-1">
<small class="text-muted">By node</small>
</div>
<table class="table table-sm table-borderless mb-0 site-call-kpi-node-table">
<thead class="table-light">
<tr>
<th class="small py-1">Node</th>
<th class="text-end small py-1">Stuck</th>
<th class="text-end small py-1">Parked</th>
</tr>
</thead>
<tbody>
@foreach (var n in PerNodeSnapshots!)
{
<tr @key="n.SourceNode">
<td class="small py-1"><code>@n.SourceNode</code></td>
<td class="text-end font-monospace small py-1 @(n.StuckCount > 0 ? "text-warning" : "")">@n.StuckCount</td>
<td class="text-end font-monospace small py-1 @(n.ParkedCount > 0 ? "text-danger" : "")">@n.ParkedCount</td>
</tr>
}
</tbody>
</table>
</div>
}
@@ -1,5 +1,6 @@
using Microsoft.AspNetCore.Components;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
namespace ZB.MOM.WW.ScadaBridge.CentralUI.Components.Health;
@@ -59,6 +60,24 @@ public partial class SiteCallKpiTiles
/// </summary>
[Parameter] public string? ErrorMessage { get; set; }
/// <summary>
/// Optional per-node KPI breakdown (T6: M5.2 per-node stuck-count KPIs).
/// When non-null and non-empty, a compact node-level stuck/parked sub-table
/// is rendered below the main tiles. <c>null</c> means the parent has not
/// loaded it yet or has opted out — the sub-table is suppressed entirely.
/// </summary>
[Parameter] public IReadOnlyList<SiteCallNodeKpiSnapshot>? PerNodeSnapshots { get; set; }
/// <summary>
/// True when <see cref="PerNodeSnapshots"/> is a successful query result.
/// Used to suppress the sub-table on a load failure.
/// </summary>
[Parameter] public bool PerNodeAvailable { get; set; }
/// <summary>Whether the per-node sub-table has data to render.</summary>
internal bool HasNodeBreakdown =>
PerNodeAvailable && PerNodeSnapshots is { Count: > 0 };
// ── Buffered tile ───────────────────────────────────────────────────────
private string BufferedDisplay =>
@@ -9,6 +9,7 @@
@using ZB.MOM.WW.ScadaBridge.HealthMonitoring
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit
@using ZB.MOM.WW.ScadaBridge.Communication
@implements IDisposable
@inject ICentralHealthAggregator HealthAggregator
@@ -65,7 +66,9 @@
(buffered / stuck / parked). Refreshed alongside the site states. *@
<SiteCallKpiTiles Snapshot="@_siteCallKpi"
IsAvailable="@_siteCallKpiAvailable"
ErrorMessage="@_siteCallKpiError" />
ErrorMessage="@_siteCallKpiError"
PerNodeSnapshots="@_siteCallNodeKpis"
PerNodeAvailable="@_siteCallNodeKpiAvailable" />
@* Audit Log (#23) M7 Bundle E — three KPI tiles for the Audit channel
(volume / error rate / backlog). Refreshed alongside the site states. *@
@@ -378,6 +381,12 @@
private bool _siteCallKpiAvailable;
private string? _siteCallKpiError;
// Per-node Site Call KPI breakdown (T6: M5.2 per-node stuck-count KPIs).
// Passed to SiteCallKpiTiles as an optional sub-table.
private IReadOnlyList<SiteCallNodeKpiSnapshot> _siteCallNodeKpis =
Array.Empty<SiteCallNodeKpiSnapshot>();
private bool _siteCallNodeKpiAvailable;
private static bool SiteHasActiveErrors(SiteHealthState state)
{
var report = state.LatestReport;
@@ -415,7 +424,7 @@
{
_siteStates = HealthAggregator.GetAllSiteStates();
await LoadOutboxKpis();
await LoadSiteCallKpis();
await Task.WhenAll(LoadSiteCallKpis(), LoadSiteCallNodeKpis());
await LoadAuditKpis();
}
@@ -474,6 +483,30 @@
}
}
// Per-node site-call KPI loader (T6: M5.2). Best-effort; a fault silently
// suppresses the per-node sub-table rather than degrading the dashboard.
private async Task LoadSiteCallNodeKpis()
{
try
{
var response = await CommunicationService.GetPerNodeSiteCallKpisAsync(
new PerNodeSiteCallKpiRequest(Guid.NewGuid().ToString("N")));
if (response.Success)
{
_siteCallNodeKpis = response.Nodes;
_siteCallNodeKpiAvailable = true;
}
else
{
_siteCallNodeKpiAvailable = false;
}
}
catch
{
_siteCallNodeKpiAvailable = false;
}
}
// Tiles show the numeric KPI when available, or an em dash when the outbox
// KPI query failed — matching how the page renders other unavailable data.
private string OutboxTileValue(int value) =>
@@ -69,6 +69,51 @@
</div>
}
@* ── Per-node breakdown (T6: additive) ── *@
<h5 class="mb-2">Per-node breakdown</h5>
@if (_perNodeError != null)
{
<div class="alert alert-warning py-2">Per-node KPIs unavailable: @_perNodeError</div>
}
else if (_perNode.Count == 0)
{
<div class="card mb-3">
<div class="card-body text-center text-muted py-3">
<div class="small">No per-node activity (rows may have a null SourceNode).</div>
</div>
</div>
}
else
{
<div class="table-responsive mb-3">
<table class="table table-sm table-hover align-middle">
<thead class="table-light">
<tr>
<th>Node</th>
<th class="text-end">Queue Depth</th>
<th class="text-end">Stuck</th>
<th class="text-end">Parked</th>
<th class="text-end">Delivered (last interval)</th>
<th class="text-end">Oldest Pending Age</th>
</tr>
</thead>
<tbody>
@foreach (var n in _perNode)
{
<tr @key="n.SourceNode" class="@(n.StuckCount > 0 ? "table-warning" : "")">
<td><code>@n.SourceNode</code></td>
<td class="text-end font-monospace">@n.QueueDepth</td>
<td class="text-end font-monospace @(n.StuckCount > 0 ? "text-warning" : "")">@n.StuckCount</td>
<td class="text-end font-monospace @(n.ParkedCount > 0 ? "text-danger" : "")">@n.ParkedCount</td>
<td class="text-end font-monospace text-success">@n.DeliveredLastInterval</td>
<td class="text-end font-monospace">@FormatAge(n.OldestPendingAge)</td>
</tr>
}
</tbody>
</table>
</div>
}
@* ── Per-site breakdown ── *@
<h5 class="mb-2">Per-site breakdown</h5>
@if (_perSiteError != null)
@@ -124,6 +169,10 @@
private IReadOnlyList<SiteNotificationKpiSnapshot> _perSite = Array.Empty<SiteNotificationKpiSnapshot>();
private string? _perSiteError;
// ── Per-node (T6: M5.2 per-node stuck-count KPIs) ──
private IReadOnlyList<NodeNotificationKpiSnapshot> _perNode = Array.Empty<NodeNotificationKpiSnapshot>();
private string? _perNodeError;
private bool _loading;
protected override async Task OnInitializedAsync()
@@ -144,9 +193,9 @@
private async Task RefreshAll()
{
_loading = true;
// Race-free despite both tasks mutating component fields: Blazor Server runs
// Race-free despite all tasks mutating component fields: Blazor Server runs
// every continuation on the circuit's single-threaded synchronization context.
await Task.WhenAll(LoadGlobalKpis(), LoadPerSiteKpis());
await Task.WhenAll(LoadGlobalKpis(), LoadPerSiteKpis(), LoadPerNodeKpis());
_loading = false;
}
@@ -194,6 +243,28 @@
}
}
private async Task LoadPerNodeKpis()
{
try
{
var response = await CommunicationService.GetPerNodeNotificationKpisAsync(
new PerNodeNotificationKpiRequest(Guid.NewGuid().ToString("N")));
if (response.Success)
{
_perNode = response.Nodes;
_perNodeError = null;
}
else
{
_perNodeError = response.ErrorMessage ?? "Per-node KPI query failed.";
}
}
catch (Exception ex)
{
_perNodeError = $"Per-node KPI query failed: {ex.Message}";
}
}
private string SiteName(string siteId) =>
_sites.FirstOrDefault(s => s.SiteIdentifier == siteId)?.Name ?? siteId;