refactor(kpi): K13/K15 trend review fixups — per-metric isolation, disable-during-load + logging, loading-flag finally, test coverage

This commit is contained in:
Joseph Doherty
2026-06-17 20:44:34 -04:00
parent 7d7c6cbb05
commit eb4bce3e49
7 changed files with 151 additions and 55 deletions
@@ -31,12 +31,12 @@
<div class="btn-group btn-group-sm" role="group" aria-label="Trend window"> <div class="btn-group btn-group-sm" role="group" aria-label="Trend window">
<button type="button" <button type="button"
class="btn @(_windowHours == 24 ? "btn-secondary" : "btn-outline-secondary")" class="btn @(_windowHours == 24 ? "btn-secondary" : "btn-outline-secondary")"
@onclick="() => SetWindowAsync(24)"> @onclick="() => SetWindowAsync(24)" disabled="@_trendsLoading">
24h 24h
</button> </button>
<button type="button" <button type="button"
class="btn @(_windowHours == 168 ? "btn-secondary" : "btn-outline-secondary")" class="btn @(_windowHours == 168 ? "btn-secondary" : "btn-outline-secondary")"
@onclick="() => SetWindowAsync(168)"> @onclick="() => SetWindowAsync(168)" disabled="@_trendsLoading">
7d 7d
</button> </button>
</div> </div>
@@ -2,6 +2,7 @@ using System.Globalization;
using Microsoft.AspNetCore.Components; using Microsoft.AspNetCore.Components;
using Microsoft.AspNetCore.Components.Routing; using Microsoft.AspNetCore.Components.Routing;
using Microsoft.AspNetCore.WebUtilities; using Microsoft.AspNetCore.WebUtilities;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.ScadaBridge.CentralUI.Services; using ZB.MOM.WW.ScadaBridge.CentralUI.Services;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit; using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums; using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
@@ -56,6 +57,10 @@ public partial class AuditLogPage : IDisposable
/// </summary> /// </summary>
[Inject] private IKpiHistoryQueryService KpiHistory { get; set; } = null!; [Inject] private IKpiHistoryQueryService KpiHistory { get; set; } = null!;
/// <summary>Logger for the best-effort Trends panel — a degraded series fetch
/// is logged at warning level so the silent fallback is still observable.</summary>
[Inject] private ILogger<AuditLogPage> Logger { get; set; } = null!;
private AuditLogQueryFilter? _currentFilter; private AuditLogQueryFilter? _currentFilter;
private AuditEventView? _selectedEvent; private AuditEventView? _selectedEvent;
private bool _drawerOpen; private bool _drawerOpen;
@@ -271,6 +276,13 @@ public partial class AuditLogPage : IDisposable
/// <summary>Whether the Trends panel is expanded (open by default).</summary> /// <summary>Whether the Trends panel is expanded (open by default).</summary>
private bool _trendsOpen = true; private bool _trendsOpen = true;
/// <summary>
/// True while a window's series are being (re)fetched — disables the 24h/7d
/// window toggle buttons so a mid-flight click cannot stack overlapping loads
/// (mirrors the K13/K14 trend pages).
/// </summary>
private bool _trendsLoading;
/// <summary>Active window in hours — 24 (default) or 168 (7 days).</summary> /// <summary>Active window in hours — 24 (default) or 168 (7 days).</summary>
private int _windowHours = 24; private int _windowHours = 24;
@@ -297,25 +309,35 @@ public partial class AuditLogPage : IDisposable
/// </summary> /// </summary>
private async Task LoadTrendsAsync() private async Task LoadTrendsAsync()
{ {
var toUtc = DateTime.UtcNow; _trendsLoading = true;
var fromUtc = toUtc - TimeSpan.FromHours(_windowHours); try
foreach (var (metric, _, _) in TrendMetrics)
{ {
try var toUtc = DateTime.UtcNow;
var fromUtc = toUtc - TimeSpan.FromHours(_windowHours);
foreach (var (metric, _, _) in TrendMetrics)
{ {
var points = await KpiHistory.GetSeriesAsync( try
KpiSources.AuditLog, metric, KpiScopes.Global, scopeKey: null, {
fromUtc, toUtc); var points = await KpiHistory.GetSeriesAsync(
_trendSeries[metric] = new TrendSeries(points, IsAvailable: true, ErrorMessage: null); KpiSources.AuditLog, metric, KpiScopes.Global, scopeKey: null,
} fromUtc, toUtc);
catch (Exception) _trendSeries[metric] = new TrendSeries(points, IsAvailable: true, ErrorMessage: null);
{ }
// Best-effort: degrade this chart only, keep the rest of the page alive. catch (Exception ex)
_trendSeries[metric] = new TrendSeries( {
Points: null, IsAvailable: false, ErrorMessage: "Trend data unavailable."); // Best-effort: degrade this chart only, keep the rest of the page
// alive — but log the failure so the silent fallback is observable.
Logger.LogWarning(ex, "Failed to load Audit Log KPI trend series for metric {Metric}.", metric);
_trendSeries[metric] = new TrendSeries(
Points: null, IsAvailable: false, ErrorMessage: "Trend data unavailable.");
}
} }
} }
finally
{
_trendsLoading = false;
}
} }
/// <summary>Returns the rendered state for a metric, defaulting to available-empty.</summary> /// <summary>Returns the rendered state for a metric, defaulting to available-empty.</summary>
@@ -92,6 +92,7 @@
<select class="form-select form-select-sm" style="width:auto" <select class="form-select form-select-sm" style="width:auto"
data-test="site-health-trends-site" data-test="site-health-trends-site"
value="@_trendSiteId" value="@_trendSiteId"
@* Blazor wraps this async handler in an EventCallback, so the returned Task IS awaited — keep the Async signature; do not change it to a void/fire-and-forget handler. *@
@onchange="OnTrendSiteChangedAsync"> @onchange="OnTrendSiteChangedAsync">
@foreach (var key in _trendSiteKeys) @foreach (var key in _trendSiteKeys)
{ {
@@ -178,20 +178,20 @@
<div class="col-lg-4 col-md-6"> <div class="col-lg-4 col-md-6">
<KpiTrendChart Title="Queue Depth" <KpiTrendChart Title="Queue Depth"
Points="@_queueDepthSeries" Points="@_queueDepthSeries"
IsAvailable="@_trendsAvailable" IsAvailable="@_queueDepthAvailable"
ErrorMessage="@_trendsError" /> ErrorMessage="@_queueDepthError" />
</div> </div>
<div class="col-lg-4 col-md-6"> <div class="col-lg-4 col-md-6">
<KpiTrendChart Title="Parked" <KpiTrendChart Title="Parked"
Points="@_parkedSeries" Points="@_parkedSeries"
IsAvailable="@_trendsAvailable" IsAvailable="@_parkedAvailable"
ErrorMessage="@_trendsError" /> ErrorMessage="@_parkedError" />
</div> </div>
<div class="col-lg-4 col-md-6"> <div class="col-lg-4 col-md-6">
<KpiTrendChart Title="Delivered / interval" <KpiTrendChart Title="Delivered / interval"
Points="@_deliveredSeries" Points="@_deliveredSeries"
IsAvailable="@_trendsAvailable" IsAvailable="@_deliveredAvailable"
ErrorMessage="@_trendsError" /> ErrorMessage="@_deliveredError" />
</div> </div>
</div> </div>
</div> </div>
@@ -213,13 +213,24 @@
// ── Trends (T11: first KPI-history consumer) ── // ── Trends (T11: first KPI-history consumer) ──
// Window in hours: 24h (default) or 168h (7d). Toggling re-queries. // Window in hours: 24h (default) or 168h (7d). Toggling re-queries.
// Per-metric isolation (mirrors the K14 SiteCallsReport pattern): each metric
// carries its own series + availability + error, each loaded via its own
// try/catch so one metric's failure only blanks that one chart and the others
// still render their already-fetched data.
private int _windowHours = 24; private int _windowHours = 24;
private bool _trendsLoading; private bool _trendsLoading;
private bool _trendsAvailable = true;
private string? _trendsError;
private IReadOnlyList<KpiSeriesPoint>? _queueDepthSeries; private IReadOnlyList<KpiSeriesPoint>? _queueDepthSeries;
private bool _queueDepthAvailable = true;
private string? _queueDepthError;
private IReadOnlyList<KpiSeriesPoint>? _parkedSeries; private IReadOnlyList<KpiSeriesPoint>? _parkedSeries;
private bool _parkedAvailable = true;
private string? _parkedError;
private IReadOnlyList<KpiSeriesPoint>? _deliveredSeries; private IReadOnlyList<KpiSeriesPoint>? _deliveredSeries;
private bool _deliveredAvailable = true;
private string? _deliveredError;
protected override async Task OnInitializedAsync() protected override async Task OnInitializedAsync()
{ {
@@ -264,21 +275,15 @@
var toUtc = DateTime.UtcNow; var toUtc = DateTime.UtcNow;
var fromUtc = toUtc - TimeSpan.FromHours(_windowHours); var fromUtc = toUtc - TimeSpan.FromHours(_windowHours);
// Best-effort: one query failure must NOT break the page — on any // Per-metric isolation: each series is loaded via its own try/catch so
// exception the charts fall back to the unavailable placeholder while // one metric's failure only blanks that one chart while the others still
// the KPI tiles above stay rendered. // render their fetched data — and a throw never breaks the KPI tiles above.
_queueDepthSeries = await GetSeries("queueDepth", fromUtc, toUtc); (_queueDepthSeries, _queueDepthAvailable, _queueDepthError) =
_parkedSeries = await GetSeries("parkedCount", fromUtc, toUtc); await LoadSeries("queueDepth", fromUtc, toUtc);
_deliveredSeries = await GetSeries("deliveredLastInterval", fromUtc, toUtc); (_parkedSeries, _parkedAvailable, _parkedError) =
await LoadSeries("parkedCount", fromUtc, toUtc);
_trendsAvailable = true; (_deliveredSeries, _deliveredAvailable, _deliveredError) =
_trendsError = null; await LoadSeries("deliveredLastInterval", fromUtc, toUtc);
}
catch (Exception ex)
{
_trendsAvailable = false;
_trendsError = "Trend data unavailable.";
Logger.LogWarning(ex, "Failed to load notification-outbox KPI trend series.");
} }
finally finally
{ {
@@ -286,9 +291,28 @@
} }
} }
private Task<IReadOnlyList<KpiSeriesPoint>> GetSeries(string metric, DateTime fromUtc, DateTime toUtc) => /// <summary>
KpiHistory.GetSeriesAsync( /// Fetch one NotificationOutbox / Global series, swallowing any failure into the
KpiSources.NotificationOutbox, metric, KpiScopes.Global, scopeKey: null, fromUtc, toUtc); /// chart's unavailable state. Returns the points (null on failure), the
/// availability flag, and a short error message for the chart placeholder.
/// </summary>
private async Task<(IReadOnlyList<KpiSeriesPoint>? Points, bool Available, string? Error)>
LoadSeries(string metric, DateTime fromUtc, DateTime toUtc)
{
try
{
var points = await KpiHistory.GetSeriesAsync(
KpiSources.NotificationOutbox, metric, KpiScopes.Global, scopeKey: null, fromUtc, toUtc);
return (points, true, null);
}
catch (Exception ex)
{
// A KPI-history hiccup must never break the page — the chart degrades to
// its unavailable placeholder while the KPI tiles above stay rendered.
Logger.LogWarning(ex, "Failed to load notification-outbox KPI trend series for metric {Metric}.", metric);
return (null, false, "Trend data unavailable.");
}
}
private async Task LoadGlobalKpis() private async Task LoadGlobalKpis()
{ {
@@ -585,18 +585,26 @@ public partial class SiteCallsReport
private async Task LoadTrendsAsync() private async Task LoadTrendsAsync()
{ {
_trendsLoading = true; _trendsLoading = true;
try
{
var toUtc = DateTime.UtcNow;
var fromUtc = toUtc - TimeSpan.FromHours(_windowHours);
var toUtc = DateTime.UtcNow; (_bufferedSeries, _bufferedAvailable, _bufferedError) =
var fromUtc = toUtc - TimeSpan.FromHours(_windowHours); await LoadSeriesAsync("buffered", fromUtc, toUtc);
(_parkedSeries, _parkedAvailable, _parkedError) =
(_bufferedSeries, _bufferedAvailable, _bufferedError) = await LoadSeriesAsync("parked", fromUtc, toUtc);
await LoadSeriesAsync("buffered", fromUtc, toUtc); (_failedSeries, _failedAvailable, _failedError) =
(_parkedSeries, _parkedAvailable, _parkedError) = await LoadSeriesAsync("failedLastInterval", fromUtc, toUtc);
await LoadSeriesAsync("parked", fromUtc, toUtc); }
(_failedSeries, _failedAvailable, _failedError) = finally
await LoadSeriesAsync("failedLastInterval", fromUtc, toUtc); {
// try/finally so an unexpected throw before this line can't lock the
_trendsLoading = false; // spinner/disabled state on forever. The per-metric LoadSeriesAsync
// try/catches already swallow GetSeriesAsync failures, so this guards
// only the truly unexpected.
_trendsLoading = false;
}
} }
/// <summary> /// <summary>
@@ -237,10 +237,13 @@ public class HealthPageTests : BunitContext
Assert.Contains("data-test=\"site-health-trends\"", cut.Markup); Assert.Contains("data-test=\"site-health-trends\"", cut.Markup);
Assert.Contains("data-test=\"site-health-trends-site\"", cut.Markup); Assert.Contains("data-test=\"site-health-trends-site\"", cut.Markup);
// The four metric charts render (the shared KpiTrendChart slug hook), // The four metric charts render (the shared KpiTrendChart slug hook),
// and the seeded non-empty series draws a polyline. // and the seeded non-empty series draws a polyline. The "S&F Buffer
// Depth" title slugifies to "s-f-buffer-depth" (the & and the spaces
// each collapse to a dash) — see KpiTrendChart.Slugify.
Assert.Contains("kpi-trend-connections-down", cut.Markup); Assert.Contains("kpi-trend-connections-down", cut.Markup);
Assert.Contains("kpi-trend-dead-letters", cut.Markup); Assert.Contains("kpi-trend-dead-letters", cut.Markup);
Assert.Contains("kpi-trend-script-errors", cut.Markup); Assert.Contains("kpi-trend-script-errors", cut.Markup);
Assert.Contains("kpi-trend-s-f-buffer-depth", cut.Markup);
Assert.Contains("<polyline", cut.Markup); Assert.Contains("<polyline", cut.Markup);
}); });
} }
@@ -178,6 +178,44 @@ public class NotificationKpisPageTests : BunitContext
}); });
} }
[Fact]
public void PartialTrendFailure_IsolatesToTheFailingMetric()
{
// Per-metric isolation (K13 fixup): the substitute returns a known series
// for two metrics but THROWS for "parkedCount". The two surviving charts
// must still draw their polylines while only the failing chart degrades to
// the unavailable placeholder — one metric's failure no longer blanks all
// three (and does not discard the already-fetched siblings).
_kpiHistory.GetSeriesAsync(
Arg.Any<string>(), Arg.Is<string>(m => m == "parkedCount"), Arg.Any<string>(),
Arg.Any<string?>(), Arg.Any<DateTime>(), Arg.Any<DateTime>(),
Arg.Any<int?>(), Arg.Any<CancellationToken>())
.Returns<Task<IReadOnlyList<KpiSeriesPoint>>>(_ => throw new InvalidOperationException("parked metric down"));
var cut = Render<NotificationKpisPage>();
cut.WaitForAssertion(() =>
{
// The page is still alive — KPI tiles render and the trends section
// never went fully blank.
Assert.Contains("Queue Depth", cut.Markup);
Assert.NotNull(cut.Find("[data-test=\"notification-trends\"]"));
// The two surviving metrics (queueDepth, deliveredLastInterval) drew
// their polylines …
var queueChart = cut.Find("[data-test=\"kpi-trend-queue-depth\"]");
Assert.Contains("<polyline", queueChart.InnerHtml);
var deliveredChart = cut.Find("[data-test=\"kpi-trend-delivered-interval\"]");
Assert.Contains("<polyline", deliveredChart.InnerHtml);
// … while only the failing "Parked" chart degraded to the unavailable
// placeholder (no polyline, the short error message instead).
var parkedChart = cut.Find("[data-test=\"kpi-trend-parked\"]");
Assert.DoesNotContain("<polyline", parkedChart.InnerHtml);
Assert.Contains("Trend data unavailable.", parkedChart.InnerHtml);
});
}
[Fact] [Fact]
public void TrendQueryFailure_DoesNotBreakPage() public void TrendQueryFailure_DoesNotBreakPage()
{ {