feat(kpi): K16 — Health dashboard per-site trend panel
This commit is contained in:
@@ -10,12 +10,14 @@
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Kpi
|
||||
@using ZB.MOM.WW.ScadaBridge.Communication
|
||||
@implements IDisposable
|
||||
@inject ICentralHealthAggregator HealthAggregator
|
||||
@inject ISiteRepository SiteRepository
|
||||
@inject CommunicationService CommunicationService
|
||||
@inject IAuditLogQueryService AuditLogQueryService
|
||||
@inject IKpiHistoryQueryService KpiHistory
|
||||
|
||||
<div class="container-fluid mt-3">
|
||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||
@@ -76,6 +78,74 @@
|
||||
IsAvailable="@_auditKpiAvailable"
|
||||
ErrorMessage="@_auditKpiError" />
|
||||
|
||||
@* Site Health Trends (M6 K16) — per-site Site Health KPI history. Loads on a
|
||||
separate path from the 10s tile-refresh timer so a trend-query fault can
|
||||
never disturb the live dashboard or its polling loop. The site selector
|
||||
reuses the site keys already loaded into _siteStates; the window toggle
|
||||
drives the time range. Both re-query independently. *@
|
||||
<div class="card mb-3" data-test="site-health-trends">
|
||||
<div class="card-header d-flex justify-content-between align-items-center py-2">
|
||||
<div class="d-flex align-items-center">
|
||||
<h6 class="text-muted mb-0 me-3">Site Health Trends</h6>
|
||||
@if (_trendSiteKeys.Count > 0)
|
||||
{
|
||||
<select class="form-select form-select-sm" style="width:auto"
|
||||
data-test="site-health-trends-site"
|
||||
value="@_trendSiteId"
|
||||
@onchange="OnTrendSiteChangedAsync">
|
||||
@foreach (var key in _trendSiteKeys)
|
||||
{
|
||||
<option value="@key">@TrendSiteLabel(key)</option>
|
||||
}
|
||||
</select>
|
||||
}
|
||||
</div>
|
||||
<div class="btn-group btn-group-sm" role="group" aria-label="Trend window">
|
||||
<button type="button"
|
||||
class="btn @(_trendWindowHours == 24 ? "btn-primary" : "btn-outline-secondary")"
|
||||
@onclick="() => SetTrendWindowAsync(24)" disabled="@_trendsLoading">24h</button>
|
||||
<button type="button"
|
||||
class="btn @(_trendWindowHours == 168 ? "btn-primary" : "btn-outline-secondary")"
|
||||
@onclick="() => SetTrendWindowAsync(168)" disabled="@_trendsLoading">7d</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body p-3">
|
||||
@if (_trendSiteKeys.Count == 0)
|
||||
{
|
||||
<span class="text-muted small">No sites available for trends yet.</span>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div class="row g-3">
|
||||
<div class="col-lg-3 col-md-6">
|
||||
<KpiTrendChart Title="Connections Down"
|
||||
Points="@_connectionsDownSeries"
|
||||
IsAvailable="@_connectionsDownAvailable"
|
||||
ErrorMessage="@_connectionsDownError" />
|
||||
</div>
|
||||
<div class="col-lg-3 col-md-6">
|
||||
<KpiTrendChart Title="Dead Letters"
|
||||
Points="@_deadLettersSeries"
|
||||
IsAvailable="@_deadLettersAvailable"
|
||||
ErrorMessage="@_deadLettersError" />
|
||||
</div>
|
||||
<div class="col-lg-3 col-md-6">
|
||||
<KpiTrendChart Title="Script Errors"
|
||||
Points="@_scriptErrorsSeries"
|
||||
IsAvailable="@_scriptErrorsAvailable"
|
||||
ErrorMessage="@_scriptErrorsError" />
|
||||
</div>
|
||||
<div class="col-lg-3 col-md-6">
|
||||
<KpiTrendChart Title="S&F Buffer Depth"
|
||||
Points="@_sfBufferDepthSeries"
|
||||
IsAvailable="@_sfBufferDepthAvailable"
|
||||
ErrorMessage="@_sfBufferDepthError" />
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@if (_siteStates.Count == 0)
|
||||
{
|
||||
<div class="alert alert-info">No site health reports received yet.</div>
|
||||
@@ -387,6 +457,36 @@
|
||||
Array.Empty<SiteCallNodeKpiSnapshot>();
|
||||
private bool _siteCallNodeKpiAvailable;
|
||||
|
||||
// ── Site Health Trends (M6 K16) ───────────────────────────────────────────
|
||||
// Per-site Site Health KPI history, loaded on a path entirely separate from
|
||||
// the 10s tile-refresh timer (LoadSiteHealthTrendsAsync, never called from
|
||||
// the timer tick). The site keys are a snapshot of the dashboard's site set,
|
||||
// captured each time trends load so the selector mirrors the live cards.
|
||||
// Window in hours: 24h (default) or 168h (7d). Changing the selected site OR
|
||||
// the window re-queries. Each metric chart carries its own availability +
|
||||
// error so one failed GetSeriesAsync degrades a single chart, never the
|
||||
// dashboard.
|
||||
private IReadOnlyList<string> _trendSiteKeys = Array.Empty<string>();
|
||||
private string? _trendSiteId;
|
||||
private int _trendWindowHours = 24;
|
||||
private bool _trendsLoading;
|
||||
|
||||
private IReadOnlyList<KpiSeriesPoint>? _connectionsDownSeries;
|
||||
private bool _connectionsDownAvailable = true;
|
||||
private string? _connectionsDownError;
|
||||
|
||||
private IReadOnlyList<KpiSeriesPoint>? _deadLettersSeries;
|
||||
private bool _deadLettersAvailable = true;
|
||||
private string? _deadLettersError;
|
||||
|
||||
private IReadOnlyList<KpiSeriesPoint>? _scriptErrorsSeries;
|
||||
private bool _scriptErrorsAvailable = true;
|
||||
private string? _scriptErrorsError;
|
||||
|
||||
private IReadOnlyList<KpiSeriesPoint>? _sfBufferDepthSeries;
|
||||
private bool _sfBufferDepthAvailable = true;
|
||||
private string? _sfBufferDepthError;
|
||||
|
||||
private static bool SiteHasActiveErrors(SiteHealthState state)
|
||||
{
|
||||
var report = state.LatestReport;
|
||||
@@ -410,6 +510,13 @@
|
||||
}
|
||||
|
||||
await RefreshNow();
|
||||
|
||||
// Site Health Trends (M6 K16) load on their own path — never from the
|
||||
// timer tick below — so a trend-query fault can't disturb the live tile
|
||||
// refresh. Seed the selector from the sites just loaded into _siteStates
|
||||
// and query the default site.
|
||||
await LoadSiteHealthTrendsAsync();
|
||||
|
||||
_refreshTimer = new Timer(_ =>
|
||||
{
|
||||
InvokeAsync(async () =>
|
||||
@@ -420,6 +527,109 @@
|
||||
}, null, TimeSpan.FromSeconds(_autoRefreshSeconds), TimeSpan.FromSeconds(_autoRefreshSeconds));
|
||||
}
|
||||
|
||||
// Re-query when the operator picks a different site. Best-effort: the load
|
||||
// itself swallows faults per-chart.
|
||||
private async Task OnTrendSiteChangedAsync(ChangeEventArgs e)
|
||||
{
|
||||
var selected = e.Value?.ToString();
|
||||
if (string.IsNullOrEmpty(selected) || selected == _trendSiteId)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_trendSiteId = selected;
|
||||
await LoadSiteHealthTrendsAsync(refreshSiteKeys: false);
|
||||
}
|
||||
|
||||
// Re-query when the window toggle changes (24h ↔ 7d).
|
||||
private async Task SetTrendWindowAsync(int windowHours)
|
||||
{
|
||||
if (_trendWindowHours == windowHours)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_trendWindowHours = windowHours;
|
||||
await LoadSiteHealthTrendsAsync(refreshSiteKeys: false);
|
||||
}
|
||||
|
||||
// Loads the four Site Health trend series for the selected site over the
|
||||
// selected window. Deliberately decoupled from RefreshNow / the 10s timer:
|
||||
// a fault here degrades the affected chart(s) only and never propagates to
|
||||
// the tile-refresh loop.
|
||||
//
|
||||
// refreshSiteKeys re-snapshots the dashboard's site set into the selector
|
||||
// (true on init); the site-change / window-toggle paths pass false so a
|
||||
// mid-interaction site addition/removal can't yank the operator's choice.
|
||||
private async Task LoadSiteHealthTrendsAsync(bool refreshSiteKeys = true)
|
||||
{
|
||||
if (refreshSiteKeys)
|
||||
{
|
||||
// Mirror the dashboard ordering: central cluster pinned first, then
|
||||
// sites alphabetically — the same comparer the detail cards use.
|
||||
_trendSiteKeys = _siteStates.Keys
|
||||
.OrderBy(k => k == CentralHealthReportLoop.CentralSiteId ? 0 : 1)
|
||||
.ThenBy(k => k)
|
||||
.ToList();
|
||||
|
||||
// Default to the first site (or keep a still-valid prior selection).
|
||||
if (_trendSiteId == null || !_trendSiteKeys.Contains(_trendSiteId))
|
||||
{
|
||||
_trendSiteId = _trendSiteKeys.FirstOrDefault();
|
||||
}
|
||||
}
|
||||
|
||||
if (string.IsNullOrEmpty(_trendSiteId))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_trendsLoading = true;
|
||||
try
|
||||
{
|
||||
var toUtc = DateTime.UtcNow;
|
||||
var fromUtc = toUtc - TimeSpan.FromHours(_trendWindowHours);
|
||||
var siteId = _trendSiteId;
|
||||
|
||||
(_connectionsDownSeries, _connectionsDownAvailable, _connectionsDownError) =
|
||||
await LoadTrendSeriesAsync("connectionsDown", siteId, fromUtc, toUtc);
|
||||
(_deadLettersSeries, _deadLettersAvailable, _deadLettersError) =
|
||||
await LoadTrendSeriesAsync("deadLetters", siteId, fromUtc, toUtc);
|
||||
(_scriptErrorsSeries, _scriptErrorsAvailable, _scriptErrorsError) =
|
||||
await LoadTrendSeriesAsync("scriptErrors", siteId, fromUtc, toUtc);
|
||||
(_sfBufferDepthSeries, _sfBufferDepthAvailable, _sfBufferDepthError) =
|
||||
await LoadTrendSeriesAsync("sfBufferDepth", siteId, fromUtc, toUtc);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_trendsLoading = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Single best-effort series fetch. Site Health metrics are Site-scoped, so
|
||||
// scope = KpiScopes.Site and scopeKey = the selected site id. On any fault
|
||||
// the chart falls back to the unavailable placeholder — a failure here must
|
||||
// NEVER break the dashboard.
|
||||
private async Task<(IReadOnlyList<KpiSeriesPoint>?, bool, string?)> LoadTrendSeriesAsync(
|
||||
string metric, string siteId, DateTime fromUtc, DateTime toUtc)
|
||||
{
|
||||
try
|
||||
{
|
||||
var series = await KpiHistory.GetSeriesAsync(
|
||||
KpiSources.SiteHealth, metric, KpiScopes.Site, siteId, fromUtc, toUtc);
|
||||
return (series, true, null);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return (null, false, "Trend data unavailable.");
|
||||
}
|
||||
}
|
||||
|
||||
private string TrendSiteLabel(string siteKey) =>
|
||||
siteKey == CentralHealthReportLoop.CentralSiteId
|
||||
? "Central Cluster"
|
||||
: $"{GetSiteName(siteKey)} ({siteKey})";
|
||||
|
||||
private async Task RefreshNow()
|
||||
{
|
||||
_siteStates = HealthAggregator.GetAllSiteStates();
|
||||
|
||||
@@ -14,6 +14,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Kpi;
|
||||
using ZB.MOM.WW.ScadaBridge.Communication;
|
||||
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
|
||||
using HealthPage = ZB.MOM.WW.ScadaBridge.CentralUI.Components.Pages.Monitoring.Health;
|
||||
@@ -65,6 +66,17 @@ public class HealthPageTests : BunitContext
|
||||
.Returns(new Dictionary<string, SiteHealthState>());
|
||||
Services.AddSingleton(aggregator);
|
||||
|
||||
// M6 K16 — the Health page now injects IKpiHistoryQueryService to feed the
|
||||
// per-site Site Health Trends panel. Stub it with a known non-empty series
|
||||
// so the page resolves the dependency and the trend charts have data; the
|
||||
// dedicated trend tests below seed sites / override behaviour.
|
||||
var kpiHistory = Substitute.For<IKpiHistoryQueryService>();
|
||||
kpiHistory.GetSeriesAsync(
|
||||
Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string?>(),
|
||||
Arg.Any<DateTime>(), Arg.Any<DateTime>(), Arg.Any<int?>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IReadOnlyList<KpiSeriesPoint>>(SampleSeries()));
|
||||
Services.AddSingleton(kpiHistory);
|
||||
|
||||
var siteRepo = Substitute.For<ISiteRepository>();
|
||||
siteRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<IReadOnlyList<Site>>(new List<Site>()));
|
||||
@@ -210,6 +222,89 @@ public class HealthPageTests : BunitContext
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Renders_SiteHealthTrends_PanelAndChart_ForSelectedSite()
|
||||
{
|
||||
// Seed one site so the trend panel's selector has an option and the
|
||||
// default-site load produces charts.
|
||||
SeedSites("site-a");
|
||||
|
||||
var cut = Render<HealthPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
// The panel + its site selector render at the documented hooks.
|
||||
Assert.Contains("data-test=\"site-health-trends\"", cut.Markup);
|
||||
Assert.Contains("data-test=\"site-health-trends-site\"", cut.Markup);
|
||||
// The four metric charts render (the shared KpiTrendChart slug hook),
|
||||
// and the seeded non-empty series draws a polyline.
|
||||
Assert.Contains("kpi-trend-connections-down", cut.Markup);
|
||||
Assert.Contains("kpi-trend-dead-letters", cut.Markup);
|
||||
Assert.Contains("kpi-trend-script-errors", cut.Markup);
|
||||
Assert.Contains("<polyline", cut.Markup);
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SiteHealthTrendsFailure_DoesNotBreakDashboard()
|
||||
{
|
||||
SeedSites("site-a");
|
||||
|
||||
// The KPI-history service throws on every query — the trend load is
|
||||
// best-effort, so the dashboard (and its tiles) must still render.
|
||||
var faulting = Substitute.For<IKpiHistoryQueryService>();
|
||||
faulting.GetSeriesAsync(
|
||||
Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string?>(),
|
||||
Arg.Any<DateTime>(), Arg.Any<DateTime>(), Arg.Any<int?>(), Arg.Any<CancellationToken>())
|
||||
.Returns<Task<IReadOnlyList<KpiSeriesPoint>>>(_ =>
|
||||
throw new InvalidOperationException("kpi history unavailable"));
|
||||
Services.AddSingleton(faulting);
|
||||
|
||||
var cut = Render<HealthPage>();
|
||||
|
||||
cut.WaitForAssertion(() =>
|
||||
{
|
||||
// No unhandled exception: the core dashboard tiles still render, and
|
||||
// the panel falls back to the per-chart unavailable placeholder.
|
||||
Assert.Contains("Notification Outbox", cut.Markup);
|
||||
Assert.Contains("data-test=\"site-health-trends\"", cut.Markup);
|
||||
Assert.Contains("Trend data unavailable.", cut.Markup);
|
||||
});
|
||||
}
|
||||
|
||||
// Re-seeds the aggregator substitute so the trend panel's site selector has
|
||||
// options. Each site id maps to a minimal online SiteHealthState (a null
|
||||
// report is fine — the trend panel keys off the site ids, not the report).
|
||||
private void SeedSites(params string[] siteIds)
|
||||
{
|
||||
var aggregator = Substitute.For<ICentralHealthAggregator>();
|
||||
var states = siteIds.ToDictionary(
|
||||
id => id,
|
||||
id => new SiteHealthState
|
||||
{
|
||||
SiteId = id,
|
||||
IsOnline = true,
|
||||
LastHeartbeatAt = DateTimeOffset.UtcNow,
|
||||
});
|
||||
aggregator.GetAllSiteStates()
|
||||
.Returns(new Dictionary<string, SiteHealthState>(states));
|
||||
Services.AddSingleton(aggregator);
|
||||
}
|
||||
|
||||
// A known non-empty (≥2-point) series so KpiTrendChart renders a polyline
|
||||
// rather than the single-sample / unavailable placeholder.
|
||||
private static IReadOnlyList<KpiSeriesPoint> SampleSeries()
|
||||
{
|
||||
var baseUtc = DateTime.UtcNow.AddHours(-24);
|
||||
return new List<KpiSeriesPoint>
|
||||
{
|
||||
new(baseUtc, 1),
|
||||
new(baseUtc.AddHours(6), 3),
|
||||
new(baseUtc.AddHours(12), 2),
|
||||
new(baseUtc.AddHours(18), 5),
|
||||
};
|
||||
}
|
||||
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
|
||||
Reference in New Issue
Block a user