feat(kpi): K16 — Health dashboard per-site trend panel
This commit is contained in:
@@ -10,12 +10,14 @@
|
|||||||
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification
|
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification
|
||||||
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit
|
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit
|
||||||
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit
|
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit
|
||||||
|
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Kpi
|
||||||
@using ZB.MOM.WW.ScadaBridge.Communication
|
@using ZB.MOM.WW.ScadaBridge.Communication
|
||||||
@implements IDisposable
|
@implements IDisposable
|
||||||
@inject ICentralHealthAggregator HealthAggregator
|
@inject ICentralHealthAggregator HealthAggregator
|
||||||
@inject ISiteRepository SiteRepository
|
@inject ISiteRepository SiteRepository
|
||||||
@inject CommunicationService CommunicationService
|
@inject CommunicationService CommunicationService
|
||||||
@inject IAuditLogQueryService AuditLogQueryService
|
@inject IAuditLogQueryService AuditLogQueryService
|
||||||
|
@inject IKpiHistoryQueryService KpiHistory
|
||||||
|
|
||||||
<div class="container-fluid mt-3">
|
<div class="container-fluid mt-3">
|
||||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||||
@@ -76,6 +78,74 @@
|
|||||||
IsAvailable="@_auditKpiAvailable"
|
IsAvailable="@_auditKpiAvailable"
|
||||||
ErrorMessage="@_auditKpiError" />
|
ErrorMessage="@_auditKpiError" />
|
||||||
|
|
||||||
|
@* Site Health Trends (M6 K16) — per-site Site Health KPI history. Loads on a
|
||||||
|
separate path from the 10s tile-refresh timer so a trend-query fault can
|
||||||
|
never disturb the live dashboard or its polling loop. The site selector
|
||||||
|
reuses the site keys already loaded into _siteStates; the window toggle
|
||||||
|
drives the time range. Both re-query independently. *@
|
||||||
|
<div class="card mb-3" data-test="site-health-trends">
|
||||||
|
<div class="card-header d-flex justify-content-between align-items-center py-2">
|
||||||
|
<div class="d-flex align-items-center">
|
||||||
|
<h6 class="text-muted mb-0 me-3">Site Health Trends</h6>
|
||||||
|
@if (_trendSiteKeys.Count > 0)
|
||||||
|
{
|
||||||
|
<select class="form-select form-select-sm" style="width:auto"
|
||||||
|
data-test="site-health-trends-site"
|
||||||
|
value="@_trendSiteId"
|
||||||
|
@onchange="OnTrendSiteChangedAsync">
|
||||||
|
@foreach (var key in _trendSiteKeys)
|
||||||
|
{
|
||||||
|
<option value="@key">@TrendSiteLabel(key)</option>
|
||||||
|
}
|
||||||
|
</select>
|
||||||
|
}
|
||||||
|
</div>
|
||||||
|
<div class="btn-group btn-group-sm" role="group" aria-label="Trend window">
|
||||||
|
<button type="button"
|
||||||
|
class="btn @(_trendWindowHours == 24 ? "btn-primary" : "btn-outline-secondary")"
|
||||||
|
@onclick="() => SetTrendWindowAsync(24)" disabled="@_trendsLoading">24h</button>
|
||||||
|
<button type="button"
|
||||||
|
class="btn @(_trendWindowHours == 168 ? "btn-primary" : "btn-outline-secondary")"
|
||||||
|
@onclick="() => SetTrendWindowAsync(168)" disabled="@_trendsLoading">7d</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="card-body p-3">
|
||||||
|
@if (_trendSiteKeys.Count == 0)
|
||||||
|
{
|
||||||
|
<span class="text-muted small">No sites available for trends yet.</span>
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
<div class="row g-3">
|
||||||
|
<div class="col-lg-3 col-md-6">
|
||||||
|
<KpiTrendChart Title="Connections Down"
|
||||||
|
Points="@_connectionsDownSeries"
|
||||||
|
IsAvailable="@_connectionsDownAvailable"
|
||||||
|
ErrorMessage="@_connectionsDownError" />
|
||||||
|
</div>
|
||||||
|
<div class="col-lg-3 col-md-6">
|
||||||
|
<KpiTrendChart Title="Dead Letters"
|
||||||
|
Points="@_deadLettersSeries"
|
||||||
|
IsAvailable="@_deadLettersAvailable"
|
||||||
|
ErrorMessage="@_deadLettersError" />
|
||||||
|
</div>
|
||||||
|
<div class="col-lg-3 col-md-6">
|
||||||
|
<KpiTrendChart Title="Script Errors"
|
||||||
|
Points="@_scriptErrorsSeries"
|
||||||
|
IsAvailable="@_scriptErrorsAvailable"
|
||||||
|
ErrorMessage="@_scriptErrorsError" />
|
||||||
|
</div>
|
||||||
|
<div class="col-lg-3 col-md-6">
|
||||||
|
<KpiTrendChart Title="S&F Buffer Depth"
|
||||||
|
Points="@_sfBufferDepthSeries"
|
||||||
|
IsAvailable="@_sfBufferDepthAvailable"
|
||||||
|
ErrorMessage="@_sfBufferDepthError" />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
@if (_siteStates.Count == 0)
|
@if (_siteStates.Count == 0)
|
||||||
{
|
{
|
||||||
<div class="alert alert-info">No site health reports received yet.</div>
|
<div class="alert alert-info">No site health reports received yet.</div>
|
||||||
@@ -387,6 +457,36 @@
|
|||||||
Array.Empty<SiteCallNodeKpiSnapshot>();
|
Array.Empty<SiteCallNodeKpiSnapshot>();
|
||||||
private bool _siteCallNodeKpiAvailable;
|
private bool _siteCallNodeKpiAvailable;
|
||||||
|
|
||||||
|
// ── Site Health Trends (M6 K16) ───────────────────────────────────────────
|
||||||
|
// Per-site Site Health KPI history, loaded on a path entirely separate from
|
||||||
|
// the 10s tile-refresh timer (LoadSiteHealthTrendsAsync, never called from
|
||||||
|
// the timer tick). The site keys are a snapshot of the dashboard's site set,
|
||||||
|
// captured each time trends load so the selector mirrors the live cards.
|
||||||
|
// Window in hours: 24h (default) or 168h (7d). Changing the selected site OR
|
||||||
|
// the window re-queries. Each metric chart carries its own availability +
|
||||||
|
// error so one failed GetSeriesAsync degrades a single chart, never the
|
||||||
|
// dashboard.
|
||||||
|
private IReadOnlyList<string> _trendSiteKeys = Array.Empty<string>();
|
||||||
|
private string? _trendSiteId;
|
||||||
|
private int _trendWindowHours = 24;
|
||||||
|
private bool _trendsLoading;
|
||||||
|
|
||||||
|
private IReadOnlyList<KpiSeriesPoint>? _connectionsDownSeries;
|
||||||
|
private bool _connectionsDownAvailable = true;
|
||||||
|
private string? _connectionsDownError;
|
||||||
|
|
||||||
|
private IReadOnlyList<KpiSeriesPoint>? _deadLettersSeries;
|
||||||
|
private bool _deadLettersAvailable = true;
|
||||||
|
private string? _deadLettersError;
|
||||||
|
|
||||||
|
private IReadOnlyList<KpiSeriesPoint>? _scriptErrorsSeries;
|
||||||
|
private bool _scriptErrorsAvailable = true;
|
||||||
|
private string? _scriptErrorsError;
|
||||||
|
|
||||||
|
private IReadOnlyList<KpiSeriesPoint>? _sfBufferDepthSeries;
|
||||||
|
private bool _sfBufferDepthAvailable = true;
|
||||||
|
private string? _sfBufferDepthError;
|
||||||
|
|
||||||
private static bool SiteHasActiveErrors(SiteHealthState state)
|
private static bool SiteHasActiveErrors(SiteHealthState state)
|
||||||
{
|
{
|
||||||
var report = state.LatestReport;
|
var report = state.LatestReport;
|
||||||
@@ -410,6 +510,13 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
await RefreshNow();
|
await RefreshNow();
|
||||||
|
|
||||||
|
// Site Health Trends (M6 K16) load on their own path — never from the
|
||||||
|
// timer tick below — so a trend-query fault can't disturb the live tile
|
||||||
|
// refresh. Seed the selector from the sites just loaded into _siteStates
|
||||||
|
// and query the default site.
|
||||||
|
await LoadSiteHealthTrendsAsync();
|
||||||
|
|
||||||
_refreshTimer = new Timer(_ =>
|
_refreshTimer = new Timer(_ =>
|
||||||
{
|
{
|
||||||
InvokeAsync(async () =>
|
InvokeAsync(async () =>
|
||||||
@@ -420,6 +527,109 @@
|
|||||||
}, null, TimeSpan.FromSeconds(_autoRefreshSeconds), TimeSpan.FromSeconds(_autoRefreshSeconds));
|
}, null, TimeSpan.FromSeconds(_autoRefreshSeconds), TimeSpan.FromSeconds(_autoRefreshSeconds));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Re-query when the operator picks a different site. Best-effort: the load
|
||||||
|
// itself swallows faults per-chart.
|
||||||
|
private async Task OnTrendSiteChangedAsync(ChangeEventArgs e)
|
||||||
|
{
|
||||||
|
var selected = e.Value?.ToString();
|
||||||
|
if (string.IsNullOrEmpty(selected) || selected == _trendSiteId)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_trendSiteId = selected;
|
||||||
|
await LoadSiteHealthTrendsAsync(refreshSiteKeys: false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-query when the window toggle changes (24h ↔ 7d).
|
||||||
|
private async Task SetTrendWindowAsync(int windowHours)
|
||||||
|
{
|
||||||
|
if (_trendWindowHours == windowHours)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_trendWindowHours = windowHours;
|
||||||
|
await LoadSiteHealthTrendsAsync(refreshSiteKeys: false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loads the four Site Health trend series for the selected site over the
|
||||||
|
// selected window. Deliberately decoupled from RefreshNow / the 10s timer:
|
||||||
|
// a fault here degrades the affected chart(s) only and never propagates to
|
||||||
|
// the tile-refresh loop.
|
||||||
|
//
|
||||||
|
// refreshSiteKeys re-snapshots the dashboard's site set into the selector
|
||||||
|
// (true on init); the site-change / window-toggle paths pass false so a
|
||||||
|
// mid-interaction site addition/removal can't yank the operator's choice.
|
||||||
|
private async Task LoadSiteHealthTrendsAsync(bool refreshSiteKeys = true)
|
||||||
|
{
|
||||||
|
if (refreshSiteKeys)
|
||||||
|
{
|
||||||
|
// Mirror the dashboard ordering: central cluster pinned first, then
|
||||||
|
// sites alphabetically — the same comparer the detail cards use.
|
||||||
|
_trendSiteKeys = _siteStates.Keys
|
||||||
|
.OrderBy(k => k == CentralHealthReportLoop.CentralSiteId ? 0 : 1)
|
||||||
|
.ThenBy(k => k)
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
// Default to the first site (or keep a still-valid prior selection).
|
||||||
|
if (_trendSiteId == null || !_trendSiteKeys.Contains(_trendSiteId))
|
||||||
|
{
|
||||||
|
_trendSiteId = _trendSiteKeys.FirstOrDefault();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (string.IsNullOrEmpty(_trendSiteId))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_trendsLoading = true;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var toUtc = DateTime.UtcNow;
|
||||||
|
var fromUtc = toUtc - TimeSpan.FromHours(_trendWindowHours);
|
||||||
|
var siteId = _trendSiteId;
|
||||||
|
|
||||||
|
(_connectionsDownSeries, _connectionsDownAvailable, _connectionsDownError) =
|
||||||
|
await LoadTrendSeriesAsync("connectionsDown", siteId, fromUtc, toUtc);
|
||||||
|
(_deadLettersSeries, _deadLettersAvailable, _deadLettersError) =
|
||||||
|
await LoadTrendSeriesAsync("deadLetters", siteId, fromUtc, toUtc);
|
||||||
|
(_scriptErrorsSeries, _scriptErrorsAvailable, _scriptErrorsError) =
|
||||||
|
await LoadTrendSeriesAsync("scriptErrors", siteId, fromUtc, toUtc);
|
||||||
|
(_sfBufferDepthSeries, _sfBufferDepthAvailable, _sfBufferDepthError) =
|
||||||
|
await LoadTrendSeriesAsync("sfBufferDepth", siteId, fromUtc, toUtc);
|
||||||
|
}
|
||||||
|
finally
|
||||||
|
{
|
||||||
|
_trendsLoading = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Single best-effort series fetch. Site Health metrics are Site-scoped, so
|
||||||
|
// scope = KpiScopes.Site and scopeKey = the selected site id. On any fault
|
||||||
|
// the chart falls back to the unavailable placeholder — a failure here must
|
||||||
|
// NEVER break the dashboard.
|
||||||
|
private async Task<(IReadOnlyList<KpiSeriesPoint>?, bool, string?)> LoadTrendSeriesAsync(
|
||||||
|
string metric, string siteId, DateTime fromUtc, DateTime toUtc)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var series = await KpiHistory.GetSeriesAsync(
|
||||||
|
KpiSources.SiteHealth, metric, KpiScopes.Site, siteId, fromUtc, toUtc);
|
||||||
|
return (series, true, null);
|
||||||
|
}
|
||||||
|
catch
|
||||||
|
{
|
||||||
|
return (null, false, "Trend data unavailable.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private string TrendSiteLabel(string siteKey) =>
|
||||||
|
siteKey == CentralHealthReportLoop.CentralSiteId
|
||||||
|
? "Central Cluster"
|
||||||
|
: $"{GetSiteName(siteKey)} ({siteKey})";
|
||||||
|
|
||||||
private async Task RefreshNow()
|
private async Task RefreshNow()
|
||||||
{
|
{
|
||||||
_siteStates = HealthAggregator.GetAllSiteStates();
|
_siteStates = HealthAggregator.GetAllSiteStates();
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit;
|
|||||||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification;
|
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification;
|
||||||
using ZB.MOM.WW.ScadaBridge.Commons.Types;
|
using ZB.MOM.WW.ScadaBridge.Commons.Types;
|
||||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
|
using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit;
|
||||||
|
using ZB.MOM.WW.ScadaBridge.Commons.Types.Kpi;
|
||||||
using ZB.MOM.WW.ScadaBridge.Communication;
|
using ZB.MOM.WW.ScadaBridge.Communication;
|
||||||
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
|
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
|
||||||
using HealthPage = ZB.MOM.WW.ScadaBridge.CentralUI.Components.Pages.Monitoring.Health;
|
using HealthPage = ZB.MOM.WW.ScadaBridge.CentralUI.Components.Pages.Monitoring.Health;
|
||||||
@@ -65,6 +66,17 @@ public class HealthPageTests : BunitContext
|
|||||||
.Returns(new Dictionary<string, SiteHealthState>());
|
.Returns(new Dictionary<string, SiteHealthState>());
|
||||||
Services.AddSingleton(aggregator);
|
Services.AddSingleton(aggregator);
|
||||||
|
|
||||||
|
// M6 K16 — the Health page now injects IKpiHistoryQueryService to feed the
|
||||||
|
// per-site Site Health Trends panel. Stub it with a known non-empty series
|
||||||
|
// so the page resolves the dependency and the trend charts have data; the
|
||||||
|
// dedicated trend tests below seed sites / override behaviour.
|
||||||
|
var kpiHistory = Substitute.For<IKpiHistoryQueryService>();
|
||||||
|
kpiHistory.GetSeriesAsync(
|
||||||
|
Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string?>(),
|
||||||
|
Arg.Any<DateTime>(), Arg.Any<DateTime>(), Arg.Any<int?>(), Arg.Any<CancellationToken>())
|
||||||
|
.Returns(Task.FromResult<IReadOnlyList<KpiSeriesPoint>>(SampleSeries()));
|
||||||
|
Services.AddSingleton(kpiHistory);
|
||||||
|
|
||||||
var siteRepo = Substitute.For<ISiteRepository>();
|
var siteRepo = Substitute.For<ISiteRepository>();
|
||||||
siteRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
|
siteRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
|
||||||
.Returns(Task.FromResult<IReadOnlyList<Site>>(new List<Site>()));
|
.Returns(Task.FromResult<IReadOnlyList<Site>>(new List<Site>()));
|
||||||
@@ -210,6 +222,89 @@ public class HealthPageTests : BunitContext
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Renders_SiteHealthTrends_PanelAndChart_ForSelectedSite()
|
||||||
|
{
|
||||||
|
// Seed one site so the trend panel's selector has an option and the
|
||||||
|
// default-site load produces charts.
|
||||||
|
SeedSites("site-a");
|
||||||
|
|
||||||
|
var cut = Render<HealthPage>();
|
||||||
|
|
||||||
|
cut.WaitForAssertion(() =>
|
||||||
|
{
|
||||||
|
// The panel + its site selector render at the documented hooks.
|
||||||
|
Assert.Contains("data-test=\"site-health-trends\"", cut.Markup);
|
||||||
|
Assert.Contains("data-test=\"site-health-trends-site\"", cut.Markup);
|
||||||
|
// The four metric charts render (the shared KpiTrendChart slug hook),
|
||||||
|
// and the seeded non-empty series draws a polyline.
|
||||||
|
Assert.Contains("kpi-trend-connections-down", cut.Markup);
|
||||||
|
Assert.Contains("kpi-trend-dead-letters", cut.Markup);
|
||||||
|
Assert.Contains("kpi-trend-script-errors", cut.Markup);
|
||||||
|
Assert.Contains("<polyline", cut.Markup);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void SiteHealthTrendsFailure_DoesNotBreakDashboard()
|
||||||
|
{
|
||||||
|
SeedSites("site-a");
|
||||||
|
|
||||||
|
// The KPI-history service throws on every query — the trend load is
|
||||||
|
// best-effort, so the dashboard (and its tiles) must still render.
|
||||||
|
var faulting = Substitute.For<IKpiHistoryQueryService>();
|
||||||
|
faulting.GetSeriesAsync(
|
||||||
|
Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string?>(),
|
||||||
|
Arg.Any<DateTime>(), Arg.Any<DateTime>(), Arg.Any<int?>(), Arg.Any<CancellationToken>())
|
||||||
|
.Returns<Task<IReadOnlyList<KpiSeriesPoint>>>(_ =>
|
||||||
|
throw new InvalidOperationException("kpi history unavailable"));
|
||||||
|
Services.AddSingleton(faulting);
|
||||||
|
|
||||||
|
var cut = Render<HealthPage>();
|
||||||
|
|
||||||
|
cut.WaitForAssertion(() =>
|
||||||
|
{
|
||||||
|
// No unhandled exception: the core dashboard tiles still render, and
|
||||||
|
// the panel falls back to the per-chart unavailable placeholder.
|
||||||
|
Assert.Contains("Notification Outbox", cut.Markup);
|
||||||
|
Assert.Contains("data-test=\"site-health-trends\"", cut.Markup);
|
||||||
|
Assert.Contains("Trend data unavailable.", cut.Markup);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-seeds the aggregator substitute so the trend panel's site selector has
|
||||||
|
// options. Each site id maps to a minimal online SiteHealthState (a null
|
||||||
|
// report is fine — the trend panel keys off the site ids, not the report).
|
||||||
|
private void SeedSites(params string[] siteIds)
|
||||||
|
{
|
||||||
|
var aggregator = Substitute.For<ICentralHealthAggregator>();
|
||||||
|
var states = siteIds.ToDictionary(
|
||||||
|
id => id,
|
||||||
|
id => new SiteHealthState
|
||||||
|
{
|
||||||
|
SiteId = id,
|
||||||
|
IsOnline = true,
|
||||||
|
LastHeartbeatAt = DateTimeOffset.UtcNow,
|
||||||
|
});
|
||||||
|
aggregator.GetAllSiteStates()
|
||||||
|
.Returns(new Dictionary<string, SiteHealthState>(states));
|
||||||
|
Services.AddSingleton(aggregator);
|
||||||
|
}
|
||||||
|
|
||||||
|
// A known non-empty (≥2-point) series so KpiTrendChart renders a polyline
|
||||||
|
// rather than the single-sample / unavailable placeholder.
|
||||||
|
private static IReadOnlyList<KpiSeriesPoint> SampleSeries()
|
||||||
|
{
|
||||||
|
var baseUtc = DateTime.UtcNow.AddHours(-24);
|
||||||
|
return new List<KpiSeriesPoint>
|
||||||
|
{
|
||||||
|
new(baseUtc, 1),
|
||||||
|
new(baseUtc.AddHours(6), 3),
|
||||||
|
new(baseUtc.AddHours(12), 2),
|
||||||
|
new(baseUtc.AddHours(18), 5),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
protected override void Dispose(bool disposing)
|
protected override void Dispose(bool disposing)
|
||||||
{
|
{
|
||||||
if (disposing)
|
if (disposing)
|
||||||
|
|||||||
Reference in New Issue
Block a user