From 7d7c6cbb05c06e25680786ac1ddf18e9f7073936 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Wed, 17 Jun 2026 20:36:09 -0400 Subject: [PATCH] =?UTF-8?q?feat(kpi):=20K16=20=E2=80=94=20Health=20dashboa?= =?UTF-8?q?rd=20per-site=20trend=20panel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Components/Pages/Monitoring/Health.razor | 210 ++++++++++++++++++ .../Pages/HealthPageTests.cs | 95 ++++++++ 2 files changed, 305 insertions(+) diff --git a/src/ZB.MOM.WW.ScadaBridge.CentralUI/Components/Pages/Monitoring/Health.razor b/src/ZB.MOM.WW.ScadaBridge.CentralUI/Components/Pages/Monitoring/Health.razor index f53d09a7..9ecc5ebd 100644 --- a/src/ZB.MOM.WW.ScadaBridge.CentralUI/Components/Pages/Monitoring/Health.razor +++ b/src/ZB.MOM.WW.ScadaBridge.CentralUI/Components/Pages/Monitoring/Health.razor @@ -10,12 +10,14 @@ @using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification @using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit @using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit +@using ZB.MOM.WW.ScadaBridge.Commons.Types.Kpi @using ZB.MOM.WW.ScadaBridge.Communication @implements IDisposable @inject ICentralHealthAggregator HealthAggregator @inject ISiteRepository SiteRepository @inject CommunicationService CommunicationService @inject IAuditLogQueryService AuditLogQueryService +@inject IKpiHistoryQueryService KpiHistory
@@ -76,6 +78,74 @@ IsAvailable="@_auditKpiAvailable" ErrorMessage="@_auditKpiError" /> + @* Site Health Trends (M6 K16) — per-site Site Health KPI history. Loads on a + separate path from the 10s tile-refresh timer so a trend-query fault can + never disturb the live dashboard or its polling loop. The site selector + reuses the site keys already loaded into _siteStates; the window toggle + drives the time range. Both re-query independently. *@ +
+
+
+
Site Health Trends
+ @if (_trendSiteKeys.Count > 0) + { + + } +
+
+ + +
+
+
+ @if (_trendSiteKeys.Count == 0) + { + No sites available for trends yet. + } + else + { +
+
+ +
+
+ +
+
+ +
+
+ +
+
+ } +
+
+ @if (_siteStates.Count == 0) {
No site health reports received yet.
@@ -387,6 +457,36 @@ Array.Empty(); private bool _siteCallNodeKpiAvailable; + // ── Site Health Trends (M6 K16) ─────────────────────────────────────────── + // Per-site Site Health KPI history, loaded on a path entirely separate from + // the 10s tile-refresh timer (LoadSiteHealthTrendsAsync, never called from + // the timer tick). The site keys are a snapshot of the dashboard's site set, + // captured each time trends load so the selector mirrors the live cards. + // Window in hours: 24h (default) or 168h (7d). Changing the selected site OR + // the window re-queries. Each metric chart carries its own availability + + // error so one failed GetSeriesAsync degrades a single chart, never the + // dashboard. + private IReadOnlyList _trendSiteKeys = Array.Empty(); + private string? _trendSiteId; + private int _trendWindowHours = 24; + private bool _trendsLoading; + + private IReadOnlyList? _connectionsDownSeries; + private bool _connectionsDownAvailable = true; + private string? _connectionsDownError; + + private IReadOnlyList? _deadLettersSeries; + private bool _deadLettersAvailable = true; + private string? _deadLettersError; + + private IReadOnlyList? _scriptErrorsSeries; + private bool _scriptErrorsAvailable = true; + private string? _scriptErrorsError; + + private IReadOnlyList? _sfBufferDepthSeries; + private bool _sfBufferDepthAvailable = true; + private string? _sfBufferDepthError; + private static bool SiteHasActiveErrors(SiteHealthState state) { var report = state.LatestReport; @@ -410,6 +510,13 @@ } await RefreshNow(); + + // Site Health Trends (M6 K16) load on their own path — never from the + // timer tick below — so a trend-query fault can't disturb the live tile + // refresh. Seed the selector from the sites just loaded into _siteStates + // and query the default site. + await LoadSiteHealthTrendsAsync(); + _refreshTimer = new Timer(_ => { InvokeAsync(async () => @@ -420,6 +527,109 @@ }, null, TimeSpan.FromSeconds(_autoRefreshSeconds), TimeSpan.FromSeconds(_autoRefreshSeconds)); } + // Re-query when the operator picks a different site. Best-effort: the load + // itself swallows faults per-chart. + private async Task OnTrendSiteChangedAsync(ChangeEventArgs e) + { + var selected = e.Value?.ToString(); + if (string.IsNullOrEmpty(selected) || selected == _trendSiteId) + { + return; + } + + _trendSiteId = selected; + await LoadSiteHealthTrendsAsync(refreshSiteKeys: false); + } + + // Re-query when the window toggle changes (24h ↔ 7d). + private async Task SetTrendWindowAsync(int windowHours) + { + if (_trendWindowHours == windowHours) + { + return; + } + + _trendWindowHours = windowHours; + await LoadSiteHealthTrendsAsync(refreshSiteKeys: false); + } + + // Loads the four Site Health trend series for the selected site over the + // selected window. Deliberately decoupled from RefreshNow / the 10s timer: + // a fault here degrades the affected chart(s) only and never propagates to + // the tile-refresh loop. + // + // refreshSiteKeys re-snapshots the dashboard's site set into the selector + // (true on init); the site-change / window-toggle paths pass false so a + // mid-interaction site addition/removal can't yank the operator's choice. + private async Task LoadSiteHealthTrendsAsync(bool refreshSiteKeys = true) + { + if (refreshSiteKeys) + { + // Mirror the dashboard ordering: central cluster pinned first, then + // sites alphabetically — the same comparer the detail cards use. + _trendSiteKeys = _siteStates.Keys + .OrderBy(k => k == CentralHealthReportLoop.CentralSiteId ? 0 : 1) + .ThenBy(k => k) + .ToList(); + + // Default to the first site (or keep a still-valid prior selection). + if (_trendSiteId == null || !_trendSiteKeys.Contains(_trendSiteId)) + { + _trendSiteId = _trendSiteKeys.FirstOrDefault(); + } + } + + if (string.IsNullOrEmpty(_trendSiteId)) + { + return; + } + + _trendsLoading = true; + try + { + var toUtc = DateTime.UtcNow; + var fromUtc = toUtc - TimeSpan.FromHours(_trendWindowHours); + var siteId = _trendSiteId; + + (_connectionsDownSeries, _connectionsDownAvailable, _connectionsDownError) = + await LoadTrendSeriesAsync("connectionsDown", siteId, fromUtc, toUtc); + (_deadLettersSeries, _deadLettersAvailable, _deadLettersError) = + await LoadTrendSeriesAsync("deadLetters", siteId, fromUtc, toUtc); + (_scriptErrorsSeries, _scriptErrorsAvailable, _scriptErrorsError) = + await LoadTrendSeriesAsync("scriptErrors", siteId, fromUtc, toUtc); + (_sfBufferDepthSeries, _sfBufferDepthAvailable, _sfBufferDepthError) = + await LoadTrendSeriesAsync("sfBufferDepth", siteId, fromUtc, toUtc); + } + finally + { + _trendsLoading = false; + } + } + + // Single best-effort series fetch. Site Health metrics are Site-scoped, so + // scope = KpiScopes.Site and scopeKey = the selected site id. On any fault + // the chart falls back to the unavailable placeholder — a failure here must + // NEVER break the dashboard. + private async Task<(IReadOnlyList?, bool, string?)> LoadTrendSeriesAsync( + string metric, string siteId, DateTime fromUtc, DateTime toUtc) + { + try + { + var series = await KpiHistory.GetSeriesAsync( + KpiSources.SiteHealth, metric, KpiScopes.Site, siteId, fromUtc, toUtc); + return (series, true, null); + } + catch + { + return (null, false, "Trend data unavailable."); + } + } + + private string TrendSiteLabel(string siteKey) => + siteKey == CentralHealthReportLoop.CentralSiteId + ? "Central Cluster" + : $"{GetSiteName(siteKey)} ({siteKey})"; + private async Task RefreshNow() { _siteStates = HealthAggregator.GetAllSiteStates(); diff --git a/tests/ZB.MOM.WW.ScadaBridge.CentralUI.Tests/Pages/HealthPageTests.cs b/tests/ZB.MOM.WW.ScadaBridge.CentralUI.Tests/Pages/HealthPageTests.cs index 66aff075..8a83fa75 100644 --- a/tests/ZB.MOM.WW.ScadaBridge.CentralUI.Tests/Pages/HealthPageTests.cs +++ b/tests/ZB.MOM.WW.ScadaBridge.CentralUI.Tests/Pages/HealthPageTests.cs @@ -14,6 +14,7 @@ using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit; using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification; using ZB.MOM.WW.ScadaBridge.Commons.Types; using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit; +using ZB.MOM.WW.ScadaBridge.Commons.Types.Kpi; using ZB.MOM.WW.ScadaBridge.Communication; using ZB.MOM.WW.ScadaBridge.HealthMonitoring; using HealthPage = ZB.MOM.WW.ScadaBridge.CentralUI.Components.Pages.Monitoring.Health; @@ -65,6 +66,17 @@ public class HealthPageTests : BunitContext .Returns(new Dictionary()); Services.AddSingleton(aggregator); + // M6 K16 — the Health page now injects IKpiHistoryQueryService to feed the + // per-site Site Health Trends panel. Stub it with a known non-empty series + // so the page resolves the dependency and the trend charts have data; the + // dedicated trend tests below seed sites / override behaviour. + var kpiHistory = Substitute.For(); + kpiHistory.GetSeriesAsync( + Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any(), + Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()) + .Returns(Task.FromResult>(SampleSeries())); + Services.AddSingleton(kpiHistory); + var siteRepo = Substitute.For(); siteRepo.GetAllSitesAsync(Arg.Any()) .Returns(Task.FromResult>(new List())); @@ -210,6 +222,89 @@ public class HealthPageTests : BunitContext }); } + [Fact] + public void Renders_SiteHealthTrends_PanelAndChart_ForSelectedSite() + { + // Seed one site so the trend panel's selector has an option and the + // default-site load produces charts. + SeedSites("site-a"); + + var cut = Render(); + + cut.WaitForAssertion(() => + { + // The panel + its site selector render at the documented hooks. + Assert.Contains("data-test=\"site-health-trends\"", cut.Markup); + Assert.Contains("data-test=\"site-health-trends-site\"", cut.Markup); + // The four metric charts render (the shared KpiTrendChart slug hook), + // and the seeded non-empty series draws a polyline. + Assert.Contains("kpi-trend-connections-down", cut.Markup); + Assert.Contains("kpi-trend-dead-letters", cut.Markup); + Assert.Contains("kpi-trend-script-errors", cut.Markup); + Assert.Contains("(); + faulting.GetSeriesAsync( + Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any(), + Arg.Any(), Arg.Any(), Arg.Any(), Arg.Any()) + .Returns>>(_ => + throw new InvalidOperationException("kpi history unavailable")); + Services.AddSingleton(faulting); + + var cut = Render(); + + cut.WaitForAssertion(() => + { + // No unhandled exception: the core dashboard tiles still render, and + // the panel falls back to the per-chart unavailable placeholder. + Assert.Contains("Notification Outbox", cut.Markup); + Assert.Contains("data-test=\"site-health-trends\"", cut.Markup); + Assert.Contains("Trend data unavailable.", cut.Markup); + }); + } + + // Re-seeds the aggregator substitute so the trend panel's site selector has + // options. Each site id maps to a minimal online SiteHealthState (a null + // report is fine — the trend panel keys off the site ids, not the report). + private void SeedSites(params string[] siteIds) + { + var aggregator = Substitute.For(); + var states = siteIds.ToDictionary( + id => id, + id => new SiteHealthState + { + SiteId = id, + IsOnline = true, + LastHeartbeatAt = DateTimeOffset.UtcNow, + }); + aggregator.GetAllSiteStates() + .Returns(new Dictionary(states)); + Services.AddSingleton(aggregator); + } + + // A known non-empty (≥2-point) series so KpiTrendChart renders a polyline + // rather than the single-sample / unavailable placeholder. + private static IReadOnlyList SampleSeries() + { + var baseUtc = DateTime.UtcNow.AddHours(-24); + return new List + { + new(baseUtc, 1), + new(baseUtc.AddHours(6), 3), + new(baseUtc.AddHours(12), 2), + new(baseUtc.AddHours(18), 5), + }; + } + protected override void Dispose(bool disposing) { if (disposing)