feat(kpi): K16 — Health dashboard per-site trend panel
This commit is contained in:
@@ -10,12 +10,14 @@
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Notification
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Messages.Audit
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Audit
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Kpi
|
||||
@using ZB.MOM.WW.ScadaBridge.Communication
|
||||
@implements IDisposable
|
||||
@inject ICentralHealthAggregator HealthAggregator
|
||||
@inject ISiteRepository SiteRepository
|
||||
@inject CommunicationService CommunicationService
|
||||
@inject IAuditLogQueryService AuditLogQueryService
|
||||
@inject IKpiHistoryQueryService KpiHistory
|
||||
|
||||
<div class="container-fluid mt-3">
|
||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||
@@ -76,6 +78,74 @@
|
||||
IsAvailable="@_auditKpiAvailable"
|
||||
ErrorMessage="@_auditKpiError" />
|
||||
|
||||
@* Site Health Trends (M6 K16) — per-site Site Health KPI history. Loads on a
|
||||
separate path from the 10s tile-refresh timer so a trend-query fault can
|
||||
never disturb the live dashboard or its polling loop. The site selector
|
||||
reuses the site keys already loaded into _siteStates; the window toggle
|
||||
drives the time range. Both re-query independently. *@
|
||||
<div class="card mb-3" data-test="site-health-trends">
|
||||
<div class="card-header d-flex justify-content-between align-items-center py-2">
|
||||
<div class="d-flex align-items-center">
|
||||
<h6 class="text-muted mb-0 me-3">Site Health Trends</h6>
|
||||
@if (_trendSiteKeys.Count > 0)
|
||||
{
|
||||
<select class="form-select form-select-sm" style="width:auto"
|
||||
data-test="site-health-trends-site"
|
||||
value="@_trendSiteId"
|
||||
@onchange="OnTrendSiteChangedAsync">
|
||||
@foreach (var key in _trendSiteKeys)
|
||||
{
|
||||
<option value="@key">@TrendSiteLabel(key)</option>
|
||||
}
|
||||
</select>
|
||||
}
|
||||
</div>
|
||||
<div class="btn-group btn-group-sm" role="group" aria-label="Trend window">
|
||||
<button type="button"
|
||||
class="btn @(_trendWindowHours == 24 ? "btn-primary" : "btn-outline-secondary")"
|
||||
@onclick="() => SetTrendWindowAsync(24)" disabled="@_trendsLoading">24h</button>
|
||||
<button type="button"
|
||||
class="btn @(_trendWindowHours == 168 ? "btn-primary" : "btn-outline-secondary")"
|
||||
@onclick="() => SetTrendWindowAsync(168)" disabled="@_trendsLoading">7d</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body p-3">
|
||||
@if (_trendSiteKeys.Count == 0)
|
||||
{
|
||||
<span class="text-muted small">No sites available for trends yet.</span>
|
||||
}
|
||||
else
|
||||
{
|
||||
<div class="row g-3">
|
||||
<div class="col-lg-3 col-md-6">
|
||||
<KpiTrendChart Title="Connections Down"
|
||||
Points="@_connectionsDownSeries"
|
||||
IsAvailable="@_connectionsDownAvailable"
|
||||
ErrorMessage="@_connectionsDownError" />
|
||||
</div>
|
||||
<div class="col-lg-3 col-md-6">
|
||||
<KpiTrendChart Title="Dead Letters"
|
||||
Points="@_deadLettersSeries"
|
||||
IsAvailable="@_deadLettersAvailable"
|
||||
ErrorMessage="@_deadLettersError" />
|
||||
</div>
|
||||
<div class="col-lg-3 col-md-6">
|
||||
<KpiTrendChart Title="Script Errors"
|
||||
Points="@_scriptErrorsSeries"
|
||||
IsAvailable="@_scriptErrorsAvailable"
|
||||
ErrorMessage="@_scriptErrorsError" />
|
||||
</div>
|
||||
<div class="col-lg-3 col-md-6">
|
||||
<KpiTrendChart Title="S&F Buffer Depth"
|
||||
Points="@_sfBufferDepthSeries"
|
||||
IsAvailable="@_sfBufferDepthAvailable"
|
||||
ErrorMessage="@_sfBufferDepthError" />
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@if (_siteStates.Count == 0)
|
||||
{
|
||||
<div class="alert alert-info">No site health reports received yet.</div>
|
||||
@@ -387,6 +457,36 @@
|
||||
Array.Empty<SiteCallNodeKpiSnapshot>();
|
||||
private bool _siteCallNodeKpiAvailable;
|
||||
|
||||
// ── Site Health Trends (M6 K16) ───────────────────────────────────────────
|
||||
// Per-site Site Health KPI history, loaded on a path entirely separate from
|
||||
// the 10s tile-refresh timer (LoadSiteHealthTrendsAsync, never called from
|
||||
// the timer tick). The site keys are a snapshot of the dashboard's site set,
|
||||
// captured each time trends load so the selector mirrors the live cards.
|
||||
// Window in hours: 24h (default) or 168h (7d). Changing the selected site OR
|
||||
// the window re-queries. Each metric chart carries its own availability +
|
||||
// error so one failed GetSeriesAsync degrades a single chart, never the
|
||||
// dashboard.
|
||||
private IReadOnlyList<string> _trendSiteKeys = Array.Empty<string>();
|
||||
private string? _trendSiteId;
|
||||
private int _trendWindowHours = 24;
|
||||
private bool _trendsLoading;
|
||||
|
||||
private IReadOnlyList<KpiSeriesPoint>? _connectionsDownSeries;
|
||||
private bool _connectionsDownAvailable = true;
|
||||
private string? _connectionsDownError;
|
||||
|
||||
private IReadOnlyList<KpiSeriesPoint>? _deadLettersSeries;
|
||||
private bool _deadLettersAvailable = true;
|
||||
private string? _deadLettersError;
|
||||
|
||||
private IReadOnlyList<KpiSeriesPoint>? _scriptErrorsSeries;
|
||||
private bool _scriptErrorsAvailable = true;
|
||||
private string? _scriptErrorsError;
|
||||
|
||||
private IReadOnlyList<KpiSeriesPoint>? _sfBufferDepthSeries;
|
||||
private bool _sfBufferDepthAvailable = true;
|
||||
private string? _sfBufferDepthError;
|
||||
|
||||
private static bool SiteHasActiveErrors(SiteHealthState state)
|
||||
{
|
||||
var report = state.LatestReport;
|
||||
@@ -410,6 +510,13 @@
|
||||
}
|
||||
|
||||
await RefreshNow();
|
||||
|
||||
// Site Health Trends (M6 K16) load on their own path — never from the
|
||||
// timer tick below — so a trend-query fault can't disturb the live tile
|
||||
// refresh. Seed the selector from the sites just loaded into _siteStates
|
||||
// and query the default site.
|
||||
await LoadSiteHealthTrendsAsync();
|
||||
|
||||
_refreshTimer = new Timer(_ =>
|
||||
{
|
||||
InvokeAsync(async () =>
|
||||
@@ -420,6 +527,109 @@
|
||||
}, null, TimeSpan.FromSeconds(_autoRefreshSeconds), TimeSpan.FromSeconds(_autoRefreshSeconds));
|
||||
}
|
||||
|
||||
// Re-query when the operator picks a different site. Best-effort: the load
|
||||
// itself swallows faults per-chart.
|
||||
private async Task OnTrendSiteChangedAsync(ChangeEventArgs e)
|
||||
{
|
||||
var selected = e.Value?.ToString();
|
||||
if (string.IsNullOrEmpty(selected) || selected == _trendSiteId)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_trendSiteId = selected;
|
||||
await LoadSiteHealthTrendsAsync(refreshSiteKeys: false);
|
||||
}
|
||||
|
||||
// Re-query when the window toggle changes (24h ↔ 7d).
|
||||
private async Task SetTrendWindowAsync(int windowHours)
|
||||
{
|
||||
if (_trendWindowHours == windowHours)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_trendWindowHours = windowHours;
|
||||
await LoadSiteHealthTrendsAsync(refreshSiteKeys: false);
|
||||
}
|
||||
|
||||
// Loads the four Site Health trend series for the selected site over the
|
||||
// selected window. Deliberately decoupled from RefreshNow / the 10s timer:
|
||||
// a fault here degrades the affected chart(s) only and never propagates to
|
||||
// the tile-refresh loop.
|
||||
//
|
||||
// refreshSiteKeys re-snapshots the dashboard's site set into the selector
|
||||
// (true on init); the site-change / window-toggle paths pass false so a
|
||||
// mid-interaction site addition/removal can't yank the operator's choice.
|
||||
private async Task LoadSiteHealthTrendsAsync(bool refreshSiteKeys = true)
|
||||
{
|
||||
if (refreshSiteKeys)
|
||||
{
|
||||
// Mirror the dashboard ordering: central cluster pinned first, then
|
||||
// sites alphabetically — the same comparer the detail cards use.
|
||||
_trendSiteKeys = _siteStates.Keys
|
||||
.OrderBy(k => k == CentralHealthReportLoop.CentralSiteId ? 0 : 1)
|
||||
.ThenBy(k => k)
|
||||
.ToList();
|
||||
|
||||
// Default to the first site (or keep a still-valid prior selection).
|
||||
if (_trendSiteId == null || !_trendSiteKeys.Contains(_trendSiteId))
|
||||
{
|
||||
_trendSiteId = _trendSiteKeys.FirstOrDefault();
|
||||
}
|
||||
}
|
||||
|
||||
if (string.IsNullOrEmpty(_trendSiteId))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_trendsLoading = true;
|
||||
try
|
||||
{
|
||||
var toUtc = DateTime.UtcNow;
|
||||
var fromUtc = toUtc - TimeSpan.FromHours(_trendWindowHours);
|
||||
var siteId = _trendSiteId;
|
||||
|
||||
(_connectionsDownSeries, _connectionsDownAvailable, _connectionsDownError) =
|
||||
await LoadTrendSeriesAsync("connectionsDown", siteId, fromUtc, toUtc);
|
||||
(_deadLettersSeries, _deadLettersAvailable, _deadLettersError) =
|
||||
await LoadTrendSeriesAsync("deadLetters", siteId, fromUtc, toUtc);
|
||||
(_scriptErrorsSeries, _scriptErrorsAvailable, _scriptErrorsError) =
|
||||
await LoadTrendSeriesAsync("scriptErrors", siteId, fromUtc, toUtc);
|
||||
(_sfBufferDepthSeries, _sfBufferDepthAvailable, _sfBufferDepthError) =
|
||||
await LoadTrendSeriesAsync("sfBufferDepth", siteId, fromUtc, toUtc);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_trendsLoading = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Single best-effort series fetch. Site Health metrics are Site-scoped, so
|
||||
// scope = KpiScopes.Site and scopeKey = the selected site id. On any fault
|
||||
// the chart falls back to the unavailable placeholder — a failure here must
|
||||
// NEVER break the dashboard.
|
||||
private async Task<(IReadOnlyList<KpiSeriesPoint>?, bool, string?)> LoadTrendSeriesAsync(
|
||||
string metric, string siteId, DateTime fromUtc, DateTime toUtc)
|
||||
{
|
||||
try
|
||||
{
|
||||
var series = await KpiHistory.GetSeriesAsync(
|
||||
KpiSources.SiteHealth, metric, KpiScopes.Site, siteId, fromUtc, toUtc);
|
||||
return (series, true, null);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return (null, false, "Trend data unavailable.");
|
||||
}
|
||||
}
|
||||
|
||||
private string TrendSiteLabel(string siteKey) =>
|
||||
siteKey == CentralHealthReportLoop.CentralSiteId
|
||||
? "Central Cluster"
|
||||
: $"{GetSiteName(siteKey)} ({siteKey})";
|
||||
|
||||
private async Task RefreshNow()
|
||||
{
|
||||
_siteStates = HealthAggregator.GetAllSiteStates();
|
||||
|
||||
Reference in New Issue
Block a user