perf(ui): topology page — staleness off the live loop + parallelized one-shot
This commit is contained in:
@@ -9,6 +9,8 @@
|
||||
@using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums
|
||||
@using ZB.MOM.WW.ScadaBridge.DeploymentManager
|
||||
@using ZB.MOM.WW.ScadaBridge.TemplateEngine.Services
|
||||
@using System.Collections.Concurrent
|
||||
@using Microsoft.Extensions.DependencyInjection
|
||||
@attribute [Authorize(Policy = AuthorizationPolicies.RequireDeployment)]
|
||||
@inject ITemplateEngineRepository TemplateEngineRepository
|
||||
@inject ISiteRepository SiteRepository
|
||||
@@ -21,6 +23,7 @@
|
||||
@inject NavigationManager NavigationManager
|
||||
@inject IJSRuntime JSRuntime
|
||||
@inject IDialogService Dialog
|
||||
@inject IServiceScopeFactory ScopeFactory
|
||||
@implements IDisposable
|
||||
|
||||
<div class="container-fluid mt-3">
|
||||
@@ -77,6 +80,19 @@
|
||||
<button class="btn btn-outline-secondary" aria-label="Expand all areas" @onclick="() => _tree?.ExpandAll()">Expand</button>
|
||||
<button class="btn btn-outline-secondary" aria-label="Collapse all areas" @onclick="() => _tree?.CollapseAll()">Collapse</button>
|
||||
</div>
|
||||
<button class="btn btn-outline-secondary btn-sm" aria-label="Re-check staleness"
|
||||
title="Recompute the Stale/Current badge for every deployed instance"
|
||||
@onclick="RecheckStalenessAsync" disabled="@_recheckingStaleness">
|
||||
@if (_recheckingStaleness)
|
||||
{
|
||||
<span class="spinner-border spinner-border-sm me-1" role="status" aria-hidden="true"></span>
|
||||
<span>Checking…</span>
|
||||
}
|
||||
else
|
||||
{
|
||||
<span><i class="bi bi-arrow-repeat me-1"></i>Re-check staleness</span>
|
||||
}
|
||||
</button>
|
||||
<div class="form-check form-switch ms-2 mb-0">
|
||||
<input type="checkbox" class="form-check-input" id="live-updates"
|
||||
checked="@_liveUpdates" @onchange="OnLiveUpdatesToggled" />
|
||||
@@ -121,9 +137,18 @@
|
||||
private List<Area> _allAreas = new();
|
||||
private Dictionary<int, bool> _stalenessMap = new();
|
||||
|
||||
/// <summary>
|
||||
/// Max concurrent per-instance staleness comparisons. Each comparison
|
||||
/// re-flattens the instance's config, so the fan-out is capped to avoid a
|
||||
/// thundering herd of flattens on a large topology (mirrors the
|
||||
/// AlarmSummaryService DebugViewSnapshot fan-out cap).
|
||||
/// </summary>
|
||||
private const int MaxConcurrentStalenessChecks = 8;
|
||||
|
||||
private bool _loading = true;
|
||||
private string? _errorMessage;
|
||||
private bool _actionInProgress;
|
||||
private bool _recheckingStaleness;
|
||||
|
||||
private string _searchText = string.Empty;
|
||||
|
||||
@@ -178,7 +203,11 @@
|
||||
InvokeAsync(async () =>
|
||||
{
|
||||
if (!_liveUpdates) return;
|
||||
await LoadDataAsync();
|
||||
// Staleness is deliberately OFF the live loop: it only changes when a
|
||||
// template/instance is edited or redeployed, and recomputing it (a
|
||||
// full re-flatten per deployed instance) every 15s is the page's
|
||||
// dominant cost. The poll refreshes only the cheap deployed state.
|
||||
await LoadDeployedStateAsync();
|
||||
StateHasChanged();
|
||||
});
|
||||
}, null, LiveUpdatesInterval, LiveUpdatesInterval);
|
||||
@@ -220,7 +249,25 @@
|
||||
}
|
||||
}
|
||||
|
||||
// Full reload: the cheap deployed state plus a one-shot staleness recompute.
|
||||
// Used on initial load, the manual "Refresh" button, and after any mutating
|
||||
// action (deploy/enable/disable/delete/move/rename) — all of which can change
|
||||
// staleness. The live-update timer deliberately does NOT call this; it calls
|
||||
// LoadDeployedStateAsync alone (staleness is off the 15s poll).
|
||||
private async Task LoadDataAsync()
|
||||
{
|
||||
await LoadDeployedStateAsync();
|
||||
await ComputeStalenessAsync();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fast path — loads the cheap deployed state (sites, instances, areas →
|
||||
/// hierarchy + State badge) WITHOUT recomputing per-instance staleness. These
|
||||
/// are all single, indexed central-DB reads. This is exactly what the
|
||||
/// live-update timer polls every 15s. Internal so the live-update regression
|
||||
/// test can drive the timer's code path directly.
|
||||
/// </summary>
|
||||
internal async Task LoadDeployedStateAsync()
|
||||
{
|
||||
_loading = true;
|
||||
_errorMessage = null;
|
||||
@@ -242,20 +289,6 @@
|
||||
_allAreas.AddRange(areas);
|
||||
}
|
||||
|
||||
_stalenessMap.Clear();
|
||||
foreach (var inst in _allInstances.Where(i => i.State != InstanceState.NotDeployed))
|
||||
{
|
||||
try
|
||||
{
|
||||
var comparison = await DeploymentService.GetDeploymentComparisonAsync(inst.Id);
|
||||
_stalenessMap[inst.Id] = comparison.IsSuccess && comparison.Value.IsStale;
|
||||
}
|
||||
catch
|
||||
{
|
||||
_stalenessMap[inst.Id] = false;
|
||||
}
|
||||
}
|
||||
|
||||
BuildTree();
|
||||
}
|
||||
catch (Exception ex)
|
||||
@@ -265,6 +298,81 @@
|
||||
_loading = false;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Expensive path — recomputes the Stale/Current badge for every deployed
|
||||
/// instance via <c>DeploymentService.GetDeploymentComparisonAsync</c>, which
|
||||
/// re-flattens each instance's current config and compares it to the deployed
|
||||
/// snapshot. This is the page's dominant cost, so it runs only on initial
|
||||
/// load and on demand (the "Re-check staleness" button) — never on the 15s
|
||||
/// live-update loop. The per-instance comparisons fan out with bounded
|
||||
/// concurrency (<see cref="MaxConcurrentStalenessChecks"/>), each on its own
|
||||
/// DI scope so the parallel flattens never share the circuit-scoped DbContext
|
||||
/// (mirrors KpiHistoryQueryService's per-query scope).
|
||||
/// </summary>
|
||||
private async Task ComputeStalenessAsync()
|
||||
{
|
||||
var deployedIds = _allInstances
|
||||
.Where(i => i.State != InstanceState.NotDeployed)
|
||||
.Select(i => i.Id)
|
||||
.ToList();
|
||||
|
||||
if (deployedIds.Count == 0)
|
||||
{
|
||||
_stalenessMap = new Dictionary<int, bool>();
|
||||
BuildTree();
|
||||
return;
|
||||
}
|
||||
|
||||
var results = new ConcurrentDictionary<int, bool>();
|
||||
using var gate = new SemaphoreSlim(MaxConcurrentStalenessChecks, MaxConcurrentStalenessChecks);
|
||||
|
||||
await Task.WhenAll(deployedIds.Select(id => ComputeOneStalenessAsync(id, gate, results)));
|
||||
|
||||
_stalenessMap = new Dictionary<int, bool>(results);
|
||||
BuildTree();
|
||||
}
|
||||
|
||||
private async Task ComputeOneStalenessAsync(
|
||||
int instanceId, SemaphoreSlim gate, ConcurrentDictionary<int, bool> results)
|
||||
{
|
||||
await gate.WaitAsync();
|
||||
try
|
||||
{
|
||||
// Fresh DI scope per comparison: GetDeploymentComparisonAsync re-flattens
|
||||
// through the scoped DbContext, which must not be shared across the
|
||||
// concurrent fan-out. Each scope gets its own DbContext.
|
||||
await using var scope = ScopeFactory.CreateAsyncScope();
|
||||
var deploymentService = scope.ServiceProvider.GetRequiredService<DeploymentService>();
|
||||
var comparison = await deploymentService.GetDeploymentComparisonAsync(instanceId);
|
||||
results[instanceId] = comparison.IsSuccess && comparison.Value.IsStale;
|
||||
}
|
||||
catch
|
||||
{
|
||||
results[instanceId] = false;
|
||||
}
|
||||
finally
|
||||
{
|
||||
gate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
// Manual "Re-check staleness" toolbar action. Shows a spinner while the
|
||||
// bounded-parallel one-shot runs, then refreshes the Stale/Current badges.
|
||||
private async Task RecheckStalenessAsync()
|
||||
{
|
||||
if (_recheckingStaleness) return;
|
||||
_recheckingStaleness = true;
|
||||
StateHasChanged();
|
||||
try
|
||||
{
|
||||
await ComputeStalenessAsync();
|
||||
}
|
||||
finally
|
||||
{
|
||||
_recheckingStaleness = false;
|
||||
}
|
||||
}
|
||||
|
||||
private void OnSearchChanged()
|
||||
{
|
||||
BuildTree();
|
||||
|
||||
@@ -387,6 +387,93 @@ public class TopologyPageTests : BunitContext
|
||||
Assert.Contains("Changed", markup);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Seeds one site with one deployed (Enabled) instance whose deployment
|
||||
/// comparison resolves: a deployed snapshot plus a current flatten. Used by
|
||||
/// the staleness-lifecycle tests below, which assert on how OFTEN the
|
||||
/// comparison runs (via the snapshot repository call count), not on the
|
||||
/// resulting Stale/Current value.
|
||||
/// </summary>
|
||||
private void SeedOneDeployedInstanceWithComparison()
|
||||
{
|
||||
SeedRepos(
|
||||
sites: new[] { new Site("Plant-A", "plant-a") { Id = 1 } },
|
||||
instances: new[]
|
||||
{
|
||||
new Instance("Pump-001") { Id = 100, SiteId = 1, State = InstanceState.Enabled }
|
||||
});
|
||||
|
||||
var deployedConfig = new FlattenedConfiguration { InstanceUniqueName = "Pump-001" };
|
||||
_deployRepo.GetDeployedSnapshotByInstanceIdAsync(100, Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult<DeployedConfigSnapshot?>(
|
||||
new DeployedConfigSnapshot("dep-1", "hash-old",
|
||||
JsonSerializer.Serialize(deployedConfig))));
|
||||
|
||||
var currentConfig = new FlattenedConfiguration { InstanceUniqueName = "Pump-001" };
|
||||
_pipeline.FlattenAndValidateAsync(100, Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromResult(Result<FlatteningPipelineResult>.Success(
|
||||
new FlatteningPipelineResult(currentConfig, "hash-new", ValidationResult.Success()))));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task LiveUpdate_DoesNotRecomputeStaleness()
|
||||
{
|
||||
// Performance regression guard: staleness (the expensive per-instance
|
||||
// re-flatten via DeploymentService.GetDeploymentComparisonAsync) must NOT
|
||||
// run on the 15s live-update poll. The live-update path refreshes only the
|
||||
// cheap deployed state (LoadDeployedStateAsync), which the timer invokes
|
||||
// directly. The comparison reaches into the snapshot repository, so its
|
||||
// call count is the proxy for "did staleness recompute".
|
||||
SeedOneDeployedInstanceWithComparison();
|
||||
|
||||
var cut = Render<TopologyPage>();
|
||||
|
||||
// Initial load computed staleness exactly once.
|
||||
await _deployRepo.Received(1)
|
||||
.GetDeployedSnapshotByInstanceIdAsync(100, Arg.Any<CancellationToken>());
|
||||
|
||||
// Drive the EXACT code path the live-update timer runs.
|
||||
await cut.InvokeAsync(() => cut.Instance.LoadDeployedStateAsync());
|
||||
|
||||
// Staleness was NOT recomputed by the live-update refresh — still once.
|
||||
await _deployRepo.Received(1)
|
||||
.GetDeployedSnapshotByInstanceIdAsync(100, Arg.Any<CancellationToken>());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DeployedState_Renders_AfterSplit()
|
||||
{
|
||||
// After splitting the load into fast-state vs expensive-staleness, the
|
||||
// cheap deployed state (hierarchy + State badge) must still render.
|
||||
SeedOneDeployedInstanceWithComparison();
|
||||
|
||||
var cut = Render<TopologyPage>();
|
||||
FindToggleForLabel(cut, "Plant-A")!.Click();
|
||||
|
||||
Assert.Contains("Pump-001", cut.Markup);
|
||||
Assert.Contains("Enabled", cut.Markup);
|
||||
// The deployed instance shows a staleness badge (Stale or Current).
|
||||
Assert.Matches("Stale|Current", cut.Markup);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ManualRecheck_RecomputesStaleness()
|
||||
{
|
||||
// The "Re-check staleness" button lets operators refresh staleness on
|
||||
// demand (since it's off the live loop). Clicking it runs the comparison
|
||||
// again — a second snapshot-repository call.
|
||||
SeedOneDeployedInstanceWithComparison();
|
||||
|
||||
var cut = Render<TopologyPage>();
|
||||
_deployRepo.Received(1)
|
||||
.GetDeployedSnapshotByInstanceIdAsync(100, Arg.Any<CancellationToken>());
|
||||
|
||||
cut.Find("button[aria-label='Re-check staleness']").Click();
|
||||
|
||||
_deployRepo.Received(2)
|
||||
.GetDeployedSnapshotByInstanceIdAsync(100, Arg.Any<CancellationToken>());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void LegacyInstancesRoute_IsDeclaredOnTopologyPage()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user