From 9ae6bce0c808a63cee4eb374861ea325e143c428 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 25 Jun 2026 13:42:39 -0400 Subject: [PATCH] fix(dashboard): copy Galaxy summary volatile fields fresh; memoize only O(N) breakdown Resolves Server-059, Tests-041, Server-060 (2026-06-25 re-review). DashboardSnapshotService memoized the whole Galaxy summary keyed on the cache Sequence, but the shared library bumps Sequence only on a heavy refresh: the steady-state tick, the refresh-failure path, and the age-based status getter all replace the entry via 'previous with { ... }' at the SAME Sequence. The dashboard therefore froze LastQueriedAt and, during a Galaxy SQL outage, kept showing Healthy with no error for the whole outage. Split DashboardGalaxySummaryProjector into ComputeBreakdown (the O(N) template/ category work, the only sequence-bound part) and BuildSummary (cheap volatile fields copied fresh). ResolveGalaxySummary now memoizes only the breakdown by Sequence and rebuilds the summary from the current entry each tick. Removed the redundant DashboardGalaxyProjector wrapper. Tests: same-sequence status/error/timestamp now reflected (the regression); memoization-hit and sequence-invalidation guards; GatewayApplicationTests asserts the DI container resolves IGalaxyBrowseScopeProvider to GatewayBrowseScopeProvider (pins the registration-order invariant over the library's no-op default). --- .../Dashboard/DashboardGalaxyProjector.cs | 14 -- .../Dashboard/DashboardGalaxySummary.cs | 21 ++- .../DashboardGalaxySummaryProjector.cs | 101 +++++++++----- .../Dashboard/DashboardSnapshotService.cs | 36 ++--- .../DashboardSnapshotServiceTests.cs | 123 +++++++++++++++++- .../Gateway/GatewayApplicationTests.cs | 19 +++ 6 files changed, 246 insertions(+), 68 deletions(-) delete mode 100644 src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxyProjector.cs diff --git a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxyProjector.cs b/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxyProjector.cs deleted file mode 100644 index b4b3f2e..0000000 --- a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxyProjector.cs +++ /dev/null @@ -1,14 +0,0 @@ -using ZB.MOM.WW.GalaxyRepository; - -namespace ZB.MOM.WW.MxGateway.Server.Dashboard; - -/// Projects the shared-library Galaxy cache entry into a dashboard Galaxy summary. -internal static class DashboardGalaxyProjector -{ - /// Projects the cache entry to a dashboard Galaxy summary. - /// The Galaxy hierarchy cache entry. - public static DashboardGalaxySummary Project(GalaxyHierarchyCacheEntry entry) - { - return DashboardGalaxySummaryProjector.Project(entry); - } -} diff --git a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxySummary.cs b/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxySummary.cs index 247d028..1013ab2 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxySummary.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxySummary.cs @@ -2,8 +2,10 @@ namespace ZB.MOM.WW.MxGateway.Server.Dashboard; /// /// Snapshot of the Galaxy Repository (ZB) browse state surfaced on the dashboard. -/// Populated by on a background refresh cadence so -/// the dashboard never blocks on SQL. +/// Built by from the shared-library Galaxy +/// hierarchy cache on each dashboard snapshot; the projector memoizes only the O(N) +/// on the cache sequence while copying the volatile +/// status/timestamp fields fresh, so the dashboard never blocks on SQL. /// public sealed record DashboardGalaxySummary( DashboardGalaxyStatus Status, @@ -46,3 +48,18 @@ public enum DashboardGalaxyStatus public sealed record DashboardGalaxyTemplateUsage(string TemplateName, int InstanceCount); public sealed record DashboardGalaxyCategoryCount(int CategoryId, string CategoryName, int ObjectCount); + +/// +/// The O(N)-computed template-usage and category breakdown of a Galaxy hierarchy cache entry — +/// the only part of that changes with the cache sequence +/// and is therefore safe to memoize on it. +/// +public sealed record GalaxyObjectBreakdown( + IReadOnlyList TopTemplates, + IReadOnlyList ObjectCategories) +{ + /// Gets the empty breakdown (no templates, no categories). + public static GalaxyObjectBreakdown Empty { get; } = new( + Array.Empty(), + Array.Empty()); +} diff --git a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxySummaryProjector.cs b/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxySummaryProjector.cs index 3815d58..7102198 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxySummaryProjector.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardGalaxySummaryProjector.cs @@ -27,53 +27,82 @@ public static class DashboardGalaxySummaryProjector public static DashboardGalaxySummary Project(GalaxyHierarchyCacheEntry entry) { ArgumentNullException.ThrowIfNull(entry); + return BuildSummary(entry, ComputeBreakdown(entry)); + } - IReadOnlyList topTemplates; - IReadOnlyList objectCategories; + /// + /// Computes the O(N) template-usage and category breakdown from the entry's objects. + /// This is the only part of the summary that changes with the cache + /// — the shared library replaces the object + /// set only on a heavy refresh that bumps the sequence — so callers may memoize the result + /// on the sequence. The cheap, per-tick-volatile fields are re-read fresh via + /// and must never be memoized. + /// + /// The shared-library cache entry to project. + /// The template/category breakdown of 's objects. + public static GalaxyObjectBreakdown ComputeBreakdown(GalaxyHierarchyCacheEntry entry) + { + ArgumentNullException.ThrowIfNull(entry); if (entry.Objects.Count == 0) { - topTemplates = Array.Empty(); - objectCategories = Array.Empty(); + return GalaxyObjectBreakdown.Empty; } - else + + Dictionary objectsByCategory = new(); + Dictionary templateUsage = new(StringComparer.OrdinalIgnoreCase); + + foreach (GalaxyObject obj in entry.Objects) { - Dictionary objectsByCategory = new(); - Dictionary templateUsage = new(StringComparer.OrdinalIgnoreCase); + objectsByCategory.TryGetValue(obj.CategoryId, out int categoryCount); + objectsByCategory[obj.CategoryId] = categoryCount + 1; - foreach (GalaxyObject obj in entry.Objects) + if (obj.TemplateChain.Count > 0) { - objectsByCategory.TryGetValue(obj.CategoryId, out int categoryCount); - objectsByCategory[obj.CategoryId] = categoryCount + 1; - - if (obj.TemplateChain.Count > 0) + string immediate = obj.TemplateChain[0]; + if (!string.IsNullOrWhiteSpace(immediate)) { - string immediate = obj.TemplateChain[0]; - if (!string.IsNullOrWhiteSpace(immediate)) - { - templateUsage.TryGetValue(immediate, out int templateCount); - templateUsage[immediate] = templateCount + 1; - } + templateUsage.TryGetValue(immediate, out int templateCount); + templateUsage[immediate] = templateCount + 1; } } - - topTemplates = templateUsage - .OrderByDescending(usage => usage.Value) - .ThenBy(usage => usage.Key, StringComparer.OrdinalIgnoreCase) - .Take(MaxTopTemplates) - .Select(usage => new DashboardGalaxyTemplateUsage(usage.Key, usage.Value)) - .ToArray(); - - objectCategories = objectsByCategory - .OrderByDescending(category => category.Value) - .ThenBy(category => category.Key) - .Select(category => new DashboardGalaxyCategoryCount( - category.Key, - ResolveCategoryName(category.Key), - category.Value)) - .ToArray(); } + IReadOnlyList topTemplates = templateUsage + .OrderByDescending(usage => usage.Value) + .ThenBy(usage => usage.Key, StringComparer.OrdinalIgnoreCase) + .Take(MaxTopTemplates) + .Select(usage => new DashboardGalaxyTemplateUsage(usage.Key, usage.Value)) + .ToArray(); + + IReadOnlyList objectCategories = objectsByCategory + .OrderByDescending(category => category.Value) + .ThenBy(category => category.Key) + .Select(category => new DashboardGalaxyCategoryCount( + category.Key, + ResolveCategoryName(category.Key), + category.Value)) + .ToArray(); + + return new GalaxyObjectBreakdown(topTemplates, objectCategories); + } + + /// + /// Assembles a from an entry's cheap, per-tick-volatile + /// fields (status, timestamps, last error, counts) and a precomputed + /// . The volatile fields are copied fresh on every call so a + /// memoized breakdown never freezes the dashboard's health or timestamps between heavy + /// refreshes — the library mutates those fields in place at the same sequence on steady-state + /// ticks and on refresh failure. + /// + /// The cache entry whose volatile fields to copy. + /// The precomputed template/category breakdown. + /// The assembled dashboard summary. + public static DashboardGalaxySummary BuildSummary(GalaxyHierarchyCacheEntry entry, GalaxyObjectBreakdown breakdown) + { + ArgumentNullException.ThrowIfNull(entry); + ArgumentNullException.ThrowIfNull(breakdown); + return new DashboardGalaxySummary( Status: MapDashboardStatus(entry.Status), LastQueriedAt: entry.LastQueriedAt, @@ -85,8 +114,8 @@ public static class DashboardGalaxySummaryProjector AttributeCount: entry.AttributeCount, HistorizedAttributeCount: entry.HistorizedAttributeCount, AlarmAttributeCount: entry.AlarmAttributeCount, - TopTemplates: topTemplates, - ObjectCategories: objectCategories); + TopTemplates: breakdown.TopTemplates, + ObjectCategories: breakdown.ObjectCategories); } private static DashboardGalaxyStatus MapDashboardStatus(GalaxyCacheStatus status) => status switch diff --git a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardSnapshotService.cs b/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardSnapshotService.cs index 0489bfd..371bca2 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardSnapshotService.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Dashboard/DashboardSnapshotService.cs @@ -30,12 +30,13 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService private readonly ILogger _logger; private readonly SemaphoreSlim _apiKeySummaryRefreshGate = new(1, 1); private IReadOnlyList _apiKeySummaries = Array.Empty(); - // Memoizes the projected Galaxy summary against the immutable cache sequence. The shared - // library bumps Sequence only on a heavy refresh, so an unchanged sequence means the entry - // (and therefore its summary) is unchanged and the O(N) projection can be reused. This keeps - // the ~1s snapshot tick O(1) for Galaxy, restoring the pre-adoption behavior where the - // summary was computed once per refresh rather than once per tick. - private GalaxySummaryCache? _galaxySummaryCache; + // Memoizes ONLY the O(N) template/category breakdown against the cache sequence. The shared + // library bumps Sequence only on a heavy refresh that replaces the object set, so an unchanged + // sequence means the breakdown is unchanged and can be reused — keeping the ~1s snapshot tick + // O(1) for Galaxy. The summary's cheap volatile fields (status, timestamps, last error, counts) + // are NOT memoized: the library mutates them in place at the same sequence on steady-state ticks + // and on refresh failure, so they are copied fresh on every snapshot (see Server-059). + private GalaxyBreakdownCache? _galaxyBreakdownCache; /// Initializes a new instance of the DashboardSnapshotService class. /// Registry of active gateway sessions. @@ -111,21 +112,26 @@ public sealed class DashboardSnapshotService : IDashboardSnapshotService GalaxyHierarchyCacheEntry entry = _galaxyHierarchyCache.Current; long sequence = entry.Sequence; - // Lock-free reuse: a matching sequence means the entry is unchanged, so the previously - // projected summary is still correct. A racing recompute for a new sequence is harmless — - // the projection is pure, so any winner stores identical content for that sequence. - GalaxySummaryCache? cached = Volatile.Read(ref _galaxySummaryCache); + // Lock-free reuse of the O(N) breakdown only: a matching sequence means the object set is + // unchanged, so the previously computed breakdown is still correct. A racing recompute for a + // new sequence is harmless — the computation is pure, so any winner stores identical content + // for that sequence. The cheap volatile fields are always taken from the current entry below. + GalaxyBreakdownCache? cached = Volatile.Read(ref _galaxyBreakdownCache); + GalaxyObjectBreakdown breakdown; if (cached is not null && cached.Sequence == sequence) { - return cached.Summary; + breakdown = cached.Breakdown; + } + else + { + breakdown = DashboardGalaxySummaryProjector.ComputeBreakdown(entry); + Volatile.Write(ref _galaxyBreakdownCache, new GalaxyBreakdownCache(sequence, breakdown)); } - DashboardGalaxySummary summary = DashboardGalaxyProjector.Project(entry); - Volatile.Write(ref _galaxySummaryCache, new GalaxySummaryCache(sequence, summary)); - return summary; + return DashboardGalaxySummaryProjector.BuildSummary(entry, breakdown); } - private sealed record GalaxySummaryCache(long Sequence, DashboardGalaxySummary Summary); + private sealed record GalaxyBreakdownCache(long Sequence, GalaxyObjectBreakdown Breakdown); /// /// Watches dashboard snapshots at regular intervals asynchronously. diff --git a/src/ZB.MOM.WW.MxGateway.Tests/Gateway/Dashboard/DashboardSnapshotServiceTests.cs b/src/ZB.MOM.WW.MxGateway.Tests/Gateway/Dashboard/DashboardSnapshotServiceTests.cs index 6c9e3e6..4df7ce3 100644 --- a/src/ZB.MOM.WW.MxGateway.Tests/Gateway/Dashboard/DashboardSnapshotServiceTests.cs +++ b/src/ZB.MOM.WW.MxGateway.Tests/Gateway/Dashboard/DashboardSnapshotServiceTests.cs @@ -218,7 +218,7 @@ public sealed class DashboardSnapshotServiceTests { // The shared-library cache entry no longer carries a precomputed dashboard summary; // DashboardSnapshotService derives templates/categories from the entry's objects via - // DashboardGalaxyProjector. Seed objects that yield $Pump x2 / $Area x1 templates and + // DashboardGalaxySummaryProjector. Seed objects that yield $Pump x2 / $Area x1 templates and // categories UserDefined(10) x2 / Area(13) x1, matching the asserted summary. GalaxyObject[] objects = [ @@ -280,6 +280,111 @@ public sealed class DashboardSnapshotServiceTests Assert.Contains(snapshot.Galaxy.ObjectCategories, c => c.CategoryName == "Area" && c.ObjectCount == 1); } + /// + /// Regression for Server-059: the shared library replaces the cache entry via + /// previous with { ... } at the same Sequence on a steady-state tick and on a + /// refresh failure (Status → Unavailable, LastError set). The dashboard summary must reflect + /// those per-tick-volatile fields, not a summary frozen at the last heavy-refresh sequence — + /// otherwise the Galaxy health indicator keeps showing Healthy throughout a SQL outage. + /// + [Fact] + public void GetSnapshot_WhenGalaxyEntryChangesAtSameSequence_ReflectsVolatileStatusAndError() + { + GalaxyHierarchyCacheEntry healthy = GalaxyHierarchyCacheEntry.Empty with + { + Status = GalaxyCacheStatus.Healthy, + Sequence = 5, + LastQueriedAt = DateTimeOffset.Parse("2026-04-28T11:00:00Z", CultureInfo.InvariantCulture), + LastSuccessAt = DateTimeOffset.Parse("2026-04-28T11:00:00Z", CultureInfo.InvariantCulture), + }; + MutableGalaxyHierarchyCache cache = new(healthy); + using GatewayMetrics metrics = new(); + DashboardSnapshotService service = CreateService(new SessionRegistry(), metrics, galaxyHierarchyCache: cache); + + Assert.Equal(DashboardGalaxyStatus.Healthy, service.GetSnapshot().Galaxy.Status); + + // SQL outage: no heavy refresh can succeed, so Sequence cannot advance; the library + // mutates the entry in place to Unavailable with an error at the same Sequence. + cache.Current = healthy with + { + Status = GalaxyCacheStatus.Unavailable, + LastError = "SQL unreachable", + LastQueriedAt = DateTimeOffset.Parse("2026-04-28T11:00:05Z", CultureInfo.InvariantCulture), + }; + + DashboardGalaxySummary summary = service.GetSnapshot().Galaxy; + Assert.Equal(DashboardGalaxyStatus.Unavailable, summary.Status); + Assert.Equal("SQL unreachable", summary.LastError); + Assert.Equal( + DateTimeOffset.Parse("2026-04-28T11:00:05Z", CultureInfo.InvariantCulture), + summary.LastQueriedAt); + } + + /// + /// Verifies the O(N) template/category breakdown is memoized on the cache Sequence: in + /// production the object set only changes when the library bumps Sequence on a heavy refresh, + /// so the breakdown is reused at an unchanged Sequence. Proves the memo keys on Sequence by + /// swapping Objects at the same Sequence and asserting the breakdown does not recompute. + /// + [Fact] + public void GetSnapshot_WhenSequenceUnchanged_ReusesExpensiveTemplateBreakdown() + { + GalaxyObject[] first = + [new GalaxyObject { GobjectId = 1, BrowseName = "Pump01", CategoryId = 10, TemplateChain = { "$Pump" } }]; + GalaxyHierarchyCacheEntry entry = GalaxyHierarchyCacheEntry.Empty with + { + Status = GalaxyCacheStatus.Healthy, + Sequence = 9, + Objects = first, + Index = GalaxyHierarchyIndex.Build(first), + ObjectCount = 1, + }; + MutableGalaxyHierarchyCache cache = new(entry); + using GatewayMetrics metrics = new(); + DashboardSnapshotService service = CreateService(new SessionRegistry(), metrics, galaxyHierarchyCache: cache); + + Assert.Equal("$Pump", Assert.Single(service.GetSnapshot().Galaxy.TopTemplates).TemplateName); + + GalaxyObject[] second = + [new GalaxyObject { GobjectId = 2, BrowseName = "Valve01", CategoryId = 10, TemplateChain = { "$Valve" } }]; + cache.Current = entry with { Objects = second, Index = GalaxyHierarchyIndex.Build(second) }; + + // Same Sequence (9) → breakdown reused → still "$Pump", not "$Valve". + Assert.Equal("$Pump", Assert.Single(service.GetSnapshot().Galaxy.TopTemplates).TemplateName); + } + + /// + /// Verifies that a changed cache Sequence invalidates the memoized template breakdown and the + /// next snapshot reflects the new object set. Guards against an inverted sequence check that + /// would freeze the Galaxy section after its first load (Tests-041). + /// + [Fact] + public void GetSnapshot_WhenSequenceChanges_RecomputesTemplateBreakdown() + { + GalaxyObject[] first = + [new GalaxyObject { GobjectId = 1, BrowseName = "Pump01", CategoryId = 10, TemplateChain = { "$Pump" } }]; + GalaxyHierarchyCacheEntry entry = GalaxyHierarchyCacheEntry.Empty with + { + Status = GalaxyCacheStatus.Healthy, + Sequence = 9, + Objects = first, + Index = GalaxyHierarchyIndex.Build(first), + ObjectCount = 1, + }; + MutableGalaxyHierarchyCache cache = new(entry); + using GatewayMetrics metrics = new(); + DashboardSnapshotService service = CreateService(new SessionRegistry(), metrics, galaxyHierarchyCache: cache); + + Assert.Equal("$Pump", Assert.Single(service.GetSnapshot().Galaxy.TopTemplates).TemplateName); + + GalaxyObject[] second = + [new GalaxyObject { GobjectId = 2, BrowseName = "Valve01", CategoryId = 10, TemplateChain = { "$Valve" } }]; + cache.Current = entry with { Sequence = 10, Objects = second, Index = GalaxyHierarchyIndex.Build(second) }; + + // New Sequence (10) → breakdown recomputed → now "$Valve". + Assert.Equal("$Valve", Assert.Single(service.GetSnapshot().Galaxy.TopTemplates).TemplateName); + } + /// /// Verifies snapshot watcher cancels cleanly when subscriber cancels. /// @@ -452,6 +557,22 @@ public sealed class DashboardSnapshotServiceTests public Task WaitForFirstLoadAsync(CancellationToken cancellationToken) => Task.CompletedTask; } + private sealed class MutableGalaxyHierarchyCache(GalaxyHierarchyCacheEntry current) : IGalaxyHierarchyCache + { + /// Gets or sets the current Galaxy hierarchy cache entry, swappable between snapshots. + public GalaxyHierarchyCacheEntry Current { get; set; } = current; + + /// Refreshes the cache asynchronously. + /// Cancellation token. + /// Completed task. + public Task RefreshAsync(CancellationToken cancellationToken) => Task.CompletedTask; + + /// Waits for the first cache load asynchronously. + /// Cancellation token. + /// Completed task. + public Task WaitForFirstLoadAsync(CancellationToken cancellationToken) => Task.CompletedTask; + } + private class FakeApiKeyAdminStore : IApiKeyAdminStore { /// diff --git a/src/ZB.MOM.WW.MxGateway.Tests/Gateway/GatewayApplicationTests.cs b/src/ZB.MOM.WW.MxGateway.Tests/Gateway/GatewayApplicationTests.cs index 2eb1667..5ae6cab 100644 --- a/src/ZB.MOM.WW.MxGateway.Tests/Gateway/GatewayApplicationTests.cs +++ b/src/ZB.MOM.WW.MxGateway.Tests/Gateway/GatewayApplicationTests.cs @@ -41,6 +41,25 @@ public sealed class GatewayApplicationTests Assert.Equal("SerilogLoggerFactory", factory.GetType().Name); } + /// + /// Verifies that Build registers the gateway's browse-scope provider so it wins over the + /// shared library's no-op default (Server-060). The per-API-key browse-subtree scoping + /// depends on AddSingleton<IGalaxyBrowseScopeProvider, GatewayBrowseScopeProvider> + /// running before AddZbGalaxyRepository, whose TryAddSingleton default + /// (NullGalaxyBrowseScopeProvider → full hierarchy) must lose. If this registration order ever + /// regressed, every metadata-scoped key would silently see the entire Galaxy hierarchy. + /// + [Fact] + public async Task Build_RegistersGatewayBrowseScopeProviderOverLibraryDefault() + { + await using WebApplication app = GatewayApplication.Build([]); + + ZB.MOM.WW.GalaxyRepository.Grpc.IGalaxyBrowseScopeProvider scopeProvider = + app.Services.GetRequiredService(); + + Assert.IsType(scopeProvider); + } + /// Verifies that Build registers the gateway metrics service. [Fact] public async Task Build_RegistersGatewayMetrics()