diff --git a/docs/GalaxyRepository.md b/docs/GalaxyRepository.md index 7353c53..8b784bf 100644 --- a/docs/GalaxyRepository.md +++ b/docs/GalaxyRepository.md @@ -89,6 +89,36 @@ load to complete before returning. If the first load fails or times out, the client gets `Unavailable` with a short reason. Once any load completes (success or failure), this wait is skipped on subsequent calls. +### On-disk snapshot + +The gateway may lose connectivity to the Galaxy database — and the database is +often unreachable right when the gateway itself restarts. To keep browse +working across that gap, the cache persists its dataset to disk: + +- After every successful **heavy** refresh (a deploy change), the raw + hierarchy and attribute rowsets are written to + `MxGateway:Galaxy:SnapshotCachePath` + (default `C:\ProgramData\MxGateway\galaxy-snapshot.json`). The write is + atomic — a temp file plus rename — so a crash mid-write cannot corrupt the + snapshot. Cheap no-change ticks write nothing; the file is already current. +- On the **first** refresh after startup, before any SQL runs, the cache + reloads that file. The restored data is served with `Stale` status — + it is last-known data, not live — so clients can browse immediately even + when the Galaxy database is unreachable. +- The first live query then reconciles: if it observes the **same** + `time_of_last_deploy` the snapshot was saved at, the entry is promoted to + `Healthy` with no heavy re-query (the snapshot is provably current); if it + observes a newer deploy, the heavy queries run and replace the snapshot; if + the database is still unreachable, the entry stays `Stale`. + +`is_alarm` / `is_historized` filters, paging, and the dashboard summary all +work against a restored snapshot exactly as against a live pull — the restore +path runs the same materialization. Persistence is disabled by setting +`MxGateway:Galaxy:PersistSnapshot` to `false`; the snapshot file is then +neither written nor read, and a cold start with an unreachable database comes +up `Unavailable` as before. The on-disk file is a cache, not a system of +record: deleting it only forces the next cold start to wait for live SQL. + ## Deploy Notifications `WatchDeployEvents` is a server-streaming RPC backed by @@ -291,6 +321,8 @@ Bound to `MxGateway:Galaxy` via `GalaxyRepositoryOptions`. |--------|---------|-------------| | `MxGateway:Galaxy:ConnectionString` | `Server=localhost;Database=ZB;Integrated Security=True;TrustServerCertificate=True;Encrypt=False;` | SQL Server connection string for the Galaxy Repository. Integrated Security against `localhost` is the dev default; production deployments should override this through the standard double-underscore environment variable form, e.g. `MxGateway__Galaxy__ConnectionString`. | | `MxGateway:Galaxy:CommandTimeoutSeconds` | `60` | Per-command SQL timeout. Applies to all three RPCs. | +| `MxGateway:Galaxy:PersistSnapshot` | `true` | Persists each successful browse dataset to disk and reloads it at startup. See [On-disk snapshot](#on-disk-snapshot). | +| `MxGateway:Galaxy:SnapshotCachePath` | `C:\ProgramData\MxGateway\galaxy-snapshot.json` | File path for the persisted browse snapshot. Ignored when `PersistSnapshot` is `false`. | The connection string is not treated as a secret in dev (`Integrated Security`), but production deployments that use SQL authentication should set diff --git a/docs/GatewayConfiguration.md b/docs/GatewayConfiguration.md index 1602d18..d38a09c 100644 --- a/docs/GatewayConfiguration.md +++ b/docs/GatewayConfiguration.md @@ -60,7 +60,9 @@ paths, timeouts, queue sizes, enum values, or protocol values are invalid. "Galaxy": { "ConnectionString": "Server=localhost;Database=ZB;Integrated Security=True;TrustServerCertificate=True;Encrypt=False;", "CommandTimeoutSeconds": 60, - "DashboardRefreshIntervalSeconds": 30 + "DashboardRefreshIntervalSeconds": 30, + "PersistSnapshot": true, + "SnapshotCachePath": "C:\\ProgramData\\MxGateway\\galaxy-snapshot.json" }, "Alarms": { "Enabled": false, @@ -170,6 +172,8 @@ at startup. | `MxGateway:Galaxy:ConnectionString` | `Server=localhost;Database=ZB;Integrated Security=True;TrustServerCertificate=True;Encrypt=False;` | SQL Server connection string for the Galaxy Repository (`ZB`) used by the `GalaxyRepository` browse RPCs. Override in production via `MxGateway__Galaxy__ConnectionString`. | | `MxGateway:Galaxy:CommandTimeoutSeconds` | `60` | Per-command SQL timeout for all Galaxy browse RPCs. | | `MxGateway:Galaxy:DashboardRefreshIntervalSeconds` | `30` | Interval between background refreshes of the dashboard Galaxy summary cache. SQL is hit at most once per interval regardless of dashboard render rate. | +| `MxGateway:Galaxy:PersistSnapshot` | `true` | Persists the latest successful Galaxy browse dataset to disk. When `true`, the cache reloads that snapshot at startup so clients can still browse last-known data while the Galaxy database is unreachable. The restored data is served with `Stale` status until a live query confirms it. | +| `MxGateway:Galaxy:SnapshotCachePath` | `C:\ProgramData\MxGateway\galaxy-snapshot.json` | File path for the persisted Galaxy browse snapshot. Ignored when `PersistSnapshot` is `false`. The snapshot is written atomically (temp file plus rename). | See [Galaxy Repository Browse](./GalaxyRepository.md) for the RPC surface and behavior. diff --git a/src/MxGateway.Server/Galaxy/GalaxyHierarchyCache.cs b/src/MxGateway.Server/Galaxy/GalaxyHierarchyCache.cs index d2bfb3f..4ebadcb 100644 --- a/src/MxGateway.Server/Galaxy/GalaxyHierarchyCache.cs +++ b/src/MxGateway.Server/Galaxy/GalaxyHierarchyCache.cs @@ -12,6 +12,10 @@ namespace MxGateway.Server.Galaxy; /// refresh and reused across requests. Refreshes are deploy-time gated: every tick /// queries galaxy.time_of_last_deploy (cheap), and the heavy hierarchy + /// attributes rowsets are pulled only when that timestamp has advanced. +/// Each successful heavy refresh is persisted to disk through +/// ; the first refresh restores that +/// snapshot (as ) so clients can browse +/// last-known data when the Galaxy database is unreachable on a cold start. /// public sealed class GalaxyHierarchyCache : IGalaxyHierarchyCache { @@ -19,27 +23,35 @@ public sealed class GalaxyHierarchyCache : IGalaxyHierarchyCache private readonly IGalaxyRepository _repository; private readonly IGalaxyDeployNotifier _notifier; + private readonly IGalaxyHierarchySnapshotStore? _snapshotStore; private readonly TimeProvider _timeProvider; private readonly ILogger? _logger; private readonly TaskCompletionSource _firstLoad = new(TaskCreationOptions.RunContinuationsAsynchronously); private readonly SemaphoreSlim _refreshGate = new(1, 1); private GalaxyHierarchyCacheEntry _current = GalaxyHierarchyCacheEntry.Empty; + private bool _restoreAttempted; /// Initializes a new instance of the class. /// Galaxy Repository client for SQL queries. /// Galaxy deploy event notifier. /// Provider for current time; defaults to system time. /// Optional logger for diagnostic output. + /// + /// Optional on-disk snapshot store. When supplied, the cache persists each + /// successful refresh and restores the last snapshot on first load. + /// public GalaxyHierarchyCache( IGalaxyRepository repository, IGalaxyDeployNotifier notifier, TimeProvider? timeProvider = null, - ILogger? logger = null) + ILogger? logger = null, + IGalaxyHierarchySnapshotStore? snapshotStore = null) { _repository = repository; _notifier = notifier; _timeProvider = timeProvider ?? TimeProvider.System; _logger = logger; + _snapshotStore = snapshotStore; } /// Gets the current Galaxy hierarchy cache entry with projected status. @@ -88,6 +100,15 @@ public sealed class GalaxyHierarchyCache : IGalaxyHierarchyCache private async Task RefreshCoreAsync(CancellationToken cancellationToken) { + // First refresh only: seed the cache from the on-disk snapshot before + // querying SQL, so a cold start with an unreachable Galaxy database can + // still serve last-known browse data. Runs under the refresh gate. + if (!_restoreAttempted) + { + _restoreAttempted = true; + await TryRestoreFromDiskAsync(cancellationToken).ConfigureAwait(false); + } + GalaxyHierarchyCacheEntry previous = Volatile.Read(ref _current); DateTimeOffset queriedAt = _timeProvider.GetUtcNow(); @@ -130,41 +151,17 @@ public sealed class GalaxyHierarchyCache : IGalaxyHierarchyCache List hierarchy = hierarchyTask.Result; List attributes = attributesTask.Result; - IReadOnlyList objects = BuildObjects(hierarchy, attributes); - GalaxyHierarchyIndex index = GalaxyHierarchyIndex.Build(objects); - int areaCount = hierarchy.Count(row => row.IsArea); - int historized = attributes.Count(row => row.IsHistorized); - int alarms = attributes.Count(row => row.IsAlarm); - DashboardGalaxySummary dashboardSummary = BuildDashboardSummary( + long nextSequence = previous.Sequence + 1; + GalaxyHierarchyCacheEntry next = BuildEntry( status: GalaxyCacheStatus.Healthy, + sequence: nextSequence, lastQueriedAt: queriedAt, lastSuccessAt: queriedAt, lastDeployTime: deployTime, lastError: null, hierarchy: hierarchy, - objectCount: hierarchy.Count, - areaCount: areaCount, - attributeCount: attributes.Count, - historizedAttributeCount: historized, - alarmAttributeCount: alarms); - - long nextSequence = previous.Sequence + 1; - GalaxyHierarchyCacheEntry next = new( - Status: GalaxyCacheStatus.Healthy, - Sequence: nextSequence, - LastQueriedAt: queriedAt, - LastSuccessAt: queriedAt, - LastDeployTime: deployTime, - LastError: null, - Objects: objects, - Index: index, - DashboardSummary: dashboardSummary, - ObjectCount: hierarchy.Count, - AreaCount: areaCount, - AttributeCount: attributes.Count, - HistorizedAttributeCount: historized, - AlarmAttributeCount: alarms); + attributes: attributes); Volatile.Write(ref _current, next); _firstLoad.TrySetResult(); @@ -175,6 +172,8 @@ public sealed class GalaxyHierarchyCache : IGalaxyHierarchyCache TimeOfLastDeploy: deployTime, ObjectCount: hierarchy.Count, AttributeCount: attributes.Count)); + + await PersistSnapshotAsync(deployTime, queriedAt, hierarchy, attributes, cancellationToken).ConfigureAwait(false); } catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { @@ -205,6 +204,150 @@ public sealed class GalaxyHierarchyCache : IGalaxyHierarchyCache } } + /// + /// Materializes a complete from raw + /// hierarchy and attribute rowsets. Shared by the live refresh path and the + /// on-disk restore path so both produce an identical object list, index, and + /// dashboard summary. + /// + private static GalaxyHierarchyCacheEntry BuildEntry( + GalaxyCacheStatus status, + long sequence, + DateTimeOffset? lastQueriedAt, + DateTimeOffset? lastSuccessAt, + DateTimeOffset? lastDeployTime, + string? lastError, + IReadOnlyList hierarchy, + IReadOnlyList attributes) + { + IReadOnlyList objects = BuildObjects(hierarchy, attributes); + GalaxyHierarchyIndex index = GalaxyHierarchyIndex.Build(objects); + + int areaCount = hierarchy.Count(row => row.IsArea); + int historized = attributes.Count(row => row.IsHistorized); + int alarms = attributes.Count(row => row.IsAlarm); + DashboardGalaxySummary dashboardSummary = BuildDashboardSummary( + status: status, + lastQueriedAt: lastQueriedAt, + lastSuccessAt: lastSuccessAt, + lastDeployTime: lastDeployTime, + lastError: lastError, + hierarchy: hierarchy, + objectCount: hierarchy.Count, + areaCount: areaCount, + attributeCount: attributes.Count, + historizedAttributeCount: historized, + alarmAttributeCount: alarms); + + return new GalaxyHierarchyCacheEntry( + Status: status, + Sequence: sequence, + LastQueriedAt: lastQueriedAt, + LastSuccessAt: lastSuccessAt, + LastDeployTime: lastDeployTime, + LastError: lastError, + Objects: objects, + Index: index, + DashboardSummary: dashboardSummary, + ObjectCount: hierarchy.Count, + AreaCount: areaCount, + AttributeCount: attributes.Count, + HistorizedAttributeCount: historized, + AlarmAttributeCount: alarms); + } + + /// + /// Seeds the cache from the on-disk snapshot when no live data has loaded yet. + /// The restored entry is marked — it is + /// last-known data, not live. A later refresh that observes the same deploy + /// time promotes it to healthy; one that observes a newer deploy replaces it. + /// + private async Task TryRestoreFromDiskAsync(CancellationToken cancellationToken) + { + if (_snapshotStore is null) + { + return; + } + + if (Volatile.Read(ref _current).HasData) + { + return; + } + + GalaxyHierarchySnapshot? snapshot; + try + { + snapshot = await _snapshotStore.TryLoadAsync(cancellationToken).ConfigureAwait(false); + } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + throw; + } + catch (Exception exception) + { + _logger?.LogWarning(exception, "Failed to restore the Galaxy hierarchy from the on-disk snapshot."); + return; + } + + if (snapshot is null) + { + return; + } + + long sequence = Volatile.Read(ref _current).Sequence + 1; + GalaxyHierarchyCacheEntry restored = BuildEntry( + status: GalaxyCacheStatus.Stale, + sequence: sequence, + lastQueriedAt: snapshot.SavedAt, + lastSuccessAt: snapshot.SavedAt, + lastDeployTime: snapshot.LastDeployTime, + lastError: null, + hierarchy: snapshot.Hierarchy, + attributes: snapshot.Attributes); + Volatile.Write(ref _current, restored); + + _notifier.Publish(new GalaxyDeployEventInfo( + Sequence: sequence, + ObservedAt: _timeProvider.GetUtcNow(), + TimeOfLastDeploy: snapshot.LastDeployTime, + ObjectCount: snapshot.Hierarchy.Count, + AttributeCount: snapshot.Attributes.Count)); + + _logger?.LogInformation( + "Restored Galaxy hierarchy from on-disk snapshot saved {SavedAt:o}: {ObjectCount} objects, {AttributeCount} attributes (status Stale until the Galaxy database confirms).", + snapshot.SavedAt, + snapshot.Hierarchy.Count, + snapshot.Attributes.Count); + } + + /// + /// Persists a successful refresh to disk. Persistence failures are logged and + /// swallowed — a cache that cannot write its backup is still fully usable. + /// + private async Task PersistSnapshotAsync( + DateTimeOffset? deployTime, + DateTimeOffset savedAt, + IReadOnlyList hierarchy, + IReadOnlyList attributes, + CancellationToken cancellationToken) + { + if (_snapshotStore is null) + { + return; + } + + try + { + await _snapshotStore.SaveAsync( + new GalaxyHierarchySnapshot(deployTime, savedAt, hierarchy, attributes), + cancellationToken).ConfigureAwait(false); + } + catch (Exception exception) + { + _logger?.LogWarning(exception, "Failed to persist the Galaxy hierarchy snapshot to disk."); + } + } + private static IReadOnlyList BuildObjects( IReadOnlyList hierarchy, IReadOnlyList attributes) diff --git a/src/MxGateway.Server/Galaxy/GalaxyHierarchySnapshot.cs b/src/MxGateway.Server/Galaxy/GalaxyHierarchySnapshot.cs new file mode 100644 index 0000000..184545d --- /dev/null +++ b/src/MxGateway.Server/Galaxy/GalaxyHierarchySnapshot.cs @@ -0,0 +1,24 @@ +namespace MxGateway.Server.Galaxy; + +/// +/// A serializable point-in-time copy of the Galaxy Repository browse data. +/// Holds the raw hierarchy and attribute rowsets — not the materialized +/// protobuf objects — so the restore path runs the exact same +/// materialization as a live refresh. Persisted by +/// after a successful refresh +/// and reloaded at startup when the Galaxy database is unreachable. +/// +/// +/// The galaxy.time_of_last_deploy the rowsets were pulled at, or +/// when the Galaxy table reported no deploy. A later +/// live refresh that observes this same timestamp can promote the restored +/// entry to healthy without re-running the heavy queries. +/// +/// UTC wall-clock when the snapshot was written to disk. +/// The persisted object-hierarchy rowset. +/// The persisted attribute rowset. +public sealed record GalaxyHierarchySnapshot( + DateTimeOffset? LastDeployTime, + DateTimeOffset SavedAt, + IReadOnlyList Hierarchy, + IReadOnlyList Attributes); diff --git a/src/MxGateway.Server/Galaxy/GalaxyHierarchySnapshotStore.cs b/src/MxGateway.Server/Galaxy/GalaxyHierarchySnapshotStore.cs new file mode 100644 index 0000000..7960f4a --- /dev/null +++ b/src/MxGateway.Server/Galaxy/GalaxyHierarchySnapshotStore.cs @@ -0,0 +1,122 @@ +using System.Text.Json; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; + +namespace MxGateway.Server.Galaxy; + +/// +/// JSON-file implementation of . +/// Writes the on-disk snapshot atomically (temp file + rename) so a crash +/// mid-write can never leave a torn file, and ignores files whose schema +/// version it does not recognize. When +/// is +/// both operations are no-ops. +/// +public sealed class GalaxyHierarchySnapshotStore : IGalaxyHierarchySnapshotStore +{ + /// + /// On-disk format version. Bump this whenever the persisted shape changes + /// in a way an older or newer gateway cannot read; a mismatched file is + /// ignored rather than misparsed. + /// + private const int CurrentSchemaVersion = 1; + + private static readonly JsonSerializerOptions SerializerOptions = new() + { + WriteIndented = false, + }; + + private readonly string? _path; + private readonly ILogger? _logger; + private readonly SemaphoreSlim _ioGate = new(1, 1); + + /// Initializes a new instance of the class. + /// Galaxy repository options carrying the snapshot path and enable flag. + /// Optional logger for diagnostic output. + public GalaxyHierarchySnapshotStore( + IOptions options, + ILogger? logger = null) + { + GalaxyRepositoryOptions value = options.Value; + _path = value.PersistSnapshot && !string.IsNullOrWhiteSpace(value.SnapshotCachePath) + ? value.SnapshotCachePath + : null; + _logger = logger; + } + + /// + public async Task SaveAsync(GalaxyHierarchySnapshot snapshot, CancellationToken cancellationToken) + { + ArgumentNullException.ThrowIfNull(snapshot); + if (_path is null) + { + return; + } + + PersistedFile file = new(CurrentSchemaVersion, snapshot); + + await _ioGate.WaitAsync(cancellationToken).ConfigureAwait(false); + try + { + string? directory = Path.GetDirectoryName(_path); + if (!string.IsNullOrEmpty(directory)) + { + Directory.CreateDirectory(directory); + } + + string tempPath = _path + ".tmp"; + await using (FileStream stream = new(tempPath, FileMode.Create, FileAccess.Write, FileShare.None)) + { + await JsonSerializer.SerializeAsync(stream, file, SerializerOptions, cancellationToken).ConfigureAwait(false); + } + + File.Move(tempPath, _path, overwrite: true); + _logger?.LogDebug( + "Persisted Galaxy hierarchy snapshot to {Path} ({ObjectCount} objects, {AttributeCount} attributes).", + _path, + snapshot.Hierarchy.Count, + snapshot.Attributes.Count); + } + finally + { + _ioGate.Release(); + } + } + + /// + public async Task TryLoadAsync(CancellationToken cancellationToken) + { + if (_path is null || !File.Exists(_path)) + { + return null; + } + + await _ioGate.WaitAsync(cancellationToken).ConfigureAwait(false); + try + { + PersistedFile? file; + await using (FileStream stream = new(_path, FileMode.Open, FileAccess.Read, FileShare.Read)) + { + file = await JsonSerializer.DeserializeAsync( + stream, SerializerOptions, cancellationToken).ConfigureAwait(false); + } + + if (file is null || file.SchemaVersion != CurrentSchemaVersion || file.Snapshot is null) + { + _logger?.LogWarning( + "Ignoring Galaxy hierarchy snapshot at {Path}: unrecognized or empty schema version.", + _path); + return null; + } + + return file.Snapshot; + } + finally + { + _ioGate.Release(); + } + } + + /// On-disk envelope: a schema version plus the snapshot payload. + private sealed record PersistedFile(int SchemaVersion, GalaxyHierarchySnapshot? Snapshot); +} diff --git a/src/MxGateway.Server/Galaxy/GalaxyRepositoryOptions.cs b/src/MxGateway.Server/Galaxy/GalaxyRepositoryOptions.cs index 7007c0c..3e2d016 100644 --- a/src/MxGateway.Server/Galaxy/GalaxyRepositoryOptions.cs +++ b/src/MxGateway.Server/Galaxy/GalaxyRepositoryOptions.cs @@ -27,4 +27,21 @@ public sealed class GalaxyRepositoryOptions /// cache. SQL is hit at most once per interval regardless of dashboard render rate. /// public int DashboardRefreshIntervalSeconds { get; init; } = 30; + + /// Default on-disk path for the persisted Galaxy browse snapshot. + public const string DefaultSnapshotCachePath = + @"C:\ProgramData\MxGateway\galaxy-snapshot.json"; + + /// + /// Whether the gateway persists the latest successful Galaxy browse dataset to + /// disk. When enabled, the cache reloads that snapshot at startup so clients can + /// still browse last-known data while the Galaxy database is unreachable. + /// + public bool PersistSnapshot { get; init; } = true; + + /// + /// File path for the persisted Galaxy browse snapshot. Ignored when + /// is . + /// + public string SnapshotCachePath { get; init; } = DefaultSnapshotCachePath; } diff --git a/src/MxGateway.Server/Galaxy/GalaxyRepositoryServiceCollectionExtensions.cs b/src/MxGateway.Server/Galaxy/GalaxyRepositoryServiceCollectionExtensions.cs index 070470d..d7a9e26 100644 --- a/src/MxGateway.Server/Galaxy/GalaxyRepositoryServiceCollectionExtensions.cs +++ b/src/MxGateway.Server/Galaxy/GalaxyRepositoryServiceCollectionExtensions.cs @@ -19,6 +19,7 @@ public static class GalaxyRepositoryServiceCollectionExtensions services.AddSingleton(sp => sp.GetRequiredService()); services.AddSingleton(); + services.AddSingleton(); services.AddSingleton(); services.AddHostedService(); diff --git a/src/MxGateway.Server/Galaxy/IGalaxyHierarchySnapshotStore.cs b/src/MxGateway.Server/Galaxy/IGalaxyHierarchySnapshotStore.cs new file mode 100644 index 0000000..cc3b9bf --- /dev/null +++ b/src/MxGateway.Server/Galaxy/IGalaxyHierarchySnapshotStore.cs @@ -0,0 +1,28 @@ +namespace MxGateway.Server.Galaxy; + +/// +/// Persists the latest Galaxy Repository browse dataset to disk and reloads +/// it at startup. Lets serve last-known +/// browse data when the Galaxy database is unreachable on a cold start. +/// +public interface IGalaxyHierarchySnapshotStore +{ + /// + /// Writes to disk, replacing any previous + /// snapshot atomically. A no-op when snapshot persistence is disabled. + /// + /// The browse dataset to persist. + /// Token to cancel the asynchronous operation. + Task SaveAsync(GalaxyHierarchySnapshot snapshot, CancellationToken cancellationToken); + + /// + /// Reads the persisted Galaxy browse dataset. + /// + /// Token to cancel the asynchronous operation. + /// + /// The persisted snapshot, or when none exists, + /// persistence is disabled, or the on-disk file uses an unrecognized + /// schema version. + /// + Task TryLoadAsync(CancellationToken cancellationToken); +} diff --git a/src/MxGateway.Server/appsettings.json b/src/MxGateway.Server/appsettings.json index 4d4da73..718be37 100644 --- a/src/MxGateway.Server/appsettings.json +++ b/src/MxGateway.Server/appsettings.json @@ -65,7 +65,9 @@ "Galaxy": { "ConnectionString": "Server=localhost;Database=ZB;Integrated Security=True;TrustServerCertificate=True;Encrypt=False;", "CommandTimeoutSeconds": 60, - "DashboardRefreshIntervalSeconds": 30 + "DashboardRefreshIntervalSeconds": 30, + "PersistSnapshot": true, + "SnapshotCachePath": "C:\\ProgramData\\MxGateway\\galaxy-snapshot.json" }, "Alarms": { "Enabled": true, diff --git a/src/MxGateway.Tests/Galaxy/GalaxyHierarchyCacheTests.cs b/src/MxGateway.Tests/Galaxy/GalaxyHierarchyCacheTests.cs index 9ba783e..a5202d5 100644 --- a/src/MxGateway.Tests/Galaxy/GalaxyHierarchyCacheTests.cs +++ b/src/MxGateway.Tests/Galaxy/GalaxyHierarchyCacheTests.cs @@ -1,11 +1,14 @@ +using Microsoft.Extensions.Options; using MxGateway.Server.Galaxy; using MxGateway.Contracts.Proto.Galaxy; using MxGateway.Tests.TestSupport; namespace MxGateway.Tests.Galaxy; -public sealed class GalaxyHierarchyCacheTests +public sealed class GalaxyHierarchyCacheTests : IDisposable { + private readonly List _tempPaths = []; + /// /// Verifies cache returns empty entry before any refresh occurs. /// @@ -121,6 +124,170 @@ public sealed class GalaxyHierarchyCacheTests Assert.Same(root, index.ObjectViewsById[1].Object); } + /// + /// Verifies a successful refresh writes the browse dataset to the on-disk + /// snapshot store so a later cold start can restore it. + /// + [Fact] + public async Task RefreshAsync_WhenSuccessful_PersistsSnapshotToDisk() + { + GalaxyDeployNotifier notifier = new(); + StubGalaxyRepository repository = new( + deployTime: new DateTime(2026, 5, 20, 9, 0, 0, DateTimeKind.Utc), + hierarchy: [SampleHierarchyRow()], + attributes: [SampleAttributeRow()]); + GalaxyHierarchySnapshotStore store = CreateStore(); + GalaxyHierarchyCache cache = new(repository, notifier, new ManualTimeProvider(), snapshotStore: store); + + await cache.RefreshAsync(CancellationToken.None); + + Assert.Equal(GalaxyCacheStatus.Healthy, cache.Current.Status); + GalaxyHierarchySnapshot? persisted = await store.TryLoadAsync(CancellationToken.None); + Assert.NotNull(persisted); + Assert.Equal(99, Assert.Single(persisted.Hierarchy).GobjectId); + Assert.Equal("PV", Assert.Single(persisted.Attributes).AttributeName); + } + + /// + /// Verifies that when the Galaxy database is unreachable on first refresh but a + /// snapshot exists on disk, the cache serves that data with Stale status + /// rather than coming up empty. + /// + [Fact] + public async Task RefreshAsync_WhenDatabaseUnreachableButSnapshotOnDisk_RestoresStaleData() + { + GalaxyHierarchySnapshotStore store = CreateStore(); + await store.SaveAsync( + new GalaxyHierarchySnapshot( + LastDeployTime: new DateTimeOffset(2026, 5, 20, 9, 0, 0, TimeSpan.Zero), + SavedAt: new DateTimeOffset(2026, 5, 20, 9, 1, 0, TimeSpan.Zero), + Hierarchy: [SampleHierarchyRow()], + Attributes: [SampleAttributeRow()]), + CancellationToken.None); + + GalaxyDeployNotifier notifier = new(); + ThrowingGalaxyRepository repository = new(new InvalidOperationException("Galaxy repository unreachable")); + GalaxyHierarchyCache cache = new(repository, notifier, new ManualTimeProvider(), snapshotStore: store); + + await cache.RefreshAsync(CancellationToken.None); + + Assert.Equal(GalaxyCacheStatus.Stale, cache.Current.Status); + Assert.True(cache.Current.HasData); + Assert.Equal(1, cache.Current.ObjectCount); + Assert.Equal(1, cache.Current.AttributeCount); + Assert.NotNull(notifier.Latest); + } + + /// + /// Verifies that when the disk snapshot's deploy time still matches the live + /// Galaxy database, the cache promotes the restored data to Healthy + /// without re-running the heavy hierarchy and attribute queries. + /// + [Fact] + public async Task RefreshAsync_WhenSnapshotDeployMatchesLive_PromotesToHealthyWithoutHeavyQuery() + { + DateTime deployTime = new(2026, 5, 20, 9, 0, 0, DateTimeKind.Utc); + GalaxyHierarchySnapshotStore store = CreateStore(); + await store.SaveAsync( + new GalaxyHierarchySnapshot( + LastDeployTime: new DateTimeOffset(deployTime, TimeSpan.Zero), + SavedAt: new DateTimeOffset(2026, 5, 20, 9, 1, 0, TimeSpan.Zero), + Hierarchy: [SampleHierarchyRow()], + Attributes: [SampleAttributeRow()]), + CancellationToken.None); + + GalaxyDeployNotifier notifier = new(); + StubGalaxyRepository repository = new(deployTime); + GalaxyHierarchyCache cache = new(repository, notifier, new ManualTimeProvider(), snapshotStore: store); + + await cache.RefreshAsync(CancellationToken.None); + + Assert.Equal(GalaxyCacheStatus.Healthy, cache.Current.Status); + Assert.Equal(1, cache.Current.ObjectCount); + Assert.Equal(0, repository.GetHierarchyCount); + Assert.Equal(0, repository.GetAttributesCount); + } + + private static GalaxyHierarchyRow SampleHierarchyRow() => new() + { + GobjectId = 99, + TagName = "Pump_001", + ContainedName = "Pump", + BrowseName = "Pump", + CategoryId = 10, + TemplateChain = ["AppPump"], + }; + + private static GalaxyAttributeRow SampleAttributeRow() => new() + { + GobjectId = 99, + TagName = "Pump_001", + AttributeName = "PV", + FullTagReference = "Pump_001.PV", + MxDataType = 5, + DataTypeName = "Float", + }; + + private GalaxyHierarchySnapshotStore CreateStore() + { + string path = Path.Combine( + Path.GetTempPath(), + $"mxgw-galaxy-cache-test-{Guid.NewGuid():N}.json"); + _tempPaths.Add(path); + GalaxyRepositoryOptions options = new() + { + PersistSnapshot = true, + SnapshotCachePath = path, + }; + return new GalaxyHierarchySnapshotStore(Options.Create(options)); + } + + /// In-memory that returns fixed rowsets. + private sealed class StubGalaxyRepository( + DateTime? deployTime, + List? hierarchy = null, + List? attributes = null) : IGalaxyRepository + { + private readonly List _hierarchy = hierarchy ?? []; + private readonly List _attributes = attributes ?? []; + + public int GetHierarchyCount { get; private set; } + + public int GetAttributesCount { get; private set; } + + public Task TestConnectionAsync(CancellationToken ct = default) => Task.FromResult(true); + + public Task GetLastDeployTimeAsync(CancellationToken ct = default) => Task.FromResult(deployTime); + + public Task> GetHierarchyAsync(CancellationToken ct = default) + { + GetHierarchyCount++; + return Task.FromResult(_hierarchy); + } + + public Task> GetAttributesAsync(CancellationToken ct = default) + { + GetAttributesCount++; + return Task.FromResult(_attributes); + } + } + + public void Dispose() + { + foreach (string path in _tempPaths) + { + try + { + File.Delete(path); + File.Delete(path + ".tmp"); + } + catch (IOException) + { + // Best-effort cleanup of test scratch files. + } + } + } + private sealed class ThrowingGalaxyRepository(Exception toThrow) : IGalaxyRepository { /// Gets the number of times was called. diff --git a/src/MxGateway.Tests/Galaxy/GalaxyHierarchySnapshotStoreTests.cs b/src/MxGateway.Tests/Galaxy/GalaxyHierarchySnapshotStoreTests.cs new file mode 100644 index 0000000..c28546f --- /dev/null +++ b/src/MxGateway.Tests/Galaxy/GalaxyHierarchySnapshotStoreTests.cs @@ -0,0 +1,167 @@ +using Microsoft.Extensions.Options; +using MxGateway.Server.Galaxy; + +namespace MxGateway.Tests.Galaxy; + +/// +/// Covers : the on-disk persistence +/// that lets the Galaxy browse cache survive a cold start while the Galaxy +/// database is unreachable. +/// +public sealed class GalaxyHierarchySnapshotStoreTests : IDisposable +{ + private readonly List _tempPaths = []; + + [Fact] + public async Task SaveAsync_ThenTryLoadAsync_RoundTripsRows() + { + string path = CreateTempPath(); + GalaxyHierarchySnapshotStore store = CreateStore(path); + GalaxyHierarchySnapshot snapshot = SampleSnapshot(); + + await store.SaveAsync(snapshot, CancellationToken.None); + GalaxyHierarchySnapshot? loaded = await store.TryLoadAsync(CancellationToken.None); + + Assert.NotNull(loaded); + Assert.Equal(snapshot.LastDeployTime, loaded.LastDeployTime); + Assert.Equal(snapshot.SavedAt, loaded.SavedAt); + + GalaxyHierarchyRow row = Assert.Single(loaded.Hierarchy); + Assert.Equal(7, row.GobjectId); + Assert.Equal("Pump_001", row.TagName); + Assert.Equal(["AppPump", "Pump"], row.TemplateChain); + + Assert.Equal(2, loaded.Attributes.Count); + GalaxyAttributeRow withDimension = loaded.Attributes[0]; + Assert.Equal("PV", withDimension.AttributeName); + Assert.Equal(8, withDimension.ArrayDimension); + Assert.True(withDimension.IsAlarm); + Assert.Null(loaded.Attributes[1].ArrayDimension); + } + + [Fact] + public async Task TryLoadAsync_WhenNoFileExists_ReturnsNull() + { + GalaxyHierarchySnapshotStore store = CreateStore(CreateTempPath()); + + Assert.Null(await store.TryLoadAsync(CancellationToken.None)); + } + + [Fact] + public async Task SaveAsync_WhenPersistenceDisabled_WritesNothing() + { + string path = CreateTempPath(); + GalaxyHierarchySnapshotStore store = CreateStore(path, persist: false); + + await store.SaveAsync(SampleSnapshot(), CancellationToken.None); + + Assert.False(File.Exists(path)); + Assert.Null(await store.TryLoadAsync(CancellationToken.None)); + } + + [Fact] + public async Task TryLoadAsync_WhenSchemaVersionUnrecognized_ReturnsNull() + { + string path = CreateTempPath(); + await File.WriteAllTextAsync(path, """{"SchemaVersion":999,"Snapshot":null}"""); + GalaxyHierarchySnapshotStore store = CreateStore(path); + + Assert.Null(await store.TryLoadAsync(CancellationToken.None)); + } + + [Fact] + public async Task SaveAsync_OverwritesAnEarlierSnapshot() + { + string path = CreateTempPath(); + GalaxyHierarchySnapshotStore store = CreateStore(path); + + await store.SaveAsync(SampleSnapshot(), CancellationToken.None); + GalaxyHierarchySnapshot second = SampleSnapshot() with + { + Hierarchy = [], + Attributes = [], + }; + await store.SaveAsync(second, CancellationToken.None); + + GalaxyHierarchySnapshot? loaded = await store.TryLoadAsync(CancellationToken.None); + Assert.NotNull(loaded); + Assert.Empty(loaded.Hierarchy); + Assert.Empty(loaded.Attributes); + } + + private static GalaxyHierarchySnapshot SampleSnapshot() => new( + LastDeployTime: new DateTimeOffset(2026, 5, 20, 9, 30, 0, TimeSpan.Zero), + SavedAt: new DateTimeOffset(2026, 5, 20, 9, 31, 0, TimeSpan.Zero), + Hierarchy: + [ + new GalaxyHierarchyRow + { + GobjectId = 7, + TagName = "Pump_001", + ContainedName = "Pump", + BrowseName = "Pump", + CategoryId = 10, + TemplateChain = ["AppPump", "Pump"], + }, + ], + Attributes: + [ + new GalaxyAttributeRow + { + GobjectId = 7, + TagName = "Pump_001", + AttributeName = "PV", + FullTagReference = "Pump_001.PV[]", + MxDataType = 5, + DataTypeName = "Float", + IsArray = true, + ArrayDimension = 8, + IsAlarm = true, + }, + new GalaxyAttributeRow + { + GobjectId = 7, + TagName = "Pump_001", + AttributeName = "Mode", + FullTagReference = "Pump_001.Mode", + MxDataType = 3, + DataTypeName = "Integer", + ArrayDimension = null, + }, + ]); + + private static GalaxyHierarchySnapshotStore CreateStore(string path, bool persist = true) + { + GalaxyRepositoryOptions options = new() + { + PersistSnapshot = persist, + SnapshotCachePath = path, + }; + return new GalaxyHierarchySnapshotStore(Options.Create(options)); + } + + private string CreateTempPath() + { + string path = Path.Combine( + Path.GetTempPath(), + $"mxgw-galaxy-snapshot-{Guid.NewGuid():N}.json"); + _tempPaths.Add(path); + return path; + } + + public void Dispose() + { + foreach (string path in _tempPaths) + { + try + { + File.Delete(path); + File.Delete(path + ".tmp"); + } + catch (IOException) + { + // Best-effort cleanup of test scratch files. + } + } + } +}