Resolve code-review findings Server-033..037
Server-033 (Medium): TryRestoreFromDiskAsync now completes the _firstLoad gate once the restored snapshot is published, so a browse call racing the first refresh is served immediately instead of waiting out the 5s bootstrap budget while an unreachable-database query runs. Server-034 (Low): GalaxyHierarchySnapshotStore.TryLoadAsync catches JsonException / IOException / UnauthorizedAccessException and returns null, honoring the Try contract for a corrupt or unreadable snapshot file. Server-035 (Low): SaveAsync bounds the write with a linked CancellationToken (CommandTimeoutSeconds budget) so a stuck disk cannot pin the refresh loop. Server-036 (Low): PersistSnapshotAsync no longer logs a save cancelled by gateway shutdown as a persistence failure. Server-037 (Low): added cache tests for the corrupt-snapshot restore path and for PersistSnapshot=false, plus a store test for corrupt JSON. All 100 Galaxy tests pass; gateway builds clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -306,6 +306,12 @@ public sealed class GalaxyHierarchyCache : IGalaxyHierarchyCache
|
|||||||
attributes: snapshot.Attributes);
|
attributes: snapshot.Attributes);
|
||||||
Volatile.Write(ref _current, restored);
|
Volatile.Write(ref _current, restored);
|
||||||
|
|
||||||
|
// Restored data is a valid completed first load: unblock callers waiting on
|
||||||
|
// the bootstrap gate immediately, rather than making them wait out the full
|
||||||
|
// wait budget for a live query that — when the database is unreachable, the
|
||||||
|
// scenario this restore exists for — may not return for seconds.
|
||||||
|
_firstLoad.TrySetResult();
|
||||||
|
|
||||||
_notifier.Publish(new GalaxyDeployEventInfo(
|
_notifier.Publish(new GalaxyDeployEventInfo(
|
||||||
Sequence: sequence,
|
Sequence: sequence,
|
||||||
ObservedAt: _timeProvider.GetUtcNow(),
|
ObservedAt: _timeProvider.GetUtcNow(),
|
||||||
@@ -342,6 +348,11 @@ public sealed class GalaxyHierarchyCache : IGalaxyHierarchyCache
|
|||||||
new GalaxyHierarchySnapshot(deployTime, savedAt, hierarchy, attributes),
|
new GalaxyHierarchySnapshot(deployTime, savedAt, hierarchy, attributes),
|
||||||
cancellationToken).ConfigureAwait(false);
|
cancellationToken).ConfigureAwait(false);
|
||||||
}
|
}
|
||||||
|
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||||
|
{
|
||||||
|
// The refresh was cancelled (gateway shutdown) before the write finished.
|
||||||
|
// That is not a persistence failure — do not log it as a warning.
|
||||||
|
}
|
||||||
catch (Exception exception)
|
catch (Exception exception)
|
||||||
{
|
{
|
||||||
_logger?.LogWarning(exception, "Failed to persist the Galaxy hierarchy snapshot to disk.");
|
_logger?.LogWarning(exception, "Failed to persist the Galaxy hierarchy snapshot to disk.");
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ public sealed class GalaxyHierarchySnapshotStore : IGalaxyHierarchySnapshotStore
|
|||||||
};
|
};
|
||||||
|
|
||||||
private readonly string? _path;
|
private readonly string? _path;
|
||||||
|
private readonly TimeSpan _writeTimeout;
|
||||||
private readonly ILogger<GalaxyHierarchySnapshotStore>? _logger;
|
private readonly ILogger<GalaxyHierarchySnapshotStore>? _logger;
|
||||||
private readonly SemaphoreSlim _ioGate = new(1, 1);
|
private readonly SemaphoreSlim _ioGate = new(1, 1);
|
||||||
|
|
||||||
@@ -41,6 +42,7 @@ public sealed class GalaxyHierarchySnapshotStore : IGalaxyHierarchySnapshotStore
|
|||||||
_path = value.PersistSnapshot && !string.IsNullOrWhiteSpace(value.SnapshotCachePath)
|
_path = value.PersistSnapshot && !string.IsNullOrWhiteSpace(value.SnapshotCachePath)
|
||||||
? value.SnapshotCachePath
|
? value.SnapshotCachePath
|
||||||
: null;
|
: null;
|
||||||
|
_writeTimeout = TimeSpan.FromSeconds(Math.Max(1, value.CommandTimeoutSeconds));
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,6 +60,12 @@ public sealed class GalaxyHierarchySnapshotStore : IGalaxyHierarchySnapshotStore
|
|||||||
await _ioGate.WaitAsync(cancellationToken).ConfigureAwait(false);
|
await _ioGate.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
// Bound the write so a stuck disk — e.g. a SnapshotCachePath on an
|
||||||
|
// unresponsive network share — cannot stall the caller. On the cache
|
||||||
|
// refresh path that would otherwise pin the whole refresh loop.
|
||||||
|
using CancellationTokenSource writeCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
|
||||||
|
writeCts.CancelAfter(_writeTimeout);
|
||||||
|
|
||||||
string? directory = Path.GetDirectoryName(_path);
|
string? directory = Path.GetDirectoryName(_path);
|
||||||
if (!string.IsNullOrEmpty(directory))
|
if (!string.IsNullOrEmpty(directory))
|
||||||
{
|
{
|
||||||
@@ -67,7 +75,7 @@ public sealed class GalaxyHierarchySnapshotStore : IGalaxyHierarchySnapshotStore
|
|||||||
string tempPath = _path + ".tmp";
|
string tempPath = _path + ".tmp";
|
||||||
await using (FileStream stream = new(tempPath, FileMode.Create, FileAccess.Write, FileShare.None))
|
await using (FileStream stream = new(tempPath, FileMode.Create, FileAccess.Write, FileShare.None))
|
||||||
{
|
{
|
||||||
await JsonSerializer.SerializeAsync(stream, file, SerializerOptions, cancellationToken).ConfigureAwait(false);
|
await JsonSerializer.SerializeAsync(stream, file, SerializerOptions, writeCts.Token).ConfigureAwait(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
File.Move(tempPath, _path, overwrite: true);
|
File.Move(tempPath, _path, overwrite: true);
|
||||||
@@ -111,6 +119,17 @@ public sealed class GalaxyHierarchySnapshotStore : IGalaxyHierarchySnapshotStore
|
|||||||
|
|
||||||
return file.Snapshot;
|
return file.Snapshot;
|
||||||
}
|
}
|
||||||
|
catch (Exception exception) when (exception is JsonException or IOException or UnauthorizedAccessException)
|
||||||
|
{
|
||||||
|
// A corrupt, truncated, locked, or access-denied snapshot file is an
|
||||||
|
// expected failure mode for a disk cache — honor the Try contract and
|
||||||
|
// return null rather than throwing.
|
||||||
|
_logger?.LogWarning(
|
||||||
|
exception,
|
||||||
|
"Ignoring Galaxy hierarchy snapshot at {Path}: the file is unreadable or not valid JSON.",
|
||||||
|
_path);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
finally
|
finally
|
||||||
{
|
{
|
||||||
_ioGate.Release();
|
_ioGate.Release();
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
using Microsoft.Extensions.Logging;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
using MxGateway.Server.Galaxy;
|
using MxGateway.Server.Galaxy;
|
||||||
using MxGateway.Contracts.Proto.Galaxy;
|
using MxGateway.Contracts.Proto.Galaxy;
|
||||||
@@ -208,6 +209,108 @@ public sealed class GalaxyHierarchyCacheTests : IDisposable
|
|||||||
Assert.Equal(0, repository.GetAttributesCount);
|
Assert.Equal(0, repository.GetAttributesCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Verifies that a restored on-disk snapshot completes the first-load gate
|
||||||
|
/// immediately, so a browse call racing the first refresh is not blocked for
|
||||||
|
/// the full bootstrap budget while the live Galaxy query is still running.
|
||||||
|
/// Regression test for Server-033.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public async Task RefreshAsync_RestoredSnapshotCompletesFirstLoadBeforeLiveQueryReturns()
|
||||||
|
{
|
||||||
|
GalaxyHierarchySnapshotStore store = CreateStore();
|
||||||
|
await store.SaveAsync(
|
||||||
|
new GalaxyHierarchySnapshot(
|
||||||
|
LastDeployTime: new DateTimeOffset(2026, 5, 20, 9, 0, 0, TimeSpan.Zero),
|
||||||
|
SavedAt: new DateTimeOffset(2026, 5, 20, 9, 1, 0, TimeSpan.Zero),
|
||||||
|
Hierarchy: [SampleHierarchyRow()],
|
||||||
|
Attributes: [SampleAttributeRow()]),
|
||||||
|
CancellationToken.None);
|
||||||
|
|
||||||
|
GalaxyDeployNotifier notifier = new();
|
||||||
|
BlockingGalaxyRepository repository = new();
|
||||||
|
GalaxyHierarchyCache cache = new(repository, notifier, new ManualTimeProvider(), snapshotStore: store);
|
||||||
|
|
||||||
|
Task refresh = cache.RefreshAsync(CancellationToken.None);
|
||||||
|
|
||||||
|
// The live query is blocked inside the repository; first-load must still
|
||||||
|
// complete — from the restored snapshot — well within the wait budget.
|
||||||
|
await cache.WaitForFirstLoadAsync(CancellationToken.None).WaitAsync(TimeSpan.FromSeconds(5));
|
||||||
|
Assert.True(cache.Current.HasData);
|
||||||
|
Assert.Equal(GalaxyCacheStatus.Stale, cache.Current.Status);
|
||||||
|
|
||||||
|
repository.Release();
|
||||||
|
await refresh.WaitAsync(TimeSpan.FromSeconds(5));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Verifies a corrupt on-disk snapshot does not crash startup: the cache
|
||||||
|
/// ignores the unreadable file and comes up Unavailable when the database is
|
||||||
|
/// also unreachable. Regression test for Server-037.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public async Task RefreshAsync_WhenSnapshotFileCorrupt_ComesUpUnavailableWithoutThrowing()
|
||||||
|
{
|
||||||
|
string path = CreateTempPath();
|
||||||
|
await File.WriteAllTextAsync(path, "{ this is not valid json");
|
||||||
|
GalaxyHierarchySnapshotStore store = CreateStore(path);
|
||||||
|
|
||||||
|
GalaxyDeployNotifier notifier = new();
|
||||||
|
ThrowingGalaxyRepository repository = new(new InvalidOperationException("Galaxy repository unreachable"));
|
||||||
|
GalaxyHierarchyCache cache = new(repository, notifier, new ManualTimeProvider(), snapshotStore: store);
|
||||||
|
|
||||||
|
await cache.RefreshAsync(CancellationToken.None);
|
||||||
|
|
||||||
|
Assert.Equal(GalaxyCacheStatus.Unavailable, cache.Current.Status);
|
||||||
|
Assert.False(cache.Current.HasData);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Verifies that with snapshot persistence disabled the cache does not
|
||||||
|
/// restore from disk — an unreachable database leaves it Unavailable.
|
||||||
|
/// Regression test for Server-037.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public async Task RefreshAsync_WhenPersistDisabled_DoesNotRestoreFromDisk()
|
||||||
|
{
|
||||||
|
GalaxyHierarchySnapshotStore store = CreateStore(CreateTempPath(), persist: false);
|
||||||
|
|
||||||
|
GalaxyDeployNotifier notifier = new();
|
||||||
|
ThrowingGalaxyRepository repository = new(new InvalidOperationException("Galaxy repository unreachable"));
|
||||||
|
GalaxyHierarchyCache cache = new(repository, notifier, new ManualTimeProvider(), snapshotStore: store);
|
||||||
|
|
||||||
|
await cache.RefreshAsync(CancellationToken.None);
|
||||||
|
|
||||||
|
Assert.Equal(GalaxyCacheStatus.Unavailable, cache.Current.Status);
|
||||||
|
Assert.False(cache.Current.HasData);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Verifies that a snapshot save aborted because the gateway is shutting down
|
||||||
|
/// (the refresh token is cancelled) is not logged as a persistence failure.
|
||||||
|
/// Regression test for Server-036.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public async Task RefreshAsync_WhenSnapshotSaveCancelledAtShutdown_DoesNotLogPersistFailure()
|
||||||
|
{
|
||||||
|
using CancellationTokenSource cts = new();
|
||||||
|
GalaxyDeployNotifier notifier = new();
|
||||||
|
StubGalaxyRepository repository = new(
|
||||||
|
deployTime: new DateTime(2026, 5, 20, 9, 0, 0, DateTimeKind.Utc),
|
||||||
|
hierarchy: [SampleHierarchyRow()],
|
||||||
|
attributes: [SampleAttributeRow()]);
|
||||||
|
CancellingSaveStore store = new(cts);
|
||||||
|
RecordingLogger<GalaxyHierarchyCache> logger = new();
|
||||||
|
GalaxyHierarchyCache cache = new(repository, notifier, new ManualTimeProvider(), logger, store);
|
||||||
|
|
||||||
|
await cache.RefreshAsync(cts.Token);
|
||||||
|
|
||||||
|
Assert.DoesNotContain(
|
||||||
|
logger.Entries,
|
||||||
|
entry => entry.Level == LogLevel.Warning
|
||||||
|
&& entry.Message.Contains("persist", StringComparison.OrdinalIgnoreCase));
|
||||||
|
}
|
||||||
|
|
||||||
private static GalaxyHierarchyRow SampleHierarchyRow() => new()
|
private static GalaxyHierarchyRow SampleHierarchyRow() => new()
|
||||||
{
|
{
|
||||||
GobjectId = 99,
|
GobjectId = 99,
|
||||||
@@ -228,20 +331,93 @@ public sealed class GalaxyHierarchyCacheTests : IDisposable
|
|||||||
DataTypeName = "Float",
|
DataTypeName = "Float",
|
||||||
};
|
};
|
||||||
|
|
||||||
private GalaxyHierarchySnapshotStore CreateStore()
|
private string CreateTempPath()
|
||||||
{
|
{
|
||||||
string path = Path.Combine(
|
string path = Path.Combine(
|
||||||
Path.GetTempPath(),
|
Path.GetTempPath(),
|
||||||
$"mxgw-galaxy-cache-test-{Guid.NewGuid():N}.json");
|
$"mxgw-galaxy-cache-test-{Guid.NewGuid():N}.json");
|
||||||
_tempPaths.Add(path);
|
_tempPaths.Add(path);
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
private GalaxyHierarchySnapshotStore CreateStore() => CreateStore(CreateTempPath());
|
||||||
|
|
||||||
|
private static GalaxyHierarchySnapshotStore CreateStore(string path, bool persist = true)
|
||||||
|
{
|
||||||
GalaxyRepositoryOptions options = new()
|
GalaxyRepositoryOptions options = new()
|
||||||
{
|
{
|
||||||
PersistSnapshot = true,
|
PersistSnapshot = persist,
|
||||||
SnapshotCachePath = path,
|
SnapshotCachePath = path,
|
||||||
};
|
};
|
||||||
return new GalaxyHierarchySnapshotStore(Options.Create(options));
|
return new GalaxyHierarchySnapshotStore(Options.Create(options));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary><see cref="IGalaxyRepository"/> whose deploy-time query blocks until released.</summary>
|
||||||
|
private sealed class BlockingGalaxyRepository : IGalaxyRepository
|
||||||
|
{
|
||||||
|
private readonly TaskCompletionSource _release = new(TaskCreationOptions.RunContinuationsAsynchronously);
|
||||||
|
|
||||||
|
public void Release() => _release.TrySetResult();
|
||||||
|
|
||||||
|
public Task<bool> TestConnectionAsync(CancellationToken ct = default) => Task.FromResult(false);
|
||||||
|
|
||||||
|
public async Task<DateTime?> GetLastDeployTimeAsync(CancellationToken ct = default)
|
||||||
|
{
|
||||||
|
await _release.Task.WaitAsync(ct).ConfigureAwait(false);
|
||||||
|
throw new InvalidOperationException("Galaxy repository unreachable");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Task<List<GalaxyHierarchyRow>> GetHierarchyAsync(CancellationToken ct = default)
|
||||||
|
=> throw new InvalidOperationException("GetHierarchyAsync should not be reached");
|
||||||
|
|
||||||
|
public Task<List<GalaxyAttributeRow>> GetAttributesAsync(CancellationToken ct = default)
|
||||||
|
=> throw new InvalidOperationException("GetAttributesAsync should not be reached");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>Snapshot store whose <see cref="SaveAsync"/> cancels the token mid-save.</summary>
|
||||||
|
private sealed class CancellingSaveStore(CancellationTokenSource cts) : IGalaxyHierarchySnapshotStore
|
||||||
|
{
|
||||||
|
public Task<GalaxyHierarchySnapshot?> TryLoadAsync(CancellationToken cancellationToken)
|
||||||
|
=> Task.FromResult<GalaxyHierarchySnapshot?>(null);
|
||||||
|
|
||||||
|
public Task SaveAsync(GalaxyHierarchySnapshot snapshot, CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
cts.Cancel();
|
||||||
|
cancellationToken.ThrowIfCancellationRequested();
|
||||||
|
return Task.CompletedTask;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>Minimal <see cref="ILogger{T}"/> that records every emitted log entry.</summary>
|
||||||
|
private sealed class RecordingLogger<T> : ILogger<T>
|
||||||
|
{
|
||||||
|
public List<(LogLevel Level, string Message)> Entries { get; } = [];
|
||||||
|
|
||||||
|
public IDisposable BeginScope<TState>(TState state)
|
||||||
|
where TState : notnull => NullScope.Instance;
|
||||||
|
|
||||||
|
public bool IsEnabled(LogLevel logLevel) => true;
|
||||||
|
|
||||||
|
public void Log<TState>(
|
||||||
|
LogLevel logLevel,
|
||||||
|
EventId eventId,
|
||||||
|
TState state,
|
||||||
|
Exception? exception,
|
||||||
|
Func<TState, Exception?, string> formatter)
|
||||||
|
{
|
||||||
|
Entries.Add((logLevel, formatter(state, exception)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private sealed class NullScope : IDisposable
|
||||||
|
{
|
||||||
|
public static readonly NullScope Instance = new();
|
||||||
|
|
||||||
|
public void Dispose()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>In-memory <see cref="IGalaxyRepository"/> that returns fixed rowsets.</summary>
|
/// <summary>In-memory <see cref="IGalaxyRepository"/> that returns fixed rowsets.</summary>
|
||||||
private sealed class StubGalaxyRepository(
|
private sealed class StubGalaxyRepository(
|
||||||
DateTime? deployTime,
|
DateTime? deployTime,
|
||||||
|
|||||||
@@ -59,6 +59,16 @@ public sealed class GalaxyHierarchySnapshotStoreTests : IDisposable
|
|||||||
Assert.Null(await store.TryLoadAsync(CancellationToken.None));
|
Assert.Null(await store.TryLoadAsync(CancellationToken.None));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task TryLoadAsync_WhenFileIsCorruptJson_ReturnsNull()
|
||||||
|
{
|
||||||
|
string path = CreateTempPath();
|
||||||
|
await File.WriteAllTextAsync(path, "{ this is not valid json");
|
||||||
|
GalaxyHierarchySnapshotStore store = CreateStore(path);
|
||||||
|
|
||||||
|
Assert.Null(await store.TryLoadAsync(CancellationToken.None));
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public async Task TryLoadAsync_WhenSchemaVersionUnrecognized_ReturnsNull()
|
public async Task TryLoadAsync_WhenSchemaVersionUnrecognized_ReturnsNull()
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user