fix(server): propagate watch-list cancellation; doc + test gaps (Server-051..053)

This commit is contained in:
Joseph Doherty
2026-06-15 02:39:11 -04:00
parent 410acc92eb
commit 258e09e0de
6 changed files with 283 additions and 6 deletions
@@ -66,11 +66,13 @@ public sealed class AlarmWatchListResolver : IAlarmWatchListResolver
{
rows = await _repository.GetAlarmAttributesAsync(cancellationToken).ConfigureAwait(false);
}
catch (Exception ex)
catch (Exception ex) when (ex is not OperationCanceledException)
{
// Discovery being unavailable must not crash the resolver: log and
// continue with an empty discovery set. The caller decides what to
// do with the (possibly config-only) result.
// do with the (possibly config-only) result. Cancellation is the one
// exception — an OperationCanceledException propagates per the
// IAlarmWatchListResolver contract so the caller unwinds promptly.
_logger.LogWarning(
ex,
"Galaxy Repository alarm-attribute discovery failed; continuing with configuration-only watch-list.");
@@ -86,7 +86,10 @@ public sealed class AlarmDiscoveryOptions
public string[] IncludeAttributes { get; init; } = Array.Empty<string>();
/// <summary>
/// Attribute paths to exclude from the Repository-derived poll list.
/// Attribute paths to remove from the merged poll list (case-insensitive).
/// The exclude runs after the Repository-derived rows and the explicit
/// <see cref="IncludeAttributes"/> entries are combined, so an exclude that
/// matches an explicit include suppresses it too — excludes win.
/// Ignored when <see cref="UseGalaxyRepository"/> is <c>false</c>.
/// Default empty.
/// </summary>
@@ -306,6 +306,56 @@ public sealed class AlarmWatchListResolverTests
Assert.Contains("Tank01", target.ActiveSubtag, StringComparison.Ordinal);
}
/// <summary>
/// Server-051: a cancellation triggered while Galaxy Repository discovery is
/// awaiting must propagate as <see cref="OperationCanceledException"/>, not be
/// swallowed into a config-only watch-list, per the <see cref="IAlarmWatchListResolver"/>
/// contract.
/// </summary>
[Fact]
public async Task ResolveAsync_RepositoryCancelled_PropagatesOperationCanceled()
{
using CancellationTokenSource cts = new();
// Repository observes the token, cancels the source, then throws the matching
// OperationCanceledException — exactly what the live SQL path does on shutdown.
CancellingGalaxyRepository repo = new(cts);
AlarmWatchListResolver resolver = CreateResolver(repo);
await Assert.ThrowsAnyAsync<OperationCanceledException>(() =>
resolver.ResolveAsync(
Options(include: ["Tank01.Level.HiHi"]),
cts.Token));
}
/// <summary>
/// Server-052 item 2 / Server-053: an entry that appears in both
/// <c>IncludeAttributes</c> and <c>ExcludeAttributes</c> is removed — excludes
/// win over explicit includes (the documented "excludes also suppress matching
/// explicit includes" behaviour).
/// </summary>
[Fact]
public async Task ResolveAsync_ExcludeAlsoSuppressesMatchingExplicitInclude()
{
StubGalaxyRepository repo = new(
[
new GalaxyAlarmAttributeRow { FullTagReference = "Tank01.Level.HiHi", SourceObjectReference = "Tank01", Area = "TestArea" },
]);
AlarmWatchListResolver resolver = CreateResolver(repo);
// Pump01.Fault is an explicit include AND an exclude (case-insensitive). It must
// be dropped; the GR row Tank01 survives.
IReadOnlyList<AlarmSubtagTarget> result = await resolver.ResolveAsync(Options(
useGalaxyRepository: true,
include: ["Pump01.Fault"],
exclude: ["pump01.fault"]));
AlarmSubtagTarget target = Assert.Single(result);
Assert.Contains("Tank01", target.ActiveSubtag, StringComparison.Ordinal);
Assert.DoesNotContain(result, t => t.ActiveSubtag.Contains("Pump01", StringComparison.OrdinalIgnoreCase));
}
/// <summary>In-memory <see cref="IGalaxyRepository"/> returning a fixed alarm rowset.</summary>
private sealed class StubGalaxyRepository(List<GalaxyAlarmAttributeRow> rows) : IGalaxyRepository
{
@@ -357,4 +407,35 @@ public sealed class AlarmWatchListResolverTests
public Task<List<GalaxyAlarmAttributeRow>> GetAlarmAttributesAsync(CancellationToken ct = default) =>
Task.FromException<List<GalaxyAlarmAttributeRow>>(toThrow);
}
/// <summary>
/// <see cref="IGalaxyRepository"/> whose alarm-attribute query cancels the
/// supplied source and throws a token-bound <see cref="OperationCanceledException"/>,
/// mirroring the live SQL path being cancelled mid-await.
/// </summary>
private sealed class CancellingGalaxyRepository(CancellationTokenSource source) : IGalaxyRepository
{
/// <inheritdoc />
public Task<bool> TestConnectionAsync(CancellationToken ct = default) => Task.FromResult(true);
/// <inheritdoc />
public Task<DateTime?> GetLastDeployTimeAsync(CancellationToken ct = default) =>
Task.FromResult<DateTime?>(null);
/// <inheritdoc />
public Task<List<GalaxyHierarchyRow>> GetHierarchyAsync(CancellationToken ct = default) =>
Task.FromResult(new List<GalaxyHierarchyRow>());
/// <inheritdoc />
public Task<List<GalaxyAttributeRow>> GetAttributesAsync(CancellationToken ct = default) =>
Task.FromResult(new List<GalaxyAttributeRow>());
/// <inheritdoc />
public Task<List<GalaxyAlarmAttributeRow>> GetAlarmAttributesAsync(CancellationToken ct = default)
{
source.Cancel();
ct.ThrowIfCancellationRequested();
return Task.FromResult(new List<GalaxyAlarmAttributeRow>());
}
}
}
@@ -120,6 +120,104 @@ public sealed class GatewayAlarmMonitorProviderModeTests
await monitor.StopAsync(CancellationToken.None);
}
/// <summary>
/// Server-053: a redundant <c>OnAlarmProviderModeChanged</c> event whose target
/// mode equals the current mode still records a provider switch. The worker is the
/// authority on when a mode change occurred; the gateway does not second-guess it,
/// so each event the worker emits increments <c>provider_switches</c> (no from==to
/// suppression). This test pins that semantics so it cannot drift silently.
/// </summary>
[Fact]
public async Task ProviderModeChange_RepeatedSameMode_RecordsASwitchForEachEvent()
{
using GatewayMetrics metrics = new();
long switchCount = 0;
using MeterListener listener = new();
listener.InstrumentPublished = (instrument, meterListener) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.alarms.provider_switches")
{
meterListener.EnableMeasurementEvents(instrument);
}
};
listener.SetMeasurementEventCallback<long>(
(instrument, measurement, _, _) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.alarms.provider_switches")
{
Interlocked.Add(ref switchCount, measurement);
}
});
listener.Start();
FakeSessionManager sessions = new();
using GatewayAlarmMonitor monitor = CreateMonitor(sessions, metrics);
using CancellationTokenSource cts = new();
await monitor.StartAsync(cts.Token);
await sessions.WaitForSubscribeAsync(WaitTimeout);
List<AlarmFeedMessage> received = [];
TaskCompletionSource baselineReceived = new(TaskCreationOptions.RunContinuationsAsynchronously);
using CancellationTokenSource streamCts = new();
Task reader = Task.Run(async () =>
{
try
{
await foreach (AlarmFeedMessage message in monitor.StreamAsync(null, streamCts.Token))
{
lock (received)
{
received.Add(message);
if (received.Count == 1)
{
baselineReceived.TrySetResult();
}
}
}
}
catch (OperationCanceledException)
{
// Expected when the test cancels the stream.
}
});
await baselineReceived.Task.WaitAsync(WaitTimeout);
// First subtag-mode event.
sessions.EmitEvent(new MxEvent
{
OnAlarmProviderModeChanged = new OnAlarmProviderModeChangedEvent
{
Mode = AlarmProviderMode.Subtag,
Reason = "alarmmgr failed",
At = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow),
},
});
await WaitUntilAsync(() => Interlocked.Read(ref switchCount) >= 1, WaitTimeout);
// Second subtag-mode event — same mode, but still a worker-reported switch.
sessions.EmitEvent(new MxEvent
{
OnAlarmProviderModeChanged = new OnAlarmProviderModeChangedEvent
{
Mode = AlarmProviderMode.Subtag,
Reason = "still degraded",
At = Timestamp.FromDateTimeOffset(DateTimeOffset.UtcNow),
},
});
await WaitUntilAsync(() => Interlocked.Read(ref switchCount) >= 2, WaitTimeout);
Assert.Equal(2, Interlocked.Read(ref switchCount));
await streamCts.CancelAsync();
await reader;
await cts.CancelAsync();
await monitor.StopAsync(CancellationToken.None);
}
[Fact]
public async Task NewSubscriber_ReceivesProviderStatusAsFirstMessage()
{