fix: gateway reflects configured forced provider mode into gauge/feed (#2)

This commit is contained in:
Joseph Doherty
2026-06-15 01:10:04 -04:00
parent c6f17557f6
commit 9208225f9c
2 changed files with 200 additions and 6 deletions
@@ -157,19 +157,48 @@ public sealed class GatewayAlarmMonitor : BackgroundService, IGatewayAlarmServic
private async Task RunMonitorAsync(string subscription, CancellationToken stoppingToken)
{
_state = GatewayAlarmMonitorState.Starting;
// Derive the lifecycle baseline from the configured forced mode so a
// ForceSubtag / ForceAlarmManager start advertises the correct mode even
// though no OnAlarmProviderModeChanged event is raised in those modes
// (only Auto/failover produces that event). ForceSubtag starts degraded.
AlarmProviderMode initialMode;
bool initialDegraded;
string initialReason;
switch (MapForcedMode(_options.Fallback.Mode))
{
case AlarmProviderMode.Subtag:
initialMode = AlarmProviderMode.Subtag;
initialDegraded = true;
initialReason = "Forced subtag mode (configuration)";
break;
case AlarmProviderMode.Alarmmgr:
initialMode = AlarmProviderMode.Alarmmgr;
initialDegraded = false;
initialReason = string.Empty;
break;
default:
// Unspecified (Auto): the failover consumer starts on the
// alarm-manager primary and only degrades to subtag on failure.
initialMode = AlarmProviderMode.Alarmmgr;
initialDegraded = false;
initialReason = string.Empty;
break;
}
lock (_sync)
{
// Re-baseline the provider status for this lifecycle so a restarted
// monitor advertises alarm-manager/not-degraded until told otherwise.
_providerMode = AlarmProviderMode.Alarmmgr;
_providerDegraded = false;
_providerReason = string.Empty;
// monitor advertises the configured mode until told otherwise.
_providerMode = initialMode;
_providerDegraded = initialDegraded;
_providerReason = initialReason;
_providerSince = DateTimeOffset.UtcNow;
}
// Align the observable gauge with the Alarmmgr baseline without recording
// Align the observable gauge with the lifecycle baseline without recording
// a switch — the gauge was 0 (unknown) from construction until now.
_metrics.SetAlarmProviderMode(ModeToInt(AlarmProviderMode.Alarmmgr));
_metrics.SetAlarmProviderMode(ModeToInt(initialMode));
GatewaySession session = await _sessionManager.OpenSessionAsync(
new SessionOpenRequest(BackendName, MonitorClientName, Guid.NewGuid().ToString("N"), CommandTimeout: null),
@@ -256,6 +285,10 @@ public sealed class GatewayAlarmMonitor : BackgroundService, IGatewayAlarmServic
AlarmProviderMode forcedMode = MapForcedMode(_options.Fallback.Mode);
_logger.LogInformation(
"Alarm subscribe: forcedMode={ForcedMode} configMode={ConfigMode} watchList={WatchListCount}.",
forcedMode, _options.Fallback.Mode, watchList.Count);
// When the forced mode is Unspecified (the "Auto" case) and the resolved
// watch-list is empty — the common alarmmgr-only deployment — the command
// is identical-in-effect to the historical SubscribeAlarms (wnwrap only):
@@ -156,6 +156,151 @@ public sealed class GatewayAlarmMonitorProviderModeTests
await monitor.StopAsync(CancellationToken.None);
}
[Fact]
public async Task ForceSubtagConfig_BaselinesProviderStatusToSubtagDegraded_WithoutSwitch()
{
using GatewayMetrics metrics = new();
long switchCount = 0;
int gaugeValue = -1;
using MeterListener listener = new();
listener.InstrumentPublished = (instrument, meterListener) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& (instrument.Name == "mxgateway.alarms.provider_switches"
|| instrument.Name == "mxgateway.alarms.provider_mode"))
{
meterListener.EnableMeasurementEvents(instrument);
}
};
listener.SetMeasurementEventCallback<long>(
(instrument, measurement, _, _) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.alarms.provider_switches")
{
Interlocked.Add(ref switchCount, measurement);
}
});
listener.SetMeasurementEventCallback<int>(
(instrument, measurement, _, _) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.alarms.provider_mode")
{
Interlocked.Exchange(ref gaugeValue, measurement);
}
});
listener.Start();
FakeSessionManager sessions = new();
using GatewayAlarmMonitor monitor = CreateMonitor(sessions, metrics, "ForceSubtag");
using CancellationTokenSource cts = new();
await monitor.StartAsync(cts.Token);
await sessions.WaitForSubscribeAsync(WaitTimeout);
using CancellationTokenSource streamCts = new();
AlarmFeedMessage? first = null;
Task reader = Task.Run(async () =>
{
await foreach (AlarmFeedMessage message in monitor.StreamAsync(null, streamCts.Token))
{
first = message;
break;
}
});
await WaitUntilAsync(() => first is not null, WaitTimeout);
Assert.NotNull(first);
Assert.Equal(AlarmFeedMessage.PayloadOneofCase.ProviderStatus, first!.PayloadCase);
Assert.Equal(AlarmProviderMode.Subtag, first.ProviderStatus.Mode);
Assert.True(first.ProviderStatus.Degraded);
// The observable gauge must read subtag (2) after start.
listener.RecordObservableInstruments();
Assert.Equal(2, Volatile.Read(ref gaugeValue));
// The initial set must not record a provider switch.
Assert.Equal(0, Interlocked.Read(ref switchCount));
await streamCts.CancelAsync();
await reader;
await cts.CancelAsync();
await monitor.StopAsync(CancellationToken.None);
}
[Fact]
public async Task ForceAlarmManagerConfig_BaselinesProviderStatusToAlarmmgr_WithoutSwitch()
{
using GatewayMetrics metrics = new();
long switchCount = 0;
int gaugeValue = -1;
using MeterListener listener = new();
listener.InstrumentPublished = (instrument, meterListener) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& (instrument.Name == "mxgateway.alarms.provider_switches"
|| instrument.Name == "mxgateway.alarms.provider_mode"))
{
meterListener.EnableMeasurementEvents(instrument);
}
};
listener.SetMeasurementEventCallback<long>(
(instrument, measurement, _, _) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.alarms.provider_switches")
{
Interlocked.Add(ref switchCount, measurement);
}
});
listener.SetMeasurementEventCallback<int>(
(instrument, measurement, _, _) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.alarms.provider_mode")
{
Interlocked.Exchange(ref gaugeValue, measurement);
}
});
listener.Start();
FakeSessionManager sessions = new();
using GatewayAlarmMonitor monitor = CreateMonitor(sessions, metrics, "ForceAlarmManager");
using CancellationTokenSource cts = new();
await monitor.StartAsync(cts.Token);
await sessions.WaitForSubscribeAsync(WaitTimeout);
using CancellationTokenSource streamCts = new();
AlarmFeedMessage? first = null;
Task reader = Task.Run(async () =>
{
await foreach (AlarmFeedMessage message in monitor.StreamAsync(null, streamCts.Token))
{
first = message;
break;
}
});
await WaitUntilAsync(() => first is not null, WaitTimeout);
Assert.NotNull(first);
Assert.Equal(AlarmFeedMessage.PayloadOneofCase.ProviderStatus, first!.PayloadCase);
Assert.Equal(AlarmProviderMode.Alarmmgr, first.ProviderStatus.Mode);
Assert.False(first.ProviderStatus.Degraded);
listener.RecordObservableInstruments();
Assert.Equal(1, Volatile.Read(ref gaugeValue));
Assert.Equal(0, Interlocked.Read(ref switchCount));
await streamCts.CancelAsync();
await reader;
await cts.CancelAsync();
await monitor.StopAsync(CancellationToken.None);
}
[Fact]
public async Task SubscribeAlarms_SendsForcedModeAndWatchList_FromConfiguration()
{
@@ -259,6 +404,22 @@ public sealed class GatewayAlarmMonitorProviderModeTests
NullLogger<GatewayAlarmMonitor>.Instance);
}
private static GatewayAlarmMonitor CreateMonitor(FakeSessionManager sessions, GatewayMetrics metrics, string mode)
{
AlarmsOptions options = new()
{
Enabled = true,
SubscriptionExpression = @"\\NODE\Galaxy!Area",
Fallback = new AlarmFallbackOptions { Mode = mode },
};
return new GatewayAlarmMonitor(
sessions,
new StubWatchListResolver([]),
metrics,
Microsoft.Extensions.Options.Options.Create(new GatewayOptions { Alarms = options }),
NullLogger<GatewayAlarmMonitor>.Instance);
}
private static async Task<AlarmFeedMessage> WaitForAsync(
List<AlarmFeedMessage> received,
Func<AlarmFeedMessage, bool> predicate,