diff --git a/src/ZB.MOM.WW.MxGateway.Server/Alarms/GatewayAlarmMonitor.cs b/src/ZB.MOM.WW.MxGateway.Server/Alarms/GatewayAlarmMonitor.cs index a353d9c..d4e6253 100644 --- a/src/ZB.MOM.WW.MxGateway.Server/Alarms/GatewayAlarmMonitor.cs +++ b/src/ZB.MOM.WW.MxGateway.Server/Alarms/GatewayAlarmMonitor.cs @@ -157,19 +157,48 @@ public sealed class GatewayAlarmMonitor : BackgroundService, IGatewayAlarmServic private async Task RunMonitorAsync(string subscription, CancellationToken stoppingToken) { _state = GatewayAlarmMonitorState.Starting; + + // Derive the lifecycle baseline from the configured forced mode so a + // ForceSubtag / ForceAlarmManager start advertises the correct mode even + // though no OnAlarmProviderModeChanged event is raised in those modes + // (only Auto/failover produces that event). ForceSubtag starts degraded. + AlarmProviderMode initialMode; + bool initialDegraded; + string initialReason; + switch (MapForcedMode(_options.Fallback.Mode)) + { + case AlarmProviderMode.Subtag: + initialMode = AlarmProviderMode.Subtag; + initialDegraded = true; + initialReason = "Forced subtag mode (configuration)"; + break; + case AlarmProviderMode.Alarmmgr: + initialMode = AlarmProviderMode.Alarmmgr; + initialDegraded = false; + initialReason = string.Empty; + break; + default: + // Unspecified (Auto): the failover consumer starts on the + // alarm-manager primary and only degrades to subtag on failure. + initialMode = AlarmProviderMode.Alarmmgr; + initialDegraded = false; + initialReason = string.Empty; + break; + } + lock (_sync) { // Re-baseline the provider status for this lifecycle so a restarted - // monitor advertises alarm-manager/not-degraded until told otherwise. - _providerMode = AlarmProviderMode.Alarmmgr; - _providerDegraded = false; - _providerReason = string.Empty; + // monitor advertises the configured mode until told otherwise. + _providerMode = initialMode; + _providerDegraded = initialDegraded; + _providerReason = initialReason; _providerSince = DateTimeOffset.UtcNow; } - // Align the observable gauge with the Alarmmgr baseline without recording + // Align the observable gauge with the lifecycle baseline without recording // a switch — the gauge was 0 (unknown) from construction until now. - _metrics.SetAlarmProviderMode(ModeToInt(AlarmProviderMode.Alarmmgr)); + _metrics.SetAlarmProviderMode(ModeToInt(initialMode)); GatewaySession session = await _sessionManager.OpenSessionAsync( new SessionOpenRequest(BackendName, MonitorClientName, Guid.NewGuid().ToString("N"), CommandTimeout: null), @@ -256,6 +285,10 @@ public sealed class GatewayAlarmMonitor : BackgroundService, IGatewayAlarmServic AlarmProviderMode forcedMode = MapForcedMode(_options.Fallback.Mode); + _logger.LogInformation( + "Alarm subscribe: forcedMode={ForcedMode} configMode={ConfigMode} watchList={WatchListCount}.", + forcedMode, _options.Fallback.Mode, watchList.Count); + // When the forced mode is Unspecified (the "Auto" case) and the resolved // watch-list is empty — the common alarmmgr-only deployment — the command // is identical-in-effect to the historical SubscribeAlarms (wnwrap only): diff --git a/src/ZB.MOM.WW.MxGateway.Tests/Alarms/GatewayAlarmMonitorProviderModeTests.cs b/src/ZB.MOM.WW.MxGateway.Tests/Alarms/GatewayAlarmMonitorProviderModeTests.cs index abeb856..e36aa87 100644 --- a/src/ZB.MOM.WW.MxGateway.Tests/Alarms/GatewayAlarmMonitorProviderModeTests.cs +++ b/src/ZB.MOM.WW.MxGateway.Tests/Alarms/GatewayAlarmMonitorProviderModeTests.cs @@ -156,6 +156,151 @@ public sealed class GatewayAlarmMonitorProviderModeTests await monitor.StopAsync(CancellationToken.None); } + [Fact] + public async Task ForceSubtagConfig_BaselinesProviderStatusToSubtagDegraded_WithoutSwitch() + { + using GatewayMetrics metrics = new(); + long switchCount = 0; + int gaugeValue = -1; + using MeterListener listener = new(); + listener.InstrumentPublished = (instrument, meterListener) => + { + if (ReferenceEquals(instrument.Meter, metrics.Meter) + && (instrument.Name == "mxgateway.alarms.provider_switches" + || instrument.Name == "mxgateway.alarms.provider_mode")) + { + meterListener.EnableMeasurementEvents(instrument); + } + }; + listener.SetMeasurementEventCallback( + (instrument, measurement, _, _) => + { + if (ReferenceEquals(instrument.Meter, metrics.Meter) + && instrument.Name == "mxgateway.alarms.provider_switches") + { + Interlocked.Add(ref switchCount, measurement); + } + }); + listener.SetMeasurementEventCallback( + (instrument, measurement, _, _) => + { + if (ReferenceEquals(instrument.Meter, metrics.Meter) + && instrument.Name == "mxgateway.alarms.provider_mode") + { + Interlocked.Exchange(ref gaugeValue, measurement); + } + }); + listener.Start(); + + FakeSessionManager sessions = new(); + using GatewayAlarmMonitor monitor = CreateMonitor(sessions, metrics, "ForceSubtag"); + + using CancellationTokenSource cts = new(); + await monitor.StartAsync(cts.Token); + await sessions.WaitForSubscribeAsync(WaitTimeout); + + using CancellationTokenSource streamCts = new(); + AlarmFeedMessage? first = null; + Task reader = Task.Run(async () => + { + await foreach (AlarmFeedMessage message in monitor.StreamAsync(null, streamCts.Token)) + { + first = message; + break; + } + }); + + await WaitUntilAsync(() => first is not null, WaitTimeout); + + Assert.NotNull(first); + Assert.Equal(AlarmFeedMessage.PayloadOneofCase.ProviderStatus, first!.PayloadCase); + Assert.Equal(AlarmProviderMode.Subtag, first.ProviderStatus.Mode); + Assert.True(first.ProviderStatus.Degraded); + + // The observable gauge must read subtag (2) after start. + listener.RecordObservableInstruments(); + Assert.Equal(2, Volatile.Read(ref gaugeValue)); + + // The initial set must not record a provider switch. + Assert.Equal(0, Interlocked.Read(ref switchCount)); + + await streamCts.CancelAsync(); + await reader; + await cts.CancelAsync(); + await monitor.StopAsync(CancellationToken.None); + } + + [Fact] + public async Task ForceAlarmManagerConfig_BaselinesProviderStatusToAlarmmgr_WithoutSwitch() + { + using GatewayMetrics metrics = new(); + long switchCount = 0; + int gaugeValue = -1; + using MeterListener listener = new(); + listener.InstrumentPublished = (instrument, meterListener) => + { + if (ReferenceEquals(instrument.Meter, metrics.Meter) + && (instrument.Name == "mxgateway.alarms.provider_switches" + || instrument.Name == "mxgateway.alarms.provider_mode")) + { + meterListener.EnableMeasurementEvents(instrument); + } + }; + listener.SetMeasurementEventCallback( + (instrument, measurement, _, _) => + { + if (ReferenceEquals(instrument.Meter, metrics.Meter) + && instrument.Name == "mxgateway.alarms.provider_switches") + { + Interlocked.Add(ref switchCount, measurement); + } + }); + listener.SetMeasurementEventCallback( + (instrument, measurement, _, _) => + { + if (ReferenceEquals(instrument.Meter, metrics.Meter) + && instrument.Name == "mxgateway.alarms.provider_mode") + { + Interlocked.Exchange(ref gaugeValue, measurement); + } + }); + listener.Start(); + + FakeSessionManager sessions = new(); + using GatewayAlarmMonitor monitor = CreateMonitor(sessions, metrics, "ForceAlarmManager"); + + using CancellationTokenSource cts = new(); + await monitor.StartAsync(cts.Token); + await sessions.WaitForSubscribeAsync(WaitTimeout); + + using CancellationTokenSource streamCts = new(); + AlarmFeedMessage? first = null; + Task reader = Task.Run(async () => + { + await foreach (AlarmFeedMessage message in monitor.StreamAsync(null, streamCts.Token)) + { + first = message; + break; + } + }); + + await WaitUntilAsync(() => first is not null, WaitTimeout); + + Assert.NotNull(first); + Assert.Equal(AlarmFeedMessage.PayloadOneofCase.ProviderStatus, first!.PayloadCase); + Assert.Equal(AlarmProviderMode.Alarmmgr, first.ProviderStatus.Mode); + Assert.False(first.ProviderStatus.Degraded); + + listener.RecordObservableInstruments(); + Assert.Equal(1, Volatile.Read(ref gaugeValue)); + Assert.Equal(0, Interlocked.Read(ref switchCount)); + + await streamCts.CancelAsync(); + await reader; + await cts.CancelAsync(); + await monitor.StopAsync(CancellationToken.None); + } + [Fact] public async Task SubscribeAlarms_SendsForcedModeAndWatchList_FromConfiguration() { @@ -259,6 +404,22 @@ public sealed class GatewayAlarmMonitorProviderModeTests NullLogger.Instance); } + private static GatewayAlarmMonitor CreateMonitor(FakeSessionManager sessions, GatewayMetrics metrics, string mode) + { + AlarmsOptions options = new() + { + Enabled = true, + SubscriptionExpression = @"\\NODE\Galaxy!Area", + Fallback = new AlarmFallbackOptions { Mode = mode }, + }; + return new GatewayAlarmMonitor( + sessions, + new StubWatchListResolver([]), + metrics, + Microsoft.Extensions.Options.Options.Create(new GatewayOptions { Alarms = options }), + NullLogger.Instance); + } + private static async Task WaitForAsync( List received, Func predicate,