diff --git a/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/FailoverAlarmConsumerTests.cs b/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/FailoverAlarmConsumerTests.cs index da293cc..07060fb 100644 --- a/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/FailoverAlarmConsumerTests.cs +++ b/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/FailoverAlarmConsumerTests.cs @@ -159,7 +159,7 @@ public sealed class FailoverAlarmConsumerTests sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → Subtag (mode change 1) Assert.Single(changes); - Assert.Equal(AlarmProviderMode.Subtag, changes[^1].Mode); + Assert.Equal(AlarmProviderMode.Subtag, changes[changes.Count - 1].Mode); // Primary heals: PollOnce stops throwing. ProbeOnce should call only // PollOnce (not Subscribe) to detect recovery. @@ -172,9 +172,9 @@ public sealed class FailoverAlarmConsumerTests sut.ProbeOnce(); // cleanProbes=2 → failback to Alarmmgr (mode change 2) Assert.Equal(2, changes.Count); - Assert.Equal(AlarmProviderMode.Alarmmgr, changes[^1].Mode); + Assert.Equal(AlarmProviderMode.Alarmmgr, changes[changes.Count - 1].Mode); Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode); - Assert.Equal(0, changes[^1].HResult); + Assert.Equal(0, changes[changes.Count - 1].HResult); // ProbeOnce must not have called Subscribe at all during probing. Assert.Equal(subscribeCountAfterFailover, primary.SubscribeCount); @@ -263,4 +263,32 @@ public sealed class FailoverAlarmConsumerTests Assert.Equal(22, sut.AcknowledgeByGuid(Guid.NewGuid(), "c", "n", "node", "dom", "full")); Assert.Equal(22, sut.AcknowledgeByName("a", "p", "g", "c", "n", "node", "dom", "full")); } + + /// + /// Proves that an intermittent failure during failback probing resets the + /// clean-probe counter to zero, requiring a fresh unbroken run of + /// before failing back. + /// + [Fact] + public void FailbackProbe_IntermittentFailure_ResetsCleanCount() + { + var primary = new FlakyPrimary { ThrowOnPoll = true }; + var standby = new StubStandby(); + using var sut = new FailoverAlarmConsumer(primary, standby, new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 3)); + sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → switch to Subtag + Assert.Equal(AlarmProviderMode.Subtag, sut.Mode); + + primary.ThrowOnPoll = false; + sut.ProbeOnce(); // clean 1 + sut.ProbeOnce(); // clean 2 + primary.ThrowOnPoll = true; + sut.ProbeOnce(); // fails → reset to 0 + Assert.Equal(AlarmProviderMode.Subtag, sut.Mode); + + primary.ThrowOnPoll = false; + sut.ProbeOnce(); // clean 1 + sut.ProbeOnce(); // clean 2 + sut.ProbeOnce(); // clean 3 → failback + Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode); + } } diff --git a/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs b/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs index 35cd513..b40c614 100644 --- a/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs +++ b/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs @@ -63,6 +63,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer private int consecutiveFailures; private int cleanProbes; private bool disposed; + private DateTime lastProbeAtUtc = DateTime.MinValue; /// /// The subscription expression passed to . @@ -177,6 +178,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer /// drive it directly. /// /// + /// Probe throttle. When + /// is greater than + /// zero, successive calls to this method are throttled: a probe is + /// skipped unless at least that many seconds have elapsed since the + /// last probe that was actually executed. When + /// is zero, the + /// throttle is disabled and every call probes immediately (the default + /// used by unit tests). + /// + /// /// Why PollOnce only — no re-Subscribe. /// Failover does NOT tear down the primary's subscription; /// is single-subscribe and would @@ -202,6 +213,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer if (disposed) throw new ObjectDisposedException(nameof(FailoverAlarmConsumer)); if (active != Active.Standby) return; + // Throttle probes to the configured cadence. When ProbeIntervalSeconds + // is 0 the throttle is disabled and every call probes immediately. + if (settings.ProbeIntervalSeconds > 0 + && (DateTime.UtcNow - lastProbeAtUtc).TotalSeconds < settings.ProbeIntervalSeconds) + { + return; + } + + lastProbeAtUtc = DateTime.UtcNow; + try { // Re-poll the still-subscribed primary. Do NOT call Subscribe — @@ -278,7 +299,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer { action(); } - catch (Exception ex) + catch (Exception ex) when (ex is not OutOfMemoryException) { consecutiveFailures++; int hresult = ex is COMException ? ex.HResult : 0;