diff --git a/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/FailoverAlarmConsumerTests.cs b/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/FailoverAlarmConsumerTests.cs
index da293cc..07060fb 100644
--- a/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/FailoverAlarmConsumerTests.cs
+++ b/src/ZB.MOM.WW.MxGateway.Worker.Tests/MxAccess/FailoverAlarmConsumerTests.cs
@@ -159,7 +159,7 @@ public sealed class FailoverAlarmConsumerTests
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → Subtag (mode change 1)
Assert.Single(changes);
- Assert.Equal(AlarmProviderMode.Subtag, changes[^1].Mode);
+ Assert.Equal(AlarmProviderMode.Subtag, changes[changes.Count - 1].Mode);
// Primary heals: PollOnce stops throwing. ProbeOnce should call only
// PollOnce (not Subscribe) to detect recovery.
@@ -172,9 +172,9 @@ public sealed class FailoverAlarmConsumerTests
sut.ProbeOnce(); // cleanProbes=2 → failback to Alarmmgr (mode change 2)
Assert.Equal(2, changes.Count);
- Assert.Equal(AlarmProviderMode.Alarmmgr, changes[^1].Mode);
+ Assert.Equal(AlarmProviderMode.Alarmmgr, changes[changes.Count - 1].Mode);
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
- Assert.Equal(0, changes[^1].HResult);
+ Assert.Equal(0, changes[changes.Count - 1].HResult);
// ProbeOnce must not have called Subscribe at all during probing.
Assert.Equal(subscribeCountAfterFailover, primary.SubscribeCount);
@@ -263,4 +263,32 @@ public sealed class FailoverAlarmConsumerTests
Assert.Equal(22, sut.AcknowledgeByGuid(Guid.NewGuid(), "c", "n", "node", "dom", "full"));
Assert.Equal(22, sut.AcknowledgeByName("a", "p", "g", "c", "n", "node", "dom", "full"));
}
+
+ ///
+ /// Proves that an intermittent failure during failback probing resets the
+ /// clean-probe counter to zero, requiring a fresh unbroken run of
+ /// before failing back.
+ ///
+ [Fact]
+ public void FailbackProbe_IntermittentFailure_ResetsCleanCount()
+ {
+ var primary = new FlakyPrimary { ThrowOnPoll = true };
+ var standby = new StubStandby();
+ using var sut = new FailoverAlarmConsumer(primary, standby, new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 3));
+ sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → switch to Subtag
+ Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
+
+ primary.ThrowOnPoll = false;
+ sut.ProbeOnce(); // clean 1
+ sut.ProbeOnce(); // clean 2
+ primary.ThrowOnPoll = true;
+ sut.ProbeOnce(); // fails → reset to 0
+ Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
+
+ primary.ThrowOnPoll = false;
+ sut.ProbeOnce(); // clean 1
+ sut.ProbeOnce(); // clean 2
+ sut.ProbeOnce(); // clean 3 → failback
+ Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
+ }
}
diff --git a/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs b/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs
index 35cd513..b40c614 100644
--- a/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs
+++ b/src/ZB.MOM.WW.MxGateway.Worker/MxAccess/FailoverAlarmConsumer.cs
@@ -63,6 +63,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
private int consecutiveFailures;
private int cleanProbes;
private bool disposed;
+ private DateTime lastProbeAtUtc = DateTime.MinValue;
///
/// The subscription expression passed to .
@@ -177,6 +178,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
/// drive it directly.
///
///
+ /// Probe throttle. When
+ /// is greater than
+ /// zero, successive calls to this method are throttled: a probe is
+ /// skipped unless at least that many seconds have elapsed since the
+ /// last probe that was actually executed. When
+ /// is zero, the
+ /// throttle is disabled and every call probes immediately (the default
+ /// used by unit tests).
+ ///
+ ///
/// Why PollOnce only — no re-Subscribe.
/// Failover does NOT tear down the primary's subscription;
/// is single-subscribe and would
@@ -202,6 +213,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
if (disposed) throw new ObjectDisposedException(nameof(FailoverAlarmConsumer));
if (active != Active.Standby) return;
+ // Throttle probes to the configured cadence. When ProbeIntervalSeconds
+ // is 0 the throttle is disabled and every call probes immediately.
+ if (settings.ProbeIntervalSeconds > 0
+ && (DateTime.UtcNow - lastProbeAtUtc).TotalSeconds < settings.ProbeIntervalSeconds)
+ {
+ return;
+ }
+
+ lastProbeAtUtc = DateTime.UtcNow;
+
try
{
// Re-poll the still-subscribed primary. Do NOT call Subscribe —
@@ -278,7 +299,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
{
action();
}
- catch (Exception ex)
+ catch (Exception ex) when (ex is not OutOfMemoryException)
{
consecutiveFailures++;
int hresult = ex is COMException ? ex.HResult : 0;