worker(alarms): net48 index fix; enforce ProbeIntervalSeconds; OOM-safe catch; reset-on-failure test
This commit is contained in:
@@ -159,7 +159,7 @@ public sealed class FailoverAlarmConsumerTests
|
||||
|
||||
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → Subtag (mode change 1)
|
||||
Assert.Single(changes);
|
||||
Assert.Equal(AlarmProviderMode.Subtag, changes[^1].Mode);
|
||||
Assert.Equal(AlarmProviderMode.Subtag, changes[changes.Count - 1].Mode);
|
||||
|
||||
// Primary heals: PollOnce stops throwing. ProbeOnce should call only
|
||||
// PollOnce (not Subscribe) to detect recovery.
|
||||
@@ -172,9 +172,9 @@ public sealed class FailoverAlarmConsumerTests
|
||||
sut.ProbeOnce(); // cleanProbes=2 → failback to Alarmmgr (mode change 2)
|
||||
|
||||
Assert.Equal(2, changes.Count);
|
||||
Assert.Equal(AlarmProviderMode.Alarmmgr, changes[^1].Mode);
|
||||
Assert.Equal(AlarmProviderMode.Alarmmgr, changes[changes.Count - 1].Mode);
|
||||
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
|
||||
Assert.Equal(0, changes[^1].HResult);
|
||||
Assert.Equal(0, changes[changes.Count - 1].HResult);
|
||||
|
||||
// ProbeOnce must not have called Subscribe at all during probing.
|
||||
Assert.Equal(subscribeCountAfterFailover, primary.SubscribeCount);
|
||||
@@ -263,4 +263,32 @@ public sealed class FailoverAlarmConsumerTests
|
||||
Assert.Equal(22, sut.AcknowledgeByGuid(Guid.NewGuid(), "c", "n", "node", "dom", "full"));
|
||||
Assert.Equal(22, sut.AcknowledgeByName("a", "p", "g", "c", "n", "node", "dom", "full"));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Proves that an intermittent failure during failback probing resets the
|
||||
/// clean-probe counter to zero, requiring a fresh unbroken run of
|
||||
/// <see cref="FailoverSettings.StableProbes"/> before failing back.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void FailbackProbe_IntermittentFailure_ResetsCleanCount()
|
||||
{
|
||||
var primary = new FlakyPrimary { ThrowOnPoll = true };
|
||||
var standby = new StubStandby();
|
||||
using var sut = new FailoverAlarmConsumer(primary, standby, new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 3));
|
||||
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → switch to Subtag
|
||||
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
|
||||
|
||||
primary.ThrowOnPoll = false;
|
||||
sut.ProbeOnce(); // clean 1
|
||||
sut.ProbeOnce(); // clean 2
|
||||
primary.ThrowOnPoll = true;
|
||||
sut.ProbeOnce(); // fails → reset to 0
|
||||
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
|
||||
|
||||
primary.ThrowOnPoll = false;
|
||||
sut.ProbeOnce(); // clean 1
|
||||
sut.ProbeOnce(); // clean 2
|
||||
sut.ProbeOnce(); // clean 3 → failback
|
||||
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -63,6 +63,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
||||
private int consecutiveFailures;
|
||||
private int cleanProbes;
|
||||
private bool disposed;
|
||||
private DateTime lastProbeAtUtc = DateTime.MinValue;
|
||||
|
||||
/// <summary>
|
||||
/// The subscription expression passed to <see cref="Subscribe"/>.
|
||||
@@ -177,6 +178,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
||||
/// drive it directly.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <strong>Probe throttle.</strong> When
|
||||
/// <see cref="FailoverSettings.ProbeIntervalSeconds"/> is greater than
|
||||
/// zero, successive calls to this method are throttled: a probe is
|
||||
/// skipped unless at least that many seconds have elapsed since the
|
||||
/// last probe that was actually executed. When
|
||||
/// <see cref="FailoverSettings.ProbeIntervalSeconds"/> is zero, the
|
||||
/// throttle is disabled and every call probes immediately (the default
|
||||
/// used by unit tests).
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <strong>Why PollOnce only — no re-Subscribe.</strong>
|
||||
/// Failover does NOT tear down the primary's subscription;
|
||||
/// <see cref="WnWrapAlarmConsumer"/> is single-subscribe and would
|
||||
@@ -202,6 +213,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
||||
if (disposed) throw new ObjectDisposedException(nameof(FailoverAlarmConsumer));
|
||||
if (active != Active.Standby) return;
|
||||
|
||||
// Throttle probes to the configured cadence. When ProbeIntervalSeconds
|
||||
// is 0 the throttle is disabled and every call probes immediately.
|
||||
if (settings.ProbeIntervalSeconds > 0
|
||||
&& (DateTime.UtcNow - lastProbeAtUtc).TotalSeconds < settings.ProbeIntervalSeconds)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
lastProbeAtUtc = DateTime.UtcNow;
|
||||
|
||||
try
|
||||
{
|
||||
// Re-poll the still-subscribed primary. Do NOT call Subscribe —
|
||||
@@ -278,7 +299,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
||||
{
|
||||
action();
|
||||
}
|
||||
catch (Exception ex)
|
||||
catch (Exception ex) when (ex is not OutOfMemoryException)
|
||||
{
|
||||
consecutiveFailures++;
|
||||
int hresult = ex is COMException ? ex.HResult : 0;
|
||||
|
||||
Reference in New Issue
Block a user