worker(alarms): net48 index fix; enforce ProbeIntervalSeconds; OOM-safe catch; reset-on-failure test
This commit is contained in:
@@ -159,7 +159,7 @@ public sealed class FailoverAlarmConsumerTests
|
|||||||
|
|
||||||
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → Subtag (mode change 1)
|
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → Subtag (mode change 1)
|
||||||
Assert.Single(changes);
|
Assert.Single(changes);
|
||||||
Assert.Equal(AlarmProviderMode.Subtag, changes[^1].Mode);
|
Assert.Equal(AlarmProviderMode.Subtag, changes[changes.Count - 1].Mode);
|
||||||
|
|
||||||
// Primary heals: PollOnce stops throwing. ProbeOnce should call only
|
// Primary heals: PollOnce stops throwing. ProbeOnce should call only
|
||||||
// PollOnce (not Subscribe) to detect recovery.
|
// PollOnce (not Subscribe) to detect recovery.
|
||||||
@@ -172,9 +172,9 @@ public sealed class FailoverAlarmConsumerTests
|
|||||||
sut.ProbeOnce(); // cleanProbes=2 → failback to Alarmmgr (mode change 2)
|
sut.ProbeOnce(); // cleanProbes=2 → failback to Alarmmgr (mode change 2)
|
||||||
|
|
||||||
Assert.Equal(2, changes.Count);
|
Assert.Equal(2, changes.Count);
|
||||||
Assert.Equal(AlarmProviderMode.Alarmmgr, changes[^1].Mode);
|
Assert.Equal(AlarmProviderMode.Alarmmgr, changes[changes.Count - 1].Mode);
|
||||||
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
|
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
|
||||||
Assert.Equal(0, changes[^1].HResult);
|
Assert.Equal(0, changes[changes.Count - 1].HResult);
|
||||||
|
|
||||||
// ProbeOnce must not have called Subscribe at all during probing.
|
// ProbeOnce must not have called Subscribe at all during probing.
|
||||||
Assert.Equal(subscribeCountAfterFailover, primary.SubscribeCount);
|
Assert.Equal(subscribeCountAfterFailover, primary.SubscribeCount);
|
||||||
@@ -263,4 +263,32 @@ public sealed class FailoverAlarmConsumerTests
|
|||||||
Assert.Equal(22, sut.AcknowledgeByGuid(Guid.NewGuid(), "c", "n", "node", "dom", "full"));
|
Assert.Equal(22, sut.AcknowledgeByGuid(Guid.NewGuid(), "c", "n", "node", "dom", "full"));
|
||||||
Assert.Equal(22, sut.AcknowledgeByName("a", "p", "g", "c", "n", "node", "dom", "full"));
|
Assert.Equal(22, sut.AcknowledgeByName("a", "p", "g", "c", "n", "node", "dom", "full"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Proves that an intermittent failure during failback probing resets the
|
||||||
|
/// clean-probe counter to zero, requiring a fresh unbroken run of
|
||||||
|
/// <see cref="FailoverSettings.StableProbes"/> before failing back.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public void FailbackProbe_IntermittentFailure_ResetsCleanCount()
|
||||||
|
{
|
||||||
|
var primary = new FlakyPrimary { ThrowOnPoll = true };
|
||||||
|
var standby = new StubStandby();
|
||||||
|
using var sut = new FailoverAlarmConsumer(primary, standby, new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 3));
|
||||||
|
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → switch to Subtag
|
||||||
|
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
|
||||||
|
|
||||||
|
primary.ThrowOnPoll = false;
|
||||||
|
sut.ProbeOnce(); // clean 1
|
||||||
|
sut.ProbeOnce(); // clean 2
|
||||||
|
primary.ThrowOnPoll = true;
|
||||||
|
sut.ProbeOnce(); // fails → reset to 0
|
||||||
|
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
|
||||||
|
|
||||||
|
primary.ThrowOnPoll = false;
|
||||||
|
sut.ProbeOnce(); // clean 1
|
||||||
|
sut.ProbeOnce(); // clean 2
|
||||||
|
sut.ProbeOnce(); // clean 3 → failback
|
||||||
|
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
|||||||
private int consecutiveFailures;
|
private int consecutiveFailures;
|
||||||
private int cleanProbes;
|
private int cleanProbes;
|
||||||
private bool disposed;
|
private bool disposed;
|
||||||
|
private DateTime lastProbeAtUtc = DateTime.MinValue;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The subscription expression passed to <see cref="Subscribe"/>.
|
/// The subscription expression passed to <see cref="Subscribe"/>.
|
||||||
@@ -177,6 +178,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
|||||||
/// drive it directly.
|
/// drive it directly.
|
||||||
/// </para>
|
/// </para>
|
||||||
/// <para>
|
/// <para>
|
||||||
|
/// <strong>Probe throttle.</strong> When
|
||||||
|
/// <see cref="FailoverSettings.ProbeIntervalSeconds"/> is greater than
|
||||||
|
/// zero, successive calls to this method are throttled: a probe is
|
||||||
|
/// skipped unless at least that many seconds have elapsed since the
|
||||||
|
/// last probe that was actually executed. When
|
||||||
|
/// <see cref="FailoverSettings.ProbeIntervalSeconds"/> is zero, the
|
||||||
|
/// throttle is disabled and every call probes immediately (the default
|
||||||
|
/// used by unit tests).
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
/// <strong>Why PollOnce only — no re-Subscribe.</strong>
|
/// <strong>Why PollOnce only — no re-Subscribe.</strong>
|
||||||
/// Failover does NOT tear down the primary's subscription;
|
/// Failover does NOT tear down the primary's subscription;
|
||||||
/// <see cref="WnWrapAlarmConsumer"/> is single-subscribe and would
|
/// <see cref="WnWrapAlarmConsumer"/> is single-subscribe and would
|
||||||
@@ -202,6 +213,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
|||||||
if (disposed) throw new ObjectDisposedException(nameof(FailoverAlarmConsumer));
|
if (disposed) throw new ObjectDisposedException(nameof(FailoverAlarmConsumer));
|
||||||
if (active != Active.Standby) return;
|
if (active != Active.Standby) return;
|
||||||
|
|
||||||
|
// Throttle probes to the configured cadence. When ProbeIntervalSeconds
|
||||||
|
// is 0 the throttle is disabled and every call probes immediately.
|
||||||
|
if (settings.ProbeIntervalSeconds > 0
|
||||||
|
&& (DateTime.UtcNow - lastProbeAtUtc).TotalSeconds < settings.ProbeIntervalSeconds)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
lastProbeAtUtc = DateTime.UtcNow;
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
// Re-poll the still-subscribed primary. Do NOT call Subscribe —
|
// Re-poll the still-subscribed primary. Do NOT call Subscribe —
|
||||||
@@ -278,7 +299,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
|
|||||||
{
|
{
|
||||||
action();
|
action();
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex) when (ex is not OutOfMemoryException)
|
||||||
{
|
{
|
||||||
consecutiveFailures++;
|
consecutiveFailures++;
|
||||||
int hresult = ex is COMException ? ex.HResult : 0;
|
int hresult = ex is COMException ? ex.HResult : 0;
|
||||||
|
|||||||
Reference in New Issue
Block a user