worker(alarms): net48 index fix; enforce ProbeIntervalSeconds; OOM-safe catch; reset-on-failure test

This commit is contained in:
Joseph Doherty
2026-06-13 09:55:07 -04:00
parent d6c0bb41ca
commit 7241a4fb9c
2 changed files with 53 additions and 4 deletions
@@ -159,7 +159,7 @@ public sealed class FailoverAlarmConsumerTests
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → Subtag (mode change 1)
Assert.Single(changes);
Assert.Equal(AlarmProviderMode.Subtag, changes[^1].Mode);
Assert.Equal(AlarmProviderMode.Subtag, changes[changes.Count - 1].Mode);
// Primary heals: PollOnce stops throwing. ProbeOnce should call only
// PollOnce (not Subscribe) to detect recovery.
@@ -172,9 +172,9 @@ public sealed class FailoverAlarmConsumerTests
sut.ProbeOnce(); // cleanProbes=2 → failback to Alarmmgr (mode change 2)
Assert.Equal(2, changes.Count);
Assert.Equal(AlarmProviderMode.Alarmmgr, changes[^1].Mode);
Assert.Equal(AlarmProviderMode.Alarmmgr, changes[changes.Count - 1].Mode);
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
Assert.Equal(0, changes[^1].HResult);
Assert.Equal(0, changes[changes.Count - 1].HResult);
// ProbeOnce must not have called Subscribe at all during probing.
Assert.Equal(subscribeCountAfterFailover, primary.SubscribeCount);
@@ -263,4 +263,32 @@ public sealed class FailoverAlarmConsumerTests
Assert.Equal(22, sut.AcknowledgeByGuid(Guid.NewGuid(), "c", "n", "node", "dom", "full"));
Assert.Equal(22, sut.AcknowledgeByName("a", "p", "g", "c", "n", "node", "dom", "full"));
}
/// <summary>
/// Proves that an intermittent failure during failback probing resets the
/// clean-probe counter to zero, requiring a fresh unbroken run of
/// <see cref="FailoverSettings.StableProbes"/> before failing back.
/// </summary>
[Fact]
public void FailbackProbe_IntermittentFailure_ResetsCleanCount()
{
var primary = new FlakyPrimary { ThrowOnPoll = true };
var standby = new StubStandby();
using var sut = new FailoverAlarmConsumer(primary, standby, new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 3));
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → switch to Subtag
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
primary.ThrowOnPoll = false;
sut.ProbeOnce(); // clean 1
sut.ProbeOnce(); // clean 2
primary.ThrowOnPoll = true;
sut.ProbeOnce(); // fails → reset to 0
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
primary.ThrowOnPoll = false;
sut.ProbeOnce(); // clean 1
sut.ProbeOnce(); // clean 2
sut.ProbeOnce(); // clean 3 → failback
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
}
}
@@ -63,6 +63,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
private int consecutiveFailures;
private int cleanProbes;
private bool disposed;
private DateTime lastProbeAtUtc = DateTime.MinValue;
/// <summary>
/// The subscription expression passed to <see cref="Subscribe"/>.
@@ -177,6 +178,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
/// drive it directly.
/// </para>
/// <para>
/// <strong>Probe throttle.</strong> When
/// <see cref="FailoverSettings.ProbeIntervalSeconds"/> is greater than
/// zero, successive calls to this method are throttled: a probe is
/// skipped unless at least that many seconds have elapsed since the
/// last probe that was actually executed. When
/// <see cref="FailoverSettings.ProbeIntervalSeconds"/> is zero, the
/// throttle is disabled and every call probes immediately (the default
/// used by unit tests).
/// </para>
/// <para>
/// <strong>Why PollOnce only — no re-Subscribe.</strong>
/// Failover does NOT tear down the primary's subscription;
/// <see cref="WnWrapAlarmConsumer"/> is single-subscribe and would
@@ -202,6 +213,16 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
if (disposed) throw new ObjectDisposedException(nameof(FailoverAlarmConsumer));
if (active != Active.Standby) return;
// Throttle probes to the configured cadence. When ProbeIntervalSeconds
// is 0 the throttle is disabled and every call probes immediately.
if (settings.ProbeIntervalSeconds > 0
&& (DateTime.UtcNow - lastProbeAtUtc).TotalSeconds < settings.ProbeIntervalSeconds)
{
return;
}
lastProbeAtUtc = DateTime.UtcNow;
try
{
// Re-poll the still-subscribed primary. Do NOT call Subscribe —
@@ -278,7 +299,7 @@ public sealed class FailoverAlarmConsumer : IMxAccessAlarmConsumer
{
action();
}
catch (Exception ex)
catch (Exception ex) when (ex is not OutOfMemoryException)
{
consecutiveFailures++;
int hresult = ex is COMException ? ex.HResult : 0;