worker(alarms): failback probe re-polls the still-subscribed primary (no re-Subscribe)

This commit is contained in:
Joseph Doherty
2026-06-13 09:49:38 -04:00
parent 0a54c0bc4b
commit d6c0bb41ca
2 changed files with 104 additions and 13 deletions
@@ -29,8 +29,16 @@ public sealed class FailoverAlarmConsumerTests
public bool ThrowOnPoll = true;
public int Polls;
/// <summary>
/// Number of times <see cref="Subscribe"/> has been called.
/// Incremented at entry, before any throw, so every attempt is
/// counted regardless of whether <see cref="ThrowOnPoll"/> is set.
/// </summary>
public int SubscribeCount;
public void Subscribe(string s)
{
SubscribeCount++;
if (ThrowOnPoll)
{
throw new System.Runtime.InteropServices.COMException("boom", unchecked((int)0x80004005));
@@ -137,6 +145,10 @@ public sealed class FailoverAlarmConsumerTests
[Fact]
public void WhileDegraded_PrimaryHeals_FailsBackAfterStableProbes()
{
// threshold=1 so the initial Subscribe failure (PollOnce path) immediately
// switches to Subtag. stableProbes=2 means two consecutive clean PollOnce
// calls are needed before failback. ProbeOnce must NOT call Subscribe —
// WnWrapAlarmConsumer is single-subscribe; re-calling would always throw.
FlakyPrimary primary = new FlakyPrimary { ThrowOnPoll = true };
StubStandby standby = new StubStandby();
FailoverSettings settings = new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 2);
@@ -145,21 +157,27 @@ public sealed class FailoverAlarmConsumerTests
List<AlarmProviderModeChange> changes = new List<AlarmProviderModeChange>();
sut.ProviderModeChanged += (_, e) => changes.Add(e);
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → Subtag (change 1)
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → Subtag (mode change 1)
Assert.Single(changes);
Assert.Equal(AlarmProviderMode.Subtag, changes[^1].Mode);
primary.ThrowOnPoll = false; // primary heals
// Primary heals: PollOnce stops throwing. ProbeOnce should call only
// PollOnce (not Subscribe) to detect recovery.
primary.ThrowOnPoll = false;
int subscribeCountAfterFailover = primary.SubscribeCount;
sut.ProbeOnce(); // clean 1 (no failback yet)
sut.ProbeOnce(); // cleanProbes=1 — not yet at stableProbes=2
Assert.Single(changes);
sut.ProbeOnce(); // clean 2 → failback
sut.ProbeOnce(); // cleanProbes=2 → failback to Alarmmgr (mode change 2)
Assert.Equal(2, changes.Count);
Assert.Equal(AlarmProviderMode.Alarmmgr, changes[^1].Mode);
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
Assert.Equal(0, changes[^1].HResult);
// ProbeOnce must not have called Subscribe at all during probing.
Assert.Equal(subscribeCountAfterFailover, primary.SubscribeCount);
}
[Fact]
@@ -185,6 +203,43 @@ public sealed class FailoverAlarmConsumerTests
Assert.Single(forwarded);
}
/// <summary>
/// Proves that <see cref="FailoverAlarmConsumer.ProbeOnce"/> never calls
/// <c>Subscribe</c> on the primary while degraded. The production primary
/// (<see cref="WnWrapAlarmConsumer"/>) is single-subscribe; a second
/// <c>Subscribe</c> call would always throw and make failback impossible.
/// The probe must re-poll the still-subscribed primary via
/// <c>PollOnce</c> only.
/// </summary>
[Fact]
public void ProbeOnce_DoesNotCallPrimarySubscribe()
{
// threshold=1 → first Subscribe failure immediately switches to Subtag.
FlakyPrimary primary = new FlakyPrimary { ThrowOnPoll = true };
StubStandby standby = new StubStandby();
FailoverSettings settings = new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 3);
using FailoverAlarmConsumer sut = new FailoverAlarmConsumer(primary, standby, settings);
sut.Subscribe(@"\\HOST\Galaxy!Area"); // Subscribe attempt #1 (throws) → Subtag
// Capture how many Subscribe calls the initial setup caused (exactly 1:
// the attempt that threw and triggered failover).
int subscribeCountAfterSetup = primary.SubscribeCount;
Assert.Equal(1, subscribeCountAfterSetup);
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
// Let PollOnce succeed so ProbeOnce progresses without throwing.
primary.ThrowOnPoll = false;
// Drive several ProbeOnce calls — none should touch Subscribe.
sut.ProbeOnce();
sut.ProbeOnce();
sut.ProbeOnce(); // stableProbes=3 → failback on this call
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
Assert.Equal(subscribeCountAfterSetup, primary.SubscribeCount);
}
[Fact]
public void Acknowledge_DelegatesToActiveChild()
{