worker(alarms): FailoverAlarmConsumer auto-failover/failback state machine

This commit is contained in:
Joseph Doherty
2026-06-13 09:46:47 -04:00
parent fd64b9260c
commit 0a54c0bc4b
4 changed files with 640 additions and 0 deletions
@@ -0,0 +1,211 @@
using System;
using System.Collections.Generic;
using ZB.MOM.WW.MxGateway.Contracts.Proto;
using ZB.MOM.WW.MxGateway.Worker.MxAccess;
using Xunit;
namespace ZB.MOM.WW.MxGateway.Worker.Tests.MxAccess;
/// <summary>
/// Unit tests for <see cref="FailoverAlarmConsumer"/>: prove the
/// auto-failover (consecutive primary COM failures → standby) and
/// auto-failback (consecutive clean probes → primary) state machine,
/// active-child transition forwarding, and active-child delegation of
/// acknowledgments. Fakes stand in for both children so this needs no
/// AVEVA install.
/// </summary>
public sealed class FailoverAlarmConsumerTests
{
/// <summary>
/// Primary fake whose Subscribe/PollOnce throw a COMException while
/// <see cref="ThrowOnPoll"/> is set, modeling a wnwrap consumer that
/// surfaces COM HRESULT failures. Can also re-raise a transition so
/// before-failover forwarding can be exercised.
/// </summary>
private sealed class FlakyPrimary : IMxAccessAlarmConsumer
{
public event EventHandler<MxAlarmTransitionEvent>? AlarmTransitionEmitted;
public bool ThrowOnPoll = true;
public int Polls;
public void Subscribe(string s)
{
if (ThrowOnPoll)
{
throw new System.Runtime.InteropServices.COMException("boom", unchecked((int)0x80004005));
}
}
public void PollOnce()
{
Polls++;
if (ThrowOnPoll)
{
throw new System.Runtime.InteropServices.COMException("boom", unchecked((int)0x80004005));
}
}
public int AcknowledgeByGuid(Guid g, string c, string a, string b, string d, string e) => 11;
public int AcknowledgeByName(string n, string p, string gr, string c, string a, string b, string d, string e) => 11;
public IReadOnlyList<MxAlarmSnapshotRecord> SnapshotActiveAlarms() => Array.Empty<MxAlarmSnapshotRecord>();
public void Dispose() { }
public void Raise(MxAlarmTransitionEvent e) => AlarmTransitionEmitted?.Invoke(this, e);
}
/// <summary>
/// Standby fake (subtag stand-in): never throws, records that it was
/// armed, and can re-raise a transition.
/// </summary>
private sealed class StubStandby : IMxAccessAlarmConsumer
{
public event EventHandler<MxAlarmTransitionEvent>? AlarmTransitionEmitted;
public bool Subscribed;
public void Subscribe(string s) => Subscribed = true;
public void PollOnce() { }
public int AcknowledgeByGuid(Guid g, string c, string a, string b, string d, string e) => 22;
public int AcknowledgeByName(string n, string p, string gr, string c, string a, string b, string d, string e) => 22;
public IReadOnlyList<MxAlarmSnapshotRecord> SnapshotActiveAlarms() => Array.Empty<MxAlarmSnapshotRecord>();
public void Dispose() { }
public void Raise(MxAlarmTransitionEvent e) => AlarmTransitionEmitted?.Invoke(this, e);
}
private static MxAlarmTransitionEvent SampleTransition() => new MxAlarmTransitionEvent
{
Record = new MxAlarmSnapshotRecord { AlarmGuid = Guid.NewGuid() },
PreviousState = MxAlarmStateKind.Unspecified,
};
[Fact]
public void Primary_FailsThresholdTimes_SwitchesToSubtag()
{
FlakyPrimary primary = new FlakyPrimary { ThrowOnPoll = true };
StubStandby standby = new StubStandby();
FailoverSettings settings = new FailoverSettings(threshold: 3, probeIntervalSeconds: 0, stableProbes: 1);
using FailoverAlarmConsumer sut = new FailoverAlarmConsumer(primary, standby, settings);
List<AlarmProviderModeChange> changes = new List<AlarmProviderModeChange>();
sut.ProviderModeChanged += (_, e) => changes.Add(e);
sut.Subscribe(@"\\HOST\Galaxy!Area"); // failure 1 (primary), standby armed
Assert.True(standby.Subscribed);
Assert.Empty(changes);
sut.PollOnce(); // failure 2
Assert.Empty(changes);
sut.PollOnce(); // failure 3 → switch
Assert.Single(changes);
Assert.Equal(AlarmProviderMode.Subtag, changes[0].Mode);
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
Assert.Equal(unchecked((int)0x80004005), changes[0].HResult);
}
[Fact]
public void AfterSwitch_StandbyTransitionsAreForwarded()
{
FlakyPrimary primary = new FlakyPrimary { ThrowOnPoll = true };
StubStandby standby = new StubStandby();
FailoverSettings settings = new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 1);
using FailoverAlarmConsumer sut = new FailoverAlarmConsumer(primary, standby, settings);
MxAlarmTransitionEvent? forwarded = null;
sut.AlarmTransitionEmitted += (_, e) => forwarded = e;
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → switch to Subtag immediately
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
MxAlarmTransitionEvent transition = SampleTransition();
standby.Raise(transition);
Assert.Same(transition, forwarded);
}
[Fact]
public void WhileDegraded_PrimaryHeals_FailsBackAfterStableProbes()
{
FlakyPrimary primary = new FlakyPrimary { ThrowOnPoll = true };
StubStandby standby = new StubStandby();
FailoverSettings settings = new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 2);
using FailoverAlarmConsumer sut = new FailoverAlarmConsumer(primary, standby, settings);
List<AlarmProviderModeChange> changes = new List<AlarmProviderModeChange>();
sut.ProviderModeChanged += (_, e) => changes.Add(e);
sut.Subscribe(@"\\HOST\Galaxy!Area"); // threshold=1 → Subtag (change 1)
Assert.Single(changes);
Assert.Equal(AlarmProviderMode.Subtag, changes[^1].Mode);
primary.ThrowOnPoll = false; // primary heals
sut.ProbeOnce(); // clean 1 (no failback yet)
Assert.Single(changes);
sut.ProbeOnce(); // clean 2 → failback
Assert.Equal(2, changes.Count);
Assert.Equal(AlarmProviderMode.Alarmmgr, changes[^1].Mode);
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
Assert.Equal(0, changes[^1].HResult);
}
[Fact]
public void BeforeFailover_PrimaryTransitionsAreForwarded()
{
FlakyPrimary primary = new FlakyPrimary { ThrowOnPoll = false }; // healthy, can Raise
StubStandby standby = new StubStandby();
FailoverSettings settings = new FailoverSettings(threshold: 3, probeIntervalSeconds: 0, stableProbes: 1);
using FailoverAlarmConsumer sut = new FailoverAlarmConsumer(primary, standby, settings);
List<MxAlarmTransitionEvent> forwarded = new List<MxAlarmTransitionEvent>();
sut.AlarmTransitionEmitted += (_, e) => forwarded.Add(e);
sut.Subscribe(@"\\HOST\Galaxy!Area");
Assert.Equal(AlarmProviderMode.Alarmmgr, sut.Mode);
MxAlarmTransitionEvent fromPrimary = SampleTransition();
primary.Raise(fromPrimary); // active=Primary → forwarded
Assert.Single(forwarded);
Assert.Same(fromPrimary, forwarded[0]);
standby.Raise(SampleTransition()); // standby not active → suppressed
Assert.Single(forwarded);
}
[Fact]
public void Acknowledge_DelegatesToActiveChild()
{
FlakyPrimary primary = new FlakyPrimary { ThrowOnPoll = false };
StubStandby standby = new StubStandby();
FailoverSettings settings = new FailoverSettings(threshold: 1, probeIntervalSeconds: 0, stableProbes: 1);
using FailoverAlarmConsumer sut = new FailoverAlarmConsumer(primary, standby, settings);
sut.Subscribe(@"\\HOST\Galaxy!Area");
// Active = Primary → primary's sentinel value (11).
Assert.Equal(11, sut.AcknowledgeByGuid(Guid.NewGuid(), "c", "n", "node", "dom", "full"));
Assert.Equal(11, sut.AcknowledgeByName("a", "p", "g", "c", "n", "node", "dom", "full"));
// Force a failover by failing the primary past threshold.
primary.ThrowOnPoll = true;
sut.PollOnce(); // threshold=1 → switch to Standby
Assert.Equal(AlarmProviderMode.Subtag, sut.Mode);
// Active = Standby → standby's sentinel value (22).
Assert.Equal(22, sut.AcknowledgeByGuid(Guid.NewGuid(), "c", "n", "node", "dom", "full"));
Assert.Equal(22, sut.AcknowledgeByName("a", "p", "g", "c", "n", "node", "dom", "full"));
}
}