Close all four stability-review 2026-04-13 findings so a failed runtime probe subscription can no longer leave a phantom entry that Tick() flips to Stopped and fans out false BadOutOfService quality across a host's subtree, a silently-failed dashboard bind no longer lets the service advertise a successful start while an operator-visible endpoint is dead, the seven sync-over-async sites in LmxNodeManager (rebuild probe sync, Read, Write, four HistoryRead overrides) can no longer park the OPC UA stack thread indefinitely on a hung backend, and alarm auto-subscribe + transferred-subscription restore no longer race shutdown as untracked fire-and-forget tasks.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-04-14 00:48:07 -04:00
parent 731092595f
commit c76ab8fdee
21 changed files with 869 additions and 53 deletions

View File

@@ -402,6 +402,73 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.MxAccess
sut.IsHostStopped(20).ShouldBeFalse();
}
// ---------- Subscribe failure rollback (stability review 2026-04-13 Finding 1) ----------
[Fact]
public async Task Sync_SubscribeThrows_DoesNotLeavePhantomEntry()
{
var client = new FakeMxAccessClient
{
SubscribeException = new InvalidOperationException("advise failed")
};
var (stopSpy, runSpy) = (new List<int>(), new List<int>());
using var sut = Sut(client, 15, stopSpy, runSpy);
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
// A failed SubscribeAsync must not leave a phantom entry that Tick() can later
// transition from Unknown to Stopped.
sut.ActiveProbeCount.ShouldBe(0);
sut.GetSnapshot().ShouldBeEmpty();
sut.IsHostStopped(20).ShouldBeFalse();
}
[Fact]
public async Task Sync_SubscribeThrows_TickDoesNotFireStopCallback()
{
var client = new FakeMxAccessClient
{
SubscribeException = new InvalidOperationException("advise failed")
};
var clock = new Clock();
var (stopSpy, runSpy) = (new List<int>(), new List<int>());
using var sut = Sut(client, 15, stopSpy, runSpy, clock);
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
// Advance past the unknown timeout — if the rollback were incomplete, Tick() would
// transition the phantom entry to Stopped and fan out a false host-down signal.
clock.Now = clock.Now.AddSeconds(30);
sut.Tick();
stopSpy.ShouldBeEmpty();
runSpy.ShouldBeEmpty();
sut.ActiveProbeCount.ShouldBe(0);
}
[Fact]
public async Task Sync_SubscribeSucceedsAfterRetry_AppearsInSnapshot()
{
// After a failed subscribe rolls back cleanly, a subsequent successful SyncAsync
// against the same host must behave normally.
var client = new FakeMxAccessClient
{
SubscribeException = new InvalidOperationException("first attempt fails")
};
var (stopSpy, runSpy) = (new List<int>(), new List<int>());
using var sut = Sut(client, 15, stopSpy, runSpy);
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
sut.ActiveProbeCount.ShouldBe(0);
// Clear the fault and resync — the host must now appear with Unknown state.
client.SubscribeException = null;
await sut.SyncAsync(new[] { Engine(20, "DevAppEngine") });
sut.ActiveProbeCount.ShouldBe(1);
sut.GetSnapshot().Single().State.ShouldBe(GalaxyRuntimeState.Unknown);
}
// ---------- Callback exception safety ----------
[Fact]