fix(otopcua): resume discovery on actor context + bound/harden re-discovery

This commit is contained in:
Joseph Doherty
2026-06-26 08:19:12 -04:00
parent b9b8d3d389
commit 21298ec1b2
2 changed files with 166 additions and 22 deletions
@@ -118,6 +118,59 @@ public sealed class DriverInstanceActorDiscoveryTests : RuntimeActorTestBase
driver.DiscoverCount.ShouldBeGreaterThan(passesBeforeReconnect);
}
/// <summary>
/// Regression for the Critical: a driver whose <c>DiscoverAsync</c> completes ASYNCHRONOUSLY (off the
/// actor thread) must still ship <see cref="DriverInstanceActor.DiscoveredNodesReady"/>. The handler
/// touches <c>Context.Parent</c> + <c>Timers</c> AFTER awaiting discovery; if it awaited with
/// <c>ConfigureAwait(false)</c> the continuation would resume off the actor context and those calls
/// would throw <c>NotSupportedException("no active ActorContext")</c> — the handler would fault and no
/// message would arrive. Synchronous (<c>Task.CompletedTask</c>) stubs mask the bug; this one forces a
/// genuine off-context resume (modelled on <c>SubscribableStubDriver.UnsubscribeYields</c>).
/// </summary>
[Fact]
public void Async_completing_discovery_resumes_on_actor_context_and_publishes()
{
var driver = new YieldingDiscoverableStubDriver();
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20)));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
// With the fix the handler resumes on the actor context, so the publish succeeds and the parent gets
// a non-empty set. Without it the handler faults at Context.Parent.Tell and this times out.
var published = parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2));
published.Nodes.Count.ShouldBe(3);
published.DriverInstanceId.ShouldBe(driver.DriverInstanceId);
}
/// <summary>
/// The attempt cap bounds a discovered set that never stabilises: a driver whose set keeps GROWING
/// (1,2,3,…) never repeats its signature, so the loop is stopped only by
/// <c>rediscoverMaxAttempts</c>. With a cap of 3, exactly 3 passes are published, then the stream stops.
/// </summary>
[Fact]
public void Never_stabilising_discovery_is_bounded_by_the_attempt_cap()
{
var driver = new GrowingDiscoverableStubDriver();
var parent = CreateTestProbe();
var actor = parent.ChildActorOf(DriverInstanceActor.Props(
driver, rediscoverInterval: TimeSpan.FromMilliseconds(20), rediscoverMaxAttempts: 3));
actor.Tell(new DriverInstanceActor.InitializeRequested("{}"));
var msgs = new List<DriverInstanceActor.DiscoveredNodesReady>();
for (var i = 0; i < 3; i++)
msgs.Add(parent.ExpectMsg<DriverInstanceActor.DiscoveredNodesReady>(TimeSpan.FromSeconds(2)));
// Cap reached — no fourth pass even though the set never stabilised.
parent.ExpectNoMsg(TimeSpan.FromMilliseconds(300));
// The set genuinely kept growing across the capped passes (1,2,3 nodes).
msgs.Select(m => m.Nodes.Count).ShouldBe(new[] { 1, 2, 3 });
driver.DiscoverCount.ShouldBe(3);
}
/// <summary>
/// A <see cref="StubDriver"/> that also exposes <see cref="ITagDiscovery"/>. Each <c>DiscoverAsync</c>
/// pass is counted; passes 12 yield nothing (cache warming), passes 3+ yield a stable 3-node set —
@@ -149,4 +202,63 @@ public sealed class DriverInstanceActorDiscoveryTests : RuntimeActorTestBase
return Task.CompletedTask;
}
}
/// <summary>
/// A discoverable driver whose <c>DiscoverAsync</c> genuinely SUSPENDS and resumes on a fresh
/// thread-pool thread that carries NO Akka actor cell — modelled on
/// <c>SubscribableStubDriver.UnsubscribeYields</c>. This forces the actor's <c>await DiscoverAsync(...)</c>
/// continuation to resume off-context unless the handler omits <c>ConfigureAwait(false)</c>, so it is a
/// deterministic repro of the no-ActorContext race. Returns a stable 3-node set on every pass.
/// </summary>
private sealed class YieldingDiscoverableStubDriver : StubDriver, ITagDiscovery
{
/// <summary>Suspends on a TCS completed from a background thread, then streams 3 nodes.</summary>
public async Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
{
var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
_ = Task.Run(() => tcs.SetResult(), cancellationToken);
await tcs.Task.ConfigureAwait(false); // resume on a clean thread-pool thread (no actor cell)
var fixedTree = builder.Folder("FixedTree", "FixedTree");
for (var i = 0; i < 3; i++)
{
fixedTree.Variable($"v{i}", $"v{i}", new DriverAttributeInfo(
FullName: $"m.fixed.v{i}",
DriverDataType: DriverDataType.Float64,
IsArray: false,
ArrayDim: null,
SecurityClass: SecurityClassification.ViewOnly,
IsHistorized: false));
}
}
}
/// <summary>
/// A discoverable driver whose set NEVER stabilises: pass N yields N nodes (1,2,3,…), so the
/// full-reference signature differs every pass and the loop can only be bounded by the attempt cap.
/// </summary>
private sealed class GrowingDiscoverableStubDriver : StubDriver, ITagDiscovery
{
private int _passCount;
/// <summary>Number of <see cref="DiscoverAsync"/> passes the actor has driven.</summary>
public int DiscoverCount => Volatile.Read(ref _passCount);
/// <summary>Streams an ever-growing node set (pass N → N nodes).</summary>
public Task DiscoverAsync(IAddressSpaceBuilder builder, CancellationToken cancellationToken)
{
var pass = Interlocked.Increment(ref _passCount); // 1-based pass number
var fixedTree = builder.Folder("FixedTree", "FixedTree");
for (var i = 0; i < pass; i++)
{
fixedTree.Variable($"v{i}", $"v{i}", new DriverAttributeInfo(
FullName: $"m.fixed.v{i}",
DriverDataType: DriverDataType.Float64,
IsArray: false,
ArrayDim: null,
SecurityClass: SecurityClassification.ViewOnly,
IsHistorized: false));
}
return Task.CompletedTask;
}
}
}