fix(data-connection-layer): resolve DataConnectionLayer-002/003/004/005 — Resume supervision, concurrent dicts, subscribe-failure classification, write timeout

This commit is contained in:
Joseph Doherty
2026-05-16 19:40:40 -04:00
parent d7630d80fe
commit fccd3274d3
7 changed files with 350 additions and 25 deletions

View File

@@ -512,6 +512,106 @@ public class DataConnectionActorTests : TestKit
Assert.Equal(instances * tagsPerInstance, report.ResolvedTags);
}
// ── DataConnectionLayer-004: subscribe-time failure classification ──
[Fact]
public async Task DCL004_GenuineTagResolutionFailure_PushesBadQualityToSubscriber()
{
// Regression test for DataConnectionLayer-004. When a tag genuinely fails to
// resolve at subscribe time, the design doc (Tag Path Resolution, step 2)
// requires the attribute to be marked quality `bad`. The pre-fix code only
// logged and added the tag to _unresolvedTags — the Instance Actor never got
// a signal. After the fix, a bad-quality TagValueUpdate is pushed.
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
// Genuine node-not-found: a non-connection exception.
_mockAdapter.SubscribeAsync("missing/tag", Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns(Task.FromException<string>(new KeyNotFoundException("node not found")));
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(false, null, null));
var actor = CreateConnectionActor("dcl004-bad-quality");
await Task.Delay(300);
actor.Tell(new SubscribeTagsRequest(
"c1", "inst1", "dcl004-bad-quality", ["missing/tag"], DateTimeOffset.UtcNow));
// Two messages arrive: the subscribe ack and a bad-quality update for the tag.
var bad = ExpectMsg<TagValueUpdate>(TimeSpan.FromSeconds(5));
Assert.Equal("missing/tag", bad.TagPath);
Assert.Equal(QualityCode.Bad, bad.Quality);
var ack = ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
Assert.True(ack.Success);
}
[Fact]
public async Task DCL004_ConnectionLevelSubscribeFailure_TriggersReconnect_NotTagRetry()
{
// Regression test for DataConnectionLayer-004. A subscribe failing because the
// adapter is not connected (InvalidOperationException from EnsureConnected) is
// a connection problem, not a bad tag path. The pre-fix code misclassified it
// as an unresolved tag and retried it on the 10s tag-resolution timer. After
// the fix it drives the reconnection state machine instead.
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
_mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns(Task.FromException<string>(
new InvalidOperationException("OPC UA client is not connected.")));
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(false, null, null));
var actor = CreateConnectionActor("dcl004-conn-level");
await Task.Delay(300);
actor.Tell(new SubscribeTagsRequest(
"c1", "inst1", "dcl004-conn-level", ["some/tag"], DateTimeOffset.UtcNow));
// The connection-level failure must drive the actor into Reconnecting, which
// re-attempts ConnectAsync. Pre-fix the actor stayed Connected and only armed
// the tag-resolution timer, so ConnectAsync is called exactly once.
AwaitCondition(() =>
_mockAdapter.ReceivedCalls().Count(c => c.GetMethodInfo().Name == "ConnectAsync") >= 2,
TimeSpan.FromSeconds(5));
}
// ── DataConnectionLayer-005: WriteTimeout must bound a hung write ──
[Fact]
public async Task DCL005_Write_ThatHangs_TimesOutAndReturnsFailureSynchronously()
{
// Regression test for DataConnectionLayer-005. HandleWrite called WriteAsync
// with no CancellationToken and no timeout, so a hung device write never
// produced a WriteTagResponse. The calling script would block until its own
// Ask-timeout with no DCL-level error. After the fix, _options.WriteTimeout
// bounds the write and a timeout is surfaced as a failed WriteTagResponse.
_options.WriteTimeout = TimeSpan.FromMilliseconds(300);
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
// WriteAsync never completes unless its cancellation token fires.
_mockAdapter.WriteAsync("tag1", 42, Arg.Any<CancellationToken>())
.Returns(ci =>
{
var ct = ci.Arg<CancellationToken>();
var tcs = new TaskCompletionSource<WriteResult>();
ct.Register(() => tcs.TrySetCanceled(ct));
return tcs.Task;
});
var actor = CreateConnectionActor("dcl005-write-timeout");
await Task.Delay(300); // reach Connected state
actor.Tell(new WriteTagRequest("corr1", "dcl005-write-timeout", "tag1", 42, DateTimeOffset.UtcNow));
var response = ExpectMsg<WriteTagResponse>(TimeSpan.FromSeconds(3));
Assert.False(response.Success);
Assert.Contains("timeout", response.ErrorMessage, StringComparison.OrdinalIgnoreCase);
}
[Fact]
public async Task DCL001_SubscribeWithFailedTags_CountsResolvedAndUnresolvedSeparately()
{
@@ -533,7 +633,11 @@ public class DataConnectionActorTests : TestKit
actor.Tell(new SubscribeTagsRequest(
"c1", "inst1", "dcl001-failed-tags",
["good/a", "good/b", "good/c", "bad/x", "bad/y"], DateTimeOffset.UtcNow));
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
// Two genuine resolution failures now also push a bad-quality TagValueUpdate
// to the subscriber (DataConnectionLayer-004); skip past those to the ack.
var ack = FishForMessage<SubscribeTagsResponse>(_ => true, TimeSpan.FromSeconds(5));
Assert.True(ack.Success);
actor.Tell(new DataConnectionActor.GetHealthReport());
var report = ExpectMsg<DataConnectionHealthReport>(TimeSpan.FromSeconds(3));