fix(data-connection-layer): resolve DataConnectionLayer-002/003/004/005 — Resume supervision, concurrent dicts, subscribe-failure classification, write timeout
This commit is contained in:
@@ -512,6 +512,106 @@ public class DataConnectionActorTests : TestKit
|
||||
Assert.Equal(instances * tagsPerInstance, report.ResolvedTags);
|
||||
}
|
||||
|
||||
// ── DataConnectionLayer-004: subscribe-time failure classification ──
|
||||
|
||||
[Fact]
|
||||
public async Task DCL004_GenuineTagResolutionFailure_PushesBadQualityToSubscriber()
|
||||
{
|
||||
// Regression test for DataConnectionLayer-004. When a tag genuinely fails to
|
||||
// resolve at subscribe time, the design doc (Tag Path Resolution, step 2)
|
||||
// requires the attribute to be marked quality `bad`. The pre-fix code only
|
||||
// logged and added the tag to _unresolvedTags — the Instance Actor never got
|
||||
// a signal. After the fix, a bad-quality TagValueUpdate is pushed.
|
||||
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.CompletedTask);
|
||||
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
|
||||
// Genuine node-not-found: a non-connection exception.
|
||||
_mockAdapter.SubscribeAsync("missing/tag", Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromException<string>(new KeyNotFoundException("node not found")));
|
||||
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new ReadResult(false, null, null));
|
||||
|
||||
var actor = CreateConnectionActor("dcl004-bad-quality");
|
||||
await Task.Delay(300);
|
||||
|
||||
actor.Tell(new SubscribeTagsRequest(
|
||||
"c1", "inst1", "dcl004-bad-quality", ["missing/tag"], DateTimeOffset.UtcNow));
|
||||
|
||||
// Two messages arrive: the subscribe ack and a bad-quality update for the tag.
|
||||
var bad = ExpectMsg<TagValueUpdate>(TimeSpan.FromSeconds(5));
|
||||
Assert.Equal("missing/tag", bad.TagPath);
|
||||
Assert.Equal(QualityCode.Bad, bad.Quality);
|
||||
|
||||
var ack = ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
|
||||
Assert.True(ack.Success);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DCL004_ConnectionLevelSubscribeFailure_TriggersReconnect_NotTagRetry()
|
||||
{
|
||||
// Regression test for DataConnectionLayer-004. A subscribe failing because the
|
||||
// adapter is not connected (InvalidOperationException from EnsureConnected) is
|
||||
// a connection problem, not a bad tag path. The pre-fix code misclassified it
|
||||
// as an unresolved tag and retried it on the 10s tag-resolution timer. After
|
||||
// the fix it drives the reconnection state machine instead.
|
||||
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.CompletedTask);
|
||||
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
|
||||
_mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.FromException<string>(
|
||||
new InvalidOperationException("OPC UA client is not connected.")));
|
||||
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new ReadResult(false, null, null));
|
||||
|
||||
var actor = CreateConnectionActor("dcl004-conn-level");
|
||||
await Task.Delay(300);
|
||||
|
||||
actor.Tell(new SubscribeTagsRequest(
|
||||
"c1", "inst1", "dcl004-conn-level", ["some/tag"], DateTimeOffset.UtcNow));
|
||||
|
||||
// The connection-level failure must drive the actor into Reconnecting, which
|
||||
// re-attempts ConnectAsync. Pre-fix the actor stayed Connected and only armed
|
||||
// the tag-resolution timer, so ConnectAsync is called exactly once.
|
||||
AwaitCondition(() =>
|
||||
_mockAdapter.ReceivedCalls().Count(c => c.GetMethodInfo().Name == "ConnectAsync") >= 2,
|
||||
TimeSpan.FromSeconds(5));
|
||||
}
|
||||
|
||||
// ── DataConnectionLayer-005: WriteTimeout must bound a hung write ──
|
||||
|
||||
[Fact]
|
||||
public async Task DCL005_Write_ThatHangs_TimesOutAndReturnsFailureSynchronously()
|
||||
{
|
||||
// Regression test for DataConnectionLayer-005. HandleWrite called WriteAsync
|
||||
// with no CancellationToken and no timeout, so a hung device write never
|
||||
// produced a WriteTagResponse. The calling script would block until its own
|
||||
// Ask-timeout with no DCL-level error. After the fix, _options.WriteTimeout
|
||||
// bounds the write and a timeout is surfaced as a failed WriteTagResponse.
|
||||
_options.WriteTimeout = TimeSpan.FromMilliseconds(300);
|
||||
|
||||
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.CompletedTask);
|
||||
|
||||
// WriteAsync never completes unless its cancellation token fires.
|
||||
_mockAdapter.WriteAsync("tag1", 42, Arg.Any<CancellationToken>())
|
||||
.Returns(ci =>
|
||||
{
|
||||
var ct = ci.Arg<CancellationToken>();
|
||||
var tcs = new TaskCompletionSource<WriteResult>();
|
||||
ct.Register(() => tcs.TrySetCanceled(ct));
|
||||
return tcs.Task;
|
||||
});
|
||||
|
||||
var actor = CreateConnectionActor("dcl005-write-timeout");
|
||||
await Task.Delay(300); // reach Connected state
|
||||
|
||||
actor.Tell(new WriteTagRequest("corr1", "dcl005-write-timeout", "tag1", 42, DateTimeOffset.UtcNow));
|
||||
|
||||
var response = ExpectMsg<WriteTagResponse>(TimeSpan.FromSeconds(3));
|
||||
Assert.False(response.Success);
|
||||
Assert.Contains("timeout", response.ErrorMessage, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DCL001_SubscribeWithFailedTags_CountsResolvedAndUnresolvedSeparately()
|
||||
{
|
||||
@@ -533,7 +633,11 @@ public class DataConnectionActorTests : TestKit
|
||||
actor.Tell(new SubscribeTagsRequest(
|
||||
"c1", "inst1", "dcl001-failed-tags",
|
||||
["good/a", "good/b", "good/c", "bad/x", "bad/y"], DateTimeOffset.UtcNow));
|
||||
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
|
||||
|
||||
// Two genuine resolution failures now also push a bad-quality TagValueUpdate
|
||||
// to the subscriber (DataConnectionLayer-004); skip past those to the ack.
|
||||
var ack = FishForMessage<SubscribeTagsResponse>(_ => true, TimeSpan.FromSeconds(5));
|
||||
Assert.True(ack.Success);
|
||||
|
||||
actor.Tell(new DataConnectionActor.GetHealthReport());
|
||||
var report = ExpectMsg<DataConnectionHealthReport>(TimeSpan.FromSeconds(3));
|
||||
|
||||
@@ -3,6 +3,7 @@ using Akka.TestKit.Xunit2;
|
||||
using NSubstitute;
|
||||
using ScadaLink.Commons.Interfaces.Protocol;
|
||||
using ScadaLink.Commons.Messages.DataConnection;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.DataConnectionLayer.Actors;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
|
||||
@@ -57,6 +58,52 @@ public class DataConnectionManagerActorTests : TestKit
|
||||
Assert.Contains("Unknown connection", response.ErrorMessage);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DCL002_ConnectionActorCrash_PreservesSubscriptionState()
|
||||
{
|
||||
// Regression test for DataConnectionLayer-002. The supervisor used
|
||||
// Directive.Restart, which discards the connection actor's in-memory
|
||||
// subscription registry — breaking the design doc's "transparent
|
||||
// re-subscribe" guarantee (subscribers are never re-subscribed and sit at
|
||||
// stale quality forever). After the fix the supervisor uses Resume, which
|
||||
// keeps the actor instance and its state across a transient exception.
|
||||
var mockAdapter = Substitute.For<IDataConnection>();
|
||||
mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.CompletedTask);
|
||||
mockAdapter.Status.Returns(ConnectionHealth.Connected);
|
||||
mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
|
||||
.Returns("sub-001");
|
||||
mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new ReadResult(false, null, null));
|
||||
// A write throws synchronously, escaping the message handler and crashing
|
||||
// the connection actor — exercising the supervisor strategy.
|
||||
mockAdapter.WriteAsync(Arg.Any<string>(), Arg.Any<object?>(), Arg.Any<CancellationToken>())
|
||||
.Returns<Task<WriteResult>>(_ => throw new InvalidOperationException("boom"));
|
||||
|
||||
_mockFactory.Create("OpcUa", Arg.Any<IDictionary<string, string>>()).Returns(mockAdapter);
|
||||
|
||||
var manager = Sys.ActorOf(Props.Create(() =>
|
||||
new DataConnectionManagerActor(_mockFactory, _options, _mockHealthCollector)));
|
||||
|
||||
manager.Tell(new CreateConnectionCommand("conn1", "OpcUa", new Dictionary<string, string>(), null, 3));
|
||||
await Task.Delay(300); // connection actor reaches Connected
|
||||
|
||||
// Register a subscription.
|
||||
manager.Tell(new SubscribeTagsRequest("c1", "inst1", "conn1", ["tag1"], DateTimeOffset.UtcNow));
|
||||
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(3));
|
||||
|
||||
// Crash the connection actor via a synchronously-throwing write.
|
||||
manager.Tell(new WriteTagRequest("c2", "conn1", "tag1", 42, DateTimeOffset.UtcNow));
|
||||
await Task.Delay(300); // supervisor handles the failure
|
||||
|
||||
// After the crash the subscription state must survive: the health report
|
||||
// still shows the subscribed/resolved tag. With Restart it would be 0.
|
||||
manager.Tell(new GetAllHealthReports());
|
||||
var report = ExpectMsg<DataConnectionHealthReport>(TimeSpan.FromSeconds(3));
|
||||
Assert.Equal(1, report.TotalSubscribedTags);
|
||||
Assert.Equal(1, report.ResolvedTags);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CreateConnection_UsesFactory()
|
||||
{
|
||||
|
||||
@@ -6,6 +6,37 @@ using ScadaLink.DataConnectionLayer.Adapters;
|
||||
|
||||
namespace ScadaLink.DataConnectionLayer.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// DataConnectionLayer-003: structural regression guard. RealOpcUaClient's
|
||||
/// monitored-item / callback maps are read from the OPC UA SDK's publish threads
|
||||
/// concurrently with subscribe/disconnect mutations on other threads. They must be
|
||||
/// concurrent collections, not plain Dictionary. This is verified structurally
|
||||
/// because RealOpcUaClient wraps concrete OPC Foundation SDK types and cannot be
|
||||
/// exercised without a live OPC UA server.
|
||||
/// </summary>
|
||||
public class RealOpcUaClientThreadSafetyTests
|
||||
{
|
||||
[Theory]
|
||||
[InlineData("_callbacks")]
|
||||
[InlineData("_monitoredItems")]
|
||||
public void DCL003_SharedDictionaryFields_AreConcurrentCollections(string fieldName)
|
||||
{
|
||||
var field = typeof(RealOpcUaClient)
|
||||
.GetField(fieldName,
|
||||
System.Reflection.BindingFlags.Instance |
|
||||
System.Reflection.BindingFlags.NonPublic);
|
||||
|
||||
Assert.NotNull(field);
|
||||
|
||||
var fieldType = field!.FieldType;
|
||||
Assert.True(
|
||||
fieldType.IsGenericType &&
|
||||
fieldType.GetGenericTypeDefinition() == typeof(System.Collections.Concurrent.ConcurrentDictionary<,>),
|
||||
$"RealOpcUaClient.{fieldName} must be a ConcurrentDictionary<,> for thread safety, " +
|
||||
$"but was {fieldType.Name}.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// WP-7: Tests for OPC UA adapter.
|
||||
/// </summary>
|
||||
|
||||
Reference in New Issue
Block a user