fix(data-connection-layer): resolve DataConnectionLayer-014..017 — real logger for OPC UA client, initial-connect failover, accurate subscribe response, per-tag write-batch results

This commit is contained in:
Joseph Doherty
2026-05-17 03:18:24 -04:00
parent 3d3f43229f
commit 14ba5495d1
7 changed files with 408 additions and 66 deletions

View File

@@ -803,6 +803,132 @@ public class DataConnectionActorTests : TestKit
ExpectNoMsg(TimeSpan.FromSeconds(1));
}
// ── DataConnectionLayer-015: initial-connect failures must trigger failover ──
[Fact]
public async Task DCL015_PrimaryDownAtStartup_FailsOverToBackup()
{
// Regression test for DataConnectionLayer-015. HandleConnectResult — the handler
// for the INITIAL connection attempt in the Connecting state — only logged and
// re-armed the reconnect timer. It never incremented _consecutiveFailures and
// never switched endpoint, so a primary that is unreachable when the actor first
// starts (a fresh deployment, a site restart, a primary simply down) retried the
// primary forever and never tried the configured backup. After the fix the
// initial connect participates in the failover counter like HandleReconnectResult.
var primaryConfig = new Dictionary<string, string> { ["Endpoint"] = "opc.tcp://primary:4840" };
var backupConfig = new Dictionary<string, string> { ["Endpoint"] = "opc.tcp://backup:4840" };
var primaryAdapter = Substitute.For<IDataConnection>();
var backupAdapter = Substitute.For<IDataConnection>();
// Primary is down from the very first attempt — it never connects.
primaryAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.FromException(new Exception("Connection refused")));
// Factory returns the backup adapter when called with the backup config.
_mockFactory.Create("OpcUa", Arg.Is<IDictionary<string, string>>(d => d["Endpoint"] == "opc.tcp://backup:4840"))
.Returns(backupAdapter);
backupAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
var actor = CreateFailoverActor(primaryAdapter, "dcl015-startup-failover",
primaryConfig, backupConfig, failoverRetryCount: 2);
// After failoverRetryCount initial-connect failures on the primary, the actor
// must build the backup adapter. Pre-fix the factory was never called.
AwaitCondition(() =>
_mockFactory.ReceivedCalls().Any(c =>
c.GetMethodInfo().Name == "Create" &&
c.GetArguments()[1] is IDictionary<string, string> d &&
d["Endpoint"] == "opc.tcp://backup:4840"),
TimeSpan.FromSeconds(5));
}
[Fact]
public async Task DCL015_SingleEndpointDownAtStartup_RetriesIndefinitely_NoFailover()
{
// Companion guard: a single-endpoint connection (no backup) whose primary is
// unreachable at startup must keep retrying the same endpoint indefinitely — the
// initial-connect failover counter must not synthesise a non-existent backup.
var primaryConfig = new Dictionary<string, string> { ["Endpoint"] = "opc.tcp://primary:4840" };
var primaryAdapter = Substitute.For<IDataConnection>();
var connectCount = 0;
primaryAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(_ =>
{
Interlocked.Increment(ref connectCount);
return Task.FromException(new Exception("Connection refused"));
});
var actor = CreateFailoverActor(primaryAdapter, "dcl015-no-backup",
primaryConfig, backupConfig: null, failoverRetryCount: 2);
// Many retries occur (well past the failover threshold) but no adapter is ever
// created via the factory — there is nothing to fail over to.
AwaitCondition(() => connectCount >= 6, TimeSpan.FromSeconds(10));
_mockFactory.DidNotReceive().Create(Arg.Any<string>(), Arg.Any<IDictionary<string, string>>());
}
// ── DataConnectionLayer-016: subscribe response must reflect a connection-level failure ──
[Fact]
public async Task DCL016_ConnectionLevelSubscribeFailure_RepliesWithUnsuccessfulResponse()
{
// Regression test for DataConnectionLayer-016. When a subscribe arrives while the
// adapter is silently down, HandleSubscribeCompleted drove the actor into
// Reconnecting (a connection-level failure) but still replied to the caller with
// SubscribeTagsResponse(Success: true, Error: null). The Instance Actor was told
// the subscribe succeeded while the tags were never actually subscribed at the
// adapter. After the fix the response matches the actor's own assessment:
// Success: false with an explanatory error.
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
// Subscribe fails at connection level (InvalidOperationException from EnsureConnected).
_mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns(Task.FromException<string>(
new InvalidOperationException("OPC UA client is not connected.")));
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(false, null, null));
var actor = CreateConnectionActor("dcl016-conn-fail");
await Task.Delay(300);
actor.Tell(new SubscribeTagsRequest(
"c1", "inst1", "dcl016-conn-fail", ["some/tag"], DateTimeOffset.UtcNow));
// The response must NOT claim success — the connection-level failure that drove
// Reconnecting means the tags were never subscribed at the adapter.
var response = ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
Assert.False(response.Success);
Assert.NotNull(response.ErrorMessage);
}
[Fact]
public async Task DCL016_GenuineResolutionFailure_StillRepliesSuccess()
{
// Companion guard: a genuine tag-resolution failure (the node does not exist) is
// a runtime quality concern, not a connection-level fault — the design tracks it
// via _unresolvedTags and a Bad-quality TagValueUpdate. The overall subscribe
// response stays Success: true so this case is not regressed by the 016 fix.
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
_mockAdapter.SubscribeAsync("missing/tag", Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns(Task.FromException<string>(new KeyNotFoundException("node not found")));
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(false, null, null));
var actor = CreateConnectionActor("dcl016-genuine");
await Task.Delay(300);
actor.Tell(new SubscribeTagsRequest(
"c1", "inst1", "dcl016-genuine", ["missing/tag"], DateTimeOffset.UtcNow));
var ack = FishForMessage<SubscribeTagsResponse>(_ => true, TimeSpan.FromSeconds(5));
Assert.True(ack.Success);
Assert.Null(ack.ErrorMessage);
}
[Fact]
public async Task DCL001_SubscribeWithFailedTags_CountsResolvedAndUnresolvedSeparately()
{