fix(data-connection): resolve DataConnectionLayer-006..012 — quality-counter reconciliation, per-tag batch reads, configurable failover threshold, dedup retry, stale-callback guard, secure cert default

This commit is contained in:
Joseph Doherty
2026-05-16 21:11:24 -04:00
parent 0c82ffcbe6
commit c9b236e507
8 changed files with 515 additions and 34 deletions

View File

@@ -612,6 +612,197 @@ public class DataConnectionActorTests : TestKit
Assert.Contains("timeout", response.ErrorMessage, StringComparison.OrdinalIgnoreCase);
}
// ── DataConnectionLayer-006: quality counters must not drift after unsubscribe/reconnect ──
[Fact]
public async Task DCL006_DisconnectAfterUnsubscribe_BadQualityCountMatchesRemainingTags()
{
// Regression test for DataConnectionLayer-006. _lastTagQuality and the three
// quality counters were never cleaned up on unsubscribe, so a tag removed via
// HandleUnsubscribe lingered in _lastTagQuality. PushBadQualityForAllTags then
// set _tagsBadQuality = _lastTagQuality.Count, counting the dropped tag and
// drifting the bad-quality count above the number of currently subscribed tags.
var callbacks = new System.Collections.Concurrent.ConcurrentDictionary<string, SubscriptionCallback>();
var connectCount = 0;
var reconnectGate = new TaskCompletionSource();
// First connect succeeds; the reconnect after the disconnect hangs so the actor
// stays in Reconnecting and ReSubscribeAll does not run before the assertion.
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(_ => Interlocked.Increment(ref connectCount) == 1
? Task.CompletedTask
: reconnectGate.Task);
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
_mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns(ci =>
{
callbacks[(string)ci[0]] = (SubscriptionCallback)ci[1];
return Task.FromResult("sub-" + (string)ci[0]);
});
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(false, null, null));
var actor = CreateConnectionActor("dcl006-drift");
await Task.Delay(300);
// Two instances, one tag each.
actor.Tell(new SubscribeTagsRequest("c1", "instA", "dcl006-drift", ["tagA"], DateTimeOffset.UtcNow));
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
actor.Tell(new SubscribeTagsRequest("c2", "instB", "dcl006-drift", ["tagB"], DateTimeOffset.UtcNow));
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
// Push a Good value for each tag so both land in _lastTagQuality.
AwaitCondition(() => callbacks.ContainsKey("tagA") && callbacks.ContainsKey("tagB"),
TimeSpan.FromSeconds(3));
callbacks["tagA"]("tagA", new TagValue(1, QualityCode.Good, DateTimeOffset.UtcNow));
callbacks["tagB"]("tagB", new TagValue(2, QualityCode.Good, DateTimeOffset.UtcNow));
await Task.Delay(200);
// Unsubscribe instance B — tagB is no longer subscribed by anyone.
actor.Tell(new UnsubscribeTagsRequest("c3", "instB", "dcl006-drift", DateTimeOffset.UtcNow));
await Task.Delay(200);
_mockHealthCollector.ClearReceivedCalls();
// Disconnect — PushBadQualityForAllTags runs (the reconnect hangs on the gate,
// so the actor stays in Reconnecting and ReSubscribeAll does not run).
RaiseDisconnected(_mockAdapter);
await Task.Delay(300);
// PushBadQualityForAllTags must report exactly 1 bad tag (only tagA is still
// subscribed). Pre-fix tagB lingered in _lastTagQuality and bad was reported as 2.
var qualityCall = _mockHealthCollector.ReceivedCalls()
.Where(c => c.GetMethodInfo().Name == "UpdateTagQuality")
.FirstOrDefault();
Assert.NotNull(qualityCall);
var args = qualityCall!.GetArguments();
var bad = (int)args[2]!;
Assert.Equal(1, bad);
reconnectGate.SetCanceled();
}
// ── DataConnectionLayer-010: tag-resolution retry must not double-dispatch ──
[Fact]
public async Task DCL010_TagResolutionRetry_DoesNotIssueDuplicateConcurrentSubscribes()
{
// Regression test for DataConnectionLayer-010. HandleRetryTagResolution fired a
// SubscribeAsync for every unresolved tag without removing it from _unresolvedTags
// first. A slow SubscribeAsync overlapping the next retry tick produced duplicate
// concurrent subscribe attempts for the same tag, leaking the first monitored
// item / subscription id. After the fix a tag in flight is excluded from the
// next retry until its attempt completes.
_options.TagResolutionRetryInterval = TimeSpan.FromMilliseconds(100);
var subscribeGate = new TaskCompletionSource<string>();
var subscribeCalls = 0;
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
_mockAdapter.SubscribeAsync("slow/tag", Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns(ci =>
{
var n = Interlocked.Increment(ref subscribeCalls);
// First call (initial subscribe) fails genuinely → unresolved.
if (n == 1) return Task.FromException<string>(new KeyNotFoundException("not found yet"));
// Subsequent calls are retry attempts — block on the gate so they stay
// in flight across multiple retry ticks.
return subscribeGate.Task;
});
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(false, null, null));
var actor = CreateConnectionActor("dcl010-retry");
await Task.Delay(300);
actor.Tell(new SubscribeTagsRequest("c1", "inst1", "dcl010-retry", ["slow/tag"], DateTimeOffset.UtcNow));
// Initial subscribe fails → bad-quality push then ack.
ExpectMsg<TagValueUpdate>(TimeSpan.FromSeconds(5));
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
// Let several retry ticks (100ms each) elapse while the first retry is blocked.
await Task.Delay(600);
// Exactly one retry attempt should be in flight: 1 initial + 1 retry = 2 total.
// Pre-fix, every 100ms tick dispatched another → far more than 2.
Assert.Equal(2, Volatile.Read(ref subscribeCalls));
subscribeGate.SetCanceled();
}
// ── DataConnectionLayer-011: stale callbacks from a disposed adapter must be dropped ──
[Fact]
public async Task DCL011_StaleTagValueFromOldAdapter_IsNotForwardedAfterFailover()
{
// Regression test for DataConnectionLayer-011. On failover the old adapter is
// disposed and a fresh one created, but the old adapter's subscription callbacks
// captured Self and keep Telling TagValueReceived. With no per-adapter generation
// tag, a value from the disposed adapter delivered after the actor is Connected
// on the new endpoint would be forwarded to the Instance Actor, mixing
// pre-failover device data with the active endpoint's data.
var primaryConfig = new Dictionary<string, string> { ["Endpoint"] = "opc.tcp://primary:4840" };
var backupConfig = new Dictionary<string, string> { ["Endpoint"] = "opc.tcp://backup:4840" };
var primaryAdapter = Substitute.For<IDataConnection>();
var backupAdapter = Substitute.For<IDataConnection>();
SubscriptionCallback? primaryCallback = null;
var primaryConnectCount = 0;
primaryAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(_ => Interlocked.Increment(ref primaryConnectCount) == 1
? Task.CompletedTask
: Task.FromException(new Exception("Primary down")));
primaryAdapter.Status.Returns(ConnectionHealth.Connected);
primaryAdapter.SubscribeAsync("sensor/temp", Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns(ci =>
{
primaryCallback = (SubscriptionCallback)ci[1];
return Task.FromResult("sub-primary");
});
primaryAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(false, null, null));
_mockFactory.Create("OpcUa", Arg.Is<IDictionary<string, string>>(d => d["Endpoint"] == "opc.tcp://backup:4840"))
.Returns(backupAdapter);
backupAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
backupAdapter.Status.Returns(ConnectionHealth.Connected);
backupAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns("sub-backup");
backupAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(false, null, null));
var actor = CreateFailoverActor(primaryAdapter, "dcl011-stale", primaryConfig, backupConfig, failoverRetryCount: 1);
AwaitCondition(() => primaryConnectCount >= 1, TimeSpan.FromSeconds(2));
await Task.Delay(200);
actor.Tell(new SubscribeTagsRequest("c1", TestActor.Path.Name, "dcl011-stale", ["sensor/temp"], DateTimeOffset.UtcNow));
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(3));
AwaitCondition(() => primaryCallback != null, TimeSpan.FromSeconds(3));
// Fail over to backup.
RaiseDisconnected(primaryAdapter);
// The disconnect pushes a bad-quality ConnectionQualityChanged to the subscriber.
ExpectMsg<ConnectionQualityChanged>(TimeSpan.FromSeconds(3));
AwaitCondition(() =>
backupAdapter.ReceivedCalls().Any(c => c.GetMethodInfo().Name == "SubscribeAsync"),
TimeSpan.FromSeconds(5));
await Task.Delay(300); // actor is Connected on backup
// Drain any value updates produced by the re-subscribe path.
ExpectNoMsg(TimeSpan.FromMilliseconds(300));
// The disposed primary adapter's callback fires a stale value.
primaryCallback!("sensor/temp", new TagValue(999, QualityCode.Good, DateTimeOffset.UtcNow));
// That stale value must NOT reach the subscriber.
ExpectNoMsg(TimeSpan.FromSeconds(1));
}
[Fact]
public async Task DCL001_SubscribeWithFailedTags_CountsResolvedAndUnresolvedSeparately()
{