fix(data-connection): resolve DataConnectionLayer-001 — off-thread actor state mutation
HandleSubscribe spawned a Task.Run that mutated DataConnectionActor private state (_subscriptionIds, _subscriptionsByInstance, _totalSubscribed, _resolvedTags, _unresolvedTags) from a thread-pool thread, racing the actor's own message loop — a data race on non-thread-safe Dictionary/HashSet and non-atomic counters. Restructured HandleSubscribe to follow the actor's existing PipeTo(Self) pattern: the background task now performs only adapter I/O and pipes a SubscribeCompleted message to Self; all subscription-state mutation happens in the new HandleSubscribeCompleted handler on the actor thread (wired into the Connected, Connecting and Reconnecting states). Adds DCL001_ConcurrentSubscribes_DoNotCorruptSubscriptionCounters (30x30 concurrent subscribes) which fails against the pre-fix code and passes after.
This commit is contained in:
@@ -458,4 +458,87 @@ public class DataConnectionActorTests : TestKit
|
||||
await backupAdapter.Received().SubscribeAsync(
|
||||
"sensor/temp", Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>());
|
||||
}
|
||||
|
||||
// ── DataConnectionLayer-001: subscribe must not mutate actor state off-thread ──
|
||||
|
||||
private static async Task<string> DelayedSubscribeAsync()
|
||||
{
|
||||
// A short delay so concurrent subscribe background tasks pile up and their
|
||||
// post-await state mutations would race under the pre-fix implementation.
|
||||
await Task.Delay(1);
|
||||
return "sub-" + Guid.NewGuid().ToString("N");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DCL001_ConcurrentSubscribes_DoNotCorruptSubscriptionCounters()
|
||||
{
|
||||
// Regression test for DataConnectionLayer-001. HandleSubscribe used to mutate
|
||||
// actor state (_subscriptionIds, _totalSubscribed, _resolvedTags, the per-instance
|
||||
// HashSet) from a Task.Run background thread. Many concurrent subscribes then race
|
||||
// on non-thread-safe Dictionary/HashSet and on non-atomic int++ — losing increments
|
||||
// or throwing. After the fix every mutation is applied on the actor thread via a
|
||||
// SubscribeCompleted message, so the final counts are exact.
|
||||
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.CompletedTask);
|
||||
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
|
||||
_mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
|
||||
.Returns(_ => DelayedSubscribeAsync());
|
||||
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new ReadResult(false, null, null));
|
||||
|
||||
var actor = CreateConnectionActor("dcl001-concurrent");
|
||||
await Task.Delay(300); // reach Connected state
|
||||
|
||||
const int instances = 30;
|
||||
const int tagsPerInstance = 30;
|
||||
for (var i = 0; i < instances; i++)
|
||||
{
|
||||
var tags = Enumerable.Range(0, tagsPerInstance)
|
||||
.Select(j => $"inst{i}/tag{j}")
|
||||
.ToArray();
|
||||
actor.Tell(new SubscribeTagsRequest(
|
||||
$"corr{i}", $"inst{i}", "dcl001-concurrent", tags, DateTimeOffset.UtcNow));
|
||||
}
|
||||
|
||||
// Every subscribe must be acknowledged.
|
||||
for (var i = 0; i < instances; i++)
|
||||
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(15));
|
||||
|
||||
actor.Tell(new DataConnectionActor.GetHealthReport());
|
||||
var report = ExpectMsg<DataConnectionHealthReport>(TimeSpan.FromSeconds(5));
|
||||
|
||||
// Every tag is distinct, so each is a fresh, resolved subscription.
|
||||
Assert.Equal(instances * tagsPerInstance, report.TotalSubscribedTags);
|
||||
Assert.Equal(instances * tagsPerInstance, report.ResolvedTags);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task DCL001_SubscribeWithFailedTags_CountsResolvedAndUnresolvedSeparately()
|
||||
{
|
||||
// Behavioural guard: the restructured subscribe must preserve the original
|
||||
// accounting — failed tags count toward TotalSubscribed but not ResolvedTags.
|
||||
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
|
||||
.Returns(Task.CompletedTask);
|
||||
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
|
||||
_mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
|
||||
.Returns(ci => ((string)ci[0]).StartsWith("bad")
|
||||
? Task.FromException<string>(new Exception("tag not found"))
|
||||
: Task.FromResult("sub-ok"));
|
||||
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new ReadResult(false, null, null));
|
||||
|
||||
var actor = CreateConnectionActor("dcl001-failed-tags");
|
||||
await Task.Delay(300);
|
||||
|
||||
actor.Tell(new SubscribeTagsRequest(
|
||||
"c1", "inst1", "dcl001-failed-tags",
|
||||
["good/a", "good/b", "good/c", "bad/x", "bad/y"], DateTimeOffset.UtcNow));
|
||||
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
|
||||
|
||||
actor.Tell(new DataConnectionActor.GetHealthReport());
|
||||
var report = ExpectMsg<DataConnectionHealthReport>(TimeSpan.FromSeconds(3));
|
||||
|
||||
Assert.Equal(5, report.TotalSubscribedTags); // all 5 tags tracked
|
||||
Assert.Equal(3, report.ResolvedTags); // only the 3 good ones resolved
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user