fix(lmxproxy): resolve subscribe/unsubscribe race condition on client reconnect
Three fixes for the SubscriptionManager/MxAccessClient subscription pipeline: 1. Serialize Subscribe and UnsubscribeClient with a SemaphoreSlim gate to prevent race where old-session unsubscribe removes new-session COM subscriptions. CreateMxAccessSubscriptionsAsync is now awaited instead of fire-and-forget. 2. Fix dual VTQ delivery in MxAccessClient.OnDataChange — each update was delivered twice (once via stored callback, once via OnTagValueChanged property). Now uses stored callback as the single delivery path. 3. Store pending tag addresses when CreateMxAccessSubscriptionsAsync fails (MxAccess down) and retry them on reconnect via NotifyReconnection/RetryPendingSubscriptionsAsync. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Channels;
|
||||
using System.Threading.Tasks;
|
||||
@@ -32,11 +33,34 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
Task.FromResult((false, 0));
|
||||
public Task<ProbeResult> ProbeConnectionAsync(string testTagAddress, int timeoutMs, CancellationToken ct = default) =>
|
||||
Task.FromResult(ProbeResult.Healthy(Quality.Good, DateTime.UtcNow));
|
||||
public Task UnsubscribeByAddressAsync(IEnumerable<string> addresses) => Task.CompletedTask;
|
||||
public Task<IAsyncDisposable> SubscribeAsync(IEnumerable<string> addresses, Action<string, Vtq> callback, CancellationToken ct = default) =>
|
||||
Task.FromResult<IAsyncDisposable>(new FakeSubscriptionHandle());
|
||||
public ValueTask DisposeAsync() => default;
|
||||
|
||||
// Track subscribe/unsubscribe calls for assertions
|
||||
public List<List<string>> SubscribeCalls { get; } = new List<List<string>>();
|
||||
public List<List<string>> UnsubscribeCalls { get; } = new List<List<string>>();
|
||||
public List<Action<string, Vtq>> StoredCallbacks { get; } = new List<Action<string, Vtq>>();
|
||||
|
||||
// When true, SubscribeAsync throws to simulate MxAccess being down
|
||||
public bool FailSubscriptions { get; set; }
|
||||
|
||||
public Task UnsubscribeByAddressAsync(IEnumerable<string> addresses)
|
||||
{
|
||||
UnsubscribeCalls.Add(addresses.ToList());
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task<IAsyncDisposable> SubscribeAsync(IEnumerable<string> addresses, Action<string, Vtq> callback, CancellationToken ct = default)
|
||||
{
|
||||
var addressList = addresses.ToList();
|
||||
SubscribeCalls.Add(addressList);
|
||||
StoredCallbacks.Add(callback);
|
||||
|
||||
if (FailSubscriptions)
|
||||
throw new InvalidOperationException("Not connected to MxAccess");
|
||||
|
||||
return Task.FromResult<IAsyncDisposable>(new FakeSubscriptionHandle());
|
||||
}
|
||||
|
||||
// Suppress unused event warning
|
||||
internal void FireEvent() => ConnectionStateChanged?.Invoke(this, null!);
|
||||
|
||||
@@ -47,11 +71,11 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Subscribe_ReturnsChannelReader()
|
||||
public async Task Subscribe_ReturnsChannelReader()
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient());
|
||||
using var cts = new CancellationTokenSource();
|
||||
var reader = sm.Subscribe("client1", new[] { "Tag1", "Tag2" }, cts.Token);
|
||||
var reader = await sm.SubscribeAsync("client1", new[] { "Tag1", "Tag2" }, cts.Token);
|
||||
reader.Should().NotBeNull();
|
||||
}
|
||||
|
||||
@@ -60,7 +84,7 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient());
|
||||
using var cts = new CancellationTokenSource();
|
||||
var reader = sm.Subscribe("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
|
||||
var vtq = Vtq.Good(42.0);
|
||||
sm.OnTagValueChanged("Motor.Speed", vtq);
|
||||
@@ -76,8 +100,8 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient());
|
||||
using var cts = new CancellationTokenSource();
|
||||
var reader1 = sm.Subscribe("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
var reader2 = sm.Subscribe("client2", new[] { "Motor.Speed" }, cts.Token);
|
||||
var reader1 = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
var reader2 = await sm.SubscribeAsync("client2", new[] { "Motor.Speed" }, cts.Token);
|
||||
|
||||
sm.OnTagValueChanged("Motor.Speed", Vtq.Good(99.0));
|
||||
|
||||
@@ -88,11 +112,11 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OnTagValueChanged_NonSubscribedTag_NoDelivery()
|
||||
public async Task OnTagValueChanged_NonSubscribedTag_NoDelivery()
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient());
|
||||
using var cts = new CancellationTokenSource();
|
||||
var reader = sm.Subscribe("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
|
||||
sm.OnTagValueChanged("Motor.Torque", Vtq.Good(10.0));
|
||||
|
||||
@@ -101,11 +125,11 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UnsubscribeClient_CompletesChannel()
|
||||
public async Task UnsubscribeClient_CompletesChannel()
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient());
|
||||
using var cts = new CancellationTokenSource();
|
||||
var reader = sm.Subscribe("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
|
||||
sm.UnsubscribeClient("client1");
|
||||
|
||||
@@ -114,11 +138,11 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void UnsubscribeClient_RemovesFromTagSubscriptions()
|
||||
public async Task UnsubscribeClient_RemovesFromTagSubscriptions()
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient());
|
||||
using var cts = new CancellationTokenSource();
|
||||
sm.Subscribe("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
|
||||
sm.UnsubscribeClient("client1");
|
||||
|
||||
@@ -128,12 +152,12 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RefCounting_LastClientUnsubscribeRemovesTag()
|
||||
public async Task RefCounting_LastClientUnsubscribeRemovesTag()
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient());
|
||||
using var cts = new CancellationTokenSource();
|
||||
sm.Subscribe("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
sm.Subscribe("client2", new[] { "Motor.Speed" }, cts.Token);
|
||||
await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
await sm.SubscribeAsync("client2", new[] { "Motor.Speed" }, cts.Token);
|
||||
|
||||
sm.GetStats().TotalTags.Should().Be(1);
|
||||
|
||||
@@ -145,11 +169,11 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void NotifyDisconnection_SendsBadQualityToAll()
|
||||
public async Task NotifyDisconnection_SendsBadQualityToAll()
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient());
|
||||
using var cts = new CancellationTokenSource();
|
||||
var reader = sm.Subscribe("client1", new[] { "Motor.Speed", "Motor.Torque" }, cts.Token);
|
||||
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed", "Motor.Torque" }, cts.Token);
|
||||
|
||||
sm.NotifyDisconnection();
|
||||
|
||||
@@ -161,11 +185,11 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Backpressure_DropOldest_DropsWhenFull()
|
||||
public async Task Backpressure_DropOldest_DropsWhenFull()
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient(), channelCapacity: 3);
|
||||
using var cts = new CancellationTokenSource();
|
||||
var reader = sm.Subscribe("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
|
||||
// Fill the channel beyond capacity
|
||||
for (int i = 0; i < 10; i++)
|
||||
@@ -180,17 +204,123 @@ namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GetStats_ReturnsCorrectCounts()
|
||||
public async Task GetStats_ReturnsCorrectCounts()
|
||||
{
|
||||
using var sm = new SubscriptionManager(new FakeScadaClient());
|
||||
using var cts = new CancellationTokenSource();
|
||||
sm.Subscribe("c1", new[] { "Tag1", "Tag2" }, cts.Token);
|
||||
sm.Subscribe("c2", new[] { "Tag2", "Tag3" }, cts.Token);
|
||||
await sm.SubscribeAsync("c1", new[] { "Tag1", "Tag2" }, cts.Token);
|
||||
await sm.SubscribeAsync("c2", new[] { "Tag2", "Tag3" }, cts.Token);
|
||||
|
||||
var stats = sm.GetStats();
|
||||
stats.TotalClients.Should().Be(2);
|
||||
stats.TotalTags.Should().Be(3); // Tag1, Tag2, Tag3
|
||||
stats.ActiveSubscriptions.Should().Be(4); // c1:Tag1, c1:Tag2, c2:Tag2, c2:Tag3
|
||||
}
|
||||
|
||||
// ── New tests for race condition fix ──────────────────────────
|
||||
|
||||
[Fact]
|
||||
public async Task SubscribeAfterUnsubscribe_CreatesMxAccessSubscriptions()
|
||||
{
|
||||
// Verifies FIX 1: when a client disconnects and reconnects with the same tags,
|
||||
// the new subscribe must create fresh MxAccess subscriptions (not skip them
|
||||
// because old handles still exist).
|
||||
var fake = new FakeScadaClient();
|
||||
using var sm = new SubscriptionManager(fake);
|
||||
using var cts = new CancellationTokenSource();
|
||||
|
||||
// First client subscribes
|
||||
await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
fake.SubscribeCalls.Should().HaveCount(1);
|
||||
fake.SubscribeCalls[0].Should().Contain("Motor.Speed");
|
||||
|
||||
// Client disconnects — unsubscribe removes the tag (ref count → 0)
|
||||
sm.UnsubscribeClient("client1");
|
||||
fake.UnsubscribeCalls.Should().HaveCount(1);
|
||||
fake.UnsubscribeCalls[0].Should().Contain("Motor.Speed");
|
||||
|
||||
// Same client reconnects — must create a NEW MxAccess subscription
|
||||
await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
fake.SubscribeCalls.Should().HaveCount(2, "new subscribe must create fresh MxAccess subscription");
|
||||
fake.SubscribeCalls[1].Should().Contain("Motor.Speed");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task SubscribeAfterUnsubscribe_SerializedByGate()
|
||||
{
|
||||
// Verifies FIX 1: subscribe and unsubscribe are serialized so they cannot
|
||||
// interleave and cause the race condition.
|
||||
var fake = new FakeScadaClient();
|
||||
using var sm = new SubscriptionManager(fake);
|
||||
using var cts = new CancellationTokenSource();
|
||||
|
||||
var tags = new[] { "Tag.A", "Tag.B", "Tag.C" };
|
||||
|
||||
// Subscribe, unsubscribe, re-subscribe in sequence
|
||||
await sm.SubscribeAsync("session1", tags, cts.Token);
|
||||
sm.UnsubscribeClient("session1");
|
||||
await sm.SubscribeAsync("session2", tags, cts.Token);
|
||||
|
||||
// Both subscribes should have called SubscribeAsync on the scada client
|
||||
fake.SubscribeCalls.Should().HaveCount(2);
|
||||
// The unsubscribe in between should have cleaned up
|
||||
fake.UnsubscribeCalls.Should().HaveCount(1);
|
||||
|
||||
// Data should flow to the new session
|
||||
var reader = await sm.SubscribeAsync("session3", tags, cts.Token);
|
||||
sm.OnTagValueChanged("Tag.A", Vtq.Good(1.0));
|
||||
var result = await reader.ReadAsync(cts.Token);
|
||||
result.vtq.Value.Should().Be(1.0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task OnTagValueChanged_NoDuplicateDelivery()
|
||||
{
|
||||
// Verifies FIX 2: each OnDataChange produces exactly one VTQ per client,
|
||||
// not two (which happened when both stored callback and OnTagValueChanged
|
||||
// property were invoked).
|
||||
var fake = new FakeScadaClient();
|
||||
using var sm = new SubscriptionManager(fake);
|
||||
using var cts = new CancellationTokenSource();
|
||||
|
||||
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
|
||||
// Deliver one update
|
||||
sm.OnTagValueChanged("Motor.Speed", Vtq.Good(42.0));
|
||||
|
||||
// Should receive exactly one message
|
||||
reader.TryRead(out var msg).Should().BeTrue();
|
||||
msg.vtq.Value.Should().Be(42.0);
|
||||
|
||||
// No duplicate
|
||||
reader.TryRead(out _).Should().BeFalse("each update should be delivered exactly once");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task FailedSubscription_StoredAsPending_RetriedOnReconnect()
|
||||
{
|
||||
// Verifies FIX 3: when MxAccess is down during subscribe, tags are stored
|
||||
// as pending and retried when NotifyReconnection is called.
|
||||
var fake = new FakeScadaClient();
|
||||
fake.FailSubscriptions = true;
|
||||
using var sm = new SubscriptionManager(fake);
|
||||
using var cts = new CancellationTokenSource();
|
||||
|
||||
// Subscribe while MxAccess is "down" — should not throw (errors are logged)
|
||||
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
|
||||
reader.Should().NotBeNull();
|
||||
fake.SubscribeCalls.Should().HaveCount(1);
|
||||
|
||||
// MxAccess comes back up
|
||||
fake.FailSubscriptions = false;
|
||||
sm.NotifyReconnection();
|
||||
|
||||
// Give the async retry a moment to complete
|
||||
await Task.Delay(100);
|
||||
|
||||
// Should have retried the subscription
|
||||
fake.SubscribeCalls.Should().HaveCount(2, "pending subscriptions should be retried on reconnect");
|
||||
fake.SubscribeCalls[1].Should().Contain("Motor.Speed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user