Files
scadalink-design/lmxproxy/tests/ZB.MOM.WW.LmxProxy.Host.Tests/Subscriptions/SubscriptionManagerTests.cs
Joseph Doherty fa33e1acf1 fix(lmxproxy): resolve subscribe/unsubscribe race condition on client reconnect
Three fixes for the SubscriptionManager/MxAccessClient subscription pipeline:

1. Serialize Subscribe and UnsubscribeClient with a SemaphoreSlim gate to prevent
   race where old-session unsubscribe removes new-session COM subscriptions.
   CreateMxAccessSubscriptionsAsync is now awaited instead of fire-and-forget.

2. Fix dual VTQ delivery in MxAccessClient.OnDataChange — each update was delivered
   twice (once via stored callback, once via OnTagValueChanged property). Now uses
   stored callback as the single delivery path.

3. Store pending tag addresses when CreateMxAccessSubscriptionsAsync fails (MxAccess
   down) and retry them on reconnect via NotifyReconnection/RetryPendingSubscriptionsAsync.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-22 23:18:08 -04:00

327 lines
14 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Channels;
using System.Threading.Tasks;
using FluentAssertions;
using Xunit;
using ZB.MOM.WW.LmxProxy.Host.Domain;
using ZB.MOM.WW.LmxProxy.Host.Subscriptions;
namespace ZB.MOM.WW.LmxProxy.Host.Tests.Subscriptions
{
public class SubscriptionManagerTests
{
/// <summary>Fake IScadaClient for testing (no COM dependency).</summary>
private class FakeScadaClient : IScadaClient
{
public bool IsConnected => true;
public ConnectionState ConnectionState => ConnectionState.Connected;
public event EventHandler<ConnectionStateChangedEventArgs>? ConnectionStateChanged;
public Task ConnectAsync(CancellationToken ct = default) => Task.CompletedTask;
public Task DisconnectAsync(CancellationToken ct = default) => Task.CompletedTask;
public Task<Vtq> ReadAsync(string address, CancellationToken ct = default) =>
Task.FromResult(Vtq.Good(42.0));
public Task<IReadOnlyDictionary<string, Vtq>> ReadBatchAsync(IEnumerable<string> addresses, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyDictionary<string, Vtq>>(new Dictionary<string, Vtq>());
public Task WriteAsync(string address, object value, CancellationToken ct = default) => Task.CompletedTask;
public Task WriteBatchAsync(IReadOnlyDictionary<string, object> values, CancellationToken ct = default) => Task.CompletedTask;
public Task<(bool flagReached, int elapsedMs)> WriteBatchAndWaitAsync(
IReadOnlyDictionary<string, object> values, string flagTag, object flagValue,
int timeoutMs, int pollIntervalMs, CancellationToken ct = default) =>
Task.FromResult((false, 0));
public Task<ProbeResult> ProbeConnectionAsync(string testTagAddress, int timeoutMs, CancellationToken ct = default) =>
Task.FromResult(ProbeResult.Healthy(Quality.Good, DateTime.UtcNow));
public ValueTask DisposeAsync() => default;
// Track subscribe/unsubscribe calls for assertions
public List<List<string>> SubscribeCalls { get; } = new List<List<string>>();
public List<List<string>> UnsubscribeCalls { get; } = new List<List<string>>();
public List<Action<string, Vtq>> StoredCallbacks { get; } = new List<Action<string, Vtq>>();
// When true, SubscribeAsync throws to simulate MxAccess being down
public bool FailSubscriptions { get; set; }
public Task UnsubscribeByAddressAsync(IEnumerable<string> addresses)
{
UnsubscribeCalls.Add(addresses.ToList());
return Task.CompletedTask;
}
public Task<IAsyncDisposable> SubscribeAsync(IEnumerable<string> addresses, Action<string, Vtq> callback, CancellationToken ct = default)
{
var addressList = addresses.ToList();
SubscribeCalls.Add(addressList);
StoredCallbacks.Add(callback);
if (FailSubscriptions)
throw new InvalidOperationException("Not connected to MxAccess");
return Task.FromResult<IAsyncDisposable>(new FakeSubscriptionHandle());
}
// Suppress unused event warning
internal void FireEvent() => ConnectionStateChanged?.Invoke(this, null!);
private class FakeSubscriptionHandle : IAsyncDisposable
{
public ValueTask DisposeAsync() => default;
}
}
[Fact]
public async Task Subscribe_ReturnsChannelReader()
{
using var sm = new SubscriptionManager(new FakeScadaClient());
using var cts = new CancellationTokenSource();
var reader = await sm.SubscribeAsync("client1", new[] { "Tag1", "Tag2" }, cts.Token);
reader.Should().NotBeNull();
}
[Fact]
public async Task OnTagValueChanged_FansOutToSubscribedClients()
{
using var sm = new SubscriptionManager(new FakeScadaClient());
using var cts = new CancellationTokenSource();
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
var vtq = Vtq.Good(42.0);
sm.OnTagValueChanged("Motor.Speed", vtq);
var result = await reader.ReadAsync(cts.Token);
result.address.Should().Be("Motor.Speed");
result.vtq.Value.Should().Be(42.0);
result.vtq.Quality.Should().Be(Quality.Good);
}
[Fact]
public async Task OnTagValueChanged_MultipleClients_BothReceive()
{
using var sm = new SubscriptionManager(new FakeScadaClient());
using var cts = new CancellationTokenSource();
var reader1 = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
var reader2 = await sm.SubscribeAsync("client2", new[] { "Motor.Speed" }, cts.Token);
sm.OnTagValueChanged("Motor.Speed", Vtq.Good(99.0));
var r1 = await reader1.ReadAsync(cts.Token);
var r2 = await reader2.ReadAsync(cts.Token);
r1.vtq.Value.Should().Be(99.0);
r2.vtq.Value.Should().Be(99.0);
}
[Fact]
public async Task OnTagValueChanged_NonSubscribedTag_NoDelivery()
{
using var sm = new SubscriptionManager(new FakeScadaClient());
using var cts = new CancellationTokenSource();
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
sm.OnTagValueChanged("Motor.Torque", Vtq.Good(10.0));
// Channel should be empty
reader.TryRead(out _).Should().BeFalse();
}
[Fact]
public async Task UnsubscribeClient_CompletesChannel()
{
using var sm = new SubscriptionManager(new FakeScadaClient());
using var cts = new CancellationTokenSource();
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
sm.UnsubscribeClient("client1");
// Channel should be completed
reader.Completion.IsCompleted.Should().BeTrue();
}
[Fact]
public async Task UnsubscribeClient_RemovesFromTagSubscriptions()
{
using var sm = new SubscriptionManager(new FakeScadaClient());
using var cts = new CancellationTokenSource();
await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
sm.UnsubscribeClient("client1");
var stats = sm.GetStats();
stats.TotalClients.Should().Be(0);
stats.TotalTags.Should().Be(0);
}
[Fact]
public async Task RefCounting_LastClientUnsubscribeRemovesTag()
{
using var sm = new SubscriptionManager(new FakeScadaClient());
using var cts = new CancellationTokenSource();
await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
await sm.SubscribeAsync("client2", new[] { "Motor.Speed" }, cts.Token);
sm.GetStats().TotalTags.Should().Be(1);
sm.UnsubscribeClient("client1");
sm.GetStats().TotalTags.Should().Be(1); // client2 still subscribed
sm.UnsubscribeClient("client2");
sm.GetStats().TotalTags.Should().Be(0); // last client gone
}
[Fact]
public async Task NotifyDisconnection_SendsBadQualityToAll()
{
using var sm = new SubscriptionManager(new FakeScadaClient());
using var cts = new CancellationTokenSource();
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed", "Motor.Torque" }, cts.Token);
sm.NotifyDisconnection();
// Should receive 2 bad quality messages
reader.TryRead(out var r1).Should().BeTrue();
r1.vtq.Quality.Should().Be(Quality.Bad_NotConnected);
reader.TryRead(out var r2).Should().BeTrue();
r2.vtq.Quality.Should().Be(Quality.Bad_NotConnected);
}
[Fact]
public async Task Backpressure_DropOldest_DropsWhenFull()
{
using var sm = new SubscriptionManager(new FakeScadaClient(), channelCapacity: 3);
using var cts = new CancellationTokenSource();
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
// Fill the channel beyond capacity
for (int i = 0; i < 10; i++)
{
sm.OnTagValueChanged("Motor.Speed", Vtq.Good((double)i));
}
// Should have exactly 3 messages (capacity limit)
int count = 0;
while (reader.TryRead(out _)) count++;
count.Should().Be(3);
}
[Fact]
public async Task GetStats_ReturnsCorrectCounts()
{
using var sm = new SubscriptionManager(new FakeScadaClient());
using var cts = new CancellationTokenSource();
await sm.SubscribeAsync("c1", new[] { "Tag1", "Tag2" }, cts.Token);
await sm.SubscribeAsync("c2", new[] { "Tag2", "Tag3" }, cts.Token);
var stats = sm.GetStats();
stats.TotalClients.Should().Be(2);
stats.TotalTags.Should().Be(3); // Tag1, Tag2, Tag3
stats.ActiveSubscriptions.Should().Be(4); // c1:Tag1, c1:Tag2, c2:Tag2, c2:Tag3
}
// ── New tests for race condition fix ──────────────────────────
[Fact]
public async Task SubscribeAfterUnsubscribe_CreatesMxAccessSubscriptions()
{
// Verifies FIX 1: when a client disconnects and reconnects with the same tags,
// the new subscribe must create fresh MxAccess subscriptions (not skip them
// because old handles still exist).
var fake = new FakeScadaClient();
using var sm = new SubscriptionManager(fake);
using var cts = new CancellationTokenSource();
// First client subscribes
await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
fake.SubscribeCalls.Should().HaveCount(1);
fake.SubscribeCalls[0].Should().Contain("Motor.Speed");
// Client disconnects — unsubscribe removes the tag (ref count → 0)
sm.UnsubscribeClient("client1");
fake.UnsubscribeCalls.Should().HaveCount(1);
fake.UnsubscribeCalls[0].Should().Contain("Motor.Speed");
// Same client reconnects — must create a NEW MxAccess subscription
await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
fake.SubscribeCalls.Should().HaveCount(2, "new subscribe must create fresh MxAccess subscription");
fake.SubscribeCalls[1].Should().Contain("Motor.Speed");
}
[Fact]
public async Task SubscribeAfterUnsubscribe_SerializedByGate()
{
// Verifies FIX 1: subscribe and unsubscribe are serialized so they cannot
// interleave and cause the race condition.
var fake = new FakeScadaClient();
using var sm = new SubscriptionManager(fake);
using var cts = new CancellationTokenSource();
var tags = new[] { "Tag.A", "Tag.B", "Tag.C" };
// Subscribe, unsubscribe, re-subscribe in sequence
await sm.SubscribeAsync("session1", tags, cts.Token);
sm.UnsubscribeClient("session1");
await sm.SubscribeAsync("session2", tags, cts.Token);
// Both subscribes should have called SubscribeAsync on the scada client
fake.SubscribeCalls.Should().HaveCount(2);
// The unsubscribe in between should have cleaned up
fake.UnsubscribeCalls.Should().HaveCount(1);
// Data should flow to the new session
var reader = await sm.SubscribeAsync("session3", tags, cts.Token);
sm.OnTagValueChanged("Tag.A", Vtq.Good(1.0));
var result = await reader.ReadAsync(cts.Token);
result.vtq.Value.Should().Be(1.0);
}
[Fact]
public async Task OnTagValueChanged_NoDuplicateDelivery()
{
// Verifies FIX 2: each OnDataChange produces exactly one VTQ per client,
// not two (which happened when both stored callback and OnTagValueChanged
// property were invoked).
var fake = new FakeScadaClient();
using var sm = new SubscriptionManager(fake);
using var cts = new CancellationTokenSource();
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
// Deliver one update
sm.OnTagValueChanged("Motor.Speed", Vtq.Good(42.0));
// Should receive exactly one message
reader.TryRead(out var msg).Should().BeTrue();
msg.vtq.Value.Should().Be(42.0);
// No duplicate
reader.TryRead(out _).Should().BeFalse("each update should be delivered exactly once");
}
[Fact]
public async Task FailedSubscription_StoredAsPending_RetriedOnReconnect()
{
// Verifies FIX 3: when MxAccess is down during subscribe, tags are stored
// as pending and retried when NotifyReconnection is called.
var fake = new FakeScadaClient();
fake.FailSubscriptions = true;
using var sm = new SubscriptionManager(fake);
using var cts = new CancellationTokenSource();
// Subscribe while MxAccess is "down" — should not throw (errors are logged)
var reader = await sm.SubscribeAsync("client1", new[] { "Motor.Speed" }, cts.Token);
reader.Should().NotBeNull();
fake.SubscribeCalls.Should().HaveCount(1);
// MxAccess comes back up
fake.FailSubscriptions = false;
sm.NotifyReconnection();
// Give the async retry a moment to complete
await Task.Delay(100);
// Should have retried the subscription
fake.SubscribeCalls.Should().HaveCount(2, "pending subscriptions should be retried on reconnect");
fake.SubscribeCalls[1].Should().Contain("Motor.Speed");
}
}
}