fix(client-shared): resolve High code-review findings (Client.Shared-005, Client.Shared-006)
Client.Shared-005: _activeDataSubscriptions (a plain Dictionary) and the _activeAlarmSubscription tuple were mutated from the caller thread, the keep-alive failover path, and DisconnectAsync with no synchronization, risking bucket corrosion / InvalidOperationException / lost entries. Added a dedicated _subscriptionLock and wrapped every read/write of that bookkeeping state inside it (Subscribe/Unsubscribe[Alarms]Async, Disconnect, Dispose, and the snapshot/clear/re-record steps of ReplaySubscriptionsAsync). Awaited adapter calls stay outside the lock so it is never held across I/O. Client.Shared-006: HandleKeepAliveFailureAsync had only a non-atomic state check guarding re-entry, so two bad keep-alives could each start a failover loop, racing to dispose/replace _session and double-replaying subscriptions. It now claims an atomic _failoverInProgress slot via Interlocked.CompareExchange; a re-entrant call returns immediately. The loop body moved to RunFailoverAsync, wrapped in try/finally that resets the flag. Tests: added KeepAliveFailure_ReentrantWhileFailoverInFlight_RunsFailoverOnce and SubscribeAndUnsubscribe_ConcurrentCalls_DoNotCorruptState regression tests; made the FakeSubscriptionAdapter / FakeSessionAdapter / FakeSessionFactory test doubles thread-safe (and added a CreateGate hook) so the concurrency tests exercise production locking rather than fake state. All 138 Client.Shared tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -920,6 +920,72 @@ public class OpcUaClientServiceTests : IDisposable
|
||||
_service.IsConnected.ShouldBeFalse();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Client.Shared-006: a re-entrant keep-alive failure that fires while a
|
||||
/// failover loop is still in-flight must be ignored, so only one failover runs and only
|
||||
/// one replacement session is created.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task KeepAliveFailure_ReentrantWhileFailoverInFlight_RunsFailoverOnce()
|
||||
{
|
||||
var session1 = new FakeSessionAdapter { EndpointUrl = "opc.tcp://primary:4840" };
|
||||
var session2 = new FakeSessionAdapter { EndpointUrl = "opc.tcp://backup:4840" };
|
||||
_sessionFactory.EnqueueSession(session1);
|
||||
_sessionFactory.EnqueueSession(session2);
|
||||
|
||||
var settings = ValidSettings("opc.tcp://primary:4840");
|
||||
settings.FailoverUrls = ["opc.tcp://backup:4840"];
|
||||
|
||||
await _service.ConnectAsync(settings);
|
||||
var createCountAfterConnect = _sessionFactory.CreateCallCount; // 1
|
||||
|
||||
// Hold the failover's session creation open so it stays in-flight.
|
||||
var gate = new TaskCompletionSource();
|
||||
_sessionFactory.CreateGate = gate;
|
||||
|
||||
// First bad keep-alive starts the failover loop (now blocked on the gate).
|
||||
session1.SimulateKeepAlive(false);
|
||||
|
||||
// Re-entrant bad keep-alives while failover is still running must be ignored.
|
||||
session1.SimulateKeepAlive(false);
|
||||
session1.SimulateKeepAlive(false);
|
||||
|
||||
// Release the gate so the in-flight failover completes.
|
||||
gate.SetResult();
|
||||
await Task.Delay(200);
|
||||
|
||||
// Exactly one extra session created by the single failover loop.
|
||||
_sessionFactory.CreateCallCount.ShouldBe(createCountAfterConnect + 1);
|
||||
_service.CurrentConnectionInfo!.EndpointUrl.ShouldBe("opc.tcp://backup:4840");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Regression for Client.Shared-005: concurrent subscribe/unsubscribe calls mutating the
|
||||
/// active-subscription bookkeeping must not corrupt the dictionary or throw.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task SubscribeAndUnsubscribe_ConcurrentCalls_DoNotCorruptState()
|
||||
{
|
||||
var fakeSub = new FakeSubscriptionAdapter();
|
||||
var session = new FakeSessionAdapter { NextSubscription = fakeSub };
|
||||
_sessionFactory.EnqueueSession(session);
|
||||
await _service.ConnectAsync(ValidSettings());
|
||||
|
||||
var tasks = new List<Task>();
|
||||
for (var i = 0; i < 50; i++)
|
||||
{
|
||||
var nodeId = new NodeId($"ns=2;s=Node{i}");
|
||||
tasks.Add(Task.Run(async () =>
|
||||
{
|
||||
await _service.SubscribeAsync(nodeId);
|
||||
await _service.UnsubscribeAsync(nodeId);
|
||||
}));
|
||||
}
|
||||
|
||||
// No InvalidOperationException from concurrent Dictionary mutation.
|
||||
await Should.NotThrowAsync(() => Task.WhenAll(tasks));
|
||||
}
|
||||
|
||||
// --- Dispose tests ---
|
||||
|
||||
/// <summary>
|
||||
|
||||
Reference in New Issue
Block a user