fix(client-shared): resolve High code-review findings (Client.Shared-005, Client.Shared-006)

Client.Shared-005: _activeDataSubscriptions (a plain Dictionary) and the
_activeAlarmSubscription tuple were mutated from the caller thread, the
keep-alive failover path, and DisconnectAsync with no synchronization,
risking bucket corrosion / InvalidOperationException / lost entries.
Added a dedicated _subscriptionLock and wrapped every read/write of that
bookkeeping state inside it (Subscribe/Unsubscribe[Alarms]Async,
Disconnect, Dispose, and the snapshot/clear/re-record steps of
ReplaySubscriptionsAsync). Awaited adapter calls stay outside the lock so
it is never held across I/O.

Client.Shared-006: HandleKeepAliveFailureAsync had only a non-atomic
state check guarding re-entry, so two bad keep-alives could each start a
failover loop, racing to dispose/replace _session and double-replaying
subscriptions. It now claims an atomic _failoverInProgress slot via
Interlocked.CompareExchange; a re-entrant call returns immediately. The
loop body moved to RunFailoverAsync, wrapped in try/finally that resets
the flag.

Tests: added KeepAliveFailure_ReentrantWhileFailoverInFlight_RunsFailoverOnce
and SubscribeAndUnsubscribe_ConcurrentCalls_DoNotCorruptState regression
tests; made the FakeSubscriptionAdapter / FakeSessionAdapter /
FakeSessionFactory test doubles thread-safe (and added a CreateGate hook)
so the concurrency tests exercise production locking rather than fake
state. All 138 Client.Shared tests pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-22 06:20:40 -04:00
parent 3de688f8d6
commit e221371a0c
6 changed files with 248 additions and 61 deletions

View File

@@ -159,10 +159,13 @@ internal sealed class FakeSessionAdapter : ISessionAdapter
/// <inheritdoc />
public Task<ISubscriptionAdapter> CreateSubscriptionAsync(int publishingIntervalMs, CancellationToken ct)
{
var sub = NextSubscription ?? new FakeSubscriptionAdapter();
NextSubscription = null;
_createdSubscriptions.Add(sub);
return Task.FromResult<ISubscriptionAdapter>(sub);
lock (_createdSubscriptions)
{
var sub = NextSubscription ?? new FakeSubscriptionAdapter();
NextSubscription = null;
_createdSubscriptions.Add(sub);
return Task.FromResult<ISubscriptionAdapter>(sub);
}
}
/// <inheritdoc />

View File

@@ -12,15 +12,24 @@ internal sealed class FakeSessionFactory : ISessionFactory
public bool ThrowOnCreate { get; set; }
public string? LastEndpointUrl { get; private set; }
/// <summary>
/// Optional gate that, when set, blocks <see cref="CreateSessionAsync" /> until completed.
/// Lets tests hold a failover loop in-flight to exercise re-entrancy.
/// </summary>
public TaskCompletionSource? CreateGate { get; set; }
public IReadOnlyList<FakeSessionAdapter> CreatedSessions => _createdSessions;
public Task<ISessionAdapter> CreateSessionAsync(
public async Task<ISessionAdapter> CreateSessionAsync(
ApplicationConfiguration config, EndpointDescription endpoint, string sessionName,
uint sessionTimeoutMs, UserIdentity identity, CancellationToken ct)
{
CreateCallCount++;
LastEndpointUrl = endpoint.EndpointUrl;
if (CreateGate != null)
await CreateGate.Task;
if (ThrowOnCreate)
throw new InvalidOperationException("FakeSessionFactory configured to fail.");
@@ -39,7 +48,7 @@ internal sealed class FakeSessionFactory : ISessionFactory
// Ensure endpoint URL matches
session.EndpointUrl = endpoint.EndpointUrl;
_createdSessions.Add(session);
return Task.FromResult<ISessionAdapter>(session);
return session;
}
/// <summary>

View File

@@ -12,6 +12,10 @@ internal sealed class FakeSubscriptionAdapter : ISubscriptionAdapter
Dictionary<uint, (NodeId NodeId, Action<string, DataValue>? DataCallback, Action<EventFieldList>? EventCallback
)> _items = new();
// Guards _items so concurrent-subscription tests exercise the production
// locking rather than tripping over the test double's own state.
private readonly object _itemsLock = new();
private uint _nextHandle = 100;
/// <summary>
/// Gets a value indicating whether the fake subscription has been deleted.
@@ -34,7 +38,13 @@ internal sealed class FakeSubscriptionAdapter : ISubscriptionAdapter
/// <summary>
/// Gets the handles of all active items.
/// </summary>
public IReadOnlyCollection<uint> ActiveHandles => _items.Keys.ToList();
public IReadOnlyCollection<uint> ActiveHandles
{
get
{
lock (_itemsLock) return _items.Keys.ToList();
}
}
/// <inheritdoc />
public uint SubscriptionId { get; set; } = 42;
@@ -43,17 +53,24 @@ internal sealed class FakeSubscriptionAdapter : ISubscriptionAdapter
public Task<uint> AddDataChangeMonitoredItemAsync(NodeId nodeId, int samplingIntervalMs,
Action<string, DataValue> onDataChange, CancellationToken ct)
{
AddDataChangeCount++;
var handle = _nextHandle++;
_items[handle] = (nodeId, onDataChange, null);
return Task.FromResult(handle);
lock (_itemsLock)
{
AddDataChangeCount++;
var handle = _nextHandle++;
_items[handle] = (nodeId, onDataChange, null);
return Task.FromResult(handle);
}
}
/// <inheritdoc />
public Task RemoveMonitoredItemAsync(uint clientHandle, CancellationToken ct)
{
RemoveCount++;
_items.Remove(clientHandle);
lock (_itemsLock)
{
RemoveCount++;
_items.Remove(clientHandle);
}
return Task.CompletedTask;
}
@@ -61,10 +78,13 @@ internal sealed class FakeSubscriptionAdapter : ISubscriptionAdapter
public Task<uint> AddEventMonitoredItemAsync(NodeId nodeId, int samplingIntervalMs, EventFilter filter,
Action<EventFieldList> onEvent, CancellationToken ct)
{
AddEventCount++;
var handle = _nextHandle++;
_items[handle] = (nodeId, null, onEvent);
return Task.FromResult(handle);
lock (_itemsLock)
{
AddEventCount++;
var handle = _nextHandle++;
_items[handle] = (nodeId, null, onEvent);
return Task.FromResult(handle);
}
}
/// <inheritdoc />
@@ -80,7 +100,7 @@ internal sealed class FakeSubscriptionAdapter : ISubscriptionAdapter
public Task DeleteAsync(CancellationToken ct)
{
Deleted = true;
_items.Clear();
lock (_itemsLock) _items.Clear();
return Task.CompletedTask;
}
@@ -89,7 +109,7 @@ internal sealed class FakeSubscriptionAdapter : ISubscriptionAdapter
/// </summary>
public void Dispose()
{
_items.Clear();
lock (_itemsLock) _items.Clear();
}
/// <summary>
@@ -97,8 +117,13 @@ internal sealed class FakeSubscriptionAdapter : ISubscriptionAdapter
/// </summary>
public void SimulateDataChange(uint handle, DataValue value)
{
if (_items.TryGetValue(handle, out var item) && item.DataCallback != null)
item.DataCallback(item.NodeId.ToString(), value);
(NodeId NodeId, Action<string, DataValue>? DataCallback, Action<EventFieldList>? EventCallback) item;
lock (_itemsLock)
{
if (!_items.TryGetValue(handle, out item)) return;
}
item.DataCallback?.Invoke(item.NodeId.ToString(), value);
}
/// <summary>
@@ -106,6 +131,12 @@ internal sealed class FakeSubscriptionAdapter : ISubscriptionAdapter
/// </summary>
public void SimulateEvent(uint handle, EventFieldList eventFields)
{
if (_items.TryGetValue(handle, out var item) && item.EventCallback != null) item.EventCallback(eventFields);
(NodeId NodeId, Action<string, DataValue>? DataCallback, Action<EventFieldList>? EventCallback) item;
lock (_itemsLock)
{
if (!_items.TryGetValue(handle, out item)) return;
}
item.EventCallback?.Invoke(eventFields);
}
}

View File

@@ -920,6 +920,72 @@ public class OpcUaClientServiceTests : IDisposable
_service.IsConnected.ShouldBeFalse();
}
/// <summary>
/// Regression for Client.Shared-006: a re-entrant keep-alive failure that fires while a
/// failover loop is still in-flight must be ignored, so only one failover runs and only
/// one replacement session is created.
/// </summary>
[Fact]
public async Task KeepAliveFailure_ReentrantWhileFailoverInFlight_RunsFailoverOnce()
{
var session1 = new FakeSessionAdapter { EndpointUrl = "opc.tcp://primary:4840" };
var session2 = new FakeSessionAdapter { EndpointUrl = "opc.tcp://backup:4840" };
_sessionFactory.EnqueueSession(session1);
_sessionFactory.EnqueueSession(session2);
var settings = ValidSettings("opc.tcp://primary:4840");
settings.FailoverUrls = ["opc.tcp://backup:4840"];
await _service.ConnectAsync(settings);
var createCountAfterConnect = _sessionFactory.CreateCallCount; // 1
// Hold the failover's session creation open so it stays in-flight.
var gate = new TaskCompletionSource();
_sessionFactory.CreateGate = gate;
// First bad keep-alive starts the failover loop (now blocked on the gate).
session1.SimulateKeepAlive(false);
// Re-entrant bad keep-alives while failover is still running must be ignored.
session1.SimulateKeepAlive(false);
session1.SimulateKeepAlive(false);
// Release the gate so the in-flight failover completes.
gate.SetResult();
await Task.Delay(200);
// Exactly one extra session created by the single failover loop.
_sessionFactory.CreateCallCount.ShouldBe(createCountAfterConnect + 1);
_service.CurrentConnectionInfo!.EndpointUrl.ShouldBe("opc.tcp://backup:4840");
}
/// <summary>
/// Regression for Client.Shared-005: concurrent subscribe/unsubscribe calls mutating the
/// active-subscription bookkeeping must not corrupt the dictionary or throw.
/// </summary>
[Fact]
public async Task SubscribeAndUnsubscribe_ConcurrentCalls_DoNotCorruptState()
{
var fakeSub = new FakeSubscriptionAdapter();
var session = new FakeSessionAdapter { NextSubscription = fakeSub };
_sessionFactory.EnqueueSession(session);
await _service.ConnectAsync(ValidSettings());
var tasks = new List<Task>();
for (var i = 0; i < 50; i++)
{
var nodeId = new NodeId($"ns=2;s=Node{i}");
tasks.Add(Task.Run(async () =>
{
await _service.SubscribeAsync(nodeId);
await _service.UnsubscribeAsync(nodeId);
}));
}
// No InvalidOperationException from concurrent Dictionary mutation.
await Should.NotThrowAsync(() => Task.WhenAll(tasks));
}
// --- Dispose tests ---
/// <summary>