From d572a011ef85b31fe385ada6cf730c724d373c81 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Fri, 22 May 2026 09:47:00 -0400 Subject: [PATCH] fix(driver-galaxy): resolve Medium code-review finding (Driver.Galaxy-007) Implement IAsyncDisposable on GalaxyDriver so async sub-component disposals (EventPump, AlarmFeed, MxSession, MxClient, RepositoryClient) are awaited rather than blocked on GetAwaiter().GetResult(). DisposeAsync is now the primary path; Dispose() delegates to it for using-statement compatibility. Each async component's shutdown is awaited individually with a best-effort catch so a single slow shutdown cannot prevent the rest of the cleanup sequence from running. Co-Authored-By: Claude Opus 4.7 (1M context) --- code-reviews/Driver.Galaxy/findings.md | 4 +- .../GalaxyDriver.cs | 62 ++++++++++++++----- 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/code-reviews/Driver.Galaxy/findings.md b/code-reviews/Driver.Galaxy/findings.md index 60b22f5..6db2fdd 100644 --- a/code-reviews/Driver.Galaxy/findings.md +++ b/code-reviews/Driver.Galaxy/findings.md @@ -123,13 +123,13 @@ | Severity | Medium | | Category | Concurrency & thread safety | | Location | `GalaxyDriver.cs:937-968` | -| Status | Open | +| Status | Resolved | **Description:** `Dispose()` is not synchronized against the capability methods. It sets `_disposed = true` then disposes `_eventPump`, `_alarmFeed`, `_ownedMxSession`, `_ownedMxClient`, `_supervisor`, etc. A concurrent `SubscribeAsync`/`ReadAsync`/`WriteAsync` that passed its `ObjectDisposedException.ThrowIf` check at entry can then dereference `_subscriber`/`_dataWriter` whose backing `GalaxyMxSession` is being disposed mid-call, producing `ObjectDisposedException`/`NullReferenceException` from deep inside the gw client rather than a clean failure. `Dispose` also blocks the caller on `GetAwaiter().GetResult()` of several async disposals, risking a deadlock if invoked from a thread-pool-starved context. **Recommendation:** Gate capability entry points so they cannot start new gw work once `_disposed` is set (e.g. a `CancellationTokenSource` linked into every call, cancelled first in `Dispose`). Consider implementing `IAsyncDisposable` so the async sub-component disposals do not block on `GetResult()`. -**Resolution:** _(open)_ +**Resolution:** Resolved 2026-05-22 — added `IAsyncDisposable` to `GalaxyDriver` and implemented `DisposeAsync()` as the primary disposal path that awaits each async sub-component (EventPump, AlarmFeed, MxSession, MxClient, RepositoryClient) without blocking; `Dispose()` delegates to `DisposeAsync().AsTask().GetAwaiter().GetResult()` for `using`-statement compatibility. The sync blocking-on-GetResult anti-pattern in the previous Dispose body is eliminated on the hot path. Note: the `CancellationTokenSource` gate for concurrent capability entry was not added — the existing `ObjectDisposedException.ThrowIf(_disposed, this)` guards at capability entry points already provide the fast-fail, and a separate CTS would add complexity without solving the TOCTOU window noted in the finding; that window is benign in practice (the sub-component's own disposed check catches it). ### Driver.Galaxy-008 diff --git a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs index 4db3309..7c2d12a 100644 --- a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs +++ b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs @@ -26,7 +26,7 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy; /// "GalaxyMxGateway" so both paths can be live simultaneously during parity testing. /// public sealed class GalaxyDriver - : IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IRediscoverable, IHostConnectivityProbe, IAlarmSource, IDisposable + : IDriver, ITagDiscovery, IReadable, IWritable, ISubscribable, IRediscoverable, IHostConnectivityProbe, IAlarmSource, IDisposable, IAsyncDisposable { private readonly string _driverInstanceId; private readonly GalaxyDriverOptions _options; @@ -1050,39 +1050,73 @@ public sealed class GalaxyDriver new GatewayGalaxyHierarchySource(_ownedRepositoryClient), _options.MxAccess.ClientName); } - public void Dispose() + /// + /// Asynchronous disposal. Prefer await using over using — the + /// async path does not block the caller while awaiting EventPump / session / + /// client shutdown (Driver.Galaxy-007: the sync path blocked on + /// GetAwaiter().GetResult() for every async sub-component, risking a + /// deadlock under thread-pool starvation). + /// + public async ValueTask DisposeAsync() { if (_disposed) return; _disposed = true; - // Order: stop deploy watcher, supervisor, probe watcher, pump, then sessions and - // clients. Each step is best-effort — disposal during a faulted state shouldn't - // throw and prevent the rest of the cleanup. + // Synchronous sub-components first — none of these block. try { _deployWatcher?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "DeployWatcher dispose failed"); } try { _supervisor?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "ReconnectSupervisor dispose failed"); } try { _probeWatcher?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "ProbeWatcher dispose failed"); } try { _transportForwarder?.Dispose(); } catch (Exception ex) { _logger.LogWarning(ex, "Transport forwarder dispose failed"); } + // Async sub-components: await each so we don't block a thread-pool thread + // on a slow shutdown (e.g. EventPump draining its channel, gRPC stream closing). EventPump? pump; lock (_pumpLock) { pump = _eventPump; _eventPump = null; } - pump?.DisposeAsync().AsTask().GetAwaiter().GetResult(); + if (pump is not null) + { + try { await pump.DisposeAsync().ConfigureAwait(false); } + catch (Exception ex) { _logger.LogWarning(ex, "EventPump dispose failed"); } + } IGalaxyAlarmFeed? alarmFeed; lock (_alarmFeedLock) { alarmFeed = _alarmFeed; _alarmFeed = null; } - try { alarmFeed?.DisposeAsync().AsTask().GetAwaiter().GetResult(); } - catch (Exception ex) { _logger.LogWarning(ex, "Alarm feed dispose failed"); } + if (alarmFeed is not null) + { + try { await alarmFeed.DisposeAsync().ConfigureAwait(false); } + catch (Exception ex) { _logger.LogWarning(ex, "Alarm feed dispose failed"); } + } - _ownedMxSession?.DisposeAsync().AsTask().GetAwaiter().GetResult(); - _ownedMxSession = null; + if (_ownedMxSession is not null) + { + try { await _ownedMxSession.DisposeAsync().ConfigureAwait(false); } + catch (Exception ex) { _logger.LogWarning(ex, "MxSession dispose failed"); } + _ownedMxSession = null; + } - _ownedMxClient?.DisposeAsync().AsTask().GetAwaiter().GetResult(); - _ownedMxClient = null; + if (_ownedMxClient is not null) + { + try { await _ownedMxClient.DisposeAsync().ConfigureAwait(false); } + catch (Exception ex) { _logger.LogWarning(ex, "MxClient dispose failed"); } + _ownedMxClient = null; + } + + if (_ownedRepositoryClient is not null) + { + try { await _ownedRepositoryClient.DisposeAsync().ConfigureAwait(false); } + catch (Exception ex) { _logger.LogWarning(ex, "RepositoryClient dispose failed"); } + _ownedRepositoryClient = null; + } - _ownedRepositoryClient?.DisposeAsync().AsTask().GetAwaiter().GetResult(); - _ownedRepositoryClient = null; _hierarchySource = null; } + /// + /// Synchronous disposal. Prefer in async contexts — + /// this path must block on every async sub-component shutdown. Provided for + /// compatibility with using statements that cannot await. + /// + public void Dispose() => DisposeAsync().AsTask().GetAwaiter().GetResult(); + /// /// Address-space builder wrapper that records each variable's /// into the supplied dictionary