diff --git a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs
index 1bcc63b6..58412d0f 100644
--- a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs
+++ b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/GalaxyDriver.cs
@@ -282,15 +282,17 @@ public sealed class GalaxyDriver
}
///
- /// Reopen callback for : re-Register the gw session.
- /// If the session never connected, this is a fresh ConnectAsync; otherwise it's a
- /// reconnect against the existing client.
+ /// Reopen callback for : recreate the gw session. After a
+ /// gateway restart the existing session handle is faulted / stale but non-null, so a plain
+ /// ConnectAsync would no-op and the supervisor would loop forever replaying against a
+ /// dead session. disposes the stale session +
+ /// owned client and rebuilds, so a never-connected session still opens fresh.
///
private async Task ReopenAsync(CancellationToken cancellationToken)
{
if (_ownedMxSession is null) return;
var clientOptions = BuildClientOptions(_options.Gateway);
- await _ownedMxSession.ConnectAsync(clientOptions, cancellationToken).ConfigureAwait(false);
+ await _ownedMxSession.RecreateAsync(clientOptions, cancellationToken).ConfigureAwait(false);
}
///
diff --git a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Runtime/GalaxyMxSession.cs b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Runtime/GalaxyMxSession.cs
index 7913c69c..d48d3528 100644
--- a/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Runtime/GalaxyMxSession.cs
+++ b/src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy/Runtime/GalaxyMxSession.cs
@@ -28,6 +28,7 @@ public sealed class GalaxyMxSession : IAsyncDisposable
private MxGatewayClient? _ownedClient;
private MxGatewaySession? _session;
private int _serverHandle;
+ private bool _connected;
private bool _disposed;
/// Initializes a new instance of the GalaxyMxSession class.
@@ -48,24 +49,65 @@ public sealed class GalaxyMxSession : IAsyncDisposable
///
public int ServerHandle => _serverHandle;
+ ///
+ /// Test seam — when set, replaces the real "open session + register" work so unit tests can
+ /// exercise the connect / recreate orchestration without a live gateway (the SDK session/client
+ /// types are sealed + internal-ctor and cannot be faked). Null in production = real gateway path.
+ ///
+ internal Func? OpenAndRegisterOverrideForTests { get; set; }
+
///
/// Connect the underlying gateway client + open an MXAccess session + register the
- /// configured client name. Idempotent — second calls are no-ops while
- /// is true.
+ /// configured client name. Idempotent — second calls are no-ops while a connect has
+ /// already succeeded. Use to force a rebuild after the
+ /// gateway restarts and leaves a faulted / stale session handle.
///
/// The MX gateway client options.
/// The cancellation token.
public async Task ConnectAsync(MxGatewayClientOptions clientOptions, CancellationToken cancellationToken)
{
ObjectDisposedException.ThrowIf(_disposed, this);
- if (_session is not null) return;
+ if (_connected) return;
- _ownedClient = MxGatewayClient.Create(clientOptions);
- _session = await _ownedClient.OpenSessionAsync(cancellationToken: cancellationToken).ConfigureAwait(false);
- _serverHandle = await _session.RegisterAsync(_options.ClientName, cancellationToken).ConfigureAwait(false);
- _logger.LogInformation(
- "GalaxyMxSession connected — clientName={ClientName} serverHandle={Handle}",
- _options.ClientName, _serverHandle);
+ try
+ {
+ if (OpenAndRegisterOverrideForTests is not null)
+ {
+ await OpenAndRegisterOverrideForTests(cancellationToken).ConfigureAwait(false);
+ }
+ else
+ {
+ _ownedClient = MxGatewayClient.Create(clientOptions);
+ _session = await _ownedClient.OpenSessionAsync(cancellationToken: cancellationToken).ConfigureAwait(false);
+ _serverHandle = await _session.RegisterAsync(_options.ClientName, cancellationToken).ConfigureAwait(false);
+ }
+
+ _connected = true;
+ _logger.LogInformation(
+ "GalaxyMxSession connected — clientName={ClientName} serverHandle={Handle}",
+ _options.ClientName, _serverHandle);
+ }
+ catch
+ {
+ // A partial open (e.g. OpenSession succeeded but Register threw) must not leave a half-open
+ // handle that blocks the next reconnect. Tear the partial state down so a retry rebuilds cleanly.
+ await TeardownSessionAsync().ConfigureAwait(false);
+ throw;
+ }
+ }
+
+ ///
+ /// Disposes the current (faulted / stale) session + owned client and rebuilds. Use this on
+ /// reconnect: alone no-ops while a (possibly dead) session handle is
+ /// still present, so the supervisor's reopen must route through here to actually re-establish.
+ ///
+ /// The MX gateway client options.
+ /// The cancellation token.
+ public async Task RecreateAsync(MxGatewayClientOptions clientOptions, CancellationToken cancellationToken)
+ {
+ ObjectDisposedException.ThrowIf(_disposed, this);
+ await TeardownSessionAsync().ConfigureAwait(false);
+ await ConnectAsync(clientOptions, cancellationToken).ConfigureAwait(false);
}
///
@@ -93,19 +135,31 @@ public sealed class GalaxyMxSession : IAsyncDisposable
{
if (_disposed) return;
_disposed = true;
+ await TeardownSessionAsync().ConfigureAwait(false);
+ }
+
+ ///
+ /// Resets the connect flag and best-effort disposes + nulls the owned session and client.
+ /// Shared by (final teardown) and /
+ /// the partial-open recovery path (rebuild teardown) — it does NOT set _disposed, so
+ /// a torn-down session can be re-opened.
+ ///
+ private async Task TeardownSessionAsync()
+ {
+ _connected = false;
if (_session is not null)
{
try { await _session.DisposeAsync().ConfigureAwait(false); }
catch (Exception ex) { _logger.LogWarning(ex, "GalaxyMxSession session dispose failed (best-effort)"); }
+ _session = null;
}
- _session = null;
if (_ownedClient is not null)
{
try { await _ownedClient.DisposeAsync().ConfigureAwait(false); }
catch (Exception ex) { _logger.LogWarning(ex, "GalaxyMxSession client dispose failed (best-effort)"); }
+ _ownedClient = null;
}
- _ownedClient = null;
}
}
diff --git a/tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests/Runtime/GalaxyMxSessionReconnectTests.cs b/tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests/Runtime/GalaxyMxSessionReconnectTests.cs
new file mode 100644
index 00000000..b1e689eb
--- /dev/null
+++ b/tests/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests/Runtime/GalaxyMxSessionReconnectTests.cs
@@ -0,0 +1,106 @@
+using Shouldly;
+using Xunit;
+using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Config;
+using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
+
+namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Tests.Runtime;
+
+///
+/// Connect / recreate orchestration tests for . The SDK
+/// session/client types are sealed with internal ctors and cannot be faked, so these
+/// drive the open body through the OpenAndRegisterOverrideForTests seam. The
+/// core regression guard is :
+/// a stale-session reconnect must rebuild rather than no-op (the gateway-restart bug).
+///
+public sealed class GalaxyMxSessionReconnectTests
+{
+ private static GalaxyMxAccessOptions MinimalOptions() => new(ClientName: "OtOpcUaTest");
+
+ private static GalaxyMxSession NewSession() => new(MinimalOptions());
+
+ /// A second ConnectAsync while connected must be a no-op (idempotent guard).
+ [Fact]
+ public async Task Connect_then_connect_again_is_a_noop()
+ {
+ var session = NewSession();
+ var openCount = 0;
+ session.OpenAndRegisterOverrideForTests = _ =>
+ {
+ openCount++;
+ return Task.CompletedTask;
+ };
+
+ await session.ConnectAsync(null!, CancellationToken.None);
+ openCount.ShouldBe(1);
+
+ await session.ConnectAsync(null!, CancellationToken.None);
+ openCount.ShouldBe(1);
+ }
+
+ ///
+ /// The regression guard for the gateway-restart bug: RecreateAsync must bypass
+ /// the no-op guard and re-run the open body even though a (stale) session is present.
+ ///
+ [Fact]
+ public async Task Recreate_after_connect_opens_a_fresh_session()
+ {
+ var session = NewSession();
+ var openCount = 0;
+ session.OpenAndRegisterOverrideForTests = _ =>
+ {
+ openCount++;
+ return Task.CompletedTask;
+ };
+
+ await session.ConnectAsync(null!, CancellationToken.None);
+ openCount.ShouldBe(1);
+
+ await session.RecreateAsync(null!, CancellationToken.None);
+ openCount.ShouldBe(2);
+ }
+
+ /// RecreateAsync on a never-connected session still opens (teardown is a no-op first).
+ [Fact]
+ public async Task Recreate_when_never_connected_still_opens()
+ {
+ var session = NewSession();
+ var openCount = 0;
+ session.OpenAndRegisterOverrideForTests = _ =>
+ {
+ openCount++;
+ return Task.CompletedTask;
+ };
+
+ await session.RecreateAsync(null!, CancellationToken.None);
+ openCount.ShouldBe(1);
+ }
+
+ ///
+ /// A failed first connect must not leave _connected set — the next attempt has
+ /// to reach the open body again (partial-open teardown).
+ ///
+ [Fact]
+ public async Task Connect_failure_is_not_left_half_open()
+ {
+ var session = NewSession();
+ var openCount = 0;
+ session.OpenAndRegisterOverrideForTests = _ =>
+ {
+ // Throw on the first attempt, succeed on the second.
+ if (openCount++ == 0)
+ {
+ throw new InvalidOperationException("simulated open failure");
+ }
+
+ return Task.CompletedTask;
+ };
+
+ await Should.ThrowAsync(
+ async () => await session.ConnectAsync(null!, CancellationToken.None));
+ openCount.ShouldBe(1);
+
+ // The failed first attempt must not have latched _connected — the retry reaches the body.
+ await session.ConnectAsync(null!, CancellationToken.None);
+ openCount.ShouldBe(2);
+ }
+}