diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs
index 1167303..1bac6a2 100644
--- a/src/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs
+++ b/src/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs
@@ -61,6 +61,12 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
private bool _disposed;
/// URL of the endpoint the driver actually connected to. Exposed via .
private string? _connectedEndpointUrl;
+ ///
+ /// SDK-provided reconnect handler that owns the retry loop + session-transfer machinery
+ /// when the session's keep-alive channel reports a bad status. Null outside the
+ /// reconnecting window; constructed lazily inside the keep-alive handler.
+ ///
+ private SessionReconnectHandler? _reconnectHandler;
public string DriverInstanceId => driverInstanceId;
public string DriverType => "OpcUaClient";
@@ -104,16 +110,13 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
"Tried:\n " + string.Join("\n ", attemptErrors),
attemptErrors.Select(e => new InvalidOperationException(e)));
- // Wire the session's keep-alive channel into HostState. OPC UA keep-alives are
- // authoritative for session liveness: the SDK pings on KeepAliveInterval and sets
- // KeepAliveStopped when N intervals elapse without a response. That's strictly
- // better than a driver-side polling probe — no extra round-trip, no duplicate
- // semantic.
- _keepAliveHandler = (_, e) =>
- {
- var healthy = !ServiceResult.IsBad(e.Status);
- TransitionTo(healthy ? HostState.Running : HostState.Stopped);
- };
+ // Wire the session's keep-alive channel into HostState + the reconnect trigger.
+ // OPC UA keep-alives are authoritative for session liveness: the SDK pings on
+ // KeepAliveInterval and sets KeepAliveStopped when N intervals elapse without a
+ // response. On a bad keep-alive the driver spins up a SessionReconnectHandler
+ // which transparently retries + swaps the underlying session. Subscriptions move
+ // via TransferSubscriptions so local MonitoredItem handles stay valid.
+ _keepAliveHandler = OnKeepAlive;
session.KeepAlive += _keepAliveHandler;
Session = session;
@@ -392,6 +395,13 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
}
_subscriptions.Clear();
+ // Abort any in-flight reconnect attempts before touching the session — BeginReconnect's
+ // retry loop holds a reference to the current session and would fight Session.CloseAsync
+ // if left spinning.
+ try { _reconnectHandler?.CancelReconnect(); } catch { }
+ _reconnectHandler?.Dispose();
+ _reconnectHandler = null;
+
if (_keepAliveHandler is not null && Session is not null)
{
try { Session.KeepAlive -= _keepAliveHandler; } catch { }
@@ -945,6 +955,76 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
return [new HostConnectivityStatus(HostName, _hostState, _hostStateChangedUtc)];
}
+ ///
+ /// Session keep-alive handler. On a healthy ping, bumps HostState back to Running
+ /// (typical bounce after a transient network blip). On a bad ping, starts the SDK's
+ /// which retries on the configured period +
+ /// fires when it lands a new session.
+ ///
+ private void OnKeepAlive(ISession sender, KeepAliveEventArgs e)
+ {
+ if (!ServiceResult.IsBad(e.Status))
+ {
+ TransitionTo(HostState.Running);
+ return;
+ }
+
+ TransitionTo(HostState.Stopped);
+
+ // Kick off the SDK's reconnect loop exactly once per drop. The handler handles its
+ // own retry cadence via ReconnectPeriod; we tear it down in OnReconnectComplete.
+ if (_reconnectHandler is not null) return;
+
+ _reconnectHandler = new SessionReconnectHandler(telemetry: null!,
+ reconnectAbort: false,
+ maxReconnectPeriod: (int)TimeSpan.FromMinutes(2).TotalMilliseconds);
+
+ var state = _reconnectHandler.BeginReconnect(
+ sender,
+ (int)_options.ReconnectPeriod.TotalMilliseconds,
+ OnReconnectComplete);
+ }
+
+ ///
+ /// Called by when its retry loop has either
+ /// successfully swapped to a new session or given up. Reads the new session off
+ /// handler.Session, unwires the old keep-alive hook, rewires for the new
+ /// one, and tears down the handler. Subscription migration is already handled
+ /// inside the SDK via TransferSubscriptions (the SDK calls it automatically
+ /// when is true,
+ /// which is the default).
+ ///
+ private void OnReconnectComplete(object? sender, EventArgs e)
+ {
+ if (sender is not SessionReconnectHandler handler) return;
+ var newSession = handler.Session;
+ var oldSession = Session;
+
+ // Rewire keep-alive onto the new session — without this the next drop wouldn't
+ // trigger another reconnect attempt.
+ if (oldSession is not null && _keepAliveHandler is not null)
+ {
+ try { oldSession.KeepAlive -= _keepAliveHandler; } catch { }
+ }
+ if (newSession is not null && _keepAliveHandler is not null)
+ {
+ newSession.KeepAlive += _keepAliveHandler;
+ }
+
+ Session = newSession;
+ _reconnectHandler?.Dispose();
+ _reconnectHandler = null;
+
+ // Whether the reconnect actually succeeded depends on whether the session is
+ // non-null + connected. When it succeeded, flip back to Running so downstream
+ // consumers see recovery.
+ if (newSession is not null)
+ {
+ TransitionTo(HostState.Running);
+ _health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
+ }
+ }
+
private void TransitionTo(HostState newState)
{
HostState old;
diff --git a/tests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/OpcUaClientReconnectTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/OpcUaClientReconnectTests.cs
new file mode 100644
index 0000000..99cd182
--- /dev/null
+++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/OpcUaClientReconnectTests.cs
@@ -0,0 +1,36 @@
+using Shouldly;
+using Xunit;
+
+namespace ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests;
+
+///
+/// Scaffold tests for wiring. Wire-level
+/// disconnect-reconnect-resume coverage against a live upstream server lands with the
+/// in-process fixture — too much machinery for a unit-test-only lane.
+///
+[Trait("Category", "Unit")]
+public sealed class OpcUaClientReconnectTests
+{
+ [Fact]
+ public void Default_ReconnectPeriod_matches_driver_specs_5_seconds()
+ {
+ new OpcUaClientDriverOptions().ReconnectPeriod.ShouldBe(TimeSpan.FromSeconds(5));
+ }
+
+ [Fact]
+ public void Options_ReconnectPeriod_is_configurable_for_aggressive_or_relaxed_retry()
+ {
+ var opts = new OpcUaClientDriverOptions { ReconnectPeriod = TimeSpan.FromMilliseconds(500) };
+ opts.ReconnectPeriod.ShouldBe(TimeSpan.FromMilliseconds(500));
+ }
+
+ [Fact]
+ public void Driver_starts_with_no_reconnect_handler_active_pre_init()
+ {
+ // The reconnect handler is lazy — spun up only when a bad keep-alive fires. Pre-init
+ // there's no session to reconnect, so the field must be null (indirectly verified by
+ // the lifecycle-shape test suite catching any accidental construction).
+ using var drv = new OpcUaClientDriver(new OpcUaClientDriverOptions(), "opcua-reconnect");
+ drv.GetHealth().State.ShouldBe(Core.Abstractions.DriverState.Unknown);
+ }
+}