diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs index 1167303..1bac6a2 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient/OpcUaClientDriver.cs @@ -61,6 +61,12 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d private bool _disposed; /// URL of the endpoint the driver actually connected to. Exposed via . private string? _connectedEndpointUrl; + /// + /// SDK-provided reconnect handler that owns the retry loop + session-transfer machinery + /// when the session's keep-alive channel reports a bad status. Null outside the + /// reconnecting window; constructed lazily inside the keep-alive handler. + /// + private SessionReconnectHandler? _reconnectHandler; public string DriverInstanceId => driverInstanceId; public string DriverType => "OpcUaClient"; @@ -104,16 +110,13 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d "Tried:\n " + string.Join("\n ", attemptErrors), attemptErrors.Select(e => new InvalidOperationException(e))); - // Wire the session's keep-alive channel into HostState. OPC UA keep-alives are - // authoritative for session liveness: the SDK pings on KeepAliveInterval and sets - // KeepAliveStopped when N intervals elapse without a response. That's strictly - // better than a driver-side polling probe — no extra round-trip, no duplicate - // semantic. - _keepAliveHandler = (_, e) => - { - var healthy = !ServiceResult.IsBad(e.Status); - TransitionTo(healthy ? HostState.Running : HostState.Stopped); - }; + // Wire the session's keep-alive channel into HostState + the reconnect trigger. + // OPC UA keep-alives are authoritative for session liveness: the SDK pings on + // KeepAliveInterval and sets KeepAliveStopped when N intervals elapse without a + // response. On a bad keep-alive the driver spins up a SessionReconnectHandler + // which transparently retries + swaps the underlying session. Subscriptions move + // via TransferSubscriptions so local MonitoredItem handles stay valid. + _keepAliveHandler = OnKeepAlive; session.KeepAlive += _keepAliveHandler; Session = session; @@ -392,6 +395,13 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d } _subscriptions.Clear(); + // Abort any in-flight reconnect attempts before touching the session — BeginReconnect's + // retry loop holds a reference to the current session and would fight Session.CloseAsync + // if left spinning. + try { _reconnectHandler?.CancelReconnect(); } catch { } + _reconnectHandler?.Dispose(); + _reconnectHandler = null; + if (_keepAliveHandler is not null && Session is not null) { try { Session.KeepAlive -= _keepAliveHandler; } catch { } @@ -945,6 +955,76 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d return [new HostConnectivityStatus(HostName, _hostState, _hostStateChangedUtc)]; } + /// + /// Session keep-alive handler. On a healthy ping, bumps HostState back to Running + /// (typical bounce after a transient network blip). On a bad ping, starts the SDK's + /// which retries on the configured period + + /// fires when it lands a new session. + /// + private void OnKeepAlive(ISession sender, KeepAliveEventArgs e) + { + if (!ServiceResult.IsBad(e.Status)) + { + TransitionTo(HostState.Running); + return; + } + + TransitionTo(HostState.Stopped); + + // Kick off the SDK's reconnect loop exactly once per drop. The handler handles its + // own retry cadence via ReconnectPeriod; we tear it down in OnReconnectComplete. + if (_reconnectHandler is not null) return; + + _reconnectHandler = new SessionReconnectHandler(telemetry: null!, + reconnectAbort: false, + maxReconnectPeriod: (int)TimeSpan.FromMinutes(2).TotalMilliseconds); + + var state = _reconnectHandler.BeginReconnect( + sender, + (int)_options.ReconnectPeriod.TotalMilliseconds, + OnReconnectComplete); + } + + /// + /// Called by when its retry loop has either + /// successfully swapped to a new session or given up. Reads the new session off + /// handler.Session, unwires the old keep-alive hook, rewires for the new + /// one, and tears down the handler. Subscription migration is already handled + /// inside the SDK via TransferSubscriptions (the SDK calls it automatically + /// when is true, + /// which is the default). + /// + private void OnReconnectComplete(object? sender, EventArgs e) + { + if (sender is not SessionReconnectHandler handler) return; + var newSession = handler.Session; + var oldSession = Session; + + // Rewire keep-alive onto the new session — without this the next drop wouldn't + // trigger another reconnect attempt. + if (oldSession is not null && _keepAliveHandler is not null) + { + try { oldSession.KeepAlive -= _keepAliveHandler; } catch { } + } + if (newSession is not null && _keepAliveHandler is not null) + { + newSession.KeepAlive += _keepAliveHandler; + } + + Session = newSession; + _reconnectHandler?.Dispose(); + _reconnectHandler = null; + + // Whether the reconnect actually succeeded depends on whether the session is + // non-null + connected. When it succeeded, flip back to Running so downstream + // consumers see recovery. + if (newSession is not null) + { + TransitionTo(HostState.Running); + _health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null); + } + } + private void TransitionTo(HostState newState) { HostState old; diff --git a/tests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/OpcUaClientReconnectTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/OpcUaClientReconnectTests.cs new file mode 100644 index 0000000..99cd182 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests/OpcUaClientReconnectTests.cs @@ -0,0 +1,36 @@ +using Shouldly; +using Xunit; + +namespace ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests; + +/// +/// Scaffold tests for wiring. Wire-level +/// disconnect-reconnect-resume coverage against a live upstream server lands with the +/// in-process fixture — too much machinery for a unit-test-only lane. +/// +[Trait("Category", "Unit")] +public sealed class OpcUaClientReconnectTests +{ + [Fact] + public void Default_ReconnectPeriod_matches_driver_specs_5_seconds() + { + new OpcUaClientDriverOptions().ReconnectPeriod.ShouldBe(TimeSpan.FromSeconds(5)); + } + + [Fact] + public void Options_ReconnectPeriod_is_configurable_for_aggressive_or_relaxed_retry() + { + var opts = new OpcUaClientDriverOptions { ReconnectPeriod = TimeSpan.FromMilliseconds(500) }; + opts.ReconnectPeriod.ShouldBe(TimeSpan.FromMilliseconds(500)); + } + + [Fact] + public void Driver_starts_with_no_reconnect_handler_active_pre_init() + { + // The reconnect handler is lazy — spun up only when a bad keep-alive fires. Pre-init + // there's no session to reconnect, so the field must be null (indirectly verified by + // the lifecycle-shape test suite catching any accidental construction). + using var drv = new OpcUaClientDriver(new OpcUaClientDriverOptions(), "opcua-reconnect"); + drv.GetHealth().State.ShouldBe(Core.Abstractions.DriverState.Unknown); + } +}