Phase 3 PR 74 -- OPC UA Client transparent reconnect via SessionReconnectHandler #73
@@ -61,6 +61,12 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
|
||||
private bool _disposed;
|
||||
/// <summary>URL of the endpoint the driver actually connected to. Exposed via <see cref="HostName"/>.</summary>
|
||||
private string? _connectedEndpointUrl;
|
||||
/// <summary>
|
||||
/// SDK-provided reconnect handler that owns the retry loop + session-transfer machinery
|
||||
/// when the session's keep-alive channel reports a bad status. Null outside the
|
||||
/// reconnecting window; constructed lazily inside the keep-alive handler.
|
||||
/// </summary>
|
||||
private SessionReconnectHandler? _reconnectHandler;
|
||||
|
||||
public string DriverInstanceId => driverInstanceId;
|
||||
public string DriverType => "OpcUaClient";
|
||||
@@ -104,16 +110,13 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
|
||||
"Tried:\n " + string.Join("\n ", attemptErrors),
|
||||
attemptErrors.Select(e => new InvalidOperationException(e)));
|
||||
|
||||
// Wire the session's keep-alive channel into HostState. OPC UA keep-alives are
|
||||
// authoritative for session liveness: the SDK pings on KeepAliveInterval and sets
|
||||
// KeepAliveStopped when N intervals elapse without a response. That's strictly
|
||||
// better than a driver-side polling probe — no extra round-trip, no duplicate
|
||||
// semantic.
|
||||
_keepAliveHandler = (_, e) =>
|
||||
{
|
||||
var healthy = !ServiceResult.IsBad(e.Status);
|
||||
TransitionTo(healthy ? HostState.Running : HostState.Stopped);
|
||||
};
|
||||
// Wire the session's keep-alive channel into HostState + the reconnect trigger.
|
||||
// OPC UA keep-alives are authoritative for session liveness: the SDK pings on
|
||||
// KeepAliveInterval and sets KeepAliveStopped when N intervals elapse without a
|
||||
// response. On a bad keep-alive the driver spins up a SessionReconnectHandler
|
||||
// which transparently retries + swaps the underlying session. Subscriptions move
|
||||
// via TransferSubscriptions so local MonitoredItem handles stay valid.
|
||||
_keepAliveHandler = OnKeepAlive;
|
||||
session.KeepAlive += _keepAliveHandler;
|
||||
|
||||
Session = session;
|
||||
@@ -392,6 +395,13 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
|
||||
}
|
||||
_subscriptions.Clear();
|
||||
|
||||
// Abort any in-flight reconnect attempts before touching the session — BeginReconnect's
|
||||
// retry loop holds a reference to the current session and would fight Session.CloseAsync
|
||||
// if left spinning.
|
||||
try { _reconnectHandler?.CancelReconnect(); } catch { }
|
||||
_reconnectHandler?.Dispose();
|
||||
_reconnectHandler = null;
|
||||
|
||||
if (_keepAliveHandler is not null && Session is not null)
|
||||
{
|
||||
try { Session.KeepAlive -= _keepAliveHandler; } catch { }
|
||||
@@ -945,6 +955,76 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
|
||||
return [new HostConnectivityStatus(HostName, _hostState, _hostStateChangedUtc)];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Session keep-alive handler. On a healthy ping, bumps HostState back to Running
|
||||
/// (typical bounce after a transient network blip). On a bad ping, starts the SDK's
|
||||
/// <see cref="SessionReconnectHandler"/> which retries on the configured period +
|
||||
/// fires <see cref="OnReconnectComplete"/> when it lands a new session.
|
||||
/// </summary>
|
||||
private void OnKeepAlive(ISession sender, KeepAliveEventArgs e)
|
||||
{
|
||||
if (!ServiceResult.IsBad(e.Status))
|
||||
{
|
||||
TransitionTo(HostState.Running);
|
||||
return;
|
||||
}
|
||||
|
||||
TransitionTo(HostState.Stopped);
|
||||
|
||||
// Kick off the SDK's reconnect loop exactly once per drop. The handler handles its
|
||||
// own retry cadence via ReconnectPeriod; we tear it down in OnReconnectComplete.
|
||||
if (_reconnectHandler is not null) return;
|
||||
|
||||
_reconnectHandler = new SessionReconnectHandler(telemetry: null!,
|
||||
reconnectAbort: false,
|
||||
maxReconnectPeriod: (int)TimeSpan.FromMinutes(2).TotalMilliseconds);
|
||||
|
||||
var state = _reconnectHandler.BeginReconnect(
|
||||
sender,
|
||||
(int)_options.ReconnectPeriod.TotalMilliseconds,
|
||||
OnReconnectComplete);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Called by <see cref="SessionReconnectHandler"/> when its retry loop has either
|
||||
/// successfully swapped to a new session or given up. Reads the new session off
|
||||
/// <c>handler.Session</c>, unwires the old keep-alive hook, rewires for the new
|
||||
/// one, and tears down the handler. Subscription migration is already handled
|
||||
/// inside the SDK via <c>TransferSubscriptions</c> (the SDK calls it automatically
|
||||
/// when <see cref="Session.TransferSubscriptionsOnReconnect"/> is <c>true</c>,
|
||||
/// which is the default).
|
||||
/// </summary>
|
||||
private void OnReconnectComplete(object? sender, EventArgs e)
|
||||
{
|
||||
if (sender is not SessionReconnectHandler handler) return;
|
||||
var newSession = handler.Session;
|
||||
var oldSession = Session;
|
||||
|
||||
// Rewire keep-alive onto the new session — without this the next drop wouldn't
|
||||
// trigger another reconnect attempt.
|
||||
if (oldSession is not null && _keepAliveHandler is not null)
|
||||
{
|
||||
try { oldSession.KeepAlive -= _keepAliveHandler; } catch { }
|
||||
}
|
||||
if (newSession is not null && _keepAliveHandler is not null)
|
||||
{
|
||||
newSession.KeepAlive += _keepAliveHandler;
|
||||
}
|
||||
|
||||
Session = newSession;
|
||||
_reconnectHandler?.Dispose();
|
||||
_reconnectHandler = null;
|
||||
|
||||
// Whether the reconnect actually succeeded depends on whether the session is
|
||||
// non-null + connected. When it succeeded, flip back to Running so downstream
|
||||
// consumers see recovery.
|
||||
if (newSession is not null)
|
||||
{
|
||||
TransitionTo(HostState.Running);
|
||||
_health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
|
||||
}
|
||||
}
|
||||
|
||||
private void TransitionTo(HostState newState)
|
||||
{
|
||||
HostState old;
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Scaffold tests for <see cref="SessionReconnectHandler"/> wiring. Wire-level
|
||||
/// disconnect-reconnect-resume coverage against a live upstream server lands with the
|
||||
/// in-process fixture — too much machinery for a unit-test-only lane.
|
||||
/// </summary>
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class OpcUaClientReconnectTests
|
||||
{
|
||||
[Fact]
|
||||
public void Default_ReconnectPeriod_matches_driver_specs_5_seconds()
|
||||
{
|
||||
new OpcUaClientDriverOptions().ReconnectPeriod.ShouldBe(TimeSpan.FromSeconds(5));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Options_ReconnectPeriod_is_configurable_for_aggressive_or_relaxed_retry()
|
||||
{
|
||||
var opts = new OpcUaClientDriverOptions { ReconnectPeriod = TimeSpan.FromMilliseconds(500) };
|
||||
opts.ReconnectPeriod.ShouldBe(TimeSpan.FromMilliseconds(500));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Driver_starts_with_no_reconnect_handler_active_pre_init()
|
||||
{
|
||||
// The reconnect handler is lazy — spun up only when a bad keep-alive fires. Pre-init
|
||||
// there's no session to reconnect, so the field must be null (indirectly verified by
|
||||
// the lifecycle-shape test suite catching any accidental construction).
|
||||
using var drv = new OpcUaClientDriver(new OpcUaClientDriverOptions(), "opcua-reconnect");
|
||||
drv.GetHealth().State.ShouldBe(Core.Abstractions.DriverState.Unknown);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user