Merge pull request 'Phase 3 PR 74 -- OPC UA Client transparent reconnect via SessionReconnectHandler' (#73) from phase-3-pr74-opcua-client-session-reconnect into v2

This commit was merged in pull request #73.
This commit is contained in:
2026-04-19 02:06:48 -04:00
2 changed files with 126 additions and 10 deletions

View File

@@ -61,6 +61,12 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
private bool _disposed;
/// <summary>URL of the endpoint the driver actually connected to. Exposed via <see cref="HostName"/>.</summary>
private string? _connectedEndpointUrl;
/// <summary>
/// SDK-provided reconnect handler that owns the retry loop + session-transfer machinery
/// when the session's keep-alive channel reports a bad status. Null outside the
/// reconnecting window; constructed lazily inside the keep-alive handler.
/// </summary>
private SessionReconnectHandler? _reconnectHandler;
public string DriverInstanceId => driverInstanceId;
public string DriverType => "OpcUaClient";
@@ -104,16 +110,13 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
"Tried:\n " + string.Join("\n ", attemptErrors),
attemptErrors.Select(e => new InvalidOperationException(e)));
// Wire the session's keep-alive channel into HostState. OPC UA keep-alives are
// authoritative for session liveness: the SDK pings on KeepAliveInterval and sets
// KeepAliveStopped when N intervals elapse without a response. That's strictly
// better than a driver-side polling probe — no extra round-trip, no duplicate
// semantic.
_keepAliveHandler = (_, e) =>
{
var healthy = !ServiceResult.IsBad(e.Status);
TransitionTo(healthy ? HostState.Running : HostState.Stopped);
};
// Wire the session's keep-alive channel into HostState + the reconnect trigger.
// OPC UA keep-alives are authoritative for session liveness: the SDK pings on
// KeepAliveInterval and sets KeepAliveStopped when N intervals elapse without a
// response. On a bad keep-alive the driver spins up a SessionReconnectHandler
// which transparently retries + swaps the underlying session. Subscriptions move
// via TransferSubscriptions so local MonitoredItem handles stay valid.
_keepAliveHandler = OnKeepAlive;
session.KeepAlive += _keepAliveHandler;
Session = session;
@@ -392,6 +395,13 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
}
_subscriptions.Clear();
// Abort any in-flight reconnect attempts before touching the session — BeginReconnect's
// retry loop holds a reference to the current session and would fight Session.CloseAsync
// if left spinning.
try { _reconnectHandler?.CancelReconnect(); } catch { }
_reconnectHandler?.Dispose();
_reconnectHandler = null;
if (_keepAliveHandler is not null && Session is not null)
{
try { Session.KeepAlive -= _keepAliveHandler; } catch { }
@@ -945,6 +955,76 @@ public sealed class OpcUaClientDriver(OpcUaClientDriverOptions options, string d
return [new HostConnectivityStatus(HostName, _hostState, _hostStateChangedUtc)];
}
/// <summary>
/// Session keep-alive handler. On a healthy ping, bumps HostState back to Running
/// (typical bounce after a transient network blip). On a bad ping, starts the SDK's
/// <see cref="SessionReconnectHandler"/> which retries on the configured period +
/// fires <see cref="OnReconnectComplete"/> when it lands a new session.
/// </summary>
private void OnKeepAlive(ISession sender, KeepAliveEventArgs e)
{
if (!ServiceResult.IsBad(e.Status))
{
TransitionTo(HostState.Running);
return;
}
TransitionTo(HostState.Stopped);
// Kick off the SDK's reconnect loop exactly once per drop. The handler handles its
// own retry cadence via ReconnectPeriod; we tear it down in OnReconnectComplete.
if (_reconnectHandler is not null) return;
_reconnectHandler = new SessionReconnectHandler(telemetry: null!,
reconnectAbort: false,
maxReconnectPeriod: (int)TimeSpan.FromMinutes(2).TotalMilliseconds);
var state = _reconnectHandler.BeginReconnect(
sender,
(int)_options.ReconnectPeriod.TotalMilliseconds,
OnReconnectComplete);
}
/// <summary>
/// Called by <see cref="SessionReconnectHandler"/> when its retry loop has either
/// successfully swapped to a new session or given up. Reads the new session off
/// <c>handler.Session</c>, unwires the old keep-alive hook, rewires for the new
/// one, and tears down the handler. Subscription migration is already handled
/// inside the SDK via <c>TransferSubscriptions</c> (the SDK calls it automatically
/// when <see cref="Session.TransferSubscriptionsOnReconnect"/> is <c>true</c>,
/// which is the default).
/// </summary>
private void OnReconnectComplete(object? sender, EventArgs e)
{
if (sender is not SessionReconnectHandler handler) return;
var newSession = handler.Session;
var oldSession = Session;
// Rewire keep-alive onto the new session — without this the next drop wouldn't
// trigger another reconnect attempt.
if (oldSession is not null && _keepAliveHandler is not null)
{
try { oldSession.KeepAlive -= _keepAliveHandler; } catch { }
}
if (newSession is not null && _keepAliveHandler is not null)
{
newSession.KeepAlive += _keepAliveHandler;
}
Session = newSession;
_reconnectHandler?.Dispose();
_reconnectHandler = null;
// Whether the reconnect actually succeeded depends on whether the session is
// non-null + connected. When it succeeded, flip back to Running so downstream
// consumers see recovery.
if (newSession is not null)
{
TransitionTo(HostState.Running);
_health = new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
}
}
private void TransitionTo(HostState newState)
{
HostState old;

View File

@@ -0,0 +1,36 @@
using Shouldly;
using Xunit;
namespace ZB.MOM.WW.OtOpcUa.Driver.OpcUaClient.Tests;
/// <summary>
/// Scaffold tests for <see cref="SessionReconnectHandler"/> wiring. Wire-level
/// disconnect-reconnect-resume coverage against a live upstream server lands with the
/// in-process fixture — too much machinery for a unit-test-only lane.
/// </summary>
[Trait("Category", "Unit")]
public sealed class OpcUaClientReconnectTests
{
[Fact]
public void Default_ReconnectPeriod_matches_driver_specs_5_seconds()
{
new OpcUaClientDriverOptions().ReconnectPeriod.ShouldBe(TimeSpan.FromSeconds(5));
}
[Fact]
public void Options_ReconnectPeriod_is_configurable_for_aggressive_or_relaxed_retry()
{
var opts = new OpcUaClientDriverOptions { ReconnectPeriod = TimeSpan.FromMilliseconds(500) };
opts.ReconnectPeriod.ShouldBe(TimeSpan.FromMilliseconds(500));
}
[Fact]
public void Driver_starts_with_no_reconnect_handler_active_pre_init()
{
// The reconnect handler is lazy — spun up only when a bad keep-alive fires. Pre-init
// there's no session to reconnect, so the field must be null (indirectly verified by
// the lifecycle-shape test suite catching any accidental construction).
using var drv = new OpcUaClientDriver(new OpcUaClientDriverOptions(), "opcua-reconnect");
drv.GetHealth().State.ShouldBe(Core.Abstractions.DriverState.Unknown);
}
}