From 6cf20131fe3c7a90b6558bfff1459723f8eb4999 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Fri, 24 Apr 2026 23:53:26 -0400 Subject: [PATCH] =?UTF-8?q?Task=20#139=20=E2=80=94=20Modbus=20connection-l?= =?UTF-8?q?ayer=20config=20knobs=20(keep-alive=20/=20idle=20/=20reconnect)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Promotes the previously hardcoded transport-layer settings to ModbusDriverOptions so users can tune them through DriverConfig JSON without recompiling. Three new option groups: 1. KeepAlive (ModbusKeepAliveOptions): Enabled / Time / Interval / RetryCount. Defaults preserve the historical PR 53 wire output exactly (Enabled=true, Time=30s, Interval=10s, RetryCount=3). Set Enabled=false for PLCs that reject SO_KEEPALIVE. 2. IdleDisconnectTimeout (TimeSpan?): when set, the transport tracks last-PDU- success and proactively closes + reconnects on the next request after the threshold. Defends against silent NAT / firewall socket reaping. Default null = disabled (no behaviour change). 3. Reconnect (ModbusReconnectOptions): InitialDelay / MaxDelay / BackoffMultiplier for the post-drop reconnect loop. Defaults (InitialDelay=0, MaxDelay=30s, Multiplier=2.0) preserve the historical immediate-retry behaviour for the first attempt and add geometric backoff only if the reconnect itself fails. Capped at 10 attempts before propagating. ModbusTcpTransport ctor extended with optional keepAlive / idleDisconnect / reconnect parameters; existing 4-arg call sites continue to compile. Factory DTO gains parallel KeepAlive / IdleDisconnectMs / Reconnect fields with default-aware binding. 5 new ModbusConnectionOptionsTests covering the default-preservation contract (every default field matches pre-#139) and the JSON-binding round-trip for each knob group. Existing 204 unit tests still green. --- .../ModbusDriver.cs | 6 +- .../ModbusDriverFactoryExtensions.cs | 34 ++++++ .../ModbusDriverOptions.cs | 48 +++++++++ .../ModbusTcpTransport.cs | 87 +++++++++++++-- .../ModbusConnectionOptionsTests.cs | 101 ++++++++++++++++++ 5 files changed, 264 insertions(+), 12 deletions(-) create mode 100644 tests/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests/ModbusConnectionOptionsTests.cs diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriver.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriver.cs index 5a3979b..7bf2159 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriver.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriver.cs @@ -47,7 +47,11 @@ public sealed class ModbusDriver _options = options; _driverInstanceId = driverInstanceId; _transportFactory = transportFactory - ?? (o => new ModbusTcpTransport(o.Host, o.Port, o.Timeout, o.AutoReconnect)); + ?? (o => new ModbusTcpTransport( + o.Host, o.Port, o.Timeout, o.AutoReconnect, + keepAlive: o.KeepAlive, + idleDisconnect: o.IdleDisconnectTimeout, + reconnect: o.Reconnect)); _poll = new PollGroupEngine( reader: ReadAsync, onChange: (handle, tagRef, snapshot) => diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverFactoryExtensions.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverFactoryExtensions.cs index 3decf7c..9a61139 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverFactoryExtensions.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverFactoryExtensions.cs @@ -52,6 +52,20 @@ public static class ModbusDriverFactoryExtensions Timeout = TimeSpan.FromMilliseconds(dto.Probe?.TimeoutMs ?? 2_000), ProbeAddress = dto.Probe?.ProbeAddress ?? 0, }, + KeepAlive = dto.KeepAlive is null ? new ModbusKeepAliveOptions() : new ModbusKeepAliveOptions + { + Enabled = dto.KeepAlive.Enabled ?? true, + Time = TimeSpan.FromMilliseconds(dto.KeepAlive.TimeMs ?? 30_000), + Interval = TimeSpan.FromMilliseconds(dto.KeepAlive.IntervalMs ?? 10_000), + RetryCount = dto.KeepAlive.RetryCount ?? 3, + }, + IdleDisconnectTimeout = dto.IdleDisconnectMs is { } ms ? TimeSpan.FromMilliseconds(ms) : null, + Reconnect = dto.Reconnect is null ? new ModbusReconnectOptions() : new ModbusReconnectOptions + { + InitialDelay = TimeSpan.FromMilliseconds(dto.Reconnect.InitialDelayMs ?? 0), + MaxDelay = TimeSpan.FromMilliseconds(dto.Reconnect.MaxDelayMs ?? 30_000), + BackoffMultiplier = dto.Reconnect.BackoffMultiplier ?? 2.0, + }, }; return new ModbusDriver(options, driverInstanceId); @@ -136,6 +150,26 @@ public static class ModbusDriverFactoryExtensions public bool? AutoReconnect { get; init; } public List? Tags { get; init; } public ModbusProbeDto? Probe { get; init; } + + // #139 connection-layer knobs. + public ModbusKeepAliveDto? KeepAlive { get; init; } + public int? IdleDisconnectMs { get; init; } + public ModbusReconnectDto? Reconnect { get; init; } + } + + internal sealed class ModbusKeepAliveDto + { + public bool? Enabled { get; init; } + public int? TimeMs { get; init; } + public int? IntervalMs { get; init; } + public int? RetryCount { get; init; } + } + + internal sealed class ModbusReconnectDto + { + public int? InitialDelayMs { get; init; } + public int? MaxDelayMs { get; init; } + public double? BackoffMultiplier { get; init; } } internal sealed class ModbusTagDto diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverOptions.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverOptions.cs index c202acd..8bafe36 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverOptions.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusDriverOptions.cs @@ -56,6 +56,54 @@ public sealed class ModbusDriverOptions /// connection error to the caller even though the PLC is up. /// public bool AutoReconnect { get; init; } = true; + + /// + /// Per-driver TCP keep-alive settings. Defaults are the historical PR 53 values + /// (KeepAliveEnabled=true, Time=30s, Interval=10s, RetryCount=3) so existing + /// deployments see no behaviour change. Set + /// to false to disable OS-level keep-alive entirely (some PLCs don't tolerate it). + /// + public ModbusKeepAliveOptions KeepAlive { get; init; } = new(); + + /// + /// If non-null, the transport tracks the time of the last successful PDU and proactively + /// closes + reconnects the socket on the next request after this idle threshold elapses. + /// Defends against silent NAT / firewall reaping of long-idle sockets — the explicit + /// close-and-reopen turns the failure mode from "first-send-after-X-minutes errors" into + /// "first-send-after-X-minutes pays one reconnect cost." + /// + public TimeSpan? IdleDisconnectTimeout { get; init; } = null; + + /// + /// Reconnect backoff settings used by the auto-reconnect path. Default is no backoff + /// (immediate retry — preserves the historical pre-#139 behaviour). Set to a non-zero + /// to sleep before the first reconnect + /// attempt; caps the geometric growth. + /// + public ModbusReconnectOptions Reconnect { get; init; } = new(); +} + +/// OS-level TCP keep-alive knobs. Set =false to skip entirely. +public sealed class ModbusKeepAliveOptions +{ + public bool Enabled { get; init; } = true; + /// Idle time before the first probe (seconds, mapped to TcpKeepAliveTime). + public TimeSpan Time { get; init; } = TimeSpan.FromSeconds(30); + /// Interval between probes once started (seconds, mapped to TcpKeepAliveInterval). + public TimeSpan Interval { get; init; } = TimeSpan.FromSeconds(10); + /// Probes before declaring the socket dead (mapped to TcpKeepAliveRetryCount). + public int RetryCount { get; init; } = 3; +} + +/// Geometric-backoff settings for the post-drop reconnect loop. +public sealed class ModbusReconnectOptions +{ + /// Delay before the first reconnect attempt. Default zero = immediate. + public TimeSpan InitialDelay { get; init; } = TimeSpan.Zero; + /// Upper bound on the geometric backoff sequence. + public TimeSpan MaxDelay { get; init; } = TimeSpan.FromSeconds(30); + /// Multiplier applied each retry. Default 2.0 doubles each step. + public double BackoffMultiplier { get; init; } = 2.0; } public sealed class ModbusProbeOptions diff --git a/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusTcpTransport.cs b/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusTcpTransport.cs index 662e232..b7ccbdc 100644 --- a/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusTcpTransport.cs +++ b/src/ZB.MOM.WW.OtOpcUa.Driver.Modbus/ModbusTcpTransport.cs @@ -30,18 +30,29 @@ public sealed class ModbusTcpTransport : IModbusTransport private readonly int _port; private readonly TimeSpan _timeout; private readonly bool _autoReconnect; + private readonly ModbusKeepAliveOptions _keepAlive; + private readonly TimeSpan? _idleDisconnect; + private readonly ModbusReconnectOptions _reconnect; private readonly SemaphoreSlim _gate = new(1, 1); private TcpClient? _client; private NetworkStream? _stream; private ushort _nextTx; private bool _disposed; + private DateTime _lastSuccessUtc = DateTime.UtcNow; - public ModbusTcpTransport(string host, int port, TimeSpan timeout, bool autoReconnect = true) + public ModbusTcpTransport( + string host, int port, TimeSpan timeout, bool autoReconnect = true, + ModbusKeepAliveOptions? keepAlive = null, + TimeSpan? idleDisconnect = null, + ModbusReconnectOptions? reconnect = null) { _host = host; _port = port; _timeout = timeout; _autoReconnect = autoReconnect; + _keepAlive = keepAlive ?? new ModbusKeepAliveOptions(); + _idleDisconnect = idleDisconnect; + _reconnect = reconnect ?? new ModbusReconnectOptions(); } public async Task ConnectAsync(CancellationToken ct) @@ -57,12 +68,13 @@ public sealed class ModbusTcpTransport : IModbusTransport var target = ipv4 ?? (addresses.Length > 0 ? addresses[0] : System.Net.IPAddress.Loopback); _client = new TcpClient(target.AddressFamily); - EnableKeepAlive(_client); + EnableKeepAlive(_client, _keepAlive); using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct); cts.CancelAfter(_timeout); await _client.ConnectAsync(target, _port, cts.Token).ConfigureAwait(false); _stream = _client.GetStream(); + _lastSuccessUtc = DateTime.UtcNow; } /// @@ -73,14 +85,15 @@ public sealed class ModbusTcpTransport : IModbusTransport /// sandboxes don't expose the fine-grained timing levers — the driver still works, /// application-level probe still detects problems). /// - private static void EnableKeepAlive(TcpClient client) + private static void EnableKeepAlive(TcpClient client, ModbusKeepAliveOptions opts) { + if (!opts.Enabled) return; try { client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true); - client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveTime, 30); - client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveInterval, 10); - client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveRetryCount, 3); + client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveTime, (int)opts.Time.TotalSeconds); + client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveInterval, (int)opts.Interval.TotalSeconds); + client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveRetryCount, opts.RetryCount); } catch { /* best-effort; older OSes may not expose the granular knobs */ } } @@ -93,17 +106,32 @@ public sealed class ModbusTcpTransport : IModbusTransport await _gate.WaitAsync(ct).ConfigureAwait(false); try { + // Proactive idle-disconnect: if the socket has been quiet longer than the configured + // threshold, tear it down + reconnect before this PDU lands. Defends against silent + // NAT / firewall reaping where the socket looks alive locally but the upstream side + // dropped it minutes ago. + if (_idleDisconnect.HasValue && DateTime.UtcNow - _lastSuccessUtc > _idleDisconnect.Value) + { + await TearDownAsync().ConfigureAwait(false); + await ConnectWithBackoffAsync(ct).ConfigureAwait(false); + } + try { - return await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false); + var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false); + _lastSuccessUtc = DateTime.UtcNow; + return result; } catch (Exception ex) when (_autoReconnect && IsSocketLevelFailure(ex)) { - // Mid-transaction drop: tear down the dead socket, reconnect, resend. Single - // retry — if it fails again, let it propagate so health/status reflect reality. + // Mid-transaction drop: tear down the dead socket, reconnect (with backoff if + // configured), resend. Single retry — if it fails again, let it propagate so + // health/status reflect reality. await TearDownAsync().ConfigureAwait(false); - await ConnectAsync(ct).ConfigureAwait(false); - return await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false); + await ConnectWithBackoffAsync(ct).ConfigureAwait(false); + var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false); + _lastSuccessUtc = DateTime.UtcNow; + return result; } } finally @@ -112,6 +140,43 @@ public sealed class ModbusTcpTransport : IModbusTransport } } + /// + /// Connect attempt with the configured geometric backoff. The first attempt fires after + /// (default zero — immediate); each + /// subsequent attempt sleeps for the previous delay times BackoffMultiplier, + /// capped at MaxDelay. Caller's cancellation token aborts the loop. + /// + private async Task ConnectWithBackoffAsync(CancellationToken ct) + { + var delay = _reconnect.InitialDelay; + var attempt = 0; + while (true) + { + if (delay > TimeSpan.Zero) + await Task.Delay(delay, ct).ConfigureAwait(false); + try + { + await ConnectAsync(ct).ConfigureAwait(false); + return; + } + catch (Exception ex) when (IsSocketLevelFailure(ex) && _autoReconnect) + { + attempt++; + // Geometric growth, capped. Use Math.Min on ticks so we don't overflow with + // pathological multipliers / long deployments. + var nextTicks = (long)(Math.Max(delay.Ticks, TimeSpan.FromMilliseconds(100).Ticks) * _reconnect.BackoffMultiplier); + delay = TimeSpan.FromTicks(Math.Min(nextTicks, _reconnect.MaxDelay.Ticks)); + if (attempt >= 10) + { + // Bail after 10 attempts to surface persistent failure to the caller. With + // the default backoff (1s base, 2.0x mult, 30s cap) this is roughly 4 minutes + // of attempts; with InitialDelay=0 it's immediate up to the same cap. + throw; + } + } + } + } + private async Task SendOnceAsync(byte unitId, byte[] pdu, CancellationToken ct) { if (_stream is null) throw new InvalidOperationException("Transport not connected"); diff --git a/tests/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests/ModbusConnectionOptionsTests.cs b/tests/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests/ModbusConnectionOptionsTests.cs new file mode 100644 index 0000000..e0b7989 --- /dev/null +++ b/tests/ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests/ModbusConnectionOptionsTests.cs @@ -0,0 +1,101 @@ +using Shouldly; +using Xunit; + +namespace ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests; + +/// +/// #139 connection-layer config knobs: keep-alive, idle-disconnect, reconnect backoff. +/// Coverage focuses on default behaviour (matches pre-#139 wire output exactly) and the +/// DTO-binding path so users can drive these from JSON without editing C#. +/// +[Trait("Category", "Unit")] +public sealed class ModbusConnectionOptionsTests +{ + [Fact] + public void Defaults_Match_Historical_Behaviour() + { + var opts = new ModbusDriverOptions(); + + opts.KeepAlive.Enabled.ShouldBeTrue(); + opts.KeepAlive.Time.ShouldBe(TimeSpan.FromSeconds(30)); + opts.KeepAlive.Interval.ShouldBe(TimeSpan.FromSeconds(10)); + opts.KeepAlive.RetryCount.ShouldBe(3); + + opts.IdleDisconnectTimeout.ShouldBeNull(); + + opts.Reconnect.InitialDelay.ShouldBe(TimeSpan.Zero); + opts.Reconnect.MaxDelay.ShouldBe(TimeSpan.FromSeconds(30)); + opts.Reconnect.BackoffMultiplier.ShouldBe(2.0); + } + + [Fact] + public void Factory_Reads_KeepAlive_Knobs_From_Json() + { + const string json = """ + { + "host": "10.0.0.10", + "tags": [], + "keepAlive": { "enabled": false, "timeMs": 60000, "intervalMs": 5000, "retryCount": 5 } + } + """; + var driver = ModbusDriverFactoryExtensions.CreateInstance("modbus-1", json); + // Reach into options via reflection — the factory's options field is internal. + var opts = (ModbusDriverOptions)typeof(ModbusDriver) + .GetField("_options", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)! + .GetValue(driver)!; + + opts.KeepAlive.Enabled.ShouldBeFalse(); + opts.KeepAlive.Time.ShouldBe(TimeSpan.FromMinutes(1)); + opts.KeepAlive.Interval.ShouldBe(TimeSpan.FromSeconds(5)); + opts.KeepAlive.RetryCount.ShouldBe(5); + } + + [Fact] + public void Factory_Reads_IdleDisconnect_From_Json() + { + const string json = """{ "host": "10.0.0.10", "tags": [], "idleDisconnectMs": 120000 }"""; + var driver = ModbusDriverFactoryExtensions.CreateInstance("modbus-1", json); + var opts = (ModbusDriverOptions)typeof(ModbusDriver) + .GetField("_options", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)! + .GetValue(driver)!; + + opts.IdleDisconnectTimeout.ShouldBe(TimeSpan.FromMinutes(2)); + } + + [Fact] + public void Factory_Reads_Reconnect_Backoff_From_Json() + { + const string json = """ + { + "host": "10.0.0.10", + "tags": [], + "reconnect": { "initialDelayMs": 500, "maxDelayMs": 60000, "backoffMultiplier": 1.5 } + } + """; + var driver = ModbusDriverFactoryExtensions.CreateInstance("modbus-1", json); + var opts = (ModbusDriverOptions)typeof(ModbusDriver) + .GetField("_options", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)! + .GetValue(driver)!; + + opts.Reconnect.InitialDelay.ShouldBe(TimeSpan.FromMilliseconds(500)); + opts.Reconnect.MaxDelay.ShouldBe(TimeSpan.FromMinutes(1)); + opts.Reconnect.BackoffMultiplier.ShouldBe(1.5); + } + + [Fact] + public void Factory_With_Empty_Json_Uses_All_Defaults() + { + const string json = """{ "host": "10.0.0.10", "tags": [] }"""; + var driver = ModbusDriverFactoryExtensions.CreateInstance("modbus-1", json); + var opts = (ModbusDriverOptions)typeof(ModbusDriver) + .GetField("_options", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)! + .GetValue(driver)!; + + // Every connection-layer field must match the historical defaults so existing config + // rows stay bit-for-bit identical after #139. + opts.KeepAlive.Enabled.ShouldBeTrue(); + opts.IdleDisconnectTimeout.ShouldBeNull(); + opts.Reconnect.InitialDelay.ShouldBe(TimeSpan.Zero); + opts.AutoReconnect.ShouldBeTrue(); + } +}