Task #139 — Modbus connection-layer config knobs (keep-alive / idle / reconnect)

Promotes the previously hardcoded transport-layer settings to ModbusDriverOptions
so users can tune them through DriverConfig JSON without recompiling.

Three new option groups:

1. KeepAlive (ModbusKeepAliveOptions): Enabled / Time / Interval / RetryCount.
   Defaults preserve the historical PR 53 wire output exactly (Enabled=true,
   Time=30s, Interval=10s, RetryCount=3). Set Enabled=false for PLCs that
   reject SO_KEEPALIVE.

2. IdleDisconnectTimeout (TimeSpan?): when set, the transport tracks last-PDU-
   success and proactively closes + reconnects on the next request after the
   threshold. Defends against silent NAT / firewall socket reaping. Default
   null = disabled (no behaviour change).

3. Reconnect (ModbusReconnectOptions): InitialDelay / MaxDelay /
   BackoffMultiplier for the post-drop reconnect loop. Defaults
   (InitialDelay=0, MaxDelay=30s, Multiplier=2.0) preserve the historical
   immediate-retry behaviour for the first attempt and add geometric backoff
   only if the reconnect itself fails. Capped at 10 attempts before propagating.

ModbusTcpTransport ctor extended with optional keepAlive / idleDisconnect /
reconnect parameters; existing 4-arg call sites continue to compile. Factory
DTO gains parallel KeepAlive / IdleDisconnectMs / Reconnect fields with
default-aware binding.

5 new ModbusConnectionOptionsTests covering the default-preservation contract
(every default field matches pre-#139) and the JSON-binding round-trip for
each knob group. Existing 204 unit tests still green.
This commit is contained in:
Joseph Doherty
2026-04-24 23:53:26 -04:00
parent 850b816873
commit 6cf20131fe
5 changed files with 264 additions and 12 deletions

View File

@@ -47,7 +47,11 @@ public sealed class ModbusDriver
_options = options; _options = options;
_driverInstanceId = driverInstanceId; _driverInstanceId = driverInstanceId;
_transportFactory = transportFactory _transportFactory = transportFactory
?? (o => new ModbusTcpTransport(o.Host, o.Port, o.Timeout, o.AutoReconnect)); ?? (o => new ModbusTcpTransport(
o.Host, o.Port, o.Timeout, o.AutoReconnect,
keepAlive: o.KeepAlive,
idleDisconnect: o.IdleDisconnectTimeout,
reconnect: o.Reconnect));
_poll = new PollGroupEngine( _poll = new PollGroupEngine(
reader: ReadAsync, reader: ReadAsync,
onChange: (handle, tagRef, snapshot) => onChange: (handle, tagRef, snapshot) =>

View File

@@ -52,6 +52,20 @@ public static class ModbusDriverFactoryExtensions
Timeout = TimeSpan.FromMilliseconds(dto.Probe?.TimeoutMs ?? 2_000), Timeout = TimeSpan.FromMilliseconds(dto.Probe?.TimeoutMs ?? 2_000),
ProbeAddress = dto.Probe?.ProbeAddress ?? 0, ProbeAddress = dto.Probe?.ProbeAddress ?? 0,
}, },
KeepAlive = dto.KeepAlive is null ? new ModbusKeepAliveOptions() : new ModbusKeepAliveOptions
{
Enabled = dto.KeepAlive.Enabled ?? true,
Time = TimeSpan.FromMilliseconds(dto.KeepAlive.TimeMs ?? 30_000),
Interval = TimeSpan.FromMilliseconds(dto.KeepAlive.IntervalMs ?? 10_000),
RetryCount = dto.KeepAlive.RetryCount ?? 3,
},
IdleDisconnectTimeout = dto.IdleDisconnectMs is { } ms ? TimeSpan.FromMilliseconds(ms) : null,
Reconnect = dto.Reconnect is null ? new ModbusReconnectOptions() : new ModbusReconnectOptions
{
InitialDelay = TimeSpan.FromMilliseconds(dto.Reconnect.InitialDelayMs ?? 0),
MaxDelay = TimeSpan.FromMilliseconds(dto.Reconnect.MaxDelayMs ?? 30_000),
BackoffMultiplier = dto.Reconnect.BackoffMultiplier ?? 2.0,
},
}; };
return new ModbusDriver(options, driverInstanceId); return new ModbusDriver(options, driverInstanceId);
@@ -136,6 +150,26 @@ public static class ModbusDriverFactoryExtensions
public bool? AutoReconnect { get; init; } public bool? AutoReconnect { get; init; }
public List<ModbusTagDto>? Tags { get; init; } public List<ModbusTagDto>? Tags { get; init; }
public ModbusProbeDto? Probe { get; init; } public ModbusProbeDto? Probe { get; init; }
// #139 connection-layer knobs.
public ModbusKeepAliveDto? KeepAlive { get; init; }
public int? IdleDisconnectMs { get; init; }
public ModbusReconnectDto? Reconnect { get; init; }
}
internal sealed class ModbusKeepAliveDto
{
public bool? Enabled { get; init; }
public int? TimeMs { get; init; }
public int? IntervalMs { get; init; }
public int? RetryCount { get; init; }
}
internal sealed class ModbusReconnectDto
{
public int? InitialDelayMs { get; init; }
public int? MaxDelayMs { get; init; }
public double? BackoffMultiplier { get; init; }
} }
internal sealed class ModbusTagDto internal sealed class ModbusTagDto

View File

@@ -56,6 +56,54 @@ public sealed class ModbusDriverOptions
/// connection error to the caller even though the PLC is up. /// connection error to the caller even though the PLC is up.
/// </summary> /// </summary>
public bool AutoReconnect { get; init; } = true; public bool AutoReconnect { get; init; } = true;
/// <summary>
/// Per-driver TCP keep-alive settings. Defaults are the historical PR 53 values
/// (KeepAliveEnabled=true, Time=30s, Interval=10s, RetryCount=3) so existing
/// deployments see no behaviour change. Set <see cref="ModbusKeepAliveOptions.Enabled"/>
/// to <c>false</c> to disable OS-level keep-alive entirely (some PLCs don't tolerate it).
/// </summary>
public ModbusKeepAliveOptions KeepAlive { get; init; } = new();
/// <summary>
/// If non-null, the transport tracks the time of the last successful PDU and proactively
/// closes + reconnects the socket on the next request after this idle threshold elapses.
/// Defends against silent NAT / firewall reaping of long-idle sockets — the explicit
/// close-and-reopen turns the failure mode from "first-send-after-X-minutes errors" into
/// "first-send-after-X-minutes pays one reconnect cost."
/// </summary>
public TimeSpan? IdleDisconnectTimeout { get; init; } = null;
/// <summary>
/// Reconnect backoff settings used by the auto-reconnect path. Default is no backoff
/// (immediate retry — preserves the historical pre-#139 behaviour). Set to a non-zero
/// <see cref="ModbusReconnectOptions.InitialDelay"/> to sleep before the first reconnect
/// attempt; <see cref="ModbusReconnectOptions.MaxDelay"/> caps the geometric growth.
/// </summary>
public ModbusReconnectOptions Reconnect { get; init; } = new();
}
/// <summary>OS-level TCP keep-alive knobs. Set <see cref="Enabled"/>=false to skip entirely.</summary>
public sealed class ModbusKeepAliveOptions
{
public bool Enabled { get; init; } = true;
/// <summary>Idle time before the first probe (seconds, mapped to <c>TcpKeepAliveTime</c>).</summary>
public TimeSpan Time { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>Interval between probes once started (seconds, mapped to <c>TcpKeepAliveInterval</c>).</summary>
public TimeSpan Interval { get; init; } = TimeSpan.FromSeconds(10);
/// <summary>Probes before declaring the socket dead (mapped to <c>TcpKeepAliveRetryCount</c>).</summary>
public int RetryCount { get; init; } = 3;
}
/// <summary>Geometric-backoff settings for the post-drop reconnect loop.</summary>
public sealed class ModbusReconnectOptions
{
/// <summary>Delay before the first reconnect attempt. Default zero = immediate.</summary>
public TimeSpan InitialDelay { get; init; } = TimeSpan.Zero;
/// <summary>Upper bound on the geometric backoff sequence.</summary>
public TimeSpan MaxDelay { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>Multiplier applied each retry. Default 2.0 doubles each step.</summary>
public double BackoffMultiplier { get; init; } = 2.0;
} }
public sealed class ModbusProbeOptions public sealed class ModbusProbeOptions

View File

@@ -30,18 +30,29 @@ public sealed class ModbusTcpTransport : IModbusTransport
private readonly int _port; private readonly int _port;
private readonly TimeSpan _timeout; private readonly TimeSpan _timeout;
private readonly bool _autoReconnect; private readonly bool _autoReconnect;
private readonly ModbusKeepAliveOptions _keepAlive;
private readonly TimeSpan? _idleDisconnect;
private readonly ModbusReconnectOptions _reconnect;
private readonly SemaphoreSlim _gate = new(1, 1); private readonly SemaphoreSlim _gate = new(1, 1);
private TcpClient? _client; private TcpClient? _client;
private NetworkStream? _stream; private NetworkStream? _stream;
private ushort _nextTx; private ushort _nextTx;
private bool _disposed; private bool _disposed;
private DateTime _lastSuccessUtc = DateTime.UtcNow;
public ModbusTcpTransport(string host, int port, TimeSpan timeout, bool autoReconnect = true) public ModbusTcpTransport(
string host, int port, TimeSpan timeout, bool autoReconnect = true,
ModbusKeepAliveOptions? keepAlive = null,
TimeSpan? idleDisconnect = null,
ModbusReconnectOptions? reconnect = null)
{ {
_host = host; _host = host;
_port = port; _port = port;
_timeout = timeout; _timeout = timeout;
_autoReconnect = autoReconnect; _autoReconnect = autoReconnect;
_keepAlive = keepAlive ?? new ModbusKeepAliveOptions();
_idleDisconnect = idleDisconnect;
_reconnect = reconnect ?? new ModbusReconnectOptions();
} }
public async Task ConnectAsync(CancellationToken ct) public async Task ConnectAsync(CancellationToken ct)
@@ -57,12 +68,13 @@ public sealed class ModbusTcpTransport : IModbusTransport
var target = ipv4 ?? (addresses.Length > 0 ? addresses[0] : System.Net.IPAddress.Loopback); var target = ipv4 ?? (addresses.Length > 0 ? addresses[0] : System.Net.IPAddress.Loopback);
_client = new TcpClient(target.AddressFamily); _client = new TcpClient(target.AddressFamily);
EnableKeepAlive(_client); EnableKeepAlive(_client, _keepAlive);
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct); using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(_timeout); cts.CancelAfter(_timeout);
await _client.ConnectAsync(target, _port, cts.Token).ConfigureAwait(false); await _client.ConnectAsync(target, _port, cts.Token).ConfigureAwait(false);
_stream = _client.GetStream(); _stream = _client.GetStream();
_lastSuccessUtc = DateTime.UtcNow;
} }
/// <summary> /// <summary>
@@ -73,14 +85,15 @@ public sealed class ModbusTcpTransport : IModbusTransport
/// sandboxes don't expose the fine-grained timing levers — the driver still works, /// sandboxes don't expose the fine-grained timing levers — the driver still works,
/// application-level probe still detects problems). /// application-level probe still detects problems).
/// </summary> /// </summary>
private static void EnableKeepAlive(TcpClient client) private static void EnableKeepAlive(TcpClient client, ModbusKeepAliveOptions opts)
{ {
if (!opts.Enabled) return;
try try
{ {
client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true); client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true);
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveTime, 30); client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveTime, (int)opts.Time.TotalSeconds);
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveInterval, 10); client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveInterval, (int)opts.Interval.TotalSeconds);
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveRetryCount, 3); client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveRetryCount, opts.RetryCount);
} }
catch { /* best-effort; older OSes may not expose the granular knobs */ } catch { /* best-effort; older OSes may not expose the granular knobs */ }
} }
@@ -93,17 +106,32 @@ public sealed class ModbusTcpTransport : IModbusTransport
await _gate.WaitAsync(ct).ConfigureAwait(false); await _gate.WaitAsync(ct).ConfigureAwait(false);
try try
{ {
// Proactive idle-disconnect: if the socket has been quiet longer than the configured
// threshold, tear it down + reconnect before this PDU lands. Defends against silent
// NAT / firewall reaping where the socket looks alive locally but the upstream side
// dropped it minutes ago.
if (_idleDisconnect.HasValue && DateTime.UtcNow - _lastSuccessUtc > _idleDisconnect.Value)
{
await TearDownAsync().ConfigureAwait(false);
await ConnectWithBackoffAsync(ct).ConfigureAwait(false);
}
try try
{ {
return await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false); var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false);
_lastSuccessUtc = DateTime.UtcNow;
return result;
} }
catch (Exception ex) when (_autoReconnect && IsSocketLevelFailure(ex)) catch (Exception ex) when (_autoReconnect && IsSocketLevelFailure(ex))
{ {
// Mid-transaction drop: tear down the dead socket, reconnect, resend. Single // Mid-transaction drop: tear down the dead socket, reconnect (with backoff if
// retry — if it fails again, let it propagate so health/status reflect reality. // configured), resend. Single retry — if it fails again, let it propagate so
// health/status reflect reality.
await TearDownAsync().ConfigureAwait(false); await TearDownAsync().ConfigureAwait(false);
await ConnectAsync(ct).ConfigureAwait(false); await ConnectWithBackoffAsync(ct).ConfigureAwait(false);
return await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false); var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false);
_lastSuccessUtc = DateTime.UtcNow;
return result;
} }
} }
finally finally
@@ -112,6 +140,43 @@ public sealed class ModbusTcpTransport : IModbusTransport
} }
} }
/// <summary>
/// Connect attempt with the configured geometric backoff. The first attempt fires after
/// <see cref="ModbusReconnectOptions.InitialDelay"/> (default zero — immediate); each
/// subsequent attempt sleeps for the previous delay times <c>BackoffMultiplier</c>,
/// capped at <c>MaxDelay</c>. Caller's cancellation token aborts the loop.
/// </summary>
private async Task ConnectWithBackoffAsync(CancellationToken ct)
{
var delay = _reconnect.InitialDelay;
var attempt = 0;
while (true)
{
if (delay > TimeSpan.Zero)
await Task.Delay(delay, ct).ConfigureAwait(false);
try
{
await ConnectAsync(ct).ConfigureAwait(false);
return;
}
catch (Exception ex) when (IsSocketLevelFailure(ex) && _autoReconnect)
{
attempt++;
// Geometric growth, capped. Use Math.Min on ticks so we don't overflow with
// pathological multipliers / long deployments.
var nextTicks = (long)(Math.Max(delay.Ticks, TimeSpan.FromMilliseconds(100).Ticks) * _reconnect.BackoffMultiplier);
delay = TimeSpan.FromTicks(Math.Min(nextTicks, _reconnect.MaxDelay.Ticks));
if (attempt >= 10)
{
// Bail after 10 attempts to surface persistent failure to the caller. With
// the default backoff (1s base, 2.0x mult, 30s cap) this is roughly 4 minutes
// of attempts; with InitialDelay=0 it's immediate up to the same cap.
throw;
}
}
}
}
private async Task<byte[]> SendOnceAsync(byte unitId, byte[] pdu, CancellationToken ct) private async Task<byte[]> SendOnceAsync(byte unitId, byte[] pdu, CancellationToken ct)
{ {
if (_stream is null) throw new InvalidOperationException("Transport not connected"); if (_stream is null) throw new InvalidOperationException("Transport not connected");

View File

@@ -0,0 +1,101 @@
using Shouldly;
using Xunit;
namespace ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests;
/// <summary>
/// #139 connection-layer config knobs: keep-alive, idle-disconnect, reconnect backoff.
/// Coverage focuses on default behaviour (matches pre-#139 wire output exactly) and the
/// DTO-binding path so users can drive these from JSON without editing C#.
/// </summary>
[Trait("Category", "Unit")]
public sealed class ModbusConnectionOptionsTests
{
[Fact]
public void Defaults_Match_Historical_Behaviour()
{
var opts = new ModbusDriverOptions();
opts.KeepAlive.Enabled.ShouldBeTrue();
opts.KeepAlive.Time.ShouldBe(TimeSpan.FromSeconds(30));
opts.KeepAlive.Interval.ShouldBe(TimeSpan.FromSeconds(10));
opts.KeepAlive.RetryCount.ShouldBe(3);
opts.IdleDisconnectTimeout.ShouldBeNull();
opts.Reconnect.InitialDelay.ShouldBe(TimeSpan.Zero);
opts.Reconnect.MaxDelay.ShouldBe(TimeSpan.FromSeconds(30));
opts.Reconnect.BackoffMultiplier.ShouldBe(2.0);
}
[Fact]
public void Factory_Reads_KeepAlive_Knobs_From_Json()
{
const string json = """
{
"host": "10.0.0.10",
"tags": [],
"keepAlive": { "enabled": false, "timeMs": 60000, "intervalMs": 5000, "retryCount": 5 }
}
""";
var driver = ModbusDriverFactoryExtensions.CreateInstance("modbus-1", json);
// Reach into options via reflection — the factory's options field is internal.
var opts = (ModbusDriverOptions)typeof(ModbusDriver)
.GetField("_options", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!
.GetValue(driver)!;
opts.KeepAlive.Enabled.ShouldBeFalse();
opts.KeepAlive.Time.ShouldBe(TimeSpan.FromMinutes(1));
opts.KeepAlive.Interval.ShouldBe(TimeSpan.FromSeconds(5));
opts.KeepAlive.RetryCount.ShouldBe(5);
}
[Fact]
public void Factory_Reads_IdleDisconnect_From_Json()
{
const string json = """{ "host": "10.0.0.10", "tags": [], "idleDisconnectMs": 120000 }""";
var driver = ModbusDriverFactoryExtensions.CreateInstance("modbus-1", json);
var opts = (ModbusDriverOptions)typeof(ModbusDriver)
.GetField("_options", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!
.GetValue(driver)!;
opts.IdleDisconnectTimeout.ShouldBe(TimeSpan.FromMinutes(2));
}
[Fact]
public void Factory_Reads_Reconnect_Backoff_From_Json()
{
const string json = """
{
"host": "10.0.0.10",
"tags": [],
"reconnect": { "initialDelayMs": 500, "maxDelayMs": 60000, "backoffMultiplier": 1.5 }
}
""";
var driver = ModbusDriverFactoryExtensions.CreateInstance("modbus-1", json);
var opts = (ModbusDriverOptions)typeof(ModbusDriver)
.GetField("_options", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!
.GetValue(driver)!;
opts.Reconnect.InitialDelay.ShouldBe(TimeSpan.FromMilliseconds(500));
opts.Reconnect.MaxDelay.ShouldBe(TimeSpan.FromMinutes(1));
opts.Reconnect.BackoffMultiplier.ShouldBe(1.5);
}
[Fact]
public void Factory_With_Empty_Json_Uses_All_Defaults()
{
const string json = """{ "host": "10.0.0.10", "tags": [] }""";
var driver = ModbusDriverFactoryExtensions.CreateInstance("modbus-1", json);
var opts = (ModbusDriverOptions)typeof(ModbusDriver)
.GetField("_options", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)!
.GetValue(driver)!;
// Every connection-layer field must match the historical defaults so existing config
// rows stay bit-for-bit identical after #139.
opts.KeepAlive.Enabled.ShouldBeTrue();
opts.IdleDisconnectTimeout.ShouldBeNull();
opts.Reconnect.InitialDelay.ShouldBe(TimeSpan.Zero);
opts.AutoReconnect.ShouldBeTrue();
}
}