Task #139 — Modbus connection-layer config knobs (keep-alive / idle / reconnect)

Promotes the previously hardcoded transport-layer settings to ModbusDriverOptions
so users can tune them through DriverConfig JSON without recompiling.

Three new option groups:

1. KeepAlive (ModbusKeepAliveOptions): Enabled / Time / Interval / RetryCount.
   Defaults preserve the historical PR 53 wire output exactly (Enabled=true,
   Time=30s, Interval=10s, RetryCount=3). Set Enabled=false for PLCs that
   reject SO_KEEPALIVE.

2. IdleDisconnectTimeout (TimeSpan?): when set, the transport tracks last-PDU-
   success and proactively closes + reconnects on the next request after the
   threshold. Defends against silent NAT / firewall socket reaping. Default
   null = disabled (no behaviour change).

3. Reconnect (ModbusReconnectOptions): InitialDelay / MaxDelay /
   BackoffMultiplier for the post-drop reconnect loop. Defaults
   (InitialDelay=0, MaxDelay=30s, Multiplier=2.0) preserve the historical
   immediate-retry behaviour for the first attempt and add geometric backoff
   only if the reconnect itself fails. Capped at 10 attempts before propagating.

ModbusTcpTransport ctor extended with optional keepAlive / idleDisconnect /
reconnect parameters; existing 4-arg call sites continue to compile. Factory
DTO gains parallel KeepAlive / IdleDisconnectMs / Reconnect fields with
default-aware binding.

5 new ModbusConnectionOptionsTests covering the default-preservation contract
(every default field matches pre-#139) and the JSON-binding round-trip for
each knob group. Existing 204 unit tests still green.
This commit is contained in:
Joseph Doherty
2026-04-24 23:53:26 -04:00
parent 850b816873
commit 6cf20131fe
5 changed files with 264 additions and 12 deletions

View File

@@ -47,7 +47,11 @@ public sealed class ModbusDriver
_options = options;
_driverInstanceId = driverInstanceId;
_transportFactory = transportFactory
?? (o => new ModbusTcpTransport(o.Host, o.Port, o.Timeout, o.AutoReconnect));
?? (o => new ModbusTcpTransport(
o.Host, o.Port, o.Timeout, o.AutoReconnect,
keepAlive: o.KeepAlive,
idleDisconnect: o.IdleDisconnectTimeout,
reconnect: o.Reconnect));
_poll = new PollGroupEngine(
reader: ReadAsync,
onChange: (handle, tagRef, snapshot) =>

View File

@@ -52,6 +52,20 @@ public static class ModbusDriverFactoryExtensions
Timeout = TimeSpan.FromMilliseconds(dto.Probe?.TimeoutMs ?? 2_000),
ProbeAddress = dto.Probe?.ProbeAddress ?? 0,
},
KeepAlive = dto.KeepAlive is null ? new ModbusKeepAliveOptions() : new ModbusKeepAliveOptions
{
Enabled = dto.KeepAlive.Enabled ?? true,
Time = TimeSpan.FromMilliseconds(dto.KeepAlive.TimeMs ?? 30_000),
Interval = TimeSpan.FromMilliseconds(dto.KeepAlive.IntervalMs ?? 10_000),
RetryCount = dto.KeepAlive.RetryCount ?? 3,
},
IdleDisconnectTimeout = dto.IdleDisconnectMs is { } ms ? TimeSpan.FromMilliseconds(ms) : null,
Reconnect = dto.Reconnect is null ? new ModbusReconnectOptions() : new ModbusReconnectOptions
{
InitialDelay = TimeSpan.FromMilliseconds(dto.Reconnect.InitialDelayMs ?? 0),
MaxDelay = TimeSpan.FromMilliseconds(dto.Reconnect.MaxDelayMs ?? 30_000),
BackoffMultiplier = dto.Reconnect.BackoffMultiplier ?? 2.0,
},
};
return new ModbusDriver(options, driverInstanceId);
@@ -136,6 +150,26 @@ public static class ModbusDriverFactoryExtensions
public bool? AutoReconnect { get; init; }
public List<ModbusTagDto>? Tags { get; init; }
public ModbusProbeDto? Probe { get; init; }
// #139 connection-layer knobs.
public ModbusKeepAliveDto? KeepAlive { get; init; }
public int? IdleDisconnectMs { get; init; }
public ModbusReconnectDto? Reconnect { get; init; }
}
internal sealed class ModbusKeepAliveDto
{
public bool? Enabled { get; init; }
public int? TimeMs { get; init; }
public int? IntervalMs { get; init; }
public int? RetryCount { get; init; }
}
internal sealed class ModbusReconnectDto
{
public int? InitialDelayMs { get; init; }
public int? MaxDelayMs { get; init; }
public double? BackoffMultiplier { get; init; }
}
internal sealed class ModbusTagDto

View File

@@ -56,6 +56,54 @@ public sealed class ModbusDriverOptions
/// connection error to the caller even though the PLC is up.
/// </summary>
public bool AutoReconnect { get; init; } = true;
/// <summary>
/// Per-driver TCP keep-alive settings. Defaults are the historical PR 53 values
/// (KeepAliveEnabled=true, Time=30s, Interval=10s, RetryCount=3) so existing
/// deployments see no behaviour change. Set <see cref="ModbusKeepAliveOptions.Enabled"/>
/// to <c>false</c> to disable OS-level keep-alive entirely (some PLCs don't tolerate it).
/// </summary>
public ModbusKeepAliveOptions KeepAlive { get; init; } = new();
/// <summary>
/// If non-null, the transport tracks the time of the last successful PDU and proactively
/// closes + reconnects the socket on the next request after this idle threshold elapses.
/// Defends against silent NAT / firewall reaping of long-idle sockets — the explicit
/// close-and-reopen turns the failure mode from "first-send-after-X-minutes errors" into
/// "first-send-after-X-minutes pays one reconnect cost."
/// </summary>
public TimeSpan? IdleDisconnectTimeout { get; init; } = null;
/// <summary>
/// Reconnect backoff settings used by the auto-reconnect path. Default is no backoff
/// (immediate retry — preserves the historical pre-#139 behaviour). Set to a non-zero
/// <see cref="ModbusReconnectOptions.InitialDelay"/> to sleep before the first reconnect
/// attempt; <see cref="ModbusReconnectOptions.MaxDelay"/> caps the geometric growth.
/// </summary>
public ModbusReconnectOptions Reconnect { get; init; } = new();
}
/// <summary>OS-level TCP keep-alive knobs. Set <see cref="Enabled"/>=false to skip entirely.</summary>
public sealed class ModbusKeepAliveOptions
{
public bool Enabled { get; init; } = true;
/// <summary>Idle time before the first probe (seconds, mapped to <c>TcpKeepAliveTime</c>).</summary>
public TimeSpan Time { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>Interval between probes once started (seconds, mapped to <c>TcpKeepAliveInterval</c>).</summary>
public TimeSpan Interval { get; init; } = TimeSpan.FromSeconds(10);
/// <summary>Probes before declaring the socket dead (mapped to <c>TcpKeepAliveRetryCount</c>).</summary>
public int RetryCount { get; init; } = 3;
}
/// <summary>Geometric-backoff settings for the post-drop reconnect loop.</summary>
public sealed class ModbusReconnectOptions
{
/// <summary>Delay before the first reconnect attempt. Default zero = immediate.</summary>
public TimeSpan InitialDelay { get; init; } = TimeSpan.Zero;
/// <summary>Upper bound on the geometric backoff sequence.</summary>
public TimeSpan MaxDelay { get; init; } = TimeSpan.FromSeconds(30);
/// <summary>Multiplier applied each retry. Default 2.0 doubles each step.</summary>
public double BackoffMultiplier { get; init; } = 2.0;
}
public sealed class ModbusProbeOptions

View File

@@ -30,18 +30,29 @@ public sealed class ModbusTcpTransport : IModbusTransport
private readonly int _port;
private readonly TimeSpan _timeout;
private readonly bool _autoReconnect;
private readonly ModbusKeepAliveOptions _keepAlive;
private readonly TimeSpan? _idleDisconnect;
private readonly ModbusReconnectOptions _reconnect;
private readonly SemaphoreSlim _gate = new(1, 1);
private TcpClient? _client;
private NetworkStream? _stream;
private ushort _nextTx;
private bool _disposed;
private DateTime _lastSuccessUtc = DateTime.UtcNow;
public ModbusTcpTransport(string host, int port, TimeSpan timeout, bool autoReconnect = true)
public ModbusTcpTransport(
string host, int port, TimeSpan timeout, bool autoReconnect = true,
ModbusKeepAliveOptions? keepAlive = null,
TimeSpan? idleDisconnect = null,
ModbusReconnectOptions? reconnect = null)
{
_host = host;
_port = port;
_timeout = timeout;
_autoReconnect = autoReconnect;
_keepAlive = keepAlive ?? new ModbusKeepAliveOptions();
_idleDisconnect = idleDisconnect;
_reconnect = reconnect ?? new ModbusReconnectOptions();
}
public async Task ConnectAsync(CancellationToken ct)
@@ -57,12 +68,13 @@ public sealed class ModbusTcpTransport : IModbusTransport
var target = ipv4 ?? (addresses.Length > 0 ? addresses[0] : System.Net.IPAddress.Loopback);
_client = new TcpClient(target.AddressFamily);
EnableKeepAlive(_client);
EnableKeepAlive(_client, _keepAlive);
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(_timeout);
await _client.ConnectAsync(target, _port, cts.Token).ConfigureAwait(false);
_stream = _client.GetStream();
_lastSuccessUtc = DateTime.UtcNow;
}
/// <summary>
@@ -73,14 +85,15 @@ public sealed class ModbusTcpTransport : IModbusTransport
/// sandboxes don't expose the fine-grained timing levers — the driver still works,
/// application-level probe still detects problems).
/// </summary>
private static void EnableKeepAlive(TcpClient client)
private static void EnableKeepAlive(TcpClient client, ModbusKeepAliveOptions opts)
{
if (!opts.Enabled) return;
try
{
client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true);
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveTime, 30);
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveInterval, 10);
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveRetryCount, 3);
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveTime, (int)opts.Time.TotalSeconds);
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveInterval, (int)opts.Interval.TotalSeconds);
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveRetryCount, opts.RetryCount);
}
catch { /* best-effort; older OSes may not expose the granular knobs */ }
}
@@ -93,17 +106,32 @@ public sealed class ModbusTcpTransport : IModbusTransport
await _gate.WaitAsync(ct).ConfigureAwait(false);
try
{
// Proactive idle-disconnect: if the socket has been quiet longer than the configured
// threshold, tear it down + reconnect before this PDU lands. Defends against silent
// NAT / firewall reaping where the socket looks alive locally but the upstream side
// dropped it minutes ago.
if (_idleDisconnect.HasValue && DateTime.UtcNow - _lastSuccessUtc > _idleDisconnect.Value)
{
await TearDownAsync().ConfigureAwait(false);
await ConnectWithBackoffAsync(ct).ConfigureAwait(false);
}
try
{
return await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false);
var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false);
_lastSuccessUtc = DateTime.UtcNow;
return result;
}
catch (Exception ex) when (_autoReconnect && IsSocketLevelFailure(ex))
{
// Mid-transaction drop: tear down the dead socket, reconnect, resend. Single
// retry — if it fails again, let it propagate so health/status reflect reality.
// Mid-transaction drop: tear down the dead socket, reconnect (with backoff if
// configured), resend. Single retry — if it fails again, let it propagate so
// health/status reflect reality.
await TearDownAsync().ConfigureAwait(false);
await ConnectAsync(ct).ConfigureAwait(false);
return await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false);
await ConnectWithBackoffAsync(ct).ConfigureAwait(false);
var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false);
_lastSuccessUtc = DateTime.UtcNow;
return result;
}
}
finally
@@ -112,6 +140,43 @@ public sealed class ModbusTcpTransport : IModbusTransport
}
}
/// <summary>
/// Connect attempt with the configured geometric backoff. The first attempt fires after
/// <see cref="ModbusReconnectOptions.InitialDelay"/> (default zero — immediate); each
/// subsequent attempt sleeps for the previous delay times <c>BackoffMultiplier</c>,
/// capped at <c>MaxDelay</c>. Caller's cancellation token aborts the loop.
/// </summary>
private async Task ConnectWithBackoffAsync(CancellationToken ct)
{
var delay = _reconnect.InitialDelay;
var attempt = 0;
while (true)
{
if (delay > TimeSpan.Zero)
await Task.Delay(delay, ct).ConfigureAwait(false);
try
{
await ConnectAsync(ct).ConfigureAwait(false);
return;
}
catch (Exception ex) when (IsSocketLevelFailure(ex) && _autoReconnect)
{
attempt++;
// Geometric growth, capped. Use Math.Min on ticks so we don't overflow with
// pathological multipliers / long deployments.
var nextTicks = (long)(Math.Max(delay.Ticks, TimeSpan.FromMilliseconds(100).Ticks) * _reconnect.BackoffMultiplier);
delay = TimeSpan.FromTicks(Math.Min(nextTicks, _reconnect.MaxDelay.Ticks));
if (attempt >= 10)
{
// Bail after 10 attempts to surface persistent failure to the caller. With
// the default backoff (1s base, 2.0x mult, 30s cap) this is roughly 4 minutes
// of attempts; with InitialDelay=0 it's immediate up to the same cap.
throw;
}
}
}
}
private async Task<byte[]> SendOnceAsync(byte unitId, byte[] pdu, CancellationToken ct)
{
if (_stream is null) throw new InvalidOperationException("Transport not connected");