64e3fbe035
v2-ci / build (push) Failing after 1m43s
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been skipped
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.Host.IntegrationTests) (push) Has been skipped
v2-ci / integration (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.IntegrationTests) (push) Has been skipped
Adds <summary>, <param>, <typeparam>, and <inheritdoc/> tags to public members surfaced by commentchecker — resolves 5,847 of 5,869 issues (99.6%) across three /fixdocs passes.
300 lines
14 KiB
C#
300 lines
14 KiB
C#
using System.Net.Sockets;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Driver.Modbus;
|
|
|
|
/// <summary>
|
|
/// Concrete Modbus TCP transport. Wraps a single <see cref="TcpClient"/> and serializes
|
|
/// requests so at most one transaction is in-flight at a time — Modbus servers typically
|
|
/// support concurrent transactions, but the single-flight model keeps the wire trace
|
|
/// easy to diagnose and avoids interleaved-response correlation bugs.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// Survives mid-transaction socket drops: when a send/read fails with a socket-level
|
|
/// error (<see cref="IOException"/>, <see cref="SocketException"/>, <see cref="EndOfStreamException"/>)
|
|
/// the transport disposes the dead socket, reconnects, and retries the PDU exactly
|
|
/// once. Deliberately limited to a single retry — further failures bubble up so the
|
|
/// driver's health surface reflects the real state instead of masking a dead PLC.
|
|
/// </para>
|
|
/// <para>
|
|
/// Why this matters for DL205/DL260: the AutomationDirect H2-ECOM100 does NOT send
|
|
/// TCP keepalives per <c>docs/v2/dl205.md</c> §behavioral-oddities, so any NAT/firewall
|
|
/// between the gateway and PLC can silently close an idle socket after 2-5 minutes.
|
|
/// Also enables OS-level <c>SO_KEEPALIVE</c> so the driver's own side detects a stuck
|
|
/// socket in reasonable time even when the application is mostly idle.
|
|
/// </para>
|
|
/// </remarks>
|
|
public sealed class ModbusTcpTransport : IModbusTransport
|
|
{
|
|
private readonly string _host;
|
|
private readonly int _port;
|
|
private readonly TimeSpan _timeout;
|
|
private readonly bool _autoReconnect;
|
|
private readonly ModbusKeepAliveOptions _keepAlive;
|
|
private readonly TimeSpan? _idleDisconnect;
|
|
private readonly ModbusReconnectOptions _reconnect;
|
|
private readonly SemaphoreSlim _gate = new(1, 1);
|
|
private TcpClient? _client;
|
|
private NetworkStream? _stream;
|
|
private ushort _nextTx;
|
|
private bool _disposed;
|
|
private DateTime _lastSuccessUtc = DateTime.UtcNow;
|
|
|
|
/// <summary>Initializes a new instance of the <see cref="ModbusTcpTransport"/> class.</summary>
|
|
/// <param name="host">The host address or hostname of the Modbus server.</param>
|
|
/// <param name="port">The TCP port of the Modbus server.</param>
|
|
/// <param name="timeout">The timeout for socket operations.</param>
|
|
/// <param name="autoReconnect">Whether to automatically reconnect on socket failures.</param>
|
|
/// <param name="keepAlive">Optional keep-alive configuration for the socket.</param>
|
|
/// <param name="idleDisconnect">Optional duration after which an idle socket is disconnected.</param>
|
|
/// <param name="reconnect">Optional reconnect backoff configuration.</param>
|
|
public ModbusTcpTransport(
|
|
string host, int port, TimeSpan timeout, bool autoReconnect = true,
|
|
ModbusKeepAliveOptions? keepAlive = null,
|
|
TimeSpan? idleDisconnect = null,
|
|
ModbusReconnectOptions? reconnect = null)
|
|
{
|
|
_host = host;
|
|
_port = port;
|
|
_timeout = timeout;
|
|
_autoReconnect = autoReconnect;
|
|
_keepAlive = keepAlive ?? new ModbusKeepAliveOptions();
|
|
_idleDisconnect = idleDisconnect;
|
|
_reconnect = reconnect ?? new ModbusReconnectOptions();
|
|
}
|
|
|
|
/// <summary>Connects to the Modbus TCP server with IPv4 preference.</summary>
|
|
/// <param name="ct">The cancellation token for the operation.</param>
|
|
public async Task ConnectAsync(CancellationToken ct)
|
|
{
|
|
// Resolve the host explicitly + prefer IPv4. .NET's TcpClient default-constructor is
|
|
// dual-stack (IPv6 first, fallback to IPv4) — but most Modbus TCP devices (PLCs and
|
|
// simulators like pymodbus) bind 0.0.0.0 only, so the IPv6 attempt times out and we
|
|
// burn the entire ConnectAsync budget before even trying IPv4. Resolving first +
|
|
// dialing the IPv4 address directly sidesteps that.
|
|
var addresses = await System.Net.Dns.GetHostAddressesAsync(_host, ct).ConfigureAwait(false);
|
|
var ipv4 = System.Linq.Enumerable.FirstOrDefault(addresses,
|
|
a => a.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork);
|
|
var target = ipv4 ?? (addresses.Length > 0 ? addresses[0] : System.Net.IPAddress.Loopback);
|
|
|
|
_client = new TcpClient(target.AddressFamily);
|
|
EnableKeepAlive(_client, _keepAlive);
|
|
|
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
|
cts.CancelAfter(_timeout);
|
|
await _client.ConnectAsync(target, _port, cts.Token).ConfigureAwait(false);
|
|
_stream = _client.GetStream();
|
|
_lastSuccessUtc = DateTime.UtcNow;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Enable SO_KEEPALIVE with aggressive probe timing. DL205/DL260 doesn't send keepalives
|
|
/// itself; having the OS probe the socket every ~30s lets the driver notice a dead PLC
|
|
/// or broken NAT path long before the default 2-hour Windows idle timeout fires.
|
|
/// Non-fatal if the underlying OS rejects the option (some older Linux / container
|
|
/// sandboxes don't expose the fine-grained timing levers — the driver still works,
|
|
/// application-level probe still detects problems).
|
|
/// </summary>
|
|
private static void EnableKeepAlive(TcpClient client, ModbusKeepAliveOptions opts)
|
|
{
|
|
if (!opts.Enabled) return;
|
|
try
|
|
{
|
|
client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true);
|
|
// Driver.Modbus-009: a TimeSpan < 1s previously truncated to 0 via the int cast,
|
|
// which Windows / Linux interpret as "use the default" — silently defeating the
|
|
// configured keep-alive timing. Round up to at least 1 second so a sub-second
|
|
// configuration still produces a real keep-alive cadence. Negative values are
|
|
// also clamped to 1 to avoid surfacing as OS errors.
|
|
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveTime,
|
|
ClampToWholeSeconds(opts.Time));
|
|
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveInterval,
|
|
ClampToWholeSeconds(opts.Interval));
|
|
client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveRetryCount, opts.RetryCount);
|
|
}
|
|
catch { /* best-effort; older OSes may not expose the granular knobs */ }
|
|
}
|
|
|
|
/// <summary>
|
|
/// Driver.Modbus-009: cast a <see cref="TimeSpan"/> to a whole number of seconds with a
|
|
/// minimum of 1 — protects callers from the int-cast truncation that turned 500 ms
|
|
/// keep-alive timing into "use the default" on most OSes.
|
|
/// </summary>
|
|
/// <param name="ts">The timespan to clamp to whole seconds.</param>
|
|
internal static int ClampToWholeSeconds(TimeSpan ts)
|
|
{
|
|
var seconds = (int)Math.Ceiling(ts.TotalSeconds);
|
|
return seconds < 1 ? 1 : seconds;
|
|
}
|
|
|
|
/// <summary>Sends a Modbus PDU and returns the response, with automatic retry on socket failure.</summary>
|
|
/// <param name="unitId">The Modbus unit/slave ID.</param>
|
|
/// <param name="pdu">The protocol data unit to send.</param>
|
|
/// <param name="ct">The cancellation token for the operation.</param>
|
|
public async Task<byte[]> SendAsync(byte unitId, byte[] pdu, CancellationToken ct)
|
|
{
|
|
if (_disposed) throw new ObjectDisposedException(nameof(ModbusTcpTransport));
|
|
if (_stream is null) throw new InvalidOperationException("Transport not connected");
|
|
|
|
await _gate.WaitAsync(ct).ConfigureAwait(false);
|
|
try
|
|
{
|
|
// Proactive idle-disconnect: if the socket has been quiet longer than the configured
|
|
// threshold, tear it down + reconnect before this PDU lands. Defends against silent
|
|
// NAT / firewall reaping where the socket looks alive locally but the upstream side
|
|
// dropped it minutes ago.
|
|
if (_idleDisconnect.HasValue && DateTime.UtcNow - _lastSuccessUtc > _idleDisconnect.Value)
|
|
{
|
|
await TearDownAsync().ConfigureAwait(false);
|
|
await ConnectWithBackoffAsync(ct).ConfigureAwait(false);
|
|
}
|
|
|
|
try
|
|
{
|
|
var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false);
|
|
_lastSuccessUtc = DateTime.UtcNow;
|
|
return result;
|
|
}
|
|
catch (Exception ex) when (_autoReconnect && IsSocketLevelFailure(ex))
|
|
{
|
|
// Mid-transaction drop: tear down the dead socket, reconnect (with backoff if
|
|
// configured), resend. Single retry — if it fails again, let it propagate so
|
|
// health/status reflect reality.
|
|
await TearDownAsync().ConfigureAwait(false);
|
|
await ConnectWithBackoffAsync(ct).ConfigureAwait(false);
|
|
var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false);
|
|
_lastSuccessUtc = DateTime.UtcNow;
|
|
return result;
|
|
}
|
|
}
|
|
finally
|
|
{
|
|
_gate.Release();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Connect attempt with the configured geometric backoff. The first attempt fires after
|
|
/// <see cref="ModbusReconnectOptions.InitialDelay"/> (default zero — immediate); each
|
|
/// subsequent attempt sleeps for the previous delay times <c>BackoffMultiplier</c>,
|
|
/// capped at <c>MaxDelay</c>. Caller's cancellation token aborts the loop.
|
|
/// </summary>
|
|
private async Task ConnectWithBackoffAsync(CancellationToken ct)
|
|
{
|
|
var delay = _reconnect.InitialDelay;
|
|
var attempt = 0;
|
|
while (true)
|
|
{
|
|
if (delay > TimeSpan.Zero)
|
|
await Task.Delay(delay, ct).ConfigureAwait(false);
|
|
try
|
|
{
|
|
await ConnectAsync(ct).ConfigureAwait(false);
|
|
return;
|
|
}
|
|
catch (Exception ex) when (IsSocketLevelFailure(ex) && _autoReconnect)
|
|
{
|
|
attempt++;
|
|
// Geometric growth, capped. Use Math.Min on ticks so we don't overflow with
|
|
// pathological multipliers / long deployments.
|
|
var nextTicks = (long)(Math.Max(delay.Ticks, TimeSpan.FromMilliseconds(100).Ticks) * _reconnect.BackoffMultiplier);
|
|
delay = TimeSpan.FromTicks(Math.Min(nextTicks, _reconnect.MaxDelay.Ticks));
|
|
if (attempt >= 10)
|
|
{
|
|
// Bail after 10 attempts to surface persistent failure to the caller. With
|
|
// the default backoff (1s base, 2.0x mult, 30s cap) this is roughly 4 minutes
|
|
// of attempts; with InitialDelay=0 it's immediate up to the same cap.
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private async Task<byte[]> SendOnceAsync(byte unitId, byte[] pdu, CancellationToken ct)
|
|
{
|
|
if (_stream is null) throw new InvalidOperationException("Transport not connected");
|
|
var txId = ++_nextTx;
|
|
|
|
// MBAP: [TxId(2)][Proto=0(2)][Length(2)][UnitId(1)] + PDU
|
|
var adu = new byte[7 + pdu.Length];
|
|
adu[0] = (byte)(txId >> 8);
|
|
adu[1] = (byte)(txId & 0xFF);
|
|
// protocol id already zero
|
|
var len = (ushort)(1 + pdu.Length); // unit id + pdu
|
|
adu[4] = (byte)(len >> 8);
|
|
adu[5] = (byte)(len & 0xFF);
|
|
adu[6] = unitId;
|
|
Buffer.BlockCopy(pdu, 0, adu, 7, pdu.Length);
|
|
|
|
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
|
cts.CancelAfter(_timeout);
|
|
await _stream.WriteAsync(adu.AsMemory(), cts.Token).ConfigureAwait(false);
|
|
await _stream.FlushAsync(cts.Token).ConfigureAwait(false);
|
|
|
|
var header = new byte[7];
|
|
await ReadExactlyAsync(_stream, header, cts.Token).ConfigureAwait(false);
|
|
var respTxId = (ushort)((header[0] << 8) | header[1]);
|
|
if (respTxId != txId)
|
|
throw new InvalidDataException($"Modbus TxId mismatch: expected {txId} got {respTxId}");
|
|
var respLen = (ushort)((header[4] << 8) | header[5]);
|
|
if (respLen < 1) throw new InvalidDataException($"Modbus response length too small: {respLen}");
|
|
var respPdu = new byte[respLen - 1];
|
|
await ReadExactlyAsync(_stream, respPdu, cts.Token).ConfigureAwait(false);
|
|
|
|
// Exception PDU: function code has high bit set.
|
|
if ((respPdu[0] & 0x80) != 0)
|
|
{
|
|
var fc = (byte)(respPdu[0] & 0x7F);
|
|
var ex = respPdu[1];
|
|
throw new ModbusException(fc, ex, $"Modbus exception fc={fc} code={ex}");
|
|
}
|
|
|
|
return respPdu;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Distinguish socket-layer failures (eligible for reconnect-and-retry) from
|
|
/// protocol-layer failures (must propagate — retrying the same PDU won't help if the
|
|
/// PLC just returned exception 02 Illegal Data Address).
|
|
/// </summary>
|
|
private static bool IsSocketLevelFailure(Exception ex) =>
|
|
ex is EndOfStreamException
|
|
|| ex is IOException
|
|
|| ex is SocketException
|
|
|| ex is ObjectDisposedException;
|
|
|
|
private async Task TearDownAsync()
|
|
{
|
|
try { if (_stream is not null) await _stream.DisposeAsync().ConfigureAwait(false); }
|
|
catch { /* best-effort */ }
|
|
_stream = null;
|
|
try { _client?.Dispose(); } catch { }
|
|
_client = null;
|
|
}
|
|
|
|
private static async Task ReadExactlyAsync(Stream s, byte[] buf, CancellationToken ct)
|
|
{
|
|
var read = 0;
|
|
while (read < buf.Length)
|
|
{
|
|
var n = await s.ReadAsync(buf.AsMemory(read), ct).ConfigureAwait(false);
|
|
if (n == 0) throw new EndOfStreamException("Modbus socket closed mid-response");
|
|
read += n;
|
|
}
|
|
}
|
|
|
|
/// <summary>Asynchronously disposes the transport and underlying socket resources.</summary>
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
if (_disposed) return;
|
|
_disposed = true;
|
|
try
|
|
{
|
|
if (_stream is not null) await _stream.DisposeAsync().ConfigureAwait(false);
|
|
}
|
|
catch { /* best-effort */ }
|
|
_client?.Dispose();
|
|
_gate.Dispose();
|
|
}
|
|
}
|