using System.Net.Sockets; namespace ZB.MOM.WW.OtOpcUa.Driver.Modbus; /// /// Concrete Modbus TCP transport. Wraps a single and serializes /// requests so at most one transaction is in-flight at a time — Modbus servers typically /// support concurrent transactions, but the single-flight model keeps the wire trace /// easy to diagnose and avoids interleaved-response correlation bugs. /// /// /// /// Survives mid-transaction socket drops: when a send/read fails with a socket-level /// error (, , ) /// the transport disposes the dead socket, reconnects, and retries the PDU exactly /// once. Deliberately limited to a single retry — further failures bubble up so the /// driver's health surface reflects the real state instead of masking a dead PLC. /// /// /// Why this matters for DL205/DL260: the AutomationDirect H2-ECOM100 does NOT send /// TCP keepalives per docs/v2/dl205.md §behavioral-oddities, so any NAT/firewall /// between the gateway and PLC can silently close an idle socket after 2-5 minutes. /// Also enables OS-level SO_KEEPALIVE so the driver's own side detects a stuck /// socket in reasonable time even when the application is mostly idle. /// /// public sealed class ModbusTcpTransport : IModbusTransport { private readonly string _host; private readonly int _port; private readonly TimeSpan _timeout; private readonly bool _autoReconnect; private readonly ModbusKeepAliveOptions _keepAlive; private readonly TimeSpan? _idleDisconnect; private readonly ModbusReconnectOptions _reconnect; private readonly SemaphoreSlim _gate = new(1, 1); private TcpClient? _client; private NetworkStream? _stream; private ushort _nextTx; private bool _disposed; private DateTime _lastSuccessUtc = DateTime.UtcNow; /// Initializes a new instance of the class. /// The host address or hostname of the Modbus server. /// The TCP port of the Modbus server. /// The timeout for socket operations. /// Whether to automatically reconnect on socket failures. /// Optional keep-alive configuration for the socket. /// Optional duration after which an idle socket is disconnected. /// Optional reconnect backoff configuration. public ModbusTcpTransport( string host, int port, TimeSpan timeout, bool autoReconnect = true, ModbusKeepAliveOptions? keepAlive = null, TimeSpan? idleDisconnect = null, ModbusReconnectOptions? reconnect = null) { _host = host; _port = port; _timeout = timeout; _autoReconnect = autoReconnect; _keepAlive = keepAlive ?? new ModbusKeepAliveOptions(); _idleDisconnect = idleDisconnect; _reconnect = reconnect ?? new ModbusReconnectOptions(); } /// Connects to the Modbus TCP server with IPv4 preference. /// The cancellation token for the operation. public async Task ConnectAsync(CancellationToken ct) { // Resolve the host explicitly + prefer IPv4. .NET's TcpClient default-constructor is // dual-stack (IPv6 first, fallback to IPv4) — but most Modbus TCP devices (PLCs and // simulators like pymodbus) bind 0.0.0.0 only, so the IPv6 attempt times out and we // burn the entire ConnectAsync budget before even trying IPv4. Resolving first + // dialing the IPv4 address directly sidesteps that. var addresses = await System.Net.Dns.GetHostAddressesAsync(_host, ct).ConfigureAwait(false); var ipv4 = System.Linq.Enumerable.FirstOrDefault(addresses, a => a.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork); var target = ipv4 ?? (addresses.Length > 0 ? addresses[0] : System.Net.IPAddress.Loopback); _client = new TcpClient(target.AddressFamily); EnableKeepAlive(_client, _keepAlive); using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct); cts.CancelAfter(_timeout); await _client.ConnectAsync(target, _port, cts.Token).ConfigureAwait(false); _stream = _client.GetStream(); _lastSuccessUtc = DateTime.UtcNow; } /// /// Enable SO_KEEPALIVE with aggressive probe timing. DL205/DL260 doesn't send keepalives /// itself; having the OS probe the socket every ~30s lets the driver notice a dead PLC /// or broken NAT path long before the default 2-hour Windows idle timeout fires. /// Non-fatal if the underlying OS rejects the option (some older Linux / container /// sandboxes don't expose the fine-grained timing levers — the driver still works, /// application-level probe still detects problems). /// private static void EnableKeepAlive(TcpClient client, ModbusKeepAliveOptions opts) { if (!opts.Enabled) return; try { client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true); // Driver.Modbus-009: a TimeSpan < 1s previously truncated to 0 via the int cast, // which Windows / Linux interpret as "use the default" — silently defeating the // configured keep-alive timing. Round up to at least 1 second so a sub-second // configuration still produces a real keep-alive cadence. Negative values are // also clamped to 1 to avoid surfacing as OS errors. client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveTime, ClampToWholeSeconds(opts.Time)); client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveInterval, ClampToWholeSeconds(opts.Interval)); client.Client.SetSocketOption(SocketOptionLevel.Tcp, SocketOptionName.TcpKeepAliveRetryCount, opts.RetryCount); } catch { /* best-effort; older OSes may not expose the granular knobs */ } } /// /// Driver.Modbus-009: cast a to a whole number of seconds with a /// minimum of 1 — protects callers from the int-cast truncation that turned 500 ms /// keep-alive timing into "use the default" on most OSes. /// /// The timespan to clamp to whole seconds. internal static int ClampToWholeSeconds(TimeSpan ts) { var seconds = (int)Math.Ceiling(ts.TotalSeconds); return seconds < 1 ? 1 : seconds; } /// Sends a Modbus PDU and returns the response, with automatic retry on socket failure. /// The Modbus unit/slave ID. /// The protocol data unit to send. /// The cancellation token for the operation. public async Task SendAsync(byte unitId, byte[] pdu, CancellationToken ct) { if (_disposed) throw new ObjectDisposedException(nameof(ModbusTcpTransport)); if (_stream is null) throw new InvalidOperationException("Transport not connected"); await _gate.WaitAsync(ct).ConfigureAwait(false); try { // Proactive idle-disconnect: if the socket has been quiet longer than the configured // threshold, tear it down + reconnect before this PDU lands. Defends against silent // NAT / firewall reaping where the socket looks alive locally but the upstream side // dropped it minutes ago. if (_idleDisconnect.HasValue && DateTime.UtcNow - _lastSuccessUtc > _idleDisconnect.Value) { await TearDownAsync().ConfigureAwait(false); await ConnectWithBackoffAsync(ct).ConfigureAwait(false); } try { var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false); _lastSuccessUtc = DateTime.UtcNow; return result; } catch (Exception ex) when (_autoReconnect && IsSocketLevelFailure(ex)) { // Mid-transaction drop: tear down the dead socket, reconnect (with backoff if // configured), resend. Single retry — if it fails again, let it propagate so // health/status reflect reality. await TearDownAsync().ConfigureAwait(false); await ConnectWithBackoffAsync(ct).ConfigureAwait(false); var result = await SendOnceAsync(unitId, pdu, ct).ConfigureAwait(false); _lastSuccessUtc = DateTime.UtcNow; return result; } } finally { _gate.Release(); } } /// /// Connect attempt with the configured geometric backoff. The first attempt fires after /// (default zero — immediate); each /// subsequent attempt sleeps for the previous delay times BackoffMultiplier, /// capped at MaxDelay. Caller's cancellation token aborts the loop. /// private async Task ConnectWithBackoffAsync(CancellationToken ct) { var delay = _reconnect.InitialDelay; var attempt = 0; while (true) { if (delay > TimeSpan.Zero) await Task.Delay(delay, ct).ConfigureAwait(false); try { await ConnectAsync(ct).ConfigureAwait(false); return; } catch (Exception ex) when (IsSocketLevelFailure(ex) && _autoReconnect) { attempt++; // Geometric growth, capped. Use Math.Min on ticks so we don't overflow with // pathological multipliers / long deployments. var nextTicks = (long)(Math.Max(delay.Ticks, TimeSpan.FromMilliseconds(100).Ticks) * _reconnect.BackoffMultiplier); delay = TimeSpan.FromTicks(Math.Min(nextTicks, _reconnect.MaxDelay.Ticks)); if (attempt >= 10) { // Bail after 10 attempts to surface persistent failure to the caller. With // the default backoff (1s base, 2.0x mult, 30s cap) this is roughly 4 minutes // of attempts; with InitialDelay=0 it's immediate up to the same cap. throw; } } } } private async Task SendOnceAsync(byte unitId, byte[] pdu, CancellationToken ct) { if (_stream is null) throw new InvalidOperationException("Transport not connected"); var txId = ++_nextTx; // MBAP: [TxId(2)][Proto=0(2)][Length(2)][UnitId(1)] + PDU var adu = new byte[7 + pdu.Length]; adu[0] = (byte)(txId >> 8); adu[1] = (byte)(txId & 0xFF); // protocol id already zero var len = (ushort)(1 + pdu.Length); // unit id + pdu adu[4] = (byte)(len >> 8); adu[5] = (byte)(len & 0xFF); adu[6] = unitId; Buffer.BlockCopy(pdu, 0, adu, 7, pdu.Length); using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct); cts.CancelAfter(_timeout); await _stream.WriteAsync(adu.AsMemory(), cts.Token).ConfigureAwait(false); await _stream.FlushAsync(cts.Token).ConfigureAwait(false); var header = new byte[7]; await ReadExactlyAsync(_stream, header, cts.Token).ConfigureAwait(false); var respTxId = (ushort)((header[0] << 8) | header[1]); if (respTxId != txId) throw new InvalidDataException($"Modbus TxId mismatch: expected {txId} got {respTxId}"); var respLen = (ushort)((header[4] << 8) | header[5]); if (respLen < 1) throw new InvalidDataException($"Modbus response length too small: {respLen}"); var respPdu = new byte[respLen - 1]; await ReadExactlyAsync(_stream, respPdu, cts.Token).ConfigureAwait(false); // Exception PDU: function code has high bit set. if ((respPdu[0] & 0x80) != 0) { var fc = (byte)(respPdu[0] & 0x7F); var ex = respPdu[1]; throw new ModbusException(fc, ex, $"Modbus exception fc={fc} code={ex}"); } return respPdu; } /// /// Distinguish socket-layer failures (eligible for reconnect-and-retry) from /// protocol-layer failures (must propagate — retrying the same PDU won't help if the /// PLC just returned exception 02 Illegal Data Address). /// private static bool IsSocketLevelFailure(Exception ex) => ex is EndOfStreamException || ex is IOException || ex is SocketException || ex is ObjectDisposedException; private async Task TearDownAsync() { try { if (_stream is not null) await _stream.DisposeAsync().ConfigureAwait(false); } catch { /* best-effort */ } _stream = null; try { _client?.Dispose(); } catch { } _client = null; } private static async Task ReadExactlyAsync(Stream s, byte[] buf, CancellationToken ct) { var read = 0; while (read < buf.Length) { var n = await s.ReadAsync(buf.AsMemory(read), ct).ConfigureAwait(false); if (n == 0) throw new EndOfStreamException("Modbus socket closed mid-response"); read += n; } } /// Asynchronously disposes the transport and underlying socket resources. public async ValueTask DisposeAsync() { if (_disposed) return; _disposed = true; try { if (_stream is not null) await _stream.DisposeAsync().ConfigureAwait(false); } catch { /* best-effort */ } _client?.Dispose(); _gate.Dispose(); } }