Phase 3 PR 53 -- Transport reconnect-on-drop + SO_KEEPALIVE for DL205 no-keepalive quirk. AutomationDirect H2-ECOM100 does NOT send TCP keepalives per docs/v2/dl205.md behavioral-oddities section -- any NAT/firewall device between the gateway and the PLC can silently close an idle socket after 2-5 minutes of inactivity. The PLC itself never notices and the first SendAsync after the drop would previously surface as IOException / EndOfStreamException / SocketException to the caller even though the PLC is perfectly healthy. PR 53 makes ModbusTcpTransport survive mid-session socket drops: SendAsync wraps the previous body as SendOnceAsync; on the first attempt, if the failure is a socket-layer error (IOException, SocketException, EndOfStreamException, ObjectDisposedException) AND autoReconnect is enabled (default true), the transport tears down the dead socket, calls ConnectAsync to re-establish, and resends the PDU exactly once. Deliberately single-retry -- further failures propagate so the driver health surface reflects the real state, no masking a dead PLC. Protocol-layer failures (e.g. ModbusException with exception code 02) are specifically NOT caught by the reconnect path -- they would just come back with the same exception code after the reconnect, so retrying is wasted wire time. Socket-level vs protocol-level is a discriminator inside IsSocketLevelFailure. Also enables SO_KEEPALIVE on the TcpClient with aggressive timing: TcpKeepAliveTime=30s, TcpKeepAliveInterval=10s, TcpKeepAliveRetryCount=3. Total time-to-detect-dead-socket = 30 + 10*3 = 60s, vs the Windows default 2-hour idle + 9 retries = 2h40min. Best-effort: older OSes that don't expose the fine-grained keepalive knobs silently skip them (catch {}). New ModbusDriverOptions.AutoReconnect bool (default true) threads through to the default transport factory in ModbusDriver -- callers wanting the old 'fail loud on drop' behavior can set AutoReconnect=false, or use a custom transportFactory that ignores the option. Unit tests: ModbusTcpReconnectTests boots a FlakeyModbusServer in-process (real TcpListener on loopback) that serves one valid FC03 response then forcibly shuts down the socket. Transport_recovers_from_mid_session_drop_and_retries_successfully issues two consecutive SendAsync calls and asserts both return valid PDUs -- the second must trigger the reconnect path transparently. Transport_without_AutoReconnect_propagates_drop_to_caller asserts the legacy behavior when the opt-out is taken. Validates real socket semantics rather than mocked exceptions. 142/142 Modbus.Tests pass (113 prior + 2 mapper + 2 reconnect + 25 accumulated across PRs 45-52); 11/11 DL205 integration tests still pass with MODBUS_SIM_PROFILE=dl205 -- no regression from the transport change.
This commit is contained in:
@@ -0,0 +1,146 @@
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Modbus.Tests;
|
||||
|
||||
/// <summary>
|
||||
/// Exercises <see cref="ModbusTcpTransport"/> against a real TCP listener that can close
|
||||
/// its socket mid-session on demand. Verifies the PR 53 reconnect-on-drop behavior: after
|
||||
/// the "first" socket is forcibly torn down, the next SendAsync must re-establish the
|
||||
/// connection and complete the PDU without bubbling an error to the caller.
|
||||
/// </summary>
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class ModbusTcpReconnectTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Minimal in-process Modbus-TCP stub. Accepts one TCP connection at a time, reads an
|
||||
/// MBAP + PDU, replies with a canned FC03 response echoing the request quantity of
|
||||
/// zeroed bytes, then optionally closes the socket to simulate a NAT/firewall drop.
|
||||
/// </summary>
|
||||
private sealed class FlakeyModbusServer : IAsyncDisposable
|
||||
{
|
||||
private readonly TcpListener _listener;
|
||||
public int Port => ((IPEndPoint)_listener.LocalEndpoint).Port;
|
||||
public int DropAfterNTransactions { get; set; } = int.MaxValue;
|
||||
private readonly CancellationTokenSource _stop = new();
|
||||
private int _txCount;
|
||||
|
||||
public FlakeyModbusServer()
|
||||
{
|
||||
_listener = new TcpListener(IPAddress.Loopback, 0);
|
||||
_listener.Start();
|
||||
_ = Task.Run(AcceptLoopAsync);
|
||||
}
|
||||
|
||||
private async Task AcceptLoopAsync()
|
||||
{
|
||||
while (!_stop.IsCancellationRequested)
|
||||
{
|
||||
TcpClient? client = null;
|
||||
try { client = await _listener.AcceptTcpClientAsync(_stop.Token); }
|
||||
catch { return; }
|
||||
|
||||
_ = Task.Run(() => ServeAsync(client!));
|
||||
}
|
||||
}
|
||||
|
||||
private async Task ServeAsync(TcpClient client)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var _ = client;
|
||||
var stream = client.GetStream();
|
||||
while (!_stop.IsCancellationRequested && client.Connected)
|
||||
{
|
||||
var header = new byte[7];
|
||||
if (!await ReadExactly(stream, header)) return;
|
||||
var len = (ushort)((header[4] << 8) | header[5]);
|
||||
var pdu = new byte[len - 1];
|
||||
if (!await ReadExactly(stream, pdu)) return;
|
||||
|
||||
var fc = pdu[0];
|
||||
var qty = (ushort)((pdu[3] << 8) | pdu[4]);
|
||||
var respPdu = new byte[2 + qty * 2];
|
||||
respPdu[0] = fc;
|
||||
respPdu[1] = (byte)(qty * 2);
|
||||
// data bytes stay 0
|
||||
|
||||
var respLen = (ushort)(1 + respPdu.Length);
|
||||
var adu = new byte[7 + respPdu.Length];
|
||||
adu[0] = header[0]; adu[1] = header[1];
|
||||
adu[4] = (byte)(respLen >> 8); adu[5] = (byte)(respLen & 0xFF);
|
||||
adu[6] = header[6];
|
||||
Buffer.BlockCopy(respPdu, 0, adu, 7, respPdu.Length);
|
||||
await stream.WriteAsync(adu);
|
||||
await stream.FlushAsync();
|
||||
|
||||
_txCount++;
|
||||
if (_txCount >= DropAfterNTransactions)
|
||||
{
|
||||
// Simulate NAT/firewall silent close: slam the socket without a
|
||||
// protocol-level goodbye, which is what DL260 + an intermediate
|
||||
// middlebox would look like from the client's perspective.
|
||||
client.Client.Shutdown(SocketShutdown.Both);
|
||||
client.Close();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch { /* best-effort */ }
|
||||
}
|
||||
|
||||
private static async Task<bool> ReadExactly(NetworkStream s, byte[] buf)
|
||||
{
|
||||
var read = 0;
|
||||
while (read < buf.Length)
|
||||
{
|
||||
var n = await s.ReadAsync(buf.AsMemory(read));
|
||||
if (n == 0) return false;
|
||||
read += n;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
_stop.Cancel();
|
||||
_listener.Stop();
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Transport_recovers_from_mid_session_drop_and_retries_successfully()
|
||||
{
|
||||
await using var server = new FlakeyModbusServer { DropAfterNTransactions = 1 };
|
||||
await using var transport = new ModbusTcpTransport("127.0.0.1", server.Port, TimeSpan.FromSeconds(2), autoReconnect: true);
|
||||
await transport.ConnectAsync(TestContext.Current.CancellationToken);
|
||||
|
||||
// First transaction succeeds; server then closes the socket.
|
||||
var pdu = new byte[] { 0x03, 0x00, 0x00, 0x00, 0x01 };
|
||||
var first = await transport.SendAsync(unitId: 1, pdu, TestContext.Current.CancellationToken);
|
||||
first[0].ShouldBe((byte)0x03);
|
||||
|
||||
// Second transaction: the connection is dead, but auto-reconnect must transparently
|
||||
// spin up a new socket, resend, and produce a valid response. Before PR 53 this would
|
||||
// surface as EndOfStreamException / IOException to the caller.
|
||||
var second = await transport.SendAsync(unitId: 1, pdu, TestContext.Current.CancellationToken);
|
||||
second[0].ShouldBe((byte)0x03);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Transport_without_AutoReconnect_propagates_drop_to_caller()
|
||||
{
|
||||
await using var server = new FlakeyModbusServer { DropAfterNTransactions = 1 };
|
||||
await using var transport = new ModbusTcpTransport("127.0.0.1", server.Port, TimeSpan.FromSeconds(2), autoReconnect: false);
|
||||
await transport.ConnectAsync(TestContext.Current.CancellationToken);
|
||||
|
||||
var pdu = new byte[] { 0x03, 0x00, 0x00, 0x00, 0x01 };
|
||||
_ = await transport.SendAsync(unitId: 1, pdu, TestContext.Current.CancellationToken);
|
||||
|
||||
await Should.ThrowAsync<Exception>(async () =>
|
||||
await transport.SendAsync(unitId: 1, pdu, TestContext.Current.CancellationToken));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user