mbproxy: initial commit through Phase 9 (TxId multiplexing)

Adds the mbproxy service end-to-end. Phases 00-08 implement the
production-ready single-listener / 1:1-backend transparent Modbus TCP
proxy with bidirectional BCD rewriting for the ~54-PLC DL205/DL260
fleet. Phase 9 replaces the connection layer with a single backend
socket per PLC plus MBAP TxId rewriting, lifting the H2-ECOM100's
4-concurrent-client cap as an operational ceiling.

Phase 9 additions of note:
- PlcMultiplexer + UpstreamPipe + TxIdAllocator + CorrelationMap
- InFlightRequest with IReadOnlyList<InterestedParty> (load-bearing
  for Phase 10 read coalescing — do not collapse to a single field)
- Per-request watchdog: surfaces Modbus exception 0x0B to upstream
  on BackendRequestTimeoutMs, defending against lost responses,
  dead-PLC paths, and pymodbus 3.13.0's concurrent-multiplexed-
  request bug (its ServerRequestHandler.last_pdu state race)
- Status DTO + HTML gain inFlight / maxInFlight / txIdWraps /
  disconnectCascades / queueDepth (Tier 1.6 in docs/kpi.md)

Tests: 263 unit + 38 E2E. Multiplexer correctness under truly
concurrent backend traffic is proved against a stub backend in
PlcMultiplexerTests; MultiplexerE2ETests paces requests so pymodbus
3.13's single-PDU framer stays in known-good mode.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-14 01:49:35 -04:00
parent 2e937228a0
commit 56eee3c563
105 changed files with 18430 additions and 0 deletions
@@ -0,0 +1,664 @@
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Net.Sockets;
using System.Threading.Channels;
using Mbproxy.Options;
using Polly;
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Owner of the single backend TCP connection to one PLC. Multiplexes many
/// <see cref="UpstreamPipe"/> instances onto that one socket by rewriting MBAP transaction
/// IDs so concurrent in-flight requests from different upstream clients remain
/// distinguishable on the shared wire. The multiplexer:
///
/// <list type="bullet">
/// <item><description>Opens and re-opens the backend socket through a Polly retry pipeline
/// that matches the <see cref="ResilienceOptions.BackendConnect"/> profile.</description></item>
/// <item><description>Runs one backend writer task that drains <see cref="_outboundChannel"/>
/// into the backend socket (single writer; no socket-level synchronisation needed).</description></item>
/// <item><description>Runs one backend reader task that decodes MBAP frames from the backend,
/// looks each frame up in the <see cref="CorrelationMap"/>, restores each interested
/// party's original TxId, and hands the frame to that party's
/// <see cref="UpstreamPipe._responseChannel"/>.</description></item>
/// <item><description>Cascades a backend disconnect by closing every attached pipe and
/// freeing every allocated proxy TxId, then waits for the next upstream request to
/// arrive (which triggers a fresh backend connect via Polly).</description></item>
/// </list>
///
/// <para><b>Threading invariants:</b> a single backend writer touches the backend socket
/// for sends; a single backend reader touches the same socket for receives. Per-upstream
/// read tasks call <see cref="OnUpstreamFrameAsync"/>, which allocates a proxy TxId, queues
/// the request frame into <see cref="_outboundChannel"/>, and returns. Upstream-side writes
/// flow through each pipe's response channel — never directly through this class.</para>
///
/// <para><b>Lifecycle:</b> the multiplexer is created with the backend offline. The first
/// <see cref="OnUpstreamFrameAsync"/> call (or the first <see cref="Attach"/> if you prefer
/// eager-start) triggers backend connect through the Polly pipeline. Subsequent in-flight
/// requests reuse the same socket. <see cref="DisposeAsync"/> tears down the backend
/// socket, the writer/reader tasks, and every attached pipe.</para>
/// </summary>
internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvider
{
private const int OutboundChannelCapacity = 256;
private readonly PlcOptions _plc;
private readonly ConnectionOptions _connectionOptions;
private readonly IPduPipeline _pipeline;
private readonly PerPlcContext _ctx;
private readonly ILogger<PlcMultiplexer> _logger;
private readonly ResiliencePipeline? _backendConnectPipeline;
private readonly TxIdAllocator _allocator = new();
private readonly CorrelationMap _correlation = new();
private readonly Channel<byte[]> _outboundChannel = Channel.CreateBounded<byte[]>(
new BoundedChannelOptions(OutboundChannelCapacity)
{
FullMode = BoundedChannelFullMode.Wait,
SingleReader = true,
SingleWriter = false,
});
// Attached pipes — Phase 9 needs the list for the status page; Phase 10 will need it for
// coalescing (fan-out). ConcurrentDictionary keyed on UpstreamPipe.Id for O(1) detach.
private readonly ConcurrentDictionary<Guid, UpstreamPipe> _pipes = new();
// Lifecycle plumbing. Backend tasks share a CTS; cascading disconnect cancels it,
// which terminates both the writer and reader tasks. The next call to
// EnsureBackendConnectedAsync constructs a fresh CTS and a fresh backend socket.
private readonly object _backendLock = new();
private Socket? _backendSocket;
private CancellationTokenSource? _backendCts;
private Task? _backendWriterTask;
private Task? _backendReaderTask;
private readonly CancellationTokenSource _disposeCts = new();
private bool _disposed;
private Task? _watchdogTask;
public PlcMultiplexer(
PlcOptions plc,
ConnectionOptions connectionOptions,
IPduPipeline pipeline,
PerPlcContext perPlcContext,
ILogger<PlcMultiplexer> logger,
ResiliencePipeline? backendConnectPipeline = null)
{
_plc = plc;
_connectionOptions = connectionOptions;
_pipeline = pipeline;
_ctx = perPlcContext;
_logger = logger;
_backendConnectPipeline = backendConnectPipeline;
// Register this multiplexer as the live telemetry source for the PLC's counters.
_ctx.Counters.SetMultiplexProvider(this);
// Spin up the per-request timeout watchdog. It scans the correlation map at a fixed
// interval and times out any in-flight request older than BackendRequestTimeoutMs.
// Critical for: lost responses, dead-PLC paths, and backends that mis-echo TxIds
// (e.g. pymodbus 3.13.0's concurrent-multiplexed-request bug — see test files).
_watchdogTask = Task.Run(() => RunRequestTimeoutWatchdogAsync(_disposeCts.Token), CancellationToken.None);
}
// ── IMultiplexCountersProvider ────────────────────────────────────────────
public long InFlightCount => _allocator.InFlightCount;
public long TxIdWraps => _allocator.WrapCount;
public long BackendQueueDepth => _outboundChannel.Reader.Count;
// ── Public surface ────────────────────────────────────────────────────────
/// <summary>
/// Read-only collection of currently-attached upstream pipes. Used by the status page.
/// </summary>
public IReadOnlyCollection<UpstreamPipe> AttachedPipes => _pipes.Values.ToArray();
/// <summary>
/// Attaches an upstream pipe to this multiplexer. The caller is responsible for
/// running the pipe's read+write loops (typically via <see cref="StartPipeAsync"/>)
/// which wires the pipe's OnFrame callback back into <see cref="OnUpstreamFrameAsync"/>.
/// </summary>
public void Attach(UpstreamPipe pipe)
{
if (_disposed)
throw new ObjectDisposedException(nameof(PlcMultiplexer));
_pipes[pipe.Id] = pipe;
}
/// <summary>
/// Starts the read+write tasks for <paramref name="pipe"/> and returns a task that
/// completes when the pipe's read loop ends. The multiplexer detaches the pipe when
/// its read loop returns.
/// </summary>
public Task StartPipeAsync(UpstreamPipe pipe, CancellationToken ct)
{
Attach(pipe);
// The write loop runs to completion when the pipe is disposed or the channel
// completes. We don't await it directly — it's joined inside DisposeAsync of the pipe.
_ = Task.Run(() => pipe.RunWriteLoopAsync(ct), CancellationToken.None);
var readLoop = pipe.RunReadLoopAsync(
(frame, frameCt) => OnUpstreamFrameAsync(pipe, frame, frameCt),
ct);
// When the pipe's read loop finishes, detach it. Don't dispose it here; the
// listener (or the cascade walker) owns disposal.
_ = readLoop.ContinueWith(prev =>
{
_pipes.TryRemove(pipe.Id, out _);
}, TaskScheduler.Default);
return readLoop;
}
/// <summary>
/// Tears down the multiplexer: closes the backend connection, cancels both backend
/// tasks, drains every in-flight correlation entry, and closes every attached pipe.
/// </summary>
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
// Stop the counters provider link so a status snapshot during teardown doesn't
// see live-but-soon-to-be-empty internal state.
_ctx.Counters.SetMultiplexProvider(null);
await _disposeCts.CancelAsync().ConfigureAwait(false);
// Best-effort join the watchdog so its in-flight log/dispatch settles before tests
// assert on counter state.
if (_watchdogTask is not null)
{
try { await _watchdogTask.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); }
catch { /* swallow */ }
}
await TearDownBackendAsync("disposing", cascadeUpstreams: true).ConfigureAwait(false);
_outboundChannel.Writer.TryComplete();
// Dispose all attached pipes.
foreach (var pipe in _pipes.Values)
{
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
}
_pipes.Clear();
_disposeCts.Dispose();
}
// ── Backend connect / teardown ────────────────────────────────────────────
private async Task<bool> EnsureBackendConnectedAsync(CancellationToken ct)
{
if (_disposed) return false;
// Fast path: already connected.
if (_backendSocket is { Connected: true } && _backendCts is { IsCancellationRequested: false })
return true;
// Serialise concurrent connect attempts from many upstream pipes.
await _connectGate.WaitAsync(ct).ConfigureAwait(false);
try
{
// Re-check after acquiring the gate.
if (_backendSocket is { Connected: true } && _backendCts is { IsCancellationRequested: false })
return true;
// Build a fresh backend socket and Polly-connect.
var backend = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)
{ NoDelay = true };
try
{
if (_backendConnectPipeline is not null)
{
await _backendConnectPipeline.ExecuteAsync(async attemptToken =>
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(attemptToken);
cts.CancelAfter(_connectionOptions.BackendConnectTimeoutMs);
await backend.ConnectAsync(_plc.Host, _plc.Port, cts.Token).ConfigureAwait(false);
}, ct).ConfigureAwait(false);
}
else
{
using var connectCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
connectCts.CancelAfter(_connectionOptions.BackendConnectTimeoutMs);
await backend.ConnectAsync(_plc.Host, _plc.Port, connectCts.Token).ConfigureAwait(false);
}
}
catch (Exception ex)
{
string reason = ex is OperationCanceledException
? $"Backend connect timed out or cancelled after {_connectionOptions.BackendConnectTimeoutMs} ms"
: ex.Message;
MultiplexerLogEvents.BackendFailed(_logger, _plc.Name, reason);
_ctx.Counters.IncrementConnectFailed();
backend.Dispose();
return false;
}
// Successful connect. Wire up the backend tasks.
var cts2 = CancellationTokenSource.CreateLinkedTokenSource(_disposeCts.Token);
lock (_backendLock)
{
_backendSocket = backend;
_backendCts = cts2;
_backendWriterTask = Task.Run(() => RunBackendWriterAsync(backend, cts2.Token), CancellationToken.None);
_backendReaderTask = Task.Run(() => RunBackendReaderAsync(backend, cts2.Token), CancellationToken.None);
}
_ctx.Counters.IncrementConnectSuccess();
MultiplexerLogEvents.BackendConnected(_logger, _plc.Name, _plc.Host, _plc.Port);
return true;
}
finally
{
_connectGate.Release();
}
}
private readonly SemaphoreSlim _connectGate = new(1, 1);
private async Task TearDownBackendAsync(string reason, bool cascadeUpstreams)
{
Socket? oldSocket;
CancellationTokenSource? oldCts;
Task? writer, reader;
lock (_backendLock)
{
oldSocket = _backendSocket;
oldCts = _backendCts;
writer = _backendWriterTask;
reader = _backendReaderTask;
_backendSocket = null;
_backendCts = null;
_backendWriterTask = null;
_backendReaderTask = null;
}
if (oldSocket is null && oldCts is null) return;
try { oldCts?.Cancel(); } catch { /* best effort */ }
try { oldSocket?.Shutdown(SocketShutdown.Both); } catch { /* already closed */ }
try { oldSocket?.Dispose(); } catch { /* best effort */ }
// Drain correlation map; cascade-close every interested upstream pipe.
var dropped = _correlation.DrainAll();
var cascadeIds = new HashSet<Guid>();
foreach (var kvp in dropped)
{
_allocator.Release(kvp.Key);
foreach (var party in kvp.Value.InterestedParties)
cascadeIds.Add(party.Pipe.Id);
}
int upstreamCount = 0;
if (cascadeUpstreams)
{
// Close every attached pipe that had a request in flight; the others will
// simply re-issue on next request through a fresh backend connect.
// Per the design doc, ALL attached upstreams cascade on backend disconnect.
upstreamCount = _pipes.Count;
// Snapshot keys before disposal modifies the dictionary indirectly.
var pipeList = _pipes.Values.ToArray();
foreach (var pipe in pipeList)
{
try { await pipe.DisposeAsync().ConfigureAwait(false); }
catch { /* best effort */ }
}
_pipes.Clear();
_ctx.Counters.AddDisconnectCascades(upstreamCount);
}
// Best-effort join.
try { if (writer is not null) await writer.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); } catch { /* swallow */ }
try { if (reader is not null) await reader.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); } catch { /* swallow */ }
oldCts?.Dispose();
if (upstreamCount > 0 || dropped.Count > 0)
MultiplexerLogEvents.BackendDisconnected(_logger, _plc.Name, upstreamCount, dropped.Count, reason);
}
// ── Backend writer / reader tasks ─────────────────────────────────────────
private async Task RunBackendWriterAsync(Socket backend, CancellationToken ct)
{
try
{
await foreach (var frame in _outboundChannel.Reader.ReadAllAsync(ct).ConfigureAwait(false))
{
int sent = 0;
while (sent < frame.Length)
{
int n = await backend.SendAsync(
frame.AsMemory(sent, frame.Length - sent),
SocketFlags.None,
ct).ConfigureAwait(false);
if (n == 0) throw new SocketException((int)SocketError.ConnectionReset);
sent += n;
}
}
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
// Backend failure — cascade.
_ = TearDownBackendAsync($"writer fault: {ex.Message}", cascadeUpstreams: true);
}
}
private async Task RunBackendReaderAsync(Socket backend, CancellationToken ct)
{
byte[] headerBuf = new byte[MbapFrame.HeaderSize];
try
{
while (!ct.IsCancellationRequested)
{
if (!await FillAsync(backend, headerBuf, 0, MbapFrame.HeaderSize, ct).ConfigureAwait(false))
break;
if (!MbapFrame.TryParseHeader(headerBuf.AsSpan(),
out ushort proxyTxId, out _, out ushort length, out _))
break;
if (length < 1)
{
// Degenerate frame — drop.
continue;
}
int pduBodyLen = length - 1;
if (pduBodyLen > MbapFrame.MaxPduBodySize)
{
// Frame too large — backend is misbehaving; force teardown.
_logger.LogWarning(
"Oversized backend frame: Plc={Plc} PduBody={Body} > Max={Max}",
_plc.Name, pduBodyLen, MbapFrame.MaxPduBodySize);
break;
}
byte[] frame = new byte[MbapFrame.HeaderSize + pduBodyLen];
Buffer.BlockCopy(headerBuf, 0, frame, 0, MbapFrame.HeaderSize);
if (!await FillAsync(backend, frame, MbapFrame.HeaderSize, pduBodyLen, ct).ConfigureAwait(false))
break;
if (!_correlation.TryRemove(proxyTxId, out var inFlight))
{
// No correlation entry — either a stale response after cascade, or
// the PLC sent something unsolicited. Drop the frame.
continue;
}
// Free the allocator slot immediately so it can be reused.
_allocator.Release(proxyTxId);
// Update EWMA round-trip from when we sent the request.
long elapsedMs = (DateTimeOffset.UtcNow - inFlight.SentAtUtc).Ticks * 100; // 100 ns per tick
// UpdateRoundTripEwma expects Stopwatch ticks, but we have wall-clock.
// Convert ms back to Stopwatch ticks:
long ticks = (long)((double)(DateTimeOffset.UtcNow - inFlight.SentAtUtc).TotalSeconds * Stopwatch.Frequency);
if (ticks > 0)
_ctx.Counters.UpdateRoundTripEwma(ticks);
// Apply the BCD rewriter on the response. Build a per-call context clone
// that carries CurrentRequest so the rewriter can decode FC03/04 slots.
var responseCtx = _ctx.WithCurrentRequest(inFlight);
_pipeline.Process(
MbapDirection.ResponseToClient,
frame.AsSpan(0, MbapFrame.HeaderSize),
frame.AsSpan(MbapFrame.HeaderSize, pduBodyLen),
responseCtx);
// Fan out to each interested party with their original TxId restored.
// Phase 9: always exactly one party. Phase 10: N parties (read coalescing).
foreach (var party in inFlight.InterestedParties)
{
if (!party.Pipe.IsAlive)
continue;
// The frame buffer is private to this iteration; if there are multiple
// parties (Phase 10), each gets its own copy with its own original TxId
// patched in. Phase 9 always has Count == 1, so the single-buffer path
// is the common case; we copy to keep Phase-10 forward compatibility.
byte[] outFrame = inFlight.InterestedParties.Count == 1
? frame
: (byte[])frame.Clone();
outFrame[0] = (byte)(party.OriginalTxId >> 8);
outFrame[1] = (byte)(party.OriginalTxId & 0xFF);
await party.Pipe.SendResponseAsync(outFrame, ct).ConfigureAwait(false);
}
}
// Reader exited cleanly — backend closed by remote. Cascade.
_ = TearDownBackendAsync("backend reader EOF", cascadeUpstreams: true);
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
_ = TearDownBackendAsync($"reader fault: {ex.Message}", cascadeUpstreams: true);
}
}
// ── Upstream → multiplexer entry point ────────────────────────────────────
private async ValueTask OnUpstreamFrameAsync(UpstreamPipe pipe, byte[] frame, CancellationToken ct)
{
if (_disposed) return;
// Ensure backend is connected. Failure here means we cannot service the request;
// close the upstream pipe (consistent with the 1:1 model's behaviour on connect
// failure).
if (!await EnsureBackendConnectedAsync(ct).ConfigureAwait(false))
{
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
return;
}
if (frame.Length < MbapFrame.HeaderSize)
return;
if (!MbapFrame.TryParseHeader(frame.AsSpan(0, MbapFrame.HeaderSize),
out ushort originalTxId, out _, out _, out byte unitId))
return;
if (!_allocator.TryAllocate(out ushort proxyTxId))
{
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
// Synthesize Modbus exception 04 (Slave Device Failure).
byte fc = frame.Length > MbapFrame.HeaderSize ? frame[MbapFrame.HeaderSize] : (byte)0;
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fc, exceptionCode: 4);
await pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
return;
}
// Parse the PDU FC + start/qty (for FC03/04) so the response decoder has the
// correlation it needs.
int pduOffset = MbapFrame.HeaderSize;
byte fcByte = frame[pduOffset];
ushort startAddr = 0;
ushort qty = 0;
if (fcByte is 0x03 or 0x04 && frame.Length >= pduOffset + 5)
{
startAddr = (ushort)((frame[pduOffset + 1] << 8) | frame[pduOffset + 2]);
qty = (ushort)((frame[pduOffset + 3] << 8) | frame[pduOffset + 4]);
}
var inFlight = new InFlightRequest(
UnitId: unitId,
Fc: fcByte,
StartAddress: startAddr,
Qty: qty,
InterestedParties: [new InterestedParty(pipe, originalTxId)],
SentAtUtc: DateTimeOffset.UtcNow);
if (!_correlation.TryAdd(proxyTxId, inFlight))
{
// Should be impossible: the allocator just guaranteed proxyTxId is free.
_allocator.Release(proxyTxId);
_logger.LogError("CorrelationMap.TryAdd failed for already-free proxyTxId {ProxyTxId}", proxyTxId);
return;
}
// Peak in-flight tracking.
_ctx.Counters.ObserveInFlight(_allocator.InFlightCount);
// Apply the BCD rewriter on the request. Use a per-call context with CurrentRequest
// (the rewriter doesn't currently need it on request, but Phase 10 may).
var requestCtx = _ctx.WithCurrentRequest(inFlight);
_pipeline.Process(
MbapDirection.RequestToBackend,
frame.AsSpan(0, MbapFrame.HeaderSize),
frame.AsSpan(MbapFrame.HeaderSize, frame.Length - MbapFrame.HeaderSize),
requestCtx);
// Overwrite the MBAP TxId with the proxy TxId.
frame[0] = (byte)(proxyTxId >> 8);
frame[1] = (byte)(proxyTxId & 0xFF);
// Enqueue for the backend writer task.
try
{
await _outboundChannel.Writer.WriteAsync(frame, ct).ConfigureAwait(false);
}
catch (ChannelClosedException)
{
// Channel completed during shutdown — release the proxy TxId.
if (_correlation.TryRemove(proxyTxId, out _))
_allocator.Release(proxyTxId);
}
}
// ── Per-request timeout watchdog ──────────────────────────────────────────
/// <summary>
/// Periodically scans the correlation map for in-flight requests whose response has
/// not arrived within <see cref="ConnectionOptions.BackendRequestTimeoutMs"/>. For each
/// stale entry: removes it from the map, frees its allocator slot, and delivers a
/// Modbus exception (code 0x0B / Gateway Target Device Failed To Respond) to each
/// interested party with the original TxId restored.
///
/// <para><b>Why this exists.</b> In the 1:1 connection model, a lost response would
/// fault the dedicated backend socket and the upstream pair would close. The multiplexed
/// model needs an explicit per-request timer because a single missing or mis-routed
/// response would otherwise leak a correlation entry forever and hang the upstream
/// pipe indefinitely. Real-world causes: PLC drops a response, network packet loss,
/// backend that mis-echoes MBAP TxIds.</para>
/// </summary>
private async Task RunRequestTimeoutWatchdogAsync(CancellationToken ct)
{
// Tick at ~quarter of the request timeout for responsive cleanup, but cap to a
// 1-second floor so the watchdog doesn't busy-wake on very small timeouts.
int tickMs = Math.Max(100, _connectionOptions.BackendRequestTimeoutMs / 4);
try
{
while (!ct.IsCancellationRequested)
{
await Task.Delay(tickMs, ct).ConfigureAwait(false);
var threshold = DateTimeOffset.UtcNow.AddMilliseconds(-_connectionOptions.BackendRequestTimeoutMs);
var stale = _correlation.SnapshotOlderThan(threshold);
if (stale.Count == 0) continue;
foreach (var kvp in stale)
{
ushort proxyTxId = kvp.Key;
// Try to claim the entry; if another path (response, cascade) already removed it,
// skip — no work to do.
if (!_correlation.TryRemove(proxyTxId, out var req))
continue;
_allocator.Release(proxyTxId);
long elapsedMs = (long)(DateTimeOffset.UtcNow - req.SentAtUtc).TotalMilliseconds;
foreach (var party in req.InterestedParties)
{
MultiplexerLogEvents.RequestTimeout(
_logger, _plc.Name, proxyTxId, party.OriginalTxId, req.Fc, elapsedMs);
if (!party.Pipe.IsAlive)
continue;
// Deliver Modbus exception 0x0B (Gateway Target Device Failed To Respond)
// to the upstream client. This lets the client's library raise a clean
// ModbusException rather than hanging on a timeout.
byte[] excFrame = BuildExceptionFrame(party.OriginalTxId, req.UnitId, req.Fc, exceptionCode: 0x0B);
try
{
await party.Pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
}
catch
{
// Best-effort delivery; if the pipe is going down, the client
// discovers the failure through its own socket close path.
}
}
}
}
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
_logger.LogError(ex, "Request-timeout watchdog faulted: Plc={Plc}", _plc.Name);
}
}
// ── Helpers ───────────────────────────────────────────────────────────────
private static async Task<bool> FillAsync(
Socket socket, byte[] buf, int offset, int count, CancellationToken ct)
{
int remaining = count;
while (remaining > 0)
{
int n = await socket.ReceiveAsync(
buf.AsMemory(offset + (count - remaining), remaining),
SocketFlags.None, ct).ConfigureAwait(false);
if (n == 0) return false;
remaining -= n;
}
return true;
}
private static byte[] BuildExceptionFrame(ushort originalTxId, byte unitId, byte fc, byte exceptionCode)
{
// Modbus exception PDU = [fc | 0x80][exceptionCode].
// MBAP length covers UnitId (1) + PDU (2) = 3.
var frame = new byte[MbapFrame.HeaderSize + 2];
frame[0] = (byte)(originalTxId >> 8);
frame[1] = (byte)(originalTxId & 0xFF);
frame[2] = 0; // ProtocolId
frame[3] = 0;
frame[4] = 0; // Length high
frame[5] = 3; // Length low: UnitId(1) + ExFc(1) + ExCode(1)
frame[6] = unitId;
frame[7] = (byte)(fc | 0x80);
frame[8] = exceptionCode;
return frame;
}
}