using System.Collections.Concurrent;
using System.Diagnostics;
using System.Net.Sockets;
using System.Threading.Channels;
using Mbproxy.Options;
using Mbproxy.Proxy.Cache;
using Polly;
namespace Mbproxy.Proxy.Multiplexing;
///
/// Owner of the single backend TCP connection to one PLC. Multiplexes many
/// instances onto that one socket by rewriting MBAP transaction
/// IDs so concurrent in-flight requests from different upstream clients remain
/// distinguishable on the shared wire. The multiplexer:
///
///
/// - Opens and re-opens the backend socket through a Polly retry pipeline
/// that matches the profile.
/// - Runs one backend writer task that drains
/// into the backend socket (single writer; no socket-level synchronisation needed).
/// - Runs one backend reader task that decodes MBAP frames from the backend,
/// looks each frame up in the , restores each interested
/// party's original TxId, and hands the frame to that party's
/// .
/// - Cascades a backend disconnect by closing every attached pipe and
/// freeing every allocated proxy TxId, then waits for the next upstream request to
/// arrive (which triggers a fresh backend connect via Polly).
///
///
/// Threading invariants: a single backend writer touches the backend socket
/// for sends; a single backend reader touches the same socket for receives. Per-upstream
/// read tasks call , which allocates a proxy TxId, queues
/// the request frame into , and returns. Upstream-side writes
/// flow through each pipe's response channel — never directly through this class.
///
/// Lifecycle: the multiplexer is created with the backend offline. The first
/// call (or the first if you prefer
/// eager-start) triggers backend connect through the Polly pipeline. Subsequent in-flight
/// requests reuse the same socket. tears down the backend
/// socket, the writer/reader tasks, and every attached pipe.
///
internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvider
{
private const int OutboundChannelCapacity = 256;
private readonly PlcOptions _plc;
private readonly ConnectionOptions _connectionOptions;
private readonly IPduPipeline _pipeline;
// `_ctx` is volatile so a hot-reload reseat can swap it on the running
// multiplexer. Each method that uses the context snapshots it into a local at the start
// of the operation so a single PDU sees a consistent (TagMap, Cache) pair even if the
// swap fires mid-PDU. ReplaceContext is the single mutator.
private volatile PerPlcContext _ctx;
private readonly ILogger _logger;
private readonly ResiliencePipeline? _backendConnectPipeline;
// Live read-coalescing config accessor. The accessor is read per-PDU on the
// request path so a hot-reload of `Mbproxy.Resilience.ReadCoalescing.Enabled`
// propagates immediately. Production wires this to
// `() => optionsMonitor.CurrentValue.Resilience.ReadCoalescing`. Tests default to a
// fresh `ReadCoalescingOptions()` (Enabled = true, MaxParties = 32).
private readonly Func _coalescingOptions;
// Live keepalive config accessor. Read at backend-connect time (TCP SO_KEEPALIVE) and
// on each heartbeat-loop tick (idle threshold + probe address) so a hot-reload of
// `Connection.Keepalive` propagates without a listener restart. Production wires this
// to `() => optionsMonitor.CurrentValue.Connection.Keepalive`; the fallback reads the
// construction-time `ConnectionOptions` snapshot.
private readonly Func _keepaliveOptions;
private readonly TxIdAllocator _allocator = new();
private readonly CorrelationMap _correlation = new();
private readonly InFlightByKeyMap _inFlightByKey = new();
private readonly Channel _outboundChannel = Channel.CreateBounded(
new BoundedChannelOptions(OutboundChannelCapacity)
{
FullMode = BoundedChannelFullMode.Wait,
SingleReader = true,
SingleWriter = false,
});
// Attached pipes — used by the status page and by coalescing fan-out.
// ConcurrentDictionary keyed on UpstreamPipe.Id for O(1) detach.
private readonly ConcurrentDictionary _pipes = new();
// Lifecycle plumbing. Backend tasks share a CTS; cascading disconnect cancels it,
// which terminates both the writer and reader tasks. The next call to
// EnsureBackendConnectedAsync constructs a fresh CTS and a fresh backend socket.
private readonly object _backendLock = new();
private Socket? _backendSocket;
private CancellationTokenSource? _backendCts;
private Task? _backendWriterTask;
private Task? _backendReaderTask;
private Task? _backendHeartbeatTask;
// UTC ticks of the last backend socket activity (a send OR a received frame). Updated
// by the writer and reader tasks; read by the heartbeat loop to decide whether the
// socket has been idle long enough to warrant a probe. Interlocked for cross-task
// coherence.
private long _lastBackendActivityTicks;
// Unit ID of the most recent upstream request. The synthetic heartbeat reuses it so
// the probe targets the same Modbus unit the real clients successfully talk to.
// Defaults to 0 until the first upstream frame is seen; by the time a heartbeat can
// fire the backend socket exists, which means at least one upstream frame arrived.
private int _lastSeenUnitId;
private readonly CancellationTokenSource _disposeCts = new();
// Volatile so the disposing thread's write is observed by every hot-path reader
// (OnUpstreamFrameAsync, ReplaceContext, Attach, etc.) without a separate fence.
// On x86/x64 plain reads happen to give acquire-release semantics, so this is
// defense for ARM hosts and future portability.
private volatile bool _disposed;
private Task? _watchdogTask;
public PlcMultiplexer(
PlcOptions plc,
ConnectionOptions connectionOptions,
IPduPipeline pipeline,
PerPlcContext perPlcContext,
ILogger logger,
ResiliencePipeline? backendConnectPipeline = null,
Func? coalescingOptions = null,
Func? keepaliveOptions = null)
{
_plc = plc;
_connectionOptions = connectionOptions;
_pipeline = pipeline;
_ctx = perPlcContext;
_logger = logger;
_backendConnectPipeline = backendConnectPipeline;
_coalescingOptions = coalescingOptions ?? (static () => new ReadCoalescingOptions());
_keepaliveOptions = keepaliveOptions ?? (() => _connectionOptions.Keepalive);
// Register the per-PLC cache as the live stats source for the snapshot path.
// Cache may be null when the per-PLC context has not been wired with one
// (every tag uncached, or unit tests).
if (_ctx.Cache is not null)
_ctx.Counters.SetCacheStatsProvider(new CacheStatsAdapter(_ctx.Cache));
// Register this multiplexer as the live telemetry source for the PLC's counters.
_ctx.Counters.SetMultiplexProvider(this);
// Spin up the per-request timeout watchdog. It scans the correlation map at a fixed
// interval and times out any in-flight request older than BackendRequestTimeoutMs.
// Critical for: lost responses, dead-PLC paths, and backends that mis-echo TxIds
// (e.g. pymodbus 3.13.0's concurrent-multiplexed-request bug — see test files).
_watchdogTask = Task.Run(() => RunRequestTimeoutWatchdogAsync(_disposeCts.Token), CancellationToken.None);
}
// ── IMultiplexCountersProvider ────────────────────────────────────────────
public long InFlightCount => _allocator.InFlightCount;
public long TxIdWraps => _allocator.WrapCount;
public long BackendQueueDepth => _outboundChannel.Reader.Count;
// ── Public surface ────────────────────────────────────────────────────────
///
/// Read-only collection of currently-attached upstream pipes. Used by the status page.
///
public IReadOnlyCollection AttachedPipes => _pipes.Values.ToArray();
///
/// Attaches an upstream pipe to this multiplexer. The caller is responsible for
/// running the pipe's read+write loops (typically via )
/// which wires the pipe's OnFrame callback back into .
///
public void Attach(UpstreamPipe pipe)
{
if (_disposed)
throw new ObjectDisposedException(nameof(PlcMultiplexer));
_pipes[pipe.Id] = pipe;
}
///
/// Atomically swaps the per-PLC context on a running multiplexer. Called by
/// when a
/// hot-reload tag-list change is applied to a PLC whose listener is already bound.
///
/// The new context's tag map and (optional) response cache become visible on the
/// next PDU through the volatile _ctx field. Counters are PRESERVED across reseat
/// (the supervisor builds the new context with the running counters), so we only need
/// to re-register the cache stats provider — the multiplex provider already points at
/// this same instance.
///
/// Existing per-call snapshots of the old context held by in-flight PDUs (via
/// WithCurrentRequest) finish on the old map. New PDUs after this call see the
/// new map. Per the design contract a one-PDU "old map" tail is acceptable; partial-BCD
/// rewrites mid-request would be worse.
///
public void ReplaceContext(PerPlcContext newContext)
{
if (_disposed) return;
// Provider FIRST, then _ctx. The status page's snapshot path reads
// `_cacheStatsProvider` independently of `_ctx`. If we swapped `_ctx` first, a
// snapshot taken in the gap between the two writes would still hold the OLD
// adapter wrapping the OLD cache — which the supervisor is about to dispose
// (`PlcListenerSupervisor.ReplaceContextAsync` runs `oldCache.Dispose()` after we
// return). Setting the provider first means snapshots in the swap window read
// either (old provider, old ctx) or (new provider, new ctx) — both coherent —
// never (old provider after old cache disposed).
//
// In the typical reseat case `oldContext.Counters == newContext.Counters` (the
// reconciler preserves counters across reseat), so this updates the same instance
// both paths share. The order still matters because the snapshot reads the
// provider field, not the per-context counters reference.
newContext.Counters.SetCacheStatsProvider(
newContext.Cache is not null ? new CacheStatsAdapter(newContext.Cache) : null);
_ctx = newContext;
}
///
/// Starts the read+write tasks for and returns a task that
/// completes when the pipe's read loop ends. The multiplexer detaches the pipe when
/// its read loop returns.
///
public Task StartPipeAsync(UpstreamPipe pipe, CancellationToken ct)
{
Attach(pipe);
// The write loop runs to completion when the pipe is disposed or the channel
// completes. We don't await it directly — it's joined inside DisposeAsync of the pipe.
_ = Task.Run(() => pipe.RunWriteLoopAsync(ct), CancellationToken.None);
var readLoop = pipe.RunReadLoopAsync(
(frame, frameCt) => OnUpstreamFrameAsync(pipe, frame, frameCt),
ct);
// When the pipe's read loop finishes, detach it. Don't dispose it here; the
// listener (or the cascade walker) owns disposal.
_ = readLoop.ContinueWith(prev =>
{
_pipes.TryRemove(pipe.Id, out _);
}, TaskScheduler.Default);
return readLoop;
}
///
/// Tears down the multiplexer: closes the backend connection, cancels both backend
/// tasks, drains every in-flight correlation entry, and closes every attached pipe.
///
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
// Stop the counters provider link so a status snapshot during teardown doesn't
// see live-but-soon-to-be-empty internal state.
_ctx.Counters.SetMultiplexProvider(null);
_ctx.Counters.SetCacheStatsProvider(null);
await _disposeCts.CancelAsync().ConfigureAwait(false);
// Best-effort join the watchdog so its in-flight log/dispatch settles before tests
// assert on counter state.
if (_watchdogTask is not null)
{
try { await _watchdogTask.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); }
catch { /* swallow */ }
}
await TearDownBackendAsync("disposing", cascadeUpstreams: true).ConfigureAwait(false);
_outboundChannel.Writer.TryComplete();
// Dispose all attached pipes.
foreach (var pipe in _pipes.Values)
{
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
}
_pipes.Clear();
// Guard the CTS dispose against a watchdog tick that raced past the WaitAsync
// above (e.g. a slow Task.Delay completion observing cancellation late). Also
// dispose the connect-gate semaphore.
try { _disposeCts.Dispose(); } catch (ObjectDisposedException) { /* already disposed */ }
try { _connectGate.Dispose(); } catch (ObjectDisposedException) { /* already disposed */ }
}
// ── Backend connect / teardown ────────────────────────────────────────────
private async Task EnsureBackendConnectedAsync(CancellationToken ct)
{
if (_disposed) return false;
// Fast path: already connected.
if (_backendSocket is { Connected: true } && _backendCts is { IsCancellationRequested: false })
return true;
// Serialise concurrent connect attempts from many upstream pipes.
await _connectGate.WaitAsync(ct).ConfigureAwait(false);
try
{
// Re-check after acquiring the gate.
if (_backendSocket is { Connected: true } && _backendCts is { IsCancellationRequested: false })
return true;
// Build a fresh backend socket and Polly-connect.
var backend = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)
{ NoDelay = true };
SocketKeepalive.Apply(backend, _keepaliveOptions());
try
{
if (_backendConnectPipeline is not null)
{
await _backendConnectPipeline.ExecuteAsync(async attemptToken =>
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(attemptToken);
cts.CancelAfter(_connectionOptions.BackendConnectTimeoutMs);
await backend.ConnectAsync(_plc.Host, _plc.Port, cts.Token).ConfigureAwait(false);
}, ct).ConfigureAwait(false);
}
else
{
using var connectCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
connectCts.CancelAfter(_connectionOptions.BackendConnectTimeoutMs);
await backend.ConnectAsync(_plc.Host, _plc.Port, connectCts.Token).ConfigureAwait(false);
}
}
catch (Exception ex)
{
string reason = ex is OperationCanceledException
? $"Backend connect timed out or cancelled after {_connectionOptions.BackendConnectTimeoutMs} ms"
: ex.Message;
MultiplexerLogEvents.BackendFailed(_logger, _plc.Name, reason);
_ctx.Counters.IncrementConnectFailed();
backend.Dispose();
return false;
}
// Successful connect. Wire up the backend tasks.
var cts2 = CancellationTokenSource.CreateLinkedTokenSource(_disposeCts.Token);
lock (_backendLock)
{
_backendSocket = backend;
_backendCts = cts2;
// Seed the idle timer so the heartbeat loop measures idleness from connect.
Interlocked.Exchange(ref _lastBackendActivityTicks, DateTime.UtcNow.Ticks);
_backendWriterTask = Task.Run(() => RunBackendWriterAsync(backend, cts2.Token), CancellationToken.None);
_backendReaderTask = Task.Run(() => RunBackendReaderAsync(backend, cts2.Token), CancellationToken.None);
_backendHeartbeatTask = Task.Run(() => RunBackendHeartbeatAsync(cts2.Token), CancellationToken.None);
}
_ctx.Counters.IncrementConnectSuccess();
MultiplexerLogEvents.BackendConnected(_logger, _plc.Name, _plc.Host, _plc.Port);
return true;
}
finally
{
_connectGate.Release();
}
}
private readonly SemaphoreSlim _connectGate = new(1, 1);
private async Task TearDownBackendAsync(string reason, bool cascadeUpstreams)
{
// Serialise tear-down vs connect-up via the connect gate. Without this, a fresh
// EnsureBackendConnectedAsync racing with the channel drain below could see
// stranded frames sent on its new socket with old (already-released) TxIds,
// producing orphaned responses that hang upstream peers via the watchdog.
//
// Bounded wait: a long Polly-wrapped EnsureBackendConnectedAsync against an
// unreachable host can hold the gate for the full BackendConnectTimeoutMs *
// MaxAttempts window, blocking DisposeAsync (and therefore ProxyWorker.StopAsync)
// for that duration. A 2 s teardown deadline bounds disposal latency; if the gate
// is unavailable we proceed best-effort without it (the worst-case consequence is
// one orphaned in-flight cycle on the dying backend, which the upstream watchdog
// will surface as exception 0x0B).
//
// KNOWN RACE on the gate-not-held path: a concurrent EnsureBackendConnectedAsync
// that DOES hold the gate may TryAllocate a TxId that collides (after wraparound
// in the allocator's forward scan) with a TxId we're about to release from the
// channel-drain step below. The double-release would mark the new request's slot
// as free even though it's legitimately in-flight, allowing the next allocation
// to reuse the same slot and CorrelationMap.TryAdd to fail (silent request drop).
// Probability is very low (requires gate timeout + new accept landing during
// cascade + TxId collision in a 65,536-slot space); the only consequence is one
// dropped request that the client retries. Accepted as best-effort behaviour.
bool gateHeld = false;
try
{
using var teardownCts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
await _connectGate.WaitAsync(teardownCts.Token).ConfigureAwait(false);
gateHeld = true;
}
catch (OperationCanceledException)
{
// Best-effort: proceed without the gate. Concurrent connect attempts will
// observe _disposed (or the now-null _backendSocket) and short-circuit.
}
catch (ObjectDisposedException)
{
// _connectGate already disposed — TearDown is racing past DisposeAsync.
// Skip the body entirely; there's nothing useful to do at this point.
return;
}
try
{
Socket? oldSocket;
CancellationTokenSource? oldCts;
Task? writer, reader, heartbeat;
lock (_backendLock)
{
oldSocket = _backendSocket;
oldCts = _backendCts;
writer = _backendWriterTask;
reader = _backendReaderTask;
heartbeat = _backendHeartbeatTask;
_backendSocket = null;
_backendCts = null;
_backendWriterTask = null;
_backendReaderTask = null;
_backendHeartbeatTask = null;
}
if (oldSocket is null && oldCts is null) return;
try { oldCts?.Cancel(); } catch { /* best effort */ }
try { oldSocket?.Shutdown(SocketShutdown.Both); } catch { /* already closed */ }
try { oldSocket?.Dispose(); } catch { /* best effort */ }
// Drain correlation map; cascade-close every interested upstream pipe.
var dropped = _correlation.DrainAll();
foreach (var kvp in dropped)
{
_allocator.Release(kvp.Key);
}
// Also drain the in-flight-by-key map so a brand-new identical request through
// the freshly-reconnected backend is treated as a miss (no stale entries
// outlive the backend they were destined for).
_inFlightByKey.DrainAll();
int upstreamCount = 0;
if (cascadeUpstreams)
{
// Close every attached pipe that had a request in flight; the others will
// simply re-issue on next request through a fresh backend connect.
// Per docs/Architecture/ConnectionModel.md, ALL attached upstreams cascade on backend disconnect.
upstreamCount = _pipes.Count;
// Snapshot keys before disposal modifies the dictionary indirectly.
var pipeList = _pipes.Values.ToArray();
foreach (var pipe in pipeList)
{
try { await pipe.DisposeAsync().ConfigureAwait(false); }
catch { /* best effort */ }
}
_pipes.Clear();
_ctx.Counters.AddDisconnectCascades(upstreamCount);
}
// Drain any stranded frames left in the outbound channel by the writer task
// that just faulted/cancelled. Release their proxy TxIds back to the
// allocator. By the time we reach this line the writer has stopped reading
// from the channel (cancelled CTS) and the upstream pipes have been cascaded
// (no more enqueues), so the channel state is stable.
int strandedDropped = 0;
while (_outboundChannel.Reader.TryRead(out byte[]? stranded))
{
if (stranded.Length >= 2)
{
ushort strandedTxId = (ushort)((stranded[0] << 8) | stranded[1]);
_allocator.Release(strandedTxId);
}
strandedDropped++;
}
// Best-effort join.
try { if (writer is not null) await writer.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); } catch { /* swallow */ }
try { if (reader is not null) await reader.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); } catch { /* swallow */ }
try { if (heartbeat is not null) await heartbeat.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); } catch { /* swallow */ }
oldCts?.Dispose();
if (upstreamCount > 0 || dropped.Count > 0 || strandedDropped > 0)
MultiplexerLogEvents.BackendDisconnected(_logger, _plc.Name, upstreamCount, dropped.Count + strandedDropped, reason);
}
finally
{
// Only release if we acquired — best-effort path may have skipped.
if (gateHeld)
{
try { _connectGate.Release(); }
catch (ObjectDisposedException) { /* dispose race — harmless */ }
}
}
}
// ── Backend writer / reader tasks ─────────────────────────────────────────
private async Task RunBackendWriterAsync(Socket backend, CancellationToken ct)
{
try
{
await foreach (var frame in _outboundChannel.Reader.ReadAllAsync(ct).ConfigureAwait(false))
{
int sent = 0;
while (sent < frame.Length)
{
int n = await backend.SendAsync(
frame.AsMemory(sent, frame.Length - sent),
SocketFlags.None,
ct).ConfigureAwait(false);
if (n == 0) throw new SocketException((int)SocketError.ConnectionReset);
sent += n;
}
// A send counts as backend activity — it suppresses the idle heartbeat.
Interlocked.Exchange(ref _lastBackendActivityTicks, DateTime.UtcNow.Ticks);
}
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
// Backend failure — cascade. Skip if disposal is already in progress;
// DisposeAsync runs an explicit TearDown and the fire-and-forget here would
// race against it, hitting a disposed _connectGate and producing an
// unobserved-task exception.
if (!_disposeCts.IsCancellationRequested)
_ = TearDownBackendAsync($"writer fault: {ex.Message}", cascadeUpstreams: true);
}
}
private async Task RunBackendReaderAsync(Socket backend, CancellationToken ct)
{
byte[] headerBuf = new byte[MbapFrame.HeaderSize];
try
{
while (!ct.IsCancellationRequested)
{
if (!await FillAsync(backend, headerBuf, 0, MbapFrame.HeaderSize, ct).ConfigureAwait(false))
break;
if (!MbapFrame.TryParseHeader(headerBuf.AsSpan(),
out ushort proxyTxId, out _, out ushort length, out _))
break;
if (length < 1)
{
// Degenerate frame — drop.
continue;
}
int pduBodyLen = length - 1;
if (pduBodyLen > MbapFrame.MaxPduBodySize)
{
// Frame too large — backend is misbehaving; force teardown.
_logger.LogWarning(
"Oversized backend frame: Plc={Plc} PduBody={Body} > Max={Max}",
_plc.Name, pduBodyLen, MbapFrame.MaxPduBodySize);
break;
}
byte[] frame = new byte[MbapFrame.HeaderSize + pduBodyLen];
Buffer.BlockCopy(headerBuf, 0, frame, 0, MbapFrame.HeaderSize);
if (!await FillAsync(backend, frame, MbapFrame.HeaderSize, pduBodyLen, ct).ConfigureAwait(false))
break;
// A received frame counts as backend activity — it suppresses (and, for a
// heartbeat response, satisfies) the idle heartbeat.
Interlocked.Exchange(ref _lastBackendActivityTicks, DateTime.UtcNow.Ticks);
if (!_correlation.TryRemove(proxyTxId, out var inFlight))
{
// No correlation entry — either a stale response after cascade, or
// the PLC sent something unsolicited. Drop the frame.
continue;
}
// Free the allocator slot immediately so it can be reused.
_allocator.Release(proxyTxId);
// Keepalive heartbeat response — the probe came back, the backend is alive.
// The activity timestamp was already refreshed above. There is no upstream
// party, no cache eligibility, and no rewriting to do: drop the payload and
// skip the EWMA update so the synthetic probe never pollutes the
// client-facing round-trip metric.
if (inFlight.IsHeartbeat)
continue;
// For FC03/FC04 reads, also clear the coalescing-by-key entry so a
// brand-new identical request issued AFTER this response is treated as a
// miss (opens a fresh round-trip). The TryRemove is best-effort: a
// watchdog timeout or cascade may have already removed it.
if (inFlight.Fc is 0x03 or 0x04)
{
var coalKey = new CoalescingKey(inFlight.UnitId, inFlight.Fc,
inFlight.StartAddress, inFlight.Qty);
_inFlightByKey.TryRemove(coalKey, out _);
}
// Update EWMA round-trip from when we sent the request. UpdateRoundTripEwma
// expects Stopwatch ticks; convert from the wall-clock SentAtUtc timestamp.
long ticks = (long)((DateTimeOffset.UtcNow - inFlight.SentAtUtc).TotalSeconds * Stopwatch.Frequency);
if (ticks > 0)
_ctx.Counters.UpdateRoundTripEwma(ticks);
// Apply the BCD rewriter on the response. Build a per-call context clone
// that carries CurrentRequest so the rewriter can decode FC03/04 slots.
var responseCtx = _ctx.WithCurrentRequest(inFlight);
_pipeline.Process(
MbapDirection.ResponseToClient,
frame.AsSpan(0, MbapFrame.HeaderSize),
frame.AsSpan(MbapFrame.HeaderSize, pduBodyLen),
responseCtx);
// Post-rewriter cache update:
// * FC03/FC04 successful responses are stored when the request was
// cache-eligible (resolvedTtlMs > 0).
// * FC06/FC16 successful responses invalidate every cached entry whose
// address range overlaps the write.
//
// Exception bit comes from the post-rewriter buffer (the rewriter never
// touches the FC byte today, but reading from inFlight.Fc would lose the
// exception bit). The base FC for routing decisions uses inFlight.Fc —
// the request side knows what was sent.
if (_ctx.Cache is { } postCache)
{
byte fcInResponse = frame[MbapFrame.HeaderSize];
bool isException = (fcInResponse & 0x80) != 0;
if (!isException)
{
if (inFlight.Fc is 0x03 or 0x04 && inFlight.ResolvedCacheTtlMs > 0)
{
// Snapshot the post-rewriter PDU body so the cached entry is
// independent of this frame's lifetime.
byte[] pduSnapshot = new byte[pduBodyLen];
Buffer.BlockCopy(frame, MbapFrame.HeaderSize, pduSnapshot, 0, pduBodyLen);
var cacheKey = new CacheKey(
inFlight.UnitId, inFlight.Fc,
inFlight.StartAddress, inFlight.Qty);
var now = DateTimeOffset.UtcNow;
var entry = new CacheEntry(
PduBytes: pduSnapshot,
CachedAtUtc: now,
ExpiresAtUtc: now.AddMilliseconds(inFlight.ResolvedCacheTtlMs),
Length: pduSnapshot.Length,
LastUsedTick: 0); // ResponseCache.Set stamps the real tick
postCache.Set(cacheKey, entry);
CacheLogEvents.Store(_logger, _plc.Name,
inFlight.UnitId, inFlight.Fc,
inFlight.StartAddress, inFlight.Qty,
inFlight.ResolvedCacheTtlMs);
}
else if (inFlight.Fc is 0x06 or 0x10)
{
// The design contract "invalidations during a recovering
// listener state are skipped" is upheld IMPLICITLY here:
// invalidation only fires inside the backend reader task when
// a non-exception FC06/FC16 response arrives. A `Recovering`
// listener has no backend reader (the multiplexer is torn
// down between recovery attempts), so no response can land
// here, so no invalidation. The gating is structural, not
// conditional. If a future change ever produces a write
// response off the live backend, an explicit recovering-state
// check would need to be added.
int invalidated = postCache.Invalidate(
inFlight.UnitId, inFlight.StartAddress, inFlight.Qty);
if (invalidated > 0)
{
_ctx.Counters.AddCacheInvalidations(invalidated);
CacheLogEvents.Invalidated(_logger, _plc.Name,
inFlight.UnitId, inFlight.StartAddress, inFlight.Qty,
invalidated);
}
}
}
}
// Fan out to each interested party with their original TxId restored.
// Without coalescing there is exactly one party; with coalescing there
// are N. The InFlightByKey TryRemove above (for FC03/FC04) guarantees no
// further attaches can occur — the parties list is now a stable snapshot.
//
// Non-blocking fan-out via `TrySendResponse`. The single backend reader
// task must NEVER `await` a per-upstream channel write: a wedged upstream
// (full bounded response channel) would otherwise stall the reader and
// starve every other client on this PLC. A drop here is recorded via
// `responseDropForFullUpstream`; the wedged upstream loses its own
// response and will be reaped by its own socket-close path.
foreach (var party in inFlight.InterestedParties)
{
if (!party.Pipe.IsAlive)
{
// Record the dead-upstream skip only for FC03/FC04 (the only
// function codes that take the coalescing path). For
// non-coalescing FCs this branch is silent.
if (inFlight.Fc is 0x03 or 0x04
&& inFlight.InterestedParties.Count > 1)
{
_ctx.Counters.IncrementCoalescedResponseToDeadUpstream();
CoalescingLogEvents.DeadUpstream(
_logger, _plc.Name, inFlight.UnitId, inFlight.Fc,
inFlight.StartAddress, inFlight.Qty);
}
continue;
}
// The frame buffer is private to this iteration; if there are
// multiple coalesced parties, each gets its own copy with its own
// original TxId patched in. The single-party case reuses the buffer
// directly as the common-case fast path.
byte[] outFrame = inFlight.InterestedParties.Count == 1
? frame
: (byte[])frame.Clone();
outFrame[0] = (byte)(party.OriginalTxId >> 8);
outFrame[1] = (byte)(party.OriginalTxId & 0xFF);
if (!party.Pipe.TrySendResponse(outFrame))
{
_ctx.Counters.IncrementResponseDropForFullUpstream();
}
else
{
// Count outbound bytes per delivered party. With coalescing, one
// backend response fans out to N parties and produces
// N × frame.Length bytes leaving the proxy upstream-side.
_ctx.Counters.AddBytes(up: 0, down: outFrame.Length);
}
}
}
// Reader exited cleanly — backend closed by remote. Cascade. Skip if
// dispose is already in progress (see writer-side comment above).
if (!_disposeCts.IsCancellationRequested)
_ = TearDownBackendAsync("backend reader EOF", cascadeUpstreams: true);
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
if (!_disposeCts.IsCancellationRequested)
_ = TearDownBackendAsync($"reader fault: {ex.Message}", cascadeUpstreams: true);
}
}
// ── Upstream → multiplexer entry point ────────────────────────────────────
private async ValueTask OnUpstreamFrameAsync(UpstreamPipe pipe, byte[] frame, CancellationToken ct)
{
if (_disposed) return;
if (frame.Length < MbapFrame.HeaderSize)
return;
if (!MbapFrame.TryParseHeader(frame.AsSpan(0, MbapFrame.HeaderSize),
out ushort originalTxId, out _, out _, out byte unitId))
return;
// Remember the unit ID so the backend keepalive heartbeat probes the same Modbus
// unit the real clients are known to reach successfully.
Volatile.Write(ref _lastSeenUnitId, unitId);
// Count inbound bytes from the upstream client. Surfaces in bytes.upstreamIn on
// the status page. Counted ONCE per parsed frame regardless of subsequent
// routing (cache hit, coalesce, backend round-trip, exception).
_ctx.Counters.AddBytes(up: frame.Length, down: 0);
// Parse the PDU FC + start/qty. FC03/FC04 reads use start/qty for the coalescing
// key and for the cache lookup. FC06 writes carry [addr][value]; we treat qty as
// 1 for invalidation. FC16 carries [start][qty][byteCount]...; qty is the write
// span used for cache invalidation. FC06/FC16 start/qty drive cache invalidation
// by overlap rather than exact key.
int pduOffset = MbapFrame.HeaderSize;
byte fcByte = frame.Length > pduOffset ? frame[pduOffset] : (byte)0;
ushort startAddr = 0;
ushort qty = 0;
if (fcByte is 0x03 or 0x04 && frame.Length >= pduOffset + 5)
{
startAddr = (ushort)((frame[pduOffset + 1] << 8) | frame[pduOffset + 2]);
qty = (ushort)((frame[pduOffset + 3] << 8) | frame[pduOffset + 4]);
}
else if (fcByte == 0x06 && frame.Length >= pduOffset + 5)
{
// FC06 = Write Single Register. PDU: [fc=06][addrHi][addrLo][valHi][valLo].
// For cache invalidation we represent this as qty=1 at addr.
startAddr = (ushort)((frame[pduOffset + 1] << 8) | frame[pduOffset + 2]);
qty = 1;
}
else if (fcByte == 0x10 && frame.Length >= pduOffset + 5)
{
// FC16 = Write Multiple Registers. PDU: [fc=10][startHi][startLo][qtyHi][qtyLo][byteCount]...
startAddr = (ushort)((frame[pduOffset + 1] << 8) | frame[pduOffset + 2]);
qty = (ushort)((frame[pduOffset + 3] << 8) | frame[pduOffset + 4]);
}
// Response-cache path. Cache check happens BEFORE coalescing AND before we
// attempt to bring up the backend connection. A hit short-circuits everything,
// including the EnsureBackendConnectedAsync call — operators with all reads
// cached and the backend down still get served (the cache survives backend
// disconnects per the design contract). The cache only fires for FC03/FC04 and
// only when the read range's resolved TTL > 0.
int resolvedCacheTtlMs = 0;
if (fcByte is 0x03 or 0x04 && _ctx.Cache is { } responseCache)
{
resolvedCacheTtlMs = _ctx.TagMap.ResolveCacheTtlMs(startAddr, qty);
if (resolvedCacheTtlMs > 0)
{
var cacheKey = new CacheKey(unitId, fcByte, startAddr, qty);
if (responseCache.TryGet(cacheKey, out var cached))
{
_ctx.Counters.IncrementCacheHit();
CacheLogEvents.Hit(_logger, _plc.Name, unitId, fcByte, startAddr, qty);
byte[] hitFrame = BuildCacheHitFrame(originalTxId, unitId, cached.PduBytes);
await pipe.SendResponseAsync(hitFrame, ct).ConfigureAwait(false);
// Outbound bytes for cache-hit response.
_ctx.Counters.AddBytes(up: 0, down: hitFrame.Length);
return;
}
// Per design contract: "miss" = "fell through to coalescing/backend".
// When two upstream peers issue the same cache-eligible read, both increment
// CacheMiss; only one then opens a backend round-trip (the second coalesces
// onto the first via the InFlightByKey path below). So `CacheMiss` does NOT
// equal "produced a backend round-trip" — it equals "did not find a fresh
// cache entry". The identity `Hit + Miss = cache-eligible requests` holds.
_ctx.Counters.IncrementCacheMiss();
CacheLogEvents.Miss(_logger, _plc.Name, unitId, fcByte, startAddr, qty);
}
}
// Ensure backend is connected. Failure here means we cannot service the request;
// close the upstream pipe.
if (!await EnsureBackendConnectedAsync(ct).ConfigureAwait(false))
{
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
return;
}
// Read-coalescing path. Only FC03/FC04 are coalescable; only when the feature
// is enabled in the live config. If the late-arriving request matches an
// already-in-flight peer, we attach to the existing entry and skip the backend
// round-trip entirely. The existing entry's response will fan out to both parties.
var coalescingOpts = _coalescingOptions();
if (fcByte is 0x03 or 0x04 && coalescingOpts.Enabled)
{
var key = new CoalescingKey(unitId, fcByte, startAddr, qty);
var newParty = new InterestedParty(pipe, originalTxId);
// The factory allocates a proxy TxId, builds the InFlightRequest with a
// mutable List, and adds to the correlation map. We
// deliberately do NOT enqueue to the outbound channel inside the factory —
// that's done outside the InFlightByKey lock to keep the lock scope tight
// and to avoid holding the lock across an async send.
//
// proxyTxIdForSend / inFlightForSend communicate the factory's allocation
// back out of the lock so the post-lock code can finish the send.
ushort proxyTxIdForSend = 0;
InFlightRequest? inFlightForSend = null;
_inFlightByKey.AttachOrCreate(
key,
newParty,
factory: () =>
{
if (!_allocator.TryAllocate(out ushort proxyTxId))
{
// Saturation — record an empty placeholder InFlightRequest that the
// caller will detect via inFlightForSend == null. We can't easily
// signal failure through the bool return, so we leave the saturation
// exception delivery to the caller.
return new InFlightRequest(
UnitId: unitId,
Fc: fcByte,
StartAddress: startAddr,
Qty: qty,
InterestedParties: new List { newParty },
SentAtUtc: DateTimeOffset.UtcNow,
ResolvedCacheTtlMs: resolvedCacheTtlMs);
}
var partyList = new List(capacity: 1) { newParty };
var inFlight = new InFlightRequest(
UnitId: unitId,
Fc: fcByte,
StartAddress: startAddr,
Qty: qty,
InterestedParties: partyList,
SentAtUtc: DateTimeOffset.UtcNow,
ResolvedCacheTtlMs: resolvedCacheTtlMs);
if (!_correlation.TryAdd(proxyTxId, inFlight))
{
_allocator.Release(proxyTxId);
_logger.LogError(
"CorrelationMap.TryAdd failed for already-free proxyTxId {ProxyTxId}",
proxyTxId);
// Return the stub anyway; outer code detects via inFlightForSend == null.
return inFlight;
}
_ctx.Counters.ObserveInFlight(_allocator.InFlightCount);
proxyTxIdForSend = proxyTxId;
inFlightForSend = inFlight;
return inFlight;
},
maxParties: coalescingOpts.MaxParties,
out _,
out bool wasNew);
if (!wasNew)
{
// Coalesce hit: attached to an existing in-flight entry. No backend traffic.
_ctx.Counters.IncrementCoalescedHit();
CoalescingLogEvents.Hit(_logger, _plc.Name, unitId, fcByte, startAddr, qty,
partyCount: _inFlightByKey.Count);
return;
}
// Coalesce miss: this request did not attach to an in-flight peer. Per the
// design contract `coalescedHitCount + coalescedMissCount = total FC03/FC04`,
// so even saturation-failure paths (factory below returns null inFlightForSend)
// count as a miss — every FC03/FC04 entered the coalescing path exactly once.
// "Miss" here means "did not coalesce", NOT "produced a backend round-trip".
_ctx.Counters.IncrementCoalescedMiss();
CoalescingLogEvents.Miss(_logger, _plc.Name, unitId, fcByte, startAddr, qty);
if (inFlightForSend is null)
{
// The factory hit the allocator-saturation path or a duplicate-key race
// and stored a stub `InFlightRequest` under `key`. Late attachers may
// have joined the stub between the factory call and this cleanup; we
// must deliver the saturation exception to ALL of them, not just the
// leader, otherwise the late attachers wait forever for a response that
// never comes (the stub has no proxy TxId, so no backend round-trip will
// ever fire).
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
if (_inFlightByKey.TryRemove(key, out var stub))
{
// Non-blocking delivery via TrySendResponse — the per-PLC fan-out
// path must never await per-pipe writes (a wedged late-attacher's
// full bounded channel would otherwise stall delivery to its peers).
foreach (var party in stub.InterestedParties)
{
byte[] excFrame = BuildExceptionFrame(party.OriginalTxId, unitId, fcByte, exceptionCode: 4);
if (!party.Pipe.TrySendResponse(excFrame))
_ctx.Counters.IncrementResponseDropForFullUpstream();
else
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
}
}
else
{
// The stub was already removed by another path (extremely unlikely,
// but defensive). Surface the exception to the original requester.
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fcByte, exceptionCode: 4);
if (!pipe.TrySendResponse(excFrame))
_ctx.Counters.IncrementResponseDropForFullUpstream();
else
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
}
return;
}
// Apply the BCD rewriter on the request, then send to the backend. We are now
// OUTSIDE the InFlightByKey lock — late attaches arriving after this point will
// attach to the same entry while it sits in the channel/wire.
var requestCtx = _ctx.WithCurrentRequest(inFlightForSend);
_pipeline.Process(
MbapDirection.RequestToBackend,
frame.AsSpan(0, MbapFrame.HeaderSize),
frame.AsSpan(MbapFrame.HeaderSize, frame.Length - MbapFrame.HeaderSize),
requestCtx);
frame[0] = (byte)(proxyTxIdForSend >> 8);
frame[1] = (byte)(proxyTxIdForSend & 0xFF);
try
{
await _outboundChannel.Writer.WriteAsync(frame, ct).ConfigureAwait(false);
}
catch (ChannelClosedException)
{
if (_correlation.TryRemove(proxyTxIdForSend, out _))
_allocator.Release(proxyTxIdForSend);
_inFlightByKey.TryRemove(key, out _);
}
return;
}
// Non-coalescing path (FC06/FC16 writes, FC03/04 with coalescing disabled, or
// any other FC). Every request gets its own proxy TxId and its own backend
// round-trip.
if (!_allocator.TryAllocate(out ushort proxyTxIdFc))
{
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fcByte, exceptionCode: 4);
await pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
return;
}
var partyListNc = new List(capacity: 1) { new InterestedParty(pipe, originalTxId) };
var inFlightNc = new InFlightRequest(
UnitId: unitId,
Fc: fcByte,
StartAddress: startAddr,
Qty: qty,
InterestedParties: partyListNc,
SentAtUtc: DateTimeOffset.UtcNow,
ResolvedCacheTtlMs: resolvedCacheTtlMs);
if (!_correlation.TryAdd(proxyTxIdFc, inFlightNc))
{
// Should be impossible: the allocator just guaranteed proxyTxId is free.
_allocator.Release(proxyTxIdFc);
_logger.LogError("CorrelationMap.TryAdd failed for already-free proxyTxId {ProxyTxId}", proxyTxIdFc);
return;
}
// Even when the coalescing path is bypassed (e.g. coalescing disabled for
// FC03/04), we still report the request as a Miss so Hit + Miss = total
// FC03/FC04 requests across snapshots. FC06/FC16 are not counted here (they
// are not coalescable in any sense).
if (fcByte is 0x03 or 0x04)
_ctx.Counters.IncrementCoalescedMiss();
// Peak in-flight tracking.
_ctx.Counters.ObserveInFlight(_allocator.InFlightCount);
// Apply the BCD rewriter on the request.
var requestCtxNc = _ctx.WithCurrentRequest(inFlightNc);
_pipeline.Process(
MbapDirection.RequestToBackend,
frame.AsSpan(0, MbapFrame.HeaderSize),
frame.AsSpan(MbapFrame.HeaderSize, frame.Length - MbapFrame.HeaderSize),
requestCtxNc);
// Overwrite the MBAP TxId with the proxy TxId.
frame[0] = (byte)(proxyTxIdFc >> 8);
frame[1] = (byte)(proxyTxIdFc & 0xFF);
// Enqueue for the backend writer task.
try
{
await _outboundChannel.Writer.WriteAsync(frame, ct).ConfigureAwait(false);
}
catch (ChannelClosedException)
{
// Channel completed during shutdown — release the proxy TxId.
if (_correlation.TryRemove(proxyTxIdFc, out _))
_allocator.Release(proxyTxIdFc);
}
}
// ── Per-request timeout watchdog ──────────────────────────────────────────
///
/// Periodically scans the correlation map for in-flight requests whose response has
/// not arrived within . For each
/// stale entry: removes it from the map, frees its allocator slot, and delivers a
/// Modbus exception (code 0x0B / Gateway Target Device Failed To Respond) to each
/// interested party with the original TxId restored.
///
/// Why this exists. In a multiplexed connection model a single missing
/// or mis-routed response would otherwise leak a correlation entry forever and hang
/// the upstream pipe indefinitely. Real-world causes: PLC drops a response, network
/// packet loss, backend that mis-echoes MBAP TxIds.
///
private async Task RunRequestTimeoutWatchdogAsync(CancellationToken ct)
{
// Tick at ~quarter of the request timeout for responsive cleanup, but cap to a
// 100 ms floor so the watchdog doesn't busy-wake on very small timeouts.
int tickMs = Math.Max(100, _connectionOptions.BackendRequestTimeoutMs / 4);
try
{
while (!ct.IsCancellationRequested)
{
await Task.Delay(tickMs, ct).ConfigureAwait(false);
var threshold = DateTimeOffset.UtcNow.AddMilliseconds(-_connectionOptions.BackendRequestTimeoutMs);
var stale = _correlation.SnapshotOlderThan(threshold);
if (stale.Count == 0) continue;
foreach (var kvp in stale)
{
ushort proxyTxId = kvp.Key;
// Try to claim the entry; if another path (response, cascade) already removed it,
// skip — no work to do.
if (!_correlation.TryRemove(proxyTxId, out var req))
continue;
_allocator.Release(proxyTxId);
// Keepalive heartbeat that never came back. The backend is no longer
// answering Modbus even though the socket may still look connected —
// tear it down proactively (cascading every attached pipe) so the
// failure is found here, during idle, instead of corrupting the next
// real client request. There is no upstream party to send a 0x0B to.
if (req.IsHeartbeat)
{
long hbElapsedMs = (long)(DateTimeOffset.UtcNow - req.SentAtUtc).TotalMilliseconds;
KeepaliveLogEvents.HeartbeatTimeout(_logger, _plc.Name, proxyTxId, hbElapsedMs);
_ctx.Counters.IncrementBackendHeartbeatFailed();
_ctx.Counters.IncrementBackendIdleDisconnect();
KeepaliveLogEvents.BackendIdleDisconnect(_logger, _plc.Name, hbElapsedMs);
if (!_disposeCts.IsCancellationRequested)
_ = TearDownBackendAsync("keepalive heartbeat timeout", cascadeUpstreams: true);
continue;
}
// Also clear the coalescing-by-key entry. A late attach that raced
// in just before the watchdog claim will still receive the 0x0B
// exception via this entry's InterestedParties list (List
// mutations happen before fan-out begins).
if (req.Fc is 0x03 or 0x04)
{
var coalKey = new CoalescingKey(req.UnitId, req.Fc, req.StartAddress, req.Qty);
_inFlightByKey.TryRemove(coalKey, out _);
}
long elapsedMs = (long)(DateTimeOffset.UtcNow - req.SentAtUtc).TotalMilliseconds;
foreach (var party in req.InterestedParties)
{
MultiplexerLogEvents.RequestTimeout(
_logger, _plc.Name, proxyTxId, party.OriginalTxId, req.Fc, elapsedMs);
if (!party.Pipe.IsAlive)
continue;
// Deliver Modbus exception 0x0B (Gateway Target Device Failed To Respond)
// to the upstream client. This lets the client's library raise a clean
// ModbusException rather than hanging on a timeout.
byte[] excFrame = BuildExceptionFrame(party.OriginalTxId, req.UnitId, req.Fc, exceptionCode: 0x0B);
try
{
await party.Pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
}
catch
{
// Best-effort delivery; if the pipe is going down, the client
// discovers the failure through its own socket close path.
}
}
}
}
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
_logger.LogError(ex, "Request-timeout watchdog faulted: Plc={Plc}", _plc.Name);
}
}
// ── Backend keepalive heartbeat ───────────────────────────────────────────
///
/// Backend keepalive heartbeat loop. Started alongside the writer/reader on each
/// successful connect and cancelled with them on teardown. While the backend socket
/// has been idle (no send or receive) for longer than
/// , it issues a synthetic FC03
/// qty=1 read so the path stays warm against middlebox idle-drop and a backend that is
/// connected-but-not-answering is detected here rather than on the next client request.
///
/// The probe response is consumed by (which
/// recognises and drops it); a probe that
/// never returns is timed out by , which
/// tears the backend down. The heartbeat keeps an existing backend warm — it
/// never resurrects a dead one (reconnect stays gated on the next upstream frame).
///
private async Task RunBackendHeartbeatAsync(CancellationToken ct)
{
try
{
while (!ct.IsCancellationRequested)
{
var ka = _keepaliveOptions();
int idleMs = Math.Max(1000, ka.BackendHeartbeatIdleMs);
// Tick at a quarter of the idle window so a freshly-elapsed idle period is
// noticed promptly, floored at 500 ms so the loop never busy-wakes.
int tickMs = Math.Max(500, idleMs / 4);
await Task.Delay(tickMs, ct).ConfigureAwait(false);
if (!ka.Enabled)
continue;
long lastTicks = Interlocked.Read(ref _lastBackendActivityTicks);
double idleElapsedMs =
(DateTime.UtcNow - new DateTime(lastTicks, DateTimeKind.Utc)).TotalMilliseconds;
if (idleElapsedMs < idleMs)
continue;
SendHeartbeat(ka);
}
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
_logger.LogError(ex, "Backend heartbeat loop faulted: Plc={Plc}", _plc.Name);
}
}
///
/// Builds and enqueues one synthetic FC03 qty=1 heartbeat request onto the backend
/// outbound channel. The correlation entry is flagged
/// so the reader and watchdog treat it specially; it carries no interested parties and
/// bypasses the coalescing and cache paths entirely.
///
private void SendHeartbeat(KeepaliveOptions ka)
{
// A saturated TxId space means the backend is busy (65,536 requests in flight),
// which is the opposite of idle — skip this tick rather than force a probe.
if (!_allocator.TryAllocate(out ushort proxyTxId))
return;
byte unitId = (byte)Volatile.Read(ref _lastSeenUnitId);
ushort address = (ushort)ka.BackendHeartbeatProbeAddress;
var inFlight = new InFlightRequest(
UnitId: unitId,
Fc: 0x03,
StartAddress: address,
Qty: 1,
InterestedParties: Array.Empty(),
SentAtUtc: DateTimeOffset.UtcNow,
ResolvedCacheTtlMs: 0,
IsHeartbeat: true);
if (!_correlation.TryAdd(proxyTxId, inFlight))
{
_allocator.Release(proxyTxId);
return;
}
byte[] frame = BuildHeartbeatFrame(proxyTxId, unitId, address);
// Non-blocking enqueue: if the channel is full the backend is not idle (a race), and
// if it is completed the backend is tearing down — either way, undo and skip.
if (!_outboundChannel.Writer.TryWrite(frame))
{
if (_correlation.TryRemove(proxyTxId, out _))
_allocator.Release(proxyTxId);
return;
}
_ctx.Counters.IncrementBackendHeartbeatSent();
KeepaliveLogEvents.HeartbeatSent(_logger, _plc.Name, proxyTxId, address);
}
///
/// Builds a 12-byte MBAP-framed FC03 (Read Holding Registers) request reading one
/// register at — the keepalive heartbeat probe PDU.
///
private static byte[] BuildHeartbeatFrame(ushort proxyTxId, byte unitId, ushort address)
{
// PDU = [fc=03][addrHi][addrLo][qtyHi][qtyLo]. MBAP length = UnitId(1) + PDU(5) = 6.
var frame = new byte[MbapFrame.HeaderSize + 5];
frame[0] = (byte)(proxyTxId >> 8);
frame[1] = (byte)(proxyTxId & 0xFF);
frame[2] = 0; frame[3] = 0; // ProtocolId
frame[4] = 0; frame[5] = 6; // Length
frame[6] = unitId;
frame[7] = 0x03; // FC03 Read Holding Registers
frame[8] = (byte)(address >> 8);
frame[9] = (byte)(address & 0xFF);
frame[10] = 0; frame[11] = 1; // Qty = 1
return frame;
}
// ── Helpers ───────────────────────────────────────────────────────────────
///
/// Adapter exposing a 's Count / ApproximateBytes as
/// for the snapshot path. Kept as a sealed class so
/// the cache type itself doesn't need to take an interface dependency on
/// .
///
private sealed class CacheStatsAdapter : ICacheStatsProvider
{
private readonly Cache.ResponseCache _cache;
public CacheStatsAdapter(Cache.ResponseCache cache) => _cache = cache;
public long EntryCount => _cache.Count;
public long ApproximateBytes => _cache.ApproximateBytes;
}
private static async Task FillAsync(
Socket socket, byte[] buf, int offset, int count, CancellationToken ct)
{
int remaining = count;
while (remaining > 0)
{
int n = await socket.ReceiveAsync(
buf.AsMemory(offset + (count - remaining), remaining),
SocketFlags.None, ct).ConfigureAwait(false);
if (n == 0) return false;
remaining -= n;
}
return true;
}
///
/// Builds an MBAP-framed response from cached PDU bytes for the given upstream
/// party. The cache stores POST-rewriter PDU bodies (no MBAP); each hit stamps a
/// fresh MBAP header carrying the requesting party's original TxId so the response
/// looks indistinguishable from a fresh backend reply.
///
private static byte[] BuildCacheHitFrame(ushort originalTxId, byte unitId, byte[] cachedPdu)
{
// Length field covers UnitId(1) + PDU body. Capped by Modbus spec at 253-byte PDU.
int pduLen = cachedPdu.Length;
ushort length = (ushort)(1 + pduLen);
var frame = new byte[MbapFrame.HeaderSize + pduLen];
frame[0] = (byte)(originalTxId >> 8);
frame[1] = (byte)(originalTxId & 0xFF);
frame[2] = 0; frame[3] = 0;
frame[4] = (byte)(length >> 8);
frame[5] = (byte)(length & 0xFF);
frame[6] = unitId;
Buffer.BlockCopy(cachedPdu, 0, frame, MbapFrame.HeaderSize, pduLen);
return frame;
}
private static byte[] BuildExceptionFrame(ushort originalTxId, byte unitId, byte fc, byte exceptionCode)
{
// Modbus exception PDU = [fc | 0x80][exceptionCode].
// MBAP length covers UnitId (1) + PDU (2) = 3.
var frame = new byte[MbapFrame.HeaderSize + 2];
frame[0] = (byte)(originalTxId >> 8);
frame[1] = (byte)(originalTxId & 0xFF);
frame[2] = 0; // ProtocolId
frame[3] = 0;
frame[4] = 0; // Length high
frame[5] = 3; // Length low: UnitId(1) + ExFc(1) + ExCode(1)
frame[6] = unitId;
frame[7] = (byte)(fc | 0x80);
frame[8] = exceptionCode;
return frame;
}
}