mbproxy: initial commit through Phase 9 (TxId multiplexing)
Adds the mbproxy service end-to-end. Phases 00-08 implement the production-ready single-listener / 1:1-backend transparent Modbus TCP proxy with bidirectional BCD rewriting for the ~54-PLC DL205/DL260 fleet. Phase 9 replaces the connection layer with a single backend socket per PLC plus MBAP TxId rewriting, lifting the H2-ECOM100's 4-concurrent-client cap as an operational ceiling. Phase 9 additions of note: - PlcMultiplexer + UpstreamPipe + TxIdAllocator + CorrelationMap - InFlightRequest with IReadOnlyList<InterestedParty> (load-bearing for Phase 10 read coalescing — do not collapse to a single field) - Per-request watchdog: surfaces Modbus exception 0x0B to upstream on BackendRequestTimeoutMs, defending against lost responses, dead-PLC paths, and pymodbus 3.13.0's concurrent-multiplexed- request bug (its ServerRequestHandler.last_pdu state race) - Status DTO + HTML gain inFlight / maxInFlight / txIdWraps / disconnectCascades / queueDepth (Tier 1.6 in docs/kpi.md) Tests: 263 unit + 38 E2E. Multiplexer correctness under truly concurrent backend traffic is proved against a stub backend in PlcMultiplexerTests; MultiplexerE2ETests paces requests so pymodbus 3.13's single-PDU framer stays in known-good mode. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,142 @@
|
||||
namespace Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
/// <summary>
|
||||
/// Allocates 16-bit MBAP transaction IDs (proxy TxIds) used to multiplex many upstream
|
||||
/// clients onto a single shared backend connection per PLC. The allocator tracks which
|
||||
/// IDs are currently in flight and scans forward from a rolling cursor to find the next
|
||||
/// free slot, mimicking the natural cadence of Modbus clients while keeping reuse
|
||||
/// distance maximally large in steady state.
|
||||
///
|
||||
/// <para>State is protected by a single <see cref="object"/> lock. Contention is
|
||||
/// negligible in practice — the allocator is per-PLC and one PLC's wire rate is bounded
|
||||
/// by the controller's internal scan time (a few ms per request on an H2-ECOM100).
|
||||
/// The lock is preferred over a lock-free approach for readability and worst-case
|
||||
/// determinism (Polly retries, cascade cleanup, and saturation paths must not race).</para>
|
||||
///
|
||||
/// <para><b>Memory:</b> <c>bool[65536]</c> (~64 KB) per PLC. With ~54 PLCs that is
|
||||
/// ~3.4 MB total — well within budget for a service that already ships at ~30 MB working
|
||||
/// set under load.</para>
|
||||
///
|
||||
/// <para><b>Wrap counter:</b> increments every time the rolling cursor rolls over
|
||||
/// 0xFFFF → 0x0000 during a successful allocation scan. Frequent wraps indicate either
|
||||
/// very high churn or extreme in-flight depth and are surfaced as a telemetry signal,
|
||||
/// not an error.</para>
|
||||
/// </summary>
|
||||
internal sealed class TxIdAllocator
|
||||
{
|
||||
// 65,536 slots total — the full uint16 space.
|
||||
private const int SlotCount = 65536;
|
||||
|
||||
private readonly object _lock = new();
|
||||
private readonly bool[] _inUse = new bool[SlotCount];
|
||||
private ushort _next; // rolling cursor; 0 on construction
|
||||
private int _inFlightCount; // 0..65536
|
||||
private long _wrapCount; // monotonic; never resets
|
||||
|
||||
/// <summary>
|
||||
/// Number of currently-in-flight proxy TxIds (i.e., allocated but not yet released).
|
||||
/// Read under the same lock that mutates it; the snapshot is a simple atomic read of
|
||||
/// an int but we still hold the lock for cross-field consistency with <c>_inUse</c>.
|
||||
/// </summary>
|
||||
public int InFlightCount
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _inFlightCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Number of times the rolling cursor has wrapped 0xFFFF → 0x0000 during a
|
||||
/// successful allocation since the allocator was constructed. Read without locking
|
||||
/// via <see cref="Interlocked.Read"/> for the hot status-page path.
|
||||
/// </summary>
|
||||
public long WrapCount => Interlocked.Read(ref _wrapCount);
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to allocate the next free proxy TxId.
|
||||
/// Returns <c>true</c> with <paramref name="id"/> set when an ID was allocated.
|
||||
/// Returns <c>false</c> when every slot in the 16-bit space is currently in use;
|
||||
/// the caller is responsible for emitting <c>mbproxy.multiplex.saturated</c> and
|
||||
/// returning a Modbus exception (code 04 / Slave Device Failure) to the upstream.
|
||||
/// </summary>
|
||||
public bool TryAllocate(out ushort id)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_inFlightCount >= SlotCount)
|
||||
{
|
||||
id = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Scan forward from _next for the next free slot. _inFlightCount < SlotCount
|
||||
// guarantees at least one free slot, so the loop terminates within at most
|
||||
// SlotCount iterations even in the pathological full-minus-one case.
|
||||
ushort start = _next;
|
||||
ushort cursor = start;
|
||||
do
|
||||
{
|
||||
if (!_inUse[cursor])
|
||||
{
|
||||
_inUse[cursor] = true;
|
||||
_inFlightCount++;
|
||||
|
||||
// Advance the cursor; track wrap.
|
||||
unchecked
|
||||
{
|
||||
ushort nextCursor = (ushort)(cursor + 1);
|
||||
if (nextCursor == 0)
|
||||
Interlocked.Increment(ref _wrapCount);
|
||||
_next = nextCursor;
|
||||
}
|
||||
|
||||
id = cursor;
|
||||
return true;
|
||||
}
|
||||
|
||||
unchecked
|
||||
{
|
||||
cursor = (ushort)(cursor + 1);
|
||||
}
|
||||
}
|
||||
while (cursor != start);
|
||||
|
||||
// Defensive: should be unreachable given the InFlightCount check above.
|
||||
id = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases a previously-allocated proxy TxId. Releasing an ID that is not currently
|
||||
/// allocated is a no-op (defensive: cascade-on-disconnect can call <see cref="Release"/>
|
||||
/// after a concurrent timeout path has already done so).
|
||||
/// </summary>
|
||||
public void Release(ushort id)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_inUse[id])
|
||||
{
|
||||
_inUse[id] = false;
|
||||
_inFlightCount--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test-only: returns whether the given proxy TxId is currently marked in use.
|
||||
/// Internal so it remains usable from unit tests via InternalsVisibleTo.
|
||||
/// </summary>
|
||||
internal bool IsAllocated(ushort id)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _inUse[id];
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user