Files
wwtools/mbproxy/src/Mbproxy/Proxy/Multiplexing/TxIdAllocator.cs
T
Joseph Doherty b222362ce0 mbproxy: remediate the 2026-05-16 code-review findings
Fixes every finding from the codereviews/2026-05-16 multi-agent review
(2 Critical, 20 Major, 38 Minor) and adds that review to the repo.

Highlights: dashboard XSS escape; response cache invalidated on the
write request (not just the response); ReloadValidator now runs at
startup so port collisions / duplicate names / malformed Resilience
profiles fail fast; AdminPort 0 genuinely disables the admin endpoint;
PlcListener accept-loop faults propagate to the supervisor's faulted
path; reconciler Restart builds before removing; Resilience pipelines
are restart-only from a frozen snapshot; multiplexer connect-race leak,
watchdog party-list snapshot, backend-response and FC16 framing
validation; frontend reconnect retry and util.js load guard; plus the
log-event/doc drift sweep and test-port hygiene.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 18:08:06 -04:00

158 lines
6.0 KiB
C#

namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Allocates 16-bit MBAP transaction IDs (proxy TxIds) used to multiplex many upstream
/// clients onto a single shared backend connection per PLC. The allocator tracks which
/// IDs are currently in flight and scans forward from a rolling cursor to find the next
/// free slot, mimicking the natural cadence of Modbus clients while keeping reuse
/// distance maximally large in steady state.
///
/// <para>State is protected by a single <see cref="object"/> lock. Contention is
/// negligible in practice — the allocator is per-PLC and one PLC's wire rate is bounded
/// by the controller's internal scan time (a few ms per request on an H2-ECOM100).
/// The lock is preferred over a lock-free approach for readability and worst-case
/// determinism (Polly retries, cascade cleanup, and saturation paths must not race).</para>
///
/// <para><b>Memory:</b> <c>bool[65536]</c> (~64 KB) per PLC. With ~54 PLCs that is
/// ~3.4 MB total — well within budget for a service that already ships at ~30 MB working
/// set under load.</para>
///
/// <para><b>Wrap counter:</b> increments every time the rolling cursor rolls over
/// 0xFFFF → 0x0000 during a successful allocation scan. Frequent wraps indicate either
/// very high churn or extreme in-flight depth and are surfaced as a telemetry signal,
/// not an error.</para>
/// </summary>
internal sealed class TxIdAllocator
{
// 65,536 slots total — the full uint16 space.
private const int SlotCount = 65536;
private readonly object _lock = new();
private readonly bool[] _inUse = new bool[SlotCount];
private ushort _next; // rolling cursor; 0 on construction
private int _inFlightCount; // 0..65536
private long _wrapCount; // monotonic; never resets
private long _doubleReleaseCount; // monotonic; Release called on an already-free slot
/// <summary>
/// Number of currently-in-flight proxy TxIds (i.e., allocated but not yet released).
/// Read under the same lock that mutates it; the snapshot is a simple atomic read of
/// an int but we still hold the lock for cross-field consistency with <c>_inUse</c>.
/// </summary>
public int InFlightCount
{
get
{
lock (_lock)
{
return _inFlightCount;
}
}
}
/// <summary>
/// Number of times the rolling cursor has wrapped 0xFFFF → 0x0000 during a
/// successful allocation since the allocator was constructed. Read without locking
/// via <see cref="Interlocked.Read"/> for the hot status-page path.
/// </summary>
public long WrapCount => Interlocked.Read(ref _wrapCount);
/// <summary>
/// Number of times <see cref="Release"/> was called on a slot that was already free.
/// A double-release is normally a benign cascade-vs-timeout race, but a sustained
/// non-zero rate points at the documented <c>TearDownBackendAsync</c> gate-not-held
/// race actually firing — making the otherwise-silent request drop observable.
/// </summary>
public long DoubleReleaseCount => Interlocked.Read(ref _doubleReleaseCount);
/// <summary>
/// Attempts to allocate the next free proxy TxId.
/// Returns <c>true</c> with <paramref name="id"/> set when an ID was allocated.
/// Returns <c>false</c> when every slot in the 16-bit space is currently in use;
/// the caller is responsible for emitting <c>mbproxy.multiplex.saturated</c> and
/// returning a Modbus exception (code 04 / Slave Device Failure) to the upstream.
/// </summary>
public bool TryAllocate(out ushort id)
{
lock (_lock)
{
if (_inFlightCount >= SlotCount)
{
id = 0;
return false;
}
// Scan forward from _next for the next free slot. _inFlightCount < SlotCount
// guarantees at least one free slot, so the loop terminates within at most
// SlotCount iterations even in the pathological full-minus-one case.
ushort start = _next;
ushort cursor = start;
do
{
if (!_inUse[cursor])
{
_inUse[cursor] = true;
_inFlightCount++;
// Advance the cursor; track wrap.
unchecked
{
ushort nextCursor = (ushort)(cursor + 1);
if (nextCursor == 0)
Interlocked.Increment(ref _wrapCount);
_next = nextCursor;
}
id = cursor;
return true;
}
unchecked
{
cursor = (ushort)(cursor + 1);
}
}
while (cursor != start);
// Defensive: should be unreachable given the InFlightCount check above.
id = 0;
return false;
}
}
/// <summary>
/// Releases a previously-allocated proxy TxId. Releasing an ID that is not currently
/// allocated is a no-op (defensive: cascade-on-disconnect can call <see cref="Release"/>
/// after a concurrent timeout path has already done so).
/// </summary>
public void Release(ushort id)
{
lock (_lock)
{
if (_inUse[id])
{
_inUse[id] = false;
_inFlightCount--;
}
else
{
// Double-release: the slot was already free. Harmless to the allocator
// (idempotent) but tracked so the rare cascade-vs-timeout race is visible.
Interlocked.Increment(ref _doubleReleaseCount);
}
}
}
/// <summary>
/// Test-only: returns whether the given proxy TxId is currently marked in use.
/// Internal so it remains usable from unit tests via InternalsVisibleTo.
/// </summary>
internal bool IsAllocated(ushort id)
{
lock (_lock)
{
return _inUse[id];
}
}
}