mbproxy: add in-flight read coalescing (Phase 10)

When two or more upstream clients send the same FC03/FC04 read while a
matching request is already in flight on the same PLC's multiplexed
backend socket, attach the late arrivals to the existing InFlightRequest
.InterestedParties list instead of opening a second backend round-trip.
The single backend response fans out to every attached party with each
party's original MBAP TxId restored individually. Zero post-response
staleness — coalescing operates entirely within the in-flight window
(microseconds to ~10 ms typical); the proxy is NOT a cache layer.

Headline mechanism:

- New record struct CoalescingKey(UnitId, Fc, StartAddress, Qty) keys
  the per-PLC InFlightByKeyMap. FC03 and FC04 are separate Modbus
  tables and never share a key; different unit IDs never coalesce;
  writes (FC06/FC16) bypass the coalescing path entirely.
- InFlightByKeyMap uses a simple lock around a Dictionary; atomic
  TryAttachOrCreate either appends a new party to the in-flight
  request's mutable List<InterestedParty> or invokes a factory to
  build a fresh entry. Per-entry MaxParties cap (default 32) bounds
  fan-out cost; past the cap, the next arrival opens a new entry.
- PlcMultiplexer.OnUpstreamFrameAsync takes the coalescing path for
  FC03/FC04 when Mbproxy.Resilience.ReadCoalescing.Enabled. The
  factory closure does the Phase-9 work (allocate TxId, add to
  CorrelationMap); the channel send happens AFTER returning from
  TryAttachOrCreate so the map lock is not held across the async send.
- Response fan-out in RunBackendReaderAsync removes the entry from
  InFlightByKeyMap before iterating InterestedParties, ensuring no
  concurrent attach can mutate the list during iteration.
- Cascade + watchdog paths also drain the key map so a stale entry
  cannot outlive its backend round-trip.

Counter accounting balance (per snapshot): CoalescedHitCount +
CoalescedMissCount equals total FC03 + FC04 requests since startup.
Even with coalescing disabled, every read still bumps Miss so dashboard
math stays balanced.

New surface (additive only):
- src/Mbproxy/Proxy/Multiplexing/CoalescingKey.cs
- src/Mbproxy/Proxy/Multiplexing/InFlightByKeyMap.cs
- src/Mbproxy/Proxy/Multiplexing/CoalescingLogEvents.cs
- ReadCoalescingOptions on ResilienceOptions
- CoalescedHitCount / CoalescedMissCount /
  CoalescedResponseToDeadUpstream counters surfaced on /status.json
  per PLC and as a compact "Coal" cell on the HTML status page.

Phase 9 test patch: TwoUpstreams_ProxyTxIds_AreDistinct_OnTheWire
previously read the same register from both clients (which now
coalesces). Patched to read two different addresses so the test still
proves distinct backend TxIds without violating the coalescing
contract.

Tests added: 24 new (19 unit + 5 E2E):
- CoalescingKeyTests (5)
- InFlightByKeyMapTests (6, includes concurrent stress)
- ReadCoalescingTests (8, stub-backend with deterministic delay)
- ReadCoalescingE2ETests (5, pymodbus simulator; coalescing-active
  during overlap is proven against the stub, not the sim, due to
  pymodbus 3.13's known concurrent-frame bug)

Total: 325 tests passing (282 unit + 43 E2E).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-14 02:26:06 -04:00
parent 56eee3c563
commit a2dba4bd07
25 changed files with 1888 additions and 52 deletions
+8 -2
View File
@@ -71,7 +71,10 @@ public sealed record FcCounts(
/// <summary>
/// Backend connect, exception, and multiplexer telemetry. Phase 9 added
/// <c>InFlight</c>, <c>MaxInFlight</c>, <c>TxIdWraps</c>, <c>DisconnectCascades</c>, and
/// <c>QueueDepth</c> to surface the live state of the per-PLC TxId-multiplexed connection.
/// <c>QueueDepth</c>. Phase 10 added the three coalescing counters
/// (<c>CoalescedHitCount</c>, <c>CoalescedMissCount</c>, <c>CoalescedResponseToDeadUpstream</c>);
/// the dashboard-side derived <c>coalescingRatio</c> is intentionally NOT carried on the wire
/// — consumers compute <c>Hit / (Hit + Miss)</c>.
/// </summary>
public sealed record PlcBackendStatus(
long ConnectsSuccess,
@@ -82,7 +85,10 @@ public sealed record PlcBackendStatus(
long MaxInFlight,
long TxIdWraps,
long DisconnectCascades,
long QueueDepth);
long QueueDepth,
long CoalescedHitCount,
long CoalescedMissCount,
long CoalescedResponseToDeadUpstream);
/// <summary>Modbus exception counts by code.</summary>
public sealed record ExceptionCounts(
@@ -80,6 +80,10 @@ internal static class StatusHtmlRenderer
// Phase 9: multiplexer telemetry columns.
sb.Append("<th>In-flight</th><th>Max in-flight</th><th>TxId wraps</th>");
sb.Append("<th>Cascades</th><th>Queue</th>");
// Phase 10: coalescing column. Single cell carries hit / (hit + miss) ratio as
// a percentage plus the raw hit count for context. Kept compact (one cell) to
// stay under the 50 KB page-weight budget.
sb.Append("<th>Coal</th>");
sb.Append("</tr></thead><tbody>");
foreach (var plc in status.Plcs)
@@ -146,6 +150,21 @@ internal static class StatusHtmlRenderer
sb.Append("<td>").Append(plc.Backend.TxIdWraps).Append("</td>");
sb.Append("<td>").Append(plc.Backend.DisconnectCascades).Append("</td>");
sb.Append("<td>").Append(plc.Backend.QueueDepth).Append("</td>");
// Phase 10: coalescing ratio cell — "<pct>% (<hit>)". When no coalesced reads
// have been seen, render an em-dash to keep the cell narrow.
long coalHit = plc.Backend.CoalescedHitCount;
long coalMiss = plc.Backend.CoalescedMissCount;
sb.Append("<td>");
if (coalHit + coalMiss == 0)
{
sb.Append("&mdash;");
}
else
{
int pct = (int)Math.Round(100.0 * coalHit / (coalHit + coalMiss));
sb.Append(pct).Append("% (").Append(coalHit).Append(')');
}
sb.Append("</td>");
sb.Append("</tr>");
}
@@ -99,7 +99,10 @@ internal sealed class StatusSnapshotBuilder
MaxInFlight: 0,
TxIdWraps: 0,
BackendDisconnectCascades: 0,
BackendQueueDepth: 0);
BackendQueueDepth: 0,
CoalescedHitCount: 0,
CoalescedMissCount: 0,
CoalescedResponseToDeadUpstream: 0);
// Phase 08: ConnectsSuccess / ConnectsFailed are now tracked in ProxyCounters.
long connectsSuccess = counters.ConnectsSuccess;
@@ -134,7 +137,10 @@ internal sealed class StatusSnapshotBuilder
MaxInFlight: counters.MaxInFlight,
TxIdWraps: counters.TxIdWraps,
DisconnectCascades: counters.BackendDisconnectCascades,
QueueDepth: counters.BackendQueueDepth),
QueueDepth: counters.BackendQueueDepth,
CoalescedHitCount: counters.CoalescedHitCount,
CoalescedMissCount: counters.CoalescedMissCount,
CoalescedResponseToDeadUpstream: counters.CoalescedResponseToDeadUpstream),
Bytes: new PlcBytesStatus(
UpstreamIn: counters.BytesUpstreamIn,
UpstreamOut: counters.BytesUpstreamOut)));
@@ -8,6 +8,12 @@ public sealed class ResilienceOptions
InitialBackoffMs = [1000, 2000, 5000, 15000, 30000],
SteadyStateMs = 30000,
};
/// <summary>
/// Phase 10 — in-flight read coalescing options. Defaults to enabled with a 32-party
/// cap so unconfigured deployments get the de-duplication benefit immediately.
/// </summary>
public ReadCoalescingOptions ReadCoalescing { get; init; } = new();
}
public sealed class RetryProfile
@@ -21,3 +27,32 @@ public sealed class RecoveryProfile
public IReadOnlyList<int> InitialBackoffMs { get; init; } = [];
public int SteadyStateMs { get; init; }
}
/// <summary>
/// Phase 10 — knobs for the in-flight read-coalescing feature. The feature attaches
/// late-arriving FC03/FC04 reads of identical <c>(unitId, fc, start, qty)</c> tuples to an
/// already-in-flight peer, fanning out the single backend response to every attached
/// upstream client.
///
/// <para>Zero post-response staleness — coalescing operates entirely within the in-flight
/// window (microseconds to ~10 ms typical). Once the response is delivered, the coalescing
/// entry dies.</para>
/// </summary>
public sealed class ReadCoalescingOptions
{
/// <summary>
/// Master switch. When <c>false</c>, every FC03/FC04 request takes the Phase-9 path
/// (allocate a fresh proxy TxId and round-trip to the backend). Hot-reloadable via
/// <c>IOptionsMonitor</c>; flipping to <c>false</c> at runtime does not disturb already-
/// coalesced entries — they drain naturally.
/// </summary>
public bool Enabled { get; init; } = true;
/// <summary>
/// Per-entry cap on the number of interested parties that may attach to a single
/// in-flight request. Past this cap, the next identical request opens a fresh
/// in-flight entry (a fresh backend round-trip). Bounds the response-fanout cost at
/// O(MaxParties) per entry.
/// </summary>
public int MaxParties { get; init; } = 32;
}
@@ -0,0 +1,25 @@
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Hash key for the in-flight-by-key map. Two FC03/FC04 requests with identical
/// <see cref="UnitId"/>, <see cref="Fc"/>, <see cref="StartAddress"/>, and <see cref="Qty"/>
/// can be coalesced onto a single backend round-trip — the second request's response is
/// fanned out from the first request's reply, with each upstream party's original MBAP
/// transaction ID restored individually.
///
/// <para><b>Equality semantics:</b> auto-generated record-struct value equality. FC03 and
/// FC04 produce different keys for the same address (different Modbus tables); different
/// <see cref="UnitId"/> bytes never coalesce (different PLC personalities behind a shared
/// socket); reads of different <see cref="Qty"/> never coalesce (the responses carry
/// different register counts and would not be interchangeable on a fan-out).</para>
///
/// <para><b>Scope:</b> only FC03 (Read Holding Registers) and FC04 (Read Input Registers)
/// are coalescable. FC06 (Write Single Register), FC16 (Write Multiple Registers), and
/// any non-read function code bypass coalescing entirely — writes are non-idempotent and
/// must hit the backend each time.</para>
/// </summary>
internal readonly record struct CoalescingKey(
byte UnitId,
byte Fc,
ushort StartAddress,
ushort Qty);
@@ -0,0 +1,65 @@
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Source-generated <see cref="LoggerMessage"/> definitions for the Phase-10 read-coalescing
/// feature. Event names are stable — do not rename without updating docs/design.md's
/// "Logging" event-name table.
///
/// <para>Levels are intentionally conservative — coalescing fires on every overlapping
/// read in a busy fleet (HMIs/historians polling the same screen tags), so the steady-state
/// log volume would be deafening at Information. The counters surface the same data at
/// far lower cost.</para>
/// </summary>
internal static partial class CoalescingLogEvents
{
/// <summary>
/// Emitted at Debug when an FC03/FC04 request attaches to an already-in-flight peer.
/// </summary>
[LoggerMessage(
EventId = 120,
EventName = "mbproxy.coalesce.hit",
Level = LogLevel.Debug,
Message = "Coalesce hit: Plc={Plc} Unit={UnitId} Fc={Fc} Start={Start} Qty={Qty} PartyCount={PartyCount}")]
public static partial void Hit(
ILogger logger,
string plc,
byte unitId,
byte fc,
ushort start,
ushort qty,
int partyCount);
/// <summary>
/// Emitted at Debug when an FC03/FC04 request opens a fresh in-flight entry (no
/// matching peer was found, or the matching peer had reached its <c>MaxParties</c> cap).
/// </summary>
[LoggerMessage(
EventId = 121,
EventName = "mbproxy.coalesce.miss",
Level = LogLevel.Debug,
Message = "Coalesce miss: Plc={Plc} Unit={UnitId} Fc={Fc} Start={Start} Qty={Qty}")]
public static partial void Miss(
ILogger logger,
string plc,
byte unitId,
byte fc,
ushort start,
ushort qty);
/// <summary>
/// Emitted at Debug when fan-out skips a coalesced party because its upstream pipe is
/// no longer alive. The corresponding counter increments at every occurrence.
/// </summary>
[LoggerMessage(
EventId = 122,
EventName = "mbproxy.coalesce.dead_upstream",
Level = LogLevel.Debug,
Message = "Coalesce dead upstream: Plc={Plc} Unit={UnitId} Fc={Fc} Start={Start} Qty={Qty}")]
public static partial void DeadUpstream(
ILogger logger,
string plc,
byte unitId,
byte fc,
ushort start,
ushort qty);
}
@@ -0,0 +1,122 @@
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Per-PLC "in-flight by key" map that powers <b>Phase 10 read coalescing</b>. Holds the
/// currently-in-flight FC03/FC04 requests keyed by their <see cref="CoalescingKey"/> so a
/// late-arriving request with an identical key can attach to the existing in-flight entry
/// instead of opening a second backend round-trip.
///
/// <para><b>Concurrency model.</b> A single <see cref="object"/> lock serialises every
/// state-touching method. The simpler-lock-over-CAS choice is deliberate (per the phase
/// doc) — the map is per-PLC and the wire rate per PLC is bounded by the ECOM's internal
/// scan cadence (~210 ms per request). The lock-free <c>AddOrUpdate</c> alternative is not
/// worth the read-and-prove-it-correct burden.</para>
///
/// <para><b>Mutable-list seam.</b> Each entry stores a <see cref="List{InterestedParty}"/>
/// that is also exposed through the parent <see cref="InFlightRequest.InterestedParties"/>
/// property (typed as <c>IReadOnlyList&lt;InterestedParty&gt;</c>). Attaches mutate this
/// list under the map lock. The backend reader task removes the entry from the map BEFORE
/// it iterates the list during fan-out, which guarantees no concurrent mutation during
/// iteration: by the time the reader holds the (just-removed) list, no future attach can
/// find the key and therefore no further appends can occur.</para>
///
/// <para><b>Race-on-remove:</b> the reader removes from <see cref="CorrelationMap"/> first,
/// then from this map. A late attach in between will append a new party to the same list
/// (whose response has just arrived); the reader's fan-out loop will then deliver to that
/// party too. The reverse race (reader removes from this map first, then attach arrives)
/// is impossible because the reader's <see cref="TryRemove"/> takes the lock — any in-flight
/// attach is serialised before or after.</para>
/// </summary>
internal sealed class InFlightByKeyMap
{
private readonly object _lock = new();
private readonly Dictionary<CoalescingKey, InFlightRequest> _entries = new();
/// <summary>Current entry count. Read under the lock for a stable snapshot.</summary>
public int Count
{
get { lock (_lock) { return _entries.Count; } }
}
/// <summary>
/// Atomic attach-or-create. If <paramref name="key"/> already maps to an in-flight
/// entry whose interested-party list has fewer than <paramref name="maxParties"/>
/// entries, appends <paramref name="party"/> to that entry and returns
/// <c>wasNew = false</c>. Otherwise invokes <paramref name="factory"/> to build a fresh
/// <see cref="InFlightRequest"/>, stores it under <paramref name="key"/>, and returns
/// <c>wasNew = true</c>.
///
/// <para>The factory must build a request whose <c>InterestedParties</c> list is a
/// <see cref="List{T}"/> (cast to <see cref="IReadOnlyList{T}"/>). The map relies on
/// being able to <see cref="List{T}.Add(T)"/> to that same instance under its lock when
/// later attaches arrive.</para>
///
/// <para><b>maxParties cap</b> — load-shedding safety valve. If an existing entry
/// already has <paramref name="maxParties"/> attached parties, the next arrival opens
/// a fresh entry (and a fresh backend round-trip). This bounds the response-fanout
/// cost per entry at O(maxParties).</para>
///
/// <para>Returns <c>true</c> always (the bool return matches the phase doc's signature;
/// future evolution could introduce a refusal path).</para>
/// </summary>
public bool TryAttachOrCreate(
CoalescingKey key,
InterestedParty party,
Func<InFlightRequest> factory,
int maxParties,
out InFlightRequest req,
out bool wasNew)
{
lock (_lock)
{
if (_entries.TryGetValue(key, out var existing)
&& existing.InterestedParties is List<InterestedParty> existingList
&& existingList.Count < maxParties)
{
existingList.Add(party);
req = existing;
wasNew = false;
return true;
}
req = factory();
_entries[key] = req;
wasNew = true;
return true;
}
}
/// <summary>
/// Removes the entry under <paramref name="key"/> if present. Returns <c>false</c> when
/// no entry exists — which is normal when the cascade path beat the reader to the entry
/// or when a watchdog timeout removed the entry while a response was in flight.
/// </summary>
public bool TryRemove(CoalescingKey key, out InFlightRequest req)
{
lock (_lock)
{
if (_entries.TryGetValue(key, out var existing))
{
_entries.Remove(key);
req = existing;
return true;
}
req = default!;
return false;
}
}
/// <summary>
/// Removes every entry. Used by the multiplexer's cascade path on backend disconnect.
/// </summary>
public IReadOnlyList<InFlightRequest> DrainAll()
{
lock (_lock)
{
var drained = new List<InFlightRequest>(_entries.Count);
drained.AddRange(_entries.Values);
_entries.Clear();
return drained;
}
}
}
@@ -49,9 +49,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
private readonly PerPlcContext _ctx;
private readonly ILogger<PlcMultiplexer> _logger;
private readonly ResiliencePipeline? _backendConnectPipeline;
// Phase 10: live read-coalescing config accessor. The accessor is read per-PDU on the
// request path so a hot-reload of `Mbproxy.Resilience.ReadCoalescing.Enabled`
// propagates immediately. Production wires this to
// `() => optionsMonitor.CurrentValue.Resilience.ReadCoalescing`. Tests default to a
// fresh `ReadCoalescingOptions()` (Enabled = true, MaxParties = 32).
private readonly Func<ReadCoalescingOptions> _coalescingOptions;
private readonly TxIdAllocator _allocator = new();
private readonly CorrelationMap _correlation = new();
private readonly InFlightByKeyMap _inFlightByKey = new();
private readonly Channel<byte[]> _outboundChannel = Channel.CreateBounded<byte[]>(
new BoundedChannelOptions(OutboundChannelCapacity)
@@ -84,7 +91,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
IPduPipeline pipeline,
PerPlcContext perPlcContext,
ILogger<PlcMultiplexer> logger,
ResiliencePipeline? backendConnectPipeline = null)
ResiliencePipeline? backendConnectPipeline = null,
Func<ReadCoalescingOptions>? coalescingOptions = null)
{
_plc = plc;
_connectionOptions = connectionOptions;
@@ -92,6 +100,7 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
_ctx = perPlcContext;
_logger = logger;
_backendConnectPipeline = backendConnectPipeline;
_coalescingOptions = coalescingOptions ?? (static () => new ReadCoalescingOptions());
// Register this multiplexer as the live telemetry source for the PLC's counters.
_ctx.Counters.SetMultiplexProvider(this);
@@ -301,6 +310,11 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
cascadeIds.Add(party.Pipe.Id);
}
// Phase 10 — also drain the in-flight-by-key map so a brand-new identical request
// through the freshly-reconnected backend is treated as a miss (no stale entries
// outlive the backend they were destined for).
_inFlightByKey.DrainAll();
int upstreamCount = 0;
if (cascadeUpstreams)
{
@@ -408,6 +422,17 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
// Free the allocator slot immediately so it can be reused.
_allocator.Release(proxyTxId);
// Phase 10 — for FC03/FC04 reads, also clear the coalescing-by-key entry so
// a brand-new identical request issued AFTER this response is treated as a
// miss (opens a fresh round-trip). The TryRemove is best-effort: a watchdog
// timeout or cascade may have already removed it.
if (inFlight.Fc is 0x03 or 0x04)
{
var coalKey = new CoalescingKey(inFlight.UnitId, inFlight.Fc,
inFlight.StartAddress, inFlight.Qty);
_inFlightByKey.TryRemove(coalKey, out _);
}
// Update EWMA round-trip from when we sent the request.
long elapsedMs = (DateTimeOffset.UtcNow - inFlight.SentAtUtc).Ticks * 100; // 100 ns per tick
// UpdateRoundTripEwma expects Stopwatch ticks, but we have wall-clock.
@@ -427,10 +452,25 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
// Fan out to each interested party with their original TxId restored.
// Phase 9: always exactly one party. Phase 10: N parties (read coalescing).
// Note: the InFlightByKey TryRemove above (for FC03/FC04) guarantees no
// further attaches can occur — the parties list is now a stable snapshot.
foreach (var party in inFlight.InterestedParties)
{
if (!party.Pipe.IsAlive)
{
// Phase 10 — record the dead-upstream skip only for FC03/FC04 (the
// only function codes that take the coalescing path). For non-
// coalescing FCs this branch is silent — the Phase-9 behaviour.
if (inFlight.Fc is 0x03 or 0x04
&& inFlight.InterestedParties.Count > 1)
{
_ctx.Counters.IncrementCoalescedResponseToDeadUpstream();
CoalescingLogEvents.DeadUpstream(
_logger, _plc.Name, inFlight.UnitId, inFlight.Fc,
inFlight.StartAddress, inFlight.Qty);
}
continue;
}
// The frame buffer is private to this iteration; if there are multiple
// parties (Phase 10), each gets its own copy with its own original TxId
@@ -482,20 +522,11 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
out ushort originalTxId, out _, out _, out byte unitId))
return;
if (!_allocator.TryAllocate(out ushort proxyTxId))
{
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
// Synthesize Modbus exception 04 (Slave Device Failure).
byte fc = frame.Length > MbapFrame.HeaderSize ? frame[MbapFrame.HeaderSize] : (byte)0;
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fc, exceptionCode: 4);
await pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
return;
}
// Parse the PDU FC + start/qty (for FC03/04) so the response decoder has the
// correlation it needs.
// Parse the PDU FC + start/qty (for FC03/04) — needed for both the coalescing-key
// path and the response correlation slot. FC06/FC16 (writes) keep startAddr/qty = 0;
// they bypass coalescing entirely.
int pduOffset = MbapFrame.HeaderSize;
byte fcByte = frame[pduOffset];
byte fcByte = frame.Length > pduOffset ? frame[pduOffset] : (byte)0;
ushort startAddr = 0;
ushort qty = 0;
if (fcByte is 0x03 or 0x04 && frame.Length >= pduOffset + 5)
@@ -504,37 +535,175 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
qty = (ushort)((frame[pduOffset + 3] << 8) | frame[pduOffset + 4]);
}
var inFlight = new InFlightRequest(
// Phase 10 — read-coalescing path. Only FC03/FC04 are coalescable; only when the
// feature is enabled in the live config. If the late-arriving request matches an
// already-in-flight peer, we attach to the existing entry and skip the backend
// round-trip entirely. The existing entry's response will fan out to both parties.
var coalescingOpts = _coalescingOptions();
if (fcByte is 0x03 or 0x04 && coalescingOpts.Enabled)
{
var key = new CoalescingKey(unitId, fcByte, startAddr, qty);
var newParty = new InterestedParty(pipe, originalTxId);
// The factory does the Phase-9 work: allocate a proxy TxId, build the
// InFlightRequest with a mutable List<InterestedParty>, add to the correlation
// map. We deliberately do NOT enqueue to the outbound channel inside the
// factory — that's done outside the InFlightByKey lock to keep the lock
// scope tight and to avoid holding the lock across an async send.
//
// proxyTxIdForSend / inFlightForSend communicate the factory's allocation back
// out of the lock so the post-lock code can finish the send.
ushort proxyTxIdForSend = 0;
InFlightRequest? inFlightForSend = null;
_inFlightByKey.TryAttachOrCreate(
key,
newParty,
factory: () =>
{
if (!_allocator.TryAllocate(out ushort proxyTxId))
{
// Saturation — record an empty placeholder InFlightRequest that the
// caller will detect via inFlightForSend == null. We can't easily
// signal failure through the bool return, so we leave the saturation
// exception delivery to the caller.
return new InFlightRequest(
UnitId: unitId,
Fc: fcByte,
StartAddress: startAddr,
Qty: qty,
InterestedParties: new List<InterestedParty> { newParty },
SentAtUtc: DateTimeOffset.UtcNow);
}
var partyList = new List<InterestedParty>(capacity: 1) { newParty };
var inFlight = new InFlightRequest(
UnitId: unitId,
Fc: fcByte,
StartAddress: startAddr,
Qty: qty,
InterestedParties: partyList,
SentAtUtc: DateTimeOffset.UtcNow);
if (!_correlation.TryAdd(proxyTxId, inFlight))
{
_allocator.Release(proxyTxId);
_logger.LogError(
"CorrelationMap.TryAdd failed for already-free proxyTxId {ProxyTxId}",
proxyTxId);
// Return the stub anyway; outer code detects via inFlightForSend == null.
return inFlight;
}
_ctx.Counters.ObserveInFlight(_allocator.InFlightCount);
proxyTxIdForSend = proxyTxId;
inFlightForSend = inFlight;
return inFlight;
},
maxParties: coalescingOpts.MaxParties,
out _,
out bool wasNew);
if (!wasNew)
{
// Coalesce hit: attached to an existing in-flight entry. No backend traffic.
_ctx.Counters.IncrementCoalescedHit();
CoalescingLogEvents.Hit(_logger, _plc.Name, unitId, fcByte, startAddr, qty,
partyCount: _inFlightByKey.Count);
return;
}
// Coalesce miss: we just opened a fresh in-flight entry.
_ctx.Counters.IncrementCoalescedMiss();
CoalescingLogEvents.Miss(_logger, _plc.Name, unitId, fcByte, startAddr, qty);
if (inFlightForSend is null)
{
// The factory hit the allocator-saturation path or a duplicate-key race.
// Surface a Modbus exception 04 to the upstream and clean up.
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fcByte, exceptionCode: 4);
_inFlightByKey.TryRemove(key, out _);
await pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
return;
}
// Apply the BCD rewriter on the request, then send to the backend. We are now
// OUTSIDE the InFlightByKey lock — late attaches arriving after this point will
// attach to the same entry while it sits in the channel/wire.
var requestCtx = _ctx.WithCurrentRequest(inFlightForSend);
_pipeline.Process(
MbapDirection.RequestToBackend,
frame.AsSpan(0, MbapFrame.HeaderSize),
frame.AsSpan(MbapFrame.HeaderSize, frame.Length - MbapFrame.HeaderSize),
requestCtx);
frame[0] = (byte)(proxyTxIdForSend >> 8);
frame[1] = (byte)(proxyTxIdForSend & 0xFF);
try
{
await _outboundChannel.Writer.WriteAsync(frame, ct).ConfigureAwait(false);
}
catch (ChannelClosedException)
{
if (_correlation.TryRemove(proxyTxIdForSend, out _))
_allocator.Release(proxyTxIdForSend);
_inFlightByKey.TryRemove(key, out _);
}
return;
}
// Non-coalescing path (FC06/FC16 writes, FC03/04 with coalescing disabled, or any
// other FC). This is the Phase-9 path verbatim — every request gets its own proxy
// TxId and its own backend round-trip.
if (!_allocator.TryAllocate(out ushort proxyTxIdFc))
{
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fcByte, exceptionCode: 4);
await pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
return;
}
var partyListNc = new List<InterestedParty>(capacity: 1) { new InterestedParty(pipe, originalTxId) };
var inFlightNc = new InFlightRequest(
UnitId: unitId,
Fc: fcByte,
StartAddress: startAddr,
Qty: qty,
InterestedParties: [new InterestedParty(pipe, originalTxId)],
InterestedParties: partyListNc,
SentAtUtc: DateTimeOffset.UtcNow);
if (!_correlation.TryAdd(proxyTxId, inFlight))
if (!_correlation.TryAdd(proxyTxIdFc, inFlightNc))
{
// Should be impossible: the allocator just guaranteed proxyTxId is free.
_allocator.Release(proxyTxId);
_logger.LogError("CorrelationMap.TryAdd failed for already-free proxyTxId {ProxyTxId}", proxyTxId);
_allocator.Release(proxyTxIdFc);
_logger.LogError("CorrelationMap.TryAdd failed for already-free proxyTxId {ProxyTxId}", proxyTxIdFc);
return;
}
// Phase 10 — even when the coalescing path is bypassed (e.g. coalescing disabled
// for FC03/04), we still report the request as a Miss so Hit + Miss = total
// FC03/FC04 requests across snapshots. FC06/FC16 are not counted here (they are
// not coalescable in any sense).
if (fcByte is 0x03 or 0x04)
_ctx.Counters.IncrementCoalescedMiss();
// Peak in-flight tracking.
_ctx.Counters.ObserveInFlight(_allocator.InFlightCount);
// Apply the BCD rewriter on the request. Use a per-call context with CurrentRequest
// (the rewriter doesn't currently need it on request, but Phase 10 may).
var requestCtx = _ctx.WithCurrentRequest(inFlight);
// Apply the BCD rewriter on the request.
var requestCtxNc = _ctx.WithCurrentRequest(inFlightNc);
_pipeline.Process(
MbapDirection.RequestToBackend,
frame.AsSpan(0, MbapFrame.HeaderSize),
frame.AsSpan(MbapFrame.HeaderSize, frame.Length - MbapFrame.HeaderSize),
requestCtx);
requestCtxNc);
// Overwrite the MBAP TxId with the proxy TxId.
frame[0] = (byte)(proxyTxId >> 8);
frame[1] = (byte)(proxyTxId & 0xFF);
frame[0] = (byte)(proxyTxIdFc >> 8);
frame[1] = (byte)(proxyTxIdFc & 0xFF);
// Enqueue for the backend writer task.
try
@@ -544,8 +713,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
catch (ChannelClosedException)
{
// Channel completed during shutdown — release the proxy TxId.
if (_correlation.TryRemove(proxyTxId, out _))
_allocator.Release(proxyTxId);
if (_correlation.TryRemove(proxyTxIdFc, out _))
_allocator.Release(proxyTxIdFc);
}
}
@@ -591,6 +760,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
_allocator.Release(proxyTxId);
// Phase 10 — also clear the coalescing-by-key entry. A late attach that
// raced in just before the watchdog claim will still receive the 0x0B
// exception via this entry's InterestedParties list (List<T> mutations
// happen before fan-out begins).
if (req.Fc is 0x03 or 0x04)
{
var coalKey = new CoalescingKey(req.UnitId, req.Fc, req.StartAddress, req.Qty);
_inFlightByKey.TryRemove(coalKey, out _);
}
long elapsedMs = (long)(DateTimeOffset.UtcNow - req.SentAtUtc).TotalMilliseconds;
foreach (var party in req.InterestedParties)
+6 -2
View File
@@ -31,6 +31,7 @@ internal sealed partial class PlcListener : IAsyncDisposable
private readonly ILogger _pipeLogger;
private readonly PerPlcContext? _perPlcContext;
private readonly ResiliencePipeline? _backendConnectPipeline;
private readonly Func<ReadCoalescingOptions>? _coalescingOptions;
private TcpListener? _listener;
private PlcMultiplexer? _multiplexer;
@@ -55,7 +56,8 @@ internal sealed partial class PlcListener : IAsyncDisposable
ILogger<PlcMultiplexer> multiplexerLogger,
ILogger pipeLogger,
PerPlcContext? perPlcContext = null,
ResiliencePipeline? backendConnectPipeline = null)
ResiliencePipeline? backendConnectPipeline = null,
Func<ReadCoalescingOptions>? coalescingOptions = null)
{
_plc = plc;
_connectionOptions = connectionOptions;
@@ -65,6 +67,7 @@ internal sealed partial class PlcListener : IAsyncDisposable
_pipeLogger = pipeLogger;
_perPlcContext = perPlcContext;
_backendConnectPipeline = backendConnectPipeline;
_coalescingOptions = coalescingOptions;
}
/// <summary>
@@ -94,7 +97,8 @@ internal sealed partial class PlcListener : IAsyncDisposable
_pipeline,
ctx,
_multiplexerLogger,
_backendConnectPipeline);
_backendConnectPipeline,
_coalescingOptions);
}
/// <summary>
+48 -2
View File
@@ -79,7 +79,25 @@ public sealed record CounterSnapshot(
/// (frames queued, not yet on the wire). A sustained non-zero value indicates the
/// backend is slower than upstream demand. Phase 9.
/// </summary>
long BackendQueueDepth);
long BackendQueueDepth,
/// <summary>
/// Phase 10 — cumulative count of FC03/FC04 requests that attached to an already-in-flight
/// peer instead of opening a fresh backend round-trip. <c>CoalescedHitCount + CoalescedMissCount</c>
/// equals total FC03/FC04 requests seen by the multiplexer.
/// </summary>
long CoalescedHitCount,
/// <summary>
/// Phase 10 — cumulative count of FC03/FC04 requests that opened a fresh in-flight entry
/// (no matching peer was in flight, or the matching peer had reached its <c>MaxParties</c>
/// cap). With <c>ReadCoalescing.Enabled = false</c>, every FC03/FC04 request becomes a miss.
/// </summary>
long CoalescedMissCount,
/// <summary>
/// Phase 10 — count of coalesced response fan-outs that were skipped because the
/// attached upstream pipe had already disconnected. A spike is a churn indicator; the
/// metric itself is informational (Tier 2 in <c>docs/kpi.md</c>).
/// </summary>
long CoalescedResponseToDeadUpstream);
/// <summary>
/// Thread-safe per-PLC counters backed by <see cref="System.Threading.Interlocked"/> longs.
@@ -114,6 +132,11 @@ internal sealed class ProxyCounters
private long _maxInFlight;
private long _backendDisconnectCascades;
// Phase 10 — coalescing counters. Hit + Miss = total FC03/FC04 requests.
private long _coalescedHitCount;
private long _coalescedMissCount;
private long _coalescedResponseToDeadUpstream;
// Phase 9: live state pulled from the multiplexer's allocator/map/queue on each
// snapshot. The multiplexer registers a single provider via SetMultiplexProvider.
// We use a volatile reference for lock-free read on the snapshot path.
@@ -201,6 +224,26 @@ internal sealed class ProxyCounters
public void AddDisconnectCascades(int n)
=> Interlocked.Add(ref _backendDisconnectCascades, n);
/// <summary>
/// Phase 10 — records one FC03/FC04 request that attached to an already-in-flight peer.
/// </summary>
public void IncrementCoalescedHit()
=> Interlocked.Increment(ref _coalescedHitCount);
/// <summary>
/// Phase 10 — records one FC03/FC04 request that opened a fresh in-flight entry
/// (no matching peer was in flight, or the matching peer had reached MaxParties).
/// </summary>
public void IncrementCoalescedMiss()
=> Interlocked.Increment(ref _coalescedMissCount);
/// <summary>
/// Phase 10 — records one coalesced response fan-out that was skipped because the
/// attached upstream pipe had already disconnected. Informational only.
/// </summary>
public void IncrementCoalescedResponseToDeadUpstream()
=> Interlocked.Increment(ref _coalescedResponseToDeadUpstream);
/// <summary>
/// CAS-updates the peak in-flight high-water mark. Called on every successful
/// allocation by the multiplexer. Phase 9.
@@ -311,7 +354,10 @@ internal sealed class ProxyCounters
MaxInFlight: Interlocked.Read(ref _maxInFlight),
TxIdWraps: txWraps,
BackendDisconnectCascades: Interlocked.Read(ref _backendDisconnectCascades),
BackendQueueDepth: queueDepth);
BackendQueueDepth: queueDepth,
CoalescedHitCount: Interlocked.Read(ref _coalescedHitCount),
CoalescedMissCount: Interlocked.Read(ref _coalescedMissCount),
CoalescedResponseToDeadUpstream: Interlocked.Read(ref _coalescedResponseToDeadUpstream));
}
}
+8 -1
View File
@@ -112,6 +112,12 @@ internal sealed partial class ProxyWorker : BackgroundService
resilienceOpts.ListenerRecovery,
_loggerFactory.CreateLogger($"Mbproxy.Proxy.ListenerRecovery.{plc.Name}"));
// Phase 10 — give the supervisor a live accessor for ReadCoalescingOptions
// so a hot-reload of `Mbproxy.Resilience.ReadCoalescing.Enabled` propagates
// to the multiplexer's per-PDU coalescing decision.
Func<ReadCoalescingOptions> coalescingAccessor =
() => _options.CurrentValue.Resilience.ReadCoalescing;
var supervisor = new PlcListenerSupervisor(
plc,
opts.Connection,
@@ -122,7 +128,8 @@ internal sealed partial class ProxyWorker : BackgroundService
perPlcContext,
recoveryPipeline,
_loggerFactory.CreateLogger<PlcListenerSupervisor>(),
backendPipeline);
backendPipeline,
coalescingAccessor);
_supervisors[plc.Name] = supervisor;
}
@@ -36,6 +36,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
private readonly ResiliencePipeline _recoveryPipeline;
private readonly ILogger<PlcListenerSupervisor> _logger;
private readonly ResiliencePipeline? _backendConnectPipeline;
private readonly Func<ReadCoalescingOptions>? _coalescingOptions;
// ── Mutable state ────────────────────────────────────────────────────────────────────
@@ -79,7 +80,8 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
PerPlcContext? perPlcContext,
ResiliencePipeline recoveryPipeline,
ILogger<PlcListenerSupervisor> logger,
ResiliencePipeline? backendConnectPipeline = null)
ResiliencePipeline? backendConnectPipeline = null,
Func<ReadCoalescingOptions>? coalescingOptions = null)
{
_plc = plc;
_connectionOptions = connectionOptions;
@@ -92,6 +94,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
_recoveryPipeline = recoveryPipeline;
_logger = logger;
_backendConnectPipeline = backendConnectPipeline;
_coalescingOptions = coalescingOptions;
}
/// <summary>
@@ -232,7 +235,8 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
_multiplexerLogger,
_pipeLogger,
_currentContext,
_backendConnectPipeline);
_backendConnectPipeline,
_coalescingOptions);
// Phase 07: expose the current listener for status-page pair enumeration.
_currentListener = listener;
+4
View File
@@ -17,6 +17,10 @@
"ListenerRecovery": {
"InitialBackoffMs": [ 1000, 2000, 5000, 15000, 30000 ],
"SteadyStateMs": 30000
},
"ReadCoalescing": {
"Enabled": true,
"MaxParties": 32
}
}
},