mbproxy: strip historical phase/wave/plan references from source comments
Comments described the *history* of how the code arrived (phase numbers, wave IDs, review IDs, dated TODOs) instead of what it does today. That scaffolding rotted as the codebase evolved. Cleaned 60 source files + .gitignore; behaviour unchanged (387/387 tests still pass). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,7 +4,7 @@ namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// BCD-rewriting PDU pipeline. Registered as the singleton <see cref="IPduPipeline"/>
|
||||
/// in production (replaces <see cref="NoopPduPipeline"/> from Phase 03).
|
||||
/// in production.
|
||||
///
|
||||
/// FC scope (per design.md):
|
||||
/// FC03 / FC04 response — decode covered BCD slots from raw nibbles → binary integer.
|
||||
@@ -15,13 +15,13 @@ namespace Mbproxy.Proxy;
|
||||
/// MBAP transparency contract: the MBAP length field is NEVER modified. Re-encoded slots
|
||||
/// are the same byte width as the originals (ushort → ushort), so the PDU length is stable.
|
||||
///
|
||||
/// <para><b>Phase 9 — request correlation:</b> FC03/FC04 responses do not carry the
|
||||
/// original start address. The multiplexer builds an <see cref="Multiplexing.InFlightRequest"/>
|
||||
/// <para><b>Request correlation:</b> FC03/FC04 responses do not carry the original
|
||||
/// start address. The multiplexer builds an <see cref="Multiplexing.InFlightRequest"/>
|
||||
/// on the request path, stores it in its <see cref="Multiplexing.CorrelationMap"/>, and
|
||||
/// attaches it to the per-call <see cref="PerPlcContext.CurrentRequest"/> on the response
|
||||
/// path. The rewriter consumes <c>CurrentRequest</c> instead of a per-pair last-request
|
||||
/// slot, so concurrent responses from different upstream clients each decode against
|
||||
/// their own request range without cross-talk.</para>
|
||||
/// attaches it to the per-call <see cref="PerPlcContext.CurrentRequest"/> on the
|
||||
/// response path. The rewriter consumes <c>CurrentRequest</c>, so concurrent responses
|
||||
/// from different upstream clients each decode against their own request range without
|
||||
/// cross-talk.</para>
|
||||
///
|
||||
/// <para>This class is stateless. All per-call state arrives via <see cref="PduContext"/>
|
||||
/// (specifically <see cref="PerPlcContext.CurrentRequest"/> on response). It is safe to
|
||||
@@ -157,12 +157,12 @@ internal sealed class BcdPduPipeline : IPduPipeline
|
||||
ushort startAddress = (ushort)((pdu[1] << 8) | pdu[2]);
|
||||
ushort qty = (ushort)((pdu[3] << 8) | pdu[4]);
|
||||
|
||||
// Phase 12 (W2.14) — validate the request is fully sized for `qty` registers
|
||||
// (each 2 bytes after the byteCount byte). A client claiming qty=10 with only
|
||||
// 4 bytes of register data would otherwise have its BCD slots silently skipped
|
||||
// by the per-slot bounds check below — half the request rewritten, half not.
|
||||
// Returning here passes the malformed PDU through unchanged so the PLC's own
|
||||
// validator surfaces the protocol error.
|
||||
// Validate the request is fully sized for `qty` registers (each 2 bytes after
|
||||
// the byteCount byte). A client claiming qty=10 with only 4 bytes of register
|
||||
// data would otherwise have its BCD slots silently skipped by the per-slot
|
||||
// bounds check below — half the request rewritten, half not. Returning here
|
||||
// passes the malformed PDU through unchanged so the PLC's own validator
|
||||
// surfaces the protocol error.
|
||||
if (pdu.Length < 6 + qty * 2)
|
||||
return;
|
||||
|
||||
@@ -210,14 +210,14 @@ internal sealed class BcdPduPipeline : IPduPipeline
|
||||
ushort clientLow = (ushort)((pdu[lowByteOff] << 8) | pdu[lowByteOff + 1]);
|
||||
ushort clientHigh = (ushort)((pdu[highByteOff] << 8) | pdu[highByteOff + 1]);
|
||||
|
||||
// Phase 12 (W2.13) — validate that BOTH input words are within the
|
||||
// base-10000-digit range BEFORE reconstructing. Without this guard, a
|
||||
// client writing (high=9999, low=9999) silently mutates to (high=9998,
|
||||
// low=9999) because `9999 * 10_000 + 9999 = 99_989_999` is still <= the
|
||||
// 32-bit BCD ceiling, so Encode32 accepts it and rewrites — losing 1 from
|
||||
// the high word. The unconventional wire format ("two base-10000 CDAB
|
||||
// digits", per design.md:123) means each word independently must be
|
||||
// 0..9999 to round-trip cleanly.
|
||||
// Validate that BOTH input words are within the base-10000-digit range
|
||||
// BEFORE reconstructing. Without this guard, a client writing
|
||||
// (high=9999, low=9999) silently mutates to (high=9998, low=9999)
|
||||
// because `9999 * 10_000 + 9999 = 99_989_999` is still <= the 32-bit
|
||||
// BCD ceiling, so Encode32 accepts it and rewrites — losing 1 from the
|
||||
// high word. The unconventional wire format ("two base-10000 CDAB
|
||||
// digits", per design.md) means each word independently must be 0..9999
|
||||
// to round-trip cleanly.
|
||||
if (clientLow > 9999 || clientHigh > 9999)
|
||||
{
|
||||
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, tag.Address,
|
||||
@@ -473,6 +473,4 @@ internal sealed class BcdPduPipeline : IPduPipeline
|
||||
// already counted this slot on the way out. Incrementing again would double-count.
|
||||
}
|
||||
|
||||
// Phase 12 (W3 cleanup) — HasBadNibble was previously duplicated here; the canonical
|
||||
// implementation now lives in BcdCodec.HasBadNibble (internal).
|
||||
}
|
||||
|
||||
@@ -4,10 +4,10 @@ namespace Mbproxy.Proxy.Cache;
|
||||
|
||||
/// <summary>
|
||||
/// Hash key for the per-PLC <see cref="ResponseCache"/>. Structurally identical to
|
||||
/// Phase 10's <see cref="CoalescingKey"/> — both keys discriminate the same dimensions
|
||||
/// (UnitId, FunctionCode, StartAddress, Quantity), but the two type aliases live in
|
||||
/// different namespaces so the two phases can evolve independently without one shaping
|
||||
/// the other's API surface.
|
||||
/// the read-coalescing <see cref="CoalescingKey"/> — both keys discriminate the same
|
||||
/// dimensions (UnitId, FunctionCode, StartAddress, Quantity), but the two type aliases
|
||||
/// live in different namespaces so the cache and the coalescer can evolve independently
|
||||
/// without one shaping the other's API surface.
|
||||
///
|
||||
/// <para><b>Equality semantics:</b> record-struct value equality. FC03 and FC04 produce
|
||||
/// different keys for the same address (different Modbus tables); different
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
namespace Mbproxy.Proxy.Cache;
|
||||
|
||||
/// <summary>
|
||||
/// Source-generated <see cref="LoggerMessage"/> definitions for the Phase-11 response
|
||||
/// cache. Event names are stable — do not rename without updating <c>docs/design.md</c>'s
|
||||
/// Source-generated <see cref="LoggerMessage"/> definitions for the response cache.
|
||||
/// Event names are stable — do not rename without updating <c>docs/design.md</c>'s
|
||||
/// Logging event-name table.
|
||||
///
|
||||
/// <para>Levels are conservative — a busy PLC under steady cache pressure would emit one
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
namespace Mbproxy.Proxy.Cache;
|
||||
|
||||
/// <summary>
|
||||
/// Per-PLC opt-in response cache for FC03 / FC04 read responses. Phase 11.
|
||||
/// Per-PLC opt-in response cache for FC03 / FC04 read responses.
|
||||
///
|
||||
/// <para><b>Lifecycle.</b> One instance per PLC, owned by the per-PLC context. The cache
|
||||
/// is consulted on every FC03/FC04 request before coalescing; populated by the backend
|
||||
|
||||
@@ -13,17 +13,15 @@ public enum MbapDirection
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Per-pair context carried through each PDU pipeline call.
|
||||
/// Phase 03: carries only <see cref="PlcName"/>.
|
||||
/// Phase 04 extends this via <see cref="PerPlcContext"/>, which carries the BcdTagMap,
|
||||
/// counters, and logger. Phase 09 added the per-call <c>CurrentRequest</c> slot to
|
||||
/// <see cref="PerPlcContext"/> for multiplexer-aware response correlation.
|
||||
/// Per-pair context carried through each PDU pipeline call. Carries only
|
||||
/// <see cref="PlcName"/> at the base level; <see cref="PerPlcContext"/> extends it with
|
||||
/// the BcdTagMap, counters, logger, and per-call <c>CurrentRequest</c> slot for
|
||||
/// multiplexer-aware response correlation.
|
||||
/// </summary>
|
||||
public class PduContext
|
||||
{
|
||||
/// <summary>The configured PLC name (from <c>MbproxyOptions.Plcs[i].Name</c>).</summary>
|
||||
public string PlcName { get; init; } = "";
|
||||
// Phase 04 adds: BcdTagMap, counters, logger
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -31,8 +29,8 @@ public class PduContext
|
||||
/// Called once per frame in each direction (request and response).
|
||||
///
|
||||
/// Implementations must be safe to call concurrently from multiple connection pairs.
|
||||
/// In Phase 03 the only implementation is <see cref="NoopPduPipeline"/> (pass-through).
|
||||
/// Phase 04 replaces it with a BCD rewriter registered via DI.
|
||||
/// Production wires <see cref="BcdPduPipeline"/>; <see cref="NoopPduPipeline"/> is a
|
||||
/// pass-through fallback used by tests.
|
||||
/// </summary>
|
||||
public interface IPduPipeline
|
||||
{
|
||||
@@ -42,6 +40,6 @@ public interface IPduPipeline
|
||||
/// <param name="direction">Whether this is a request or a response frame.</param>
|
||||
/// <param name="mbapHeader">The 7-byte MBAP header (read-only; includes TxId, UnitId, FC is in pdu[0]).</param>
|
||||
/// <param name="pdu">The PDU bytes starting at the function code. May be mutated in place.</param>
|
||||
/// <param name="context">Per-pair context (PLC name; extended in phase 04).</param>
|
||||
/// <param name="context">Per-pair context (PLC name; extended via <see cref="PerPlcContext"/>).</param>
|
||||
void Process(MbapDirection direction, ReadOnlySpan<byte> mbapHeader, Span<byte> pdu, PduContext context);
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
namespace Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
/// <summary>
|
||||
/// Source-generated <see cref="LoggerMessage"/> definitions for the Phase-10 read-coalescing
|
||||
/// Source-generated <see cref="LoggerMessage"/> definitions for the read-coalescing
|
||||
/// feature. Event names are stable — do not rename without updating docs/design.md's
|
||||
/// "Logging" event-name table.
|
||||
///
|
||||
/// <para>Levels are intentionally conservative — coalescing fires on every overlapping
|
||||
/// read in a busy fleet (HMIs/historians polling the same screen tags), so the steady-state
|
||||
/// log volume would be deafening at Information. The counters surface the same data at
|
||||
/// far lower cost.</para>
|
||||
/// read in a busy fleet (HMIs/historians polling the same screen tags), so the
|
||||
/// steady-state log volume would be deafening at Information. The counters surface the
|
||||
/// same data at far lower cost.</para>
|
||||
/// </summary>
|
||||
internal static partial class CoalescingLogEvents
|
||||
{
|
||||
|
||||
@@ -8,9 +8,9 @@ namespace Mbproxy.Proxy.Multiplexing;
|
||||
/// when the matching response arrives.
|
||||
///
|
||||
/// <para>Backed by <see cref="ConcurrentDictionary{TKey, TValue}"/>. The single-writer /
|
||||
/// single-remover pattern in Phase 9 does not strictly require it — but cascade-on-
|
||||
/// disconnect walks the map from a separate task and Phase 10 adds upstream-side
|
||||
/// cancellation paths, so the safer primitive is worth the negligible cost.</para>
|
||||
/// single-remover pattern does not strictly require it — but cascade-on-disconnect walks
|
||||
/// the map from a separate task and the coalescing path adds upstream-side cancellation
|
||||
/// paths, so the safer primitive is worth the negligible cost.</para>
|
||||
/// </summary>
|
||||
internal sealed class CorrelationMap
|
||||
{
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
namespace Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
/// <summary>
|
||||
/// Per-PLC "in-flight by key" map that powers <b>Phase 10 read coalescing</b>. Holds the
|
||||
/// currently-in-flight FC03/FC04 requests keyed by their <see cref="CoalescingKey"/> so a
|
||||
/// Per-PLC "in-flight by key" map that powers read coalescing. Holds the currently-
|
||||
/// in-flight FC03/FC04 requests keyed by their <see cref="CoalescingKey"/> so a
|
||||
/// late-arriving request with an identical key can attach to the existing in-flight entry
|
||||
/// instead of opening a second backend round-trip.
|
||||
///
|
||||
/// <para><b>Concurrency model.</b> A single <see cref="object"/> lock serialises every
|
||||
/// state-touching method. The simpler-lock-over-CAS choice is deliberate (per the phase
|
||||
/// doc) — the map is per-PLC and the wire rate per PLC is bounded by the ECOM's internal
|
||||
/// scan cadence (~2–10 ms per request). The lock-free <c>AddOrUpdate</c> alternative is not
|
||||
/// worth the read-and-prove-it-correct burden.</para>
|
||||
/// state-touching method. The simpler-lock-over-CAS choice is deliberate — the map is
|
||||
/// per-PLC and the wire rate per PLC is bounded by the ECOM's internal scan cadence
|
||||
/// (~2–10 ms per request). The lock-free <c>AddOrUpdate</c> alternative is not worth the
|
||||
/// read-and-prove-it-correct burden.</para>
|
||||
///
|
||||
/// <para><b>Mutable-list seam.</b> Each entry stores a <see cref="List{InterestedParty}"/>
|
||||
/// that is also exposed through the parent <see cref="InFlightRequest.InterestedParties"/>
|
||||
@@ -55,10 +55,6 @@ internal sealed class InFlightByKeyMap
|
||||
/// already has <paramref name="maxParties"/> attached parties, the next arrival opens
|
||||
/// a fresh entry (and a fresh backend round-trip). This bounds the response-fanout
|
||||
/// cost per entry at O(maxParties).</para>
|
||||
///
|
||||
/// <para>Phase 12 (W3 cleanup) — was previously declared as <c>bool TryAttachOrCreate</c>
|
||||
/// but always returned <c>true</c>. The bool was dead; the result is in the
|
||||
/// <paramref name="wasNew"/> out parameter.</para>
|
||||
/// </summary>
|
||||
public void AttachOrCreate(
|
||||
CoalescingKey key,
|
||||
|
||||
@@ -6,10 +6,9 @@ namespace Mbproxy.Proxy.Multiplexing;
|
||||
/// multiplexer must rewrite the response's MBAP TxId back to <see cref="OriginalTxId"/>
|
||||
/// before handing the frame to the pipe, so each upstream sees the proxy as transparent.
|
||||
///
|
||||
/// <para><b>Phase 9 invariant:</b> exactly one <see cref="InterestedParty"/> per
|
||||
/// <see cref="InFlightRequest"/>. <b>Phase 10 (read coalescing)</b> reuses this exact
|
||||
/// shape to fan-out a single backend response to multiple upstream parties. Do not
|
||||
/// collapse this into a single field on <see cref="InFlightRequest"/>.</para>
|
||||
/// <para>Read coalescing fans out a single backend response to multiple upstream parties
|
||||
/// via this record. Do not collapse this into a single field on
|
||||
/// <see cref="InFlightRequest"/>.</para>
|
||||
/// </summary>
|
||||
internal sealed record InterestedParty(UpstreamPipe Pipe, ushort OriginalTxId);
|
||||
|
||||
@@ -22,15 +21,12 @@ internal sealed record InterestedParty(UpstreamPipe Pipe, ushort OriginalTxId);
|
||||
/// <item><description>Provide the BCD rewriter with the originating request's
|
||||
/// <c>StartAddress</c> / <c>Qty</c> for FC03/FC04 response decoding — the response
|
||||
/// PDU itself does not carry the start address.</description></item>
|
||||
/// <item><description>Measure backend round-trip time via <see cref="SentAtUtc"/>
|
||||
/// (replaces the per-pair stopwatch slot from the 1:1 model).</description></item>
|
||||
/// <item><description>Measure backend round-trip time via <see cref="SentAtUtc"/>.</description></item>
|
||||
/// </list>
|
||||
///
|
||||
/// <para><b>Phase 9:</b> <see cref="InterestedParties"/> always has exactly one element.
|
||||
/// The list shape is the load-bearing seam that <b>Phase 10 — read coalescing</b> hooks
|
||||
/// into to fan out a single PLC response to multiple upstream clients without further
|
||||
/// refactor of the multiplexer's data model. Reviewer note: do <i>not</i> simplify back
|
||||
/// to a single <c>UpstreamPipe</c> field.</para>
|
||||
/// <para>The <see cref="InterestedParties"/> list shape is the load-bearing seam that
|
||||
/// read coalescing uses to fan out a single PLC response to multiple upstream clients.
|
||||
/// Reviewer note: do <i>not</i> simplify back to a single <c>UpstreamPipe</c> field.</para>
|
||||
/// </summary>
|
||||
internal sealed record InFlightRequest(
|
||||
byte UnitId,
|
||||
|
||||
@@ -8,9 +8,7 @@ namespace Mbproxy.Proxy.Multiplexing;
|
||||
internal static partial class MultiplexerLogEvents
|
||||
{
|
||||
/// <summary>
|
||||
/// Emitted once per upstream client accept. Replaces the per-pair
|
||||
/// <c>mbproxy.client.connected</c> event from the 1:1 model (same event name,
|
||||
/// same property shape — operators' log queries are unchanged).
|
||||
/// Emitted once per upstream client accept.
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 110,
|
||||
@@ -84,9 +82,7 @@ internal static partial class MultiplexerLogEvents
|
||||
string remoteEp);
|
||||
|
||||
/// <summary>
|
||||
/// Emitted when the backend connect Polly pipeline fails. Mirrors the existing
|
||||
/// <c>mbproxy.backend.failed</c> event from the 1:1 model so operators' alerts keep
|
||||
/// working unchanged after Phase 9.
|
||||
/// Emitted when the backend connect Polly pipeline fails.
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 115,
|
||||
|
||||
@@ -48,14 +48,14 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
private readonly ConnectionOptions _connectionOptions;
|
||||
private readonly IPduPipeline _pipeline;
|
||||
|
||||
// Phase 12 (W1.1) — `_ctx` is volatile so a hot-reload reseat can swap it on the running
|
||||
// `_ctx` is volatile so a hot-reload reseat can swap it on the running
|
||||
// multiplexer. Each method that uses the context snapshots it into a local at the start
|
||||
// of the operation so a single PDU sees a consistent (TagMap, Cache) pair even if the
|
||||
// swap fires mid-PDU. ReplaceContext is the single mutator.
|
||||
private volatile PerPlcContext _ctx;
|
||||
private readonly ILogger<PlcMultiplexer> _logger;
|
||||
private readonly ResiliencePipeline? _backendConnectPipeline;
|
||||
// Phase 10: live read-coalescing config accessor. The accessor is read per-PDU on the
|
||||
// Live read-coalescing config accessor. The accessor is read per-PDU on the
|
||||
// request path so a hot-reload of `Mbproxy.Resilience.ReadCoalescing.Enabled`
|
||||
// propagates immediately. Production wires this to
|
||||
// `() => optionsMonitor.CurrentValue.Resilience.ReadCoalescing`. Tests default to a
|
||||
@@ -74,8 +74,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
SingleWriter = false,
|
||||
});
|
||||
|
||||
// Attached pipes — Phase 9 needs the list for the status page; Phase 10 will need it for
|
||||
// coalescing (fan-out). ConcurrentDictionary keyed on UpstreamPipe.Id for O(1) detach.
|
||||
// Attached pipes — used by the status page and by coalescing fan-out.
|
||||
// ConcurrentDictionary keyed on UpstreamPipe.Id for O(1) detach.
|
||||
private readonly ConcurrentDictionary<Guid, UpstreamPipe> _pipes = new();
|
||||
|
||||
// Lifecycle plumbing. Backend tasks share a CTS; cascading disconnect cancels it,
|
||||
@@ -88,10 +88,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
private Task? _backendReaderTask;
|
||||
|
||||
private readonly CancellationTokenSource _disposeCts = new();
|
||||
// Phase 12 (W2.2) — volatile so the disposing thread's write is observed by every
|
||||
// hot-path reader (OnUpstreamFrameAsync, ReplaceContext, Attach, etc.) without a
|
||||
// separate fence. On x86/x64 plain reads happen to give acquire-release semantics, so
|
||||
// this is defense for ARM hosts and future portability.
|
||||
// Volatile so the disposing thread's write is observed by every hot-path reader
|
||||
// (OnUpstreamFrameAsync, ReplaceContext, Attach, etc.) without a separate fence.
|
||||
// On x86/x64 plain reads happen to give acquire-release semantics, so this is
|
||||
// defense for ARM hosts and future portability.
|
||||
private volatile bool _disposed;
|
||||
private Task? _watchdogTask;
|
||||
|
||||
@@ -112,8 +112,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
_backendConnectPipeline = backendConnectPipeline;
|
||||
_coalescingOptions = coalescingOptions ?? (static () => new ReadCoalescingOptions());
|
||||
|
||||
// Phase 11 — register the per-PLC cache as the live stats source for the snapshot
|
||||
// path. Cache may be null when the per-PLC context has not been wired with one
|
||||
// Register the per-PLC cache as the live stats source for the snapshot path.
|
||||
// Cache may be null when the per-PLC context has not been wired with one
|
||||
// (every tag uncached, or unit tests).
|
||||
if (_ctx.Cache is not null)
|
||||
_ctx.Counters.SetCacheStatsProvider(new CacheStatsAdapter(_ctx.Cache));
|
||||
@@ -155,8 +155,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase 12 (W1.1) — atomically swaps the per-PLC context on a running multiplexer.
|
||||
/// Called by <see cref="Supervision.PlcListenerSupervisor.ReplaceContextAsync"/> when a
|
||||
/// Atomically swaps the per-PLC context on a running multiplexer. Called by
|
||||
/// <see cref="Supervision.PlcListenerSupervisor.ReplaceContextAsync"/> when a
|
||||
/// hot-reload tag-list change is applied to a PLC whose listener is already bound.
|
||||
///
|
||||
/// <para>The new context's tag map and (optional) response cache become visible on the
|
||||
@@ -174,10 +174,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
{
|
||||
if (_disposed) return;
|
||||
|
||||
// Phase 12 (W4 / NM2) — provider FIRST, then _ctx. The status page's snapshot
|
||||
// path reads `_cacheStatsProvider` independently of `_ctx`. If we swapped `_ctx`
|
||||
// first, a snapshot taken in the gap between the two writes would still hold the
|
||||
// OLD adapter wrapping the OLD cache — which the supervisor is about to dispose
|
||||
// Provider FIRST, then _ctx. The status page's snapshot path reads
|
||||
// `_cacheStatsProvider` independently of `_ctx`. If we swapped `_ctx` first, a
|
||||
// snapshot taken in the gap between the two writes would still hold the OLD
|
||||
// adapter wrapping the OLD cache — which the supervisor is about to dispose
|
||||
// (`PlcListenerSupervisor.ReplaceContextAsync` runs `oldCache.Dispose()` after we
|
||||
// return). Setting the provider first means snapshots in the swap window read
|
||||
// either (old provider, old ctx) or (new provider, new ctx) — both coherent —
|
||||
@@ -254,9 +254,9 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
}
|
||||
_pipes.Clear();
|
||||
|
||||
// Phase 12 (W2.5, W2.6) — guard the CTS dispose against a watchdog tick that
|
||||
// raced past the WaitAsync above (e.g. a slow Task.Delay completion observing
|
||||
// cancellation late). Also dispose the connect-gate semaphore.
|
||||
// Guard the CTS dispose against a watchdog tick that raced past the WaitAsync
|
||||
// above (e.g. a slow Task.Delay completion observing cancellation late). Also
|
||||
// dispose the connect-gate semaphore.
|
||||
try { _disposeCts.Dispose(); } catch (ObjectDisposedException) { /* already disposed */ }
|
||||
try { _connectGate.Dispose(); } catch (ObjectDisposedException) { /* already disposed */ }
|
||||
}
|
||||
@@ -336,30 +336,28 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
|
||||
private async Task TearDownBackendAsync(string reason, bool cascadeUpstreams)
|
||||
{
|
||||
// Phase 12 (W1.4) — serialise tear-down vs connect-up via the connect gate. Without
|
||||
// this, a fresh EnsureBackendConnectedAsync racing with the channel drain below
|
||||
// could see stranded frames sent on its new socket with old (already-released) TxIds,
|
||||
// Serialise tear-down vs connect-up via the connect gate. Without this, a fresh
|
||||
// EnsureBackendConnectedAsync racing with the channel drain below could see
|
||||
// stranded frames sent on its new socket with old (already-released) TxIds,
|
||||
// producing orphaned responses that hang upstream peers via the watchdog.
|
||||
//
|
||||
// Phase 12 (W4 / NM1) — bound the wait. Without a timeout, a long Polly-wrapped
|
||||
// EnsureBackendConnectedAsync against an unreachable host can hold the gate for
|
||||
// the full BackendConnectTimeoutMs * MaxAttempts window, blocking DisposeAsync (and
|
||||
// therefore ProxyWorker.StopAsync) for that duration. A 2 s teardown deadline
|
||||
// bounds disposal latency; if the gate is unavailable we proceed best-effort
|
||||
// without it (the worst-case consequence is one orphaned in-flight cycle on the
|
||||
// dying backend, which the upstream watchdog will surface as exception 0x0B).
|
||||
// Bounded wait: a long Polly-wrapped EnsureBackendConnectedAsync against an
|
||||
// unreachable host can hold the gate for the full BackendConnectTimeoutMs *
|
||||
// MaxAttempts window, blocking DisposeAsync (and therefore ProxyWorker.StopAsync)
|
||||
// for that duration. A 2 s teardown deadline bounds disposal latency; if the gate
|
||||
// is unavailable we proceed best-effort without it (the worst-case consequence is
|
||||
// one orphaned in-flight cycle on the dying backend, which the upstream watchdog
|
||||
// will surface as exception 0x0B).
|
||||
//
|
||||
// Phase 12 (W5 / m1) — KNOWN RACE on the gate-not-held path: a concurrent
|
||||
// EnsureBackendConnectedAsync that DOES hold the gate may TryAllocate a TxId
|
||||
// that collides (after wraparound in the allocator's forward scan) with a TxId
|
||||
// we're about to release from the channel-drain step below. The double-release
|
||||
// would mark the new request's slot as free even though it's legitimately
|
||||
// in-flight, allowing the next allocation to reuse the same slot and
|
||||
// CorrelationMap.TryAdd to fail (silent request drop). Probability is very low
|
||||
// (requires gate timeout + new accept landing during cascade + TxId collision in
|
||||
// a 65,536-slot space); the only consequence is one dropped request that the
|
||||
// client retries. Documented as accepted best-effort behaviour in
|
||||
// codereviews/2026-05-14/ReReviewAfterRemediation.md (m1).
|
||||
// KNOWN RACE on the gate-not-held path: a concurrent EnsureBackendConnectedAsync
|
||||
// that DOES hold the gate may TryAllocate a TxId that collides (after wraparound
|
||||
// in the allocator's forward scan) with a TxId we're about to release from the
|
||||
// channel-drain step below. The double-release would mark the new request's slot
|
||||
// as free even though it's legitimately in-flight, allowing the next allocation
|
||||
// to reuse the same slot and CorrelationMap.TryAdd to fail (silent request drop).
|
||||
// Probability is very low (requires gate timeout + new accept landing during
|
||||
// cascade + TxId collision in a 65,536-slot space); the only consequence is one
|
||||
// dropped request that the client retries. Accepted as best-effort behaviour.
|
||||
bool gateHeld = false;
|
||||
try
|
||||
{
|
||||
@@ -412,8 +410,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
_allocator.Release(kvp.Key);
|
||||
}
|
||||
|
||||
// Phase 10 — also drain the in-flight-by-key map so a brand-new identical request
|
||||
// through the freshly-reconnected backend is treated as a miss (no stale entries
|
||||
// Also drain the in-flight-by-key map so a brand-new identical request through
|
||||
// the freshly-reconnected backend is treated as a miss (no stale entries
|
||||
// outlive the backend they were destined for).
|
||||
_inFlightByKey.DrainAll();
|
||||
|
||||
@@ -437,11 +435,11 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
_ctx.Counters.AddDisconnectCascades(upstreamCount);
|
||||
}
|
||||
|
||||
// Phase 12 (W1.4) — drain any stranded frames left in the outbound channel by
|
||||
// the writer task that just faulted/cancelled. Released their proxy TxIds back
|
||||
// to the allocator. By the time we reach this line the writer has stopped
|
||||
// reading from the channel (cancelled CTS) and the upstream pipes have been
|
||||
// cascaded (no more enqueues), so the channel state is stable.
|
||||
// Drain any stranded frames left in the outbound channel by the writer task
|
||||
// that just faulted/cancelled. Release their proxy TxIds back to the
|
||||
// allocator. By the time we reach this line the writer has stopped reading
|
||||
// from the channel (cancelled CTS) and the upstream pipes have been cascaded
|
||||
// (no more enqueues), so the channel state is stable.
|
||||
int strandedDropped = 0;
|
||||
while (_outboundChannel.Reader.TryRead(out byte[]? stranded))
|
||||
{
|
||||
@@ -464,7 +462,7 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
}
|
||||
finally
|
||||
{
|
||||
// Only release if we acquired (W4 / NM1) — best-effort path may have skipped.
|
||||
// Only release if we acquired — best-effort path may have skipped.
|
||||
if (gateHeld)
|
||||
{
|
||||
try { _connectGate.Release(); }
|
||||
@@ -499,10 +497,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Backend failure — cascade. Phase 12 (W4 / NM5) — skip if disposal is
|
||||
// already in progress; DisposeAsync runs an explicit TearDown and the
|
||||
// fire-and-forget here would race against it, hitting a disposed
|
||||
// _connectGate and producing an unobserved-task exception.
|
||||
// Backend failure — cascade. Skip if disposal is already in progress;
|
||||
// DisposeAsync runs an explicit TearDown and the fire-and-forget here would
|
||||
// race against it, hitting a disposed _connectGate and producing an
|
||||
// unobserved-task exception.
|
||||
if (!_disposeCts.IsCancellationRequested)
|
||||
_ = TearDownBackendAsync($"writer fault: {ex.Message}", cascadeUpstreams: true);
|
||||
}
|
||||
@@ -554,10 +552,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
// Free the allocator slot immediately so it can be reused.
|
||||
_allocator.Release(proxyTxId);
|
||||
|
||||
// Phase 10 — for FC03/FC04 reads, also clear the coalescing-by-key entry so
|
||||
// a brand-new identical request issued AFTER this response is treated as a
|
||||
// miss (opens a fresh round-trip). The TryRemove is best-effort: a watchdog
|
||||
// timeout or cascade may have already removed it.
|
||||
// For FC03/FC04 reads, also clear the coalescing-by-key entry so a
|
||||
// brand-new identical request issued AFTER this response is treated as a
|
||||
// miss (opens a fresh round-trip). The TryRemove is best-effort: a
|
||||
// watchdog timeout or cascade may have already removed it.
|
||||
if (inFlight.Fc is 0x03 or 0x04)
|
||||
{
|
||||
var coalKey = new CoalescingKey(inFlight.UnitId, inFlight.Fc,
|
||||
@@ -580,16 +578,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
frame.AsSpan(MbapFrame.HeaderSize, pduBodyLen),
|
||||
responseCtx);
|
||||
|
||||
// Phase 11 — post-rewriter cache update:
|
||||
// Post-rewriter cache update:
|
||||
// * FC03/FC04 successful responses are stored when the request was
|
||||
// cache-eligible (resolvedTtlMs > 0).
|
||||
// * FC06/FC16 successful responses invalidate every cached entry whose
|
||||
// address range overlaps the write.
|
||||
//
|
||||
// Phase 12 (W2.7) — exception bit comes from the post-rewriter buffer
|
||||
// (the rewriter never touches the FC byte today, but reading from
|
||||
// inFlight.Fc would lose the exception bit). The base FC for routing
|
||||
// decisions uses inFlight.Fc — the request side knows what was sent.
|
||||
// Exception bit comes from the post-rewriter buffer (the rewriter never
|
||||
// touches the FC byte today, but reading from inFlight.Fc would lose the
|
||||
// exception bit). The base FC for routing decisions uses inFlight.Fc —
|
||||
// the request side knows what was sent.
|
||||
if (_ctx.Cache is { } postCache)
|
||||
{
|
||||
byte fcInResponse = frame[MbapFrame.HeaderSize];
|
||||
@@ -623,16 +621,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
}
|
||||
else if (inFlight.Fc is 0x06 or 0x10)
|
||||
{
|
||||
// Phase 12 (W2.9) — the design contract "invalidations during a
|
||||
// recovering listener state are skipped" (design.md:203) is
|
||||
// upheld IMPLICITLY here: invalidation only fires inside the
|
||||
// backend reader task when a non-exception FC06/FC16 response
|
||||
// arrives. A `Recovering` listener has no backend reader (the
|
||||
// multiplexer is torn down between recovery attempts), so no
|
||||
// response can land here, so no invalidation. The gating is
|
||||
// structural, not conditional. If a future change ever produces
|
||||
// a write response off the live backend, an explicit recovering-
|
||||
// state check would need to be added.
|
||||
// The design contract "invalidations during a recovering
|
||||
// listener state are skipped" is upheld IMPLICITLY here:
|
||||
// invalidation only fires inside the backend reader task when
|
||||
// a non-exception FC06/FC16 response arrives. A `Recovering`
|
||||
// listener has no backend reader (the multiplexer is torn
|
||||
// down between recovery attempts), so no response can land
|
||||
// here, so no invalidation. The gating is structural, not
|
||||
// conditional. If a future change ever produces a write
|
||||
// response off the live backend, an explicit recovering-state
|
||||
// check would need to be added.
|
||||
int invalidated = postCache.Invalidate(
|
||||
inFlight.UnitId, inFlight.StartAddress, inFlight.Qty);
|
||||
if (invalidated > 0)
|
||||
@@ -647,23 +645,23 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
}
|
||||
|
||||
// Fan out to each interested party with their original TxId restored.
|
||||
// Phase 9: always exactly one party. Phase 10: N parties (read coalescing).
|
||||
// Note: the InFlightByKey TryRemove above (for FC03/FC04) guarantees no
|
||||
// Without coalescing there is exactly one party; with coalescing there
|
||||
// are N. The InFlightByKey TryRemove above (for FC03/FC04) guarantees no
|
||||
// further attaches can occur — the parties list is now a stable snapshot.
|
||||
//
|
||||
// Phase 12 (W1.3) — non-blocking fan-out via `TrySendResponse`. The
|
||||
// single backend reader task must NEVER `await` a per-upstream channel
|
||||
// write: a wedged upstream (full bounded response channel) would otherwise
|
||||
// stall the reader and starve every other client on this PLC. A drop here
|
||||
// is recorded via `responseDropForFullUpstream`; the wedged upstream loses
|
||||
// its own response and will be reaped by its own socket-close path.
|
||||
// Non-blocking fan-out via `TrySendResponse`. The single backend reader
|
||||
// task must NEVER `await` a per-upstream channel write: a wedged upstream
|
||||
// (full bounded response channel) would otherwise stall the reader and
|
||||
// starve every other client on this PLC. A drop here is recorded via
|
||||
// `responseDropForFullUpstream`; the wedged upstream loses its own
|
||||
// response and will be reaped by its own socket-close path.
|
||||
foreach (var party in inFlight.InterestedParties)
|
||||
{
|
||||
if (!party.Pipe.IsAlive)
|
||||
{
|
||||
// Phase 10 — record the dead-upstream skip only for FC03/FC04 (the
|
||||
// only function codes that take the coalescing path). For non-
|
||||
// coalescing FCs this branch is silent — the Phase-9 behaviour.
|
||||
// Record the dead-upstream skip only for FC03/FC04 (the only
|
||||
// function codes that take the coalescing path). For
|
||||
// non-coalescing FCs this branch is silent.
|
||||
if (inFlight.Fc is 0x03 or 0x04
|
||||
&& inFlight.InterestedParties.Count > 1)
|
||||
{
|
||||
@@ -675,10 +673,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
continue;
|
||||
}
|
||||
|
||||
// The frame buffer is private to this iteration; if there are multiple
|
||||
// parties (Phase 10), each gets its own copy with its own original TxId
|
||||
// patched in. Phase 9 always has Count == 1, so the single-buffer path
|
||||
// is the common case; we copy to keep Phase-10 forward compatibility.
|
||||
// The frame buffer is private to this iteration; if there are
|
||||
// multiple coalesced parties, each gets its own copy with its own
|
||||
// original TxId patched in. The single-party case reuses the buffer
|
||||
// directly as the common-case fast path.
|
||||
byte[] outFrame = inFlight.InterestedParties.Count == 1
|
||||
? frame
|
||||
: (byte[])frame.Clone();
|
||||
@@ -692,17 +690,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
}
|
||||
else
|
||||
{
|
||||
// Phase 12 (W6) — count outbound bytes per delivered party.
|
||||
// With coalescing, one backend response fans out to N parties and
|
||||
// produces N × frame.Length bytes leaving the proxy upstream-side.
|
||||
// Count outbound bytes per delivered party. With coalescing, one
|
||||
// backend response fans out to N parties and produces
|
||||
// N × frame.Length bytes leaving the proxy upstream-side.
|
||||
_ctx.Counters.AddBytes(up: 0, down: outFrame.Length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reader exited cleanly — backend closed by remote. Cascade.
|
||||
// Phase 12 (W4 / NM5) — skip if dispose is already in progress (see writer-side
|
||||
// comment above for rationale).
|
||||
// Reader exited cleanly — backend closed by remote. Cascade. Skip if
|
||||
// dispose is already in progress (see writer-side comment above).
|
||||
if (!_disposeCts.IsCancellationRequested)
|
||||
_ = TearDownBackendAsync("backend reader EOF", cascadeUpstreams: true);
|
||||
}
|
||||
@@ -730,16 +727,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
out ushort originalTxId, out _, out _, out byte unitId))
|
||||
return;
|
||||
|
||||
// Phase 12 (W6) — count inbound bytes from the upstream client. Surfaces in
|
||||
// bytes.upstreamIn on the status page. Counted ONCE per parsed frame regardless
|
||||
// of subsequent routing (cache hit, coalesce, backend round-trip, exception).
|
||||
// Count inbound bytes from the upstream client. Surfaces in bytes.upstreamIn on
|
||||
// the status page. Counted ONCE per parsed frame regardless of subsequent
|
||||
// routing (cache hit, coalesce, backend round-trip, exception).
|
||||
_ctx.Counters.AddBytes(up: frame.Length, down: 0);
|
||||
|
||||
// Parse the PDU FC + start/qty. FC03/FC04 reads use start/qty for the coalescing key
|
||||
// and (Phase 11) for the cache lookup. FC06 writes carry [addr][value]; we treat qty
|
||||
// as 1 for invalidation. FC16 carries [start][qty][byteCount]...; qty is the write
|
||||
// span used for cache invalidation. Phase 11: FC06/FC16 start/qty drive cache
|
||||
// invalidation by overlap rather than exact key.
|
||||
// Parse the PDU FC + start/qty. FC03/FC04 reads use start/qty for the coalescing
|
||||
// key and for the cache lookup. FC06 writes carry [addr][value]; we treat qty as
|
||||
// 1 for invalidation. FC16 carries [start][qty][byteCount]...; qty is the write
|
||||
// span used for cache invalidation. FC06/FC16 start/qty drive cache invalidation
|
||||
// by overlap rather than exact key.
|
||||
int pduOffset = MbapFrame.HeaderSize;
|
||||
byte fcByte = frame.Length > pduOffset ? frame[pduOffset] : (byte)0;
|
||||
ushort startAddr = 0;
|
||||
@@ -763,12 +760,12 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
qty = (ushort)((frame[pduOffset + 3] << 8) | frame[pduOffset + 4]);
|
||||
}
|
||||
|
||||
// Phase 11 — response-cache path. Cache check happens BEFORE coalescing AND before
|
||||
// we attempt to bring up the backend connection. A hit short-circuits everything,
|
||||
// including the EnsureBackendConnectedAsync call — operators with all reads cached
|
||||
// and the backend down still get served (the cache survives backend disconnects per
|
||||
// the design contract). The cache only fires for FC03/FC04 and only when the read
|
||||
// range's resolved TTL > 0.
|
||||
// Response-cache path. Cache check happens BEFORE coalescing AND before we
|
||||
// attempt to bring up the backend connection. A hit short-circuits everything,
|
||||
// including the EnsureBackendConnectedAsync call — operators with all reads
|
||||
// cached and the backend down still get served (the cache survives backend
|
||||
// disconnects per the design contract). The cache only fires for FC03/FC04 and
|
||||
// only when the read range's resolved TTL > 0.
|
||||
int resolvedCacheTtlMs = 0;
|
||||
if (fcByte is 0x03 or 0x04 && _ctx.Cache is { } responseCache)
|
||||
{
|
||||
@@ -783,7 +780,7 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
|
||||
byte[] hitFrame = BuildCacheHitFrame(originalTxId, unitId, cached.PduBytes);
|
||||
await pipe.SendResponseAsync(hitFrame, ct).ConfigureAwait(false);
|
||||
// Phase 12 (W6) — outbound bytes for cache-hit response.
|
||||
// Outbound bytes for cache-hit response.
|
||||
_ctx.Counters.AddBytes(up: 0, down: hitFrame.Length);
|
||||
return;
|
||||
}
|
||||
@@ -800,16 +797,15 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
}
|
||||
|
||||
// Ensure backend is connected. Failure here means we cannot service the request;
|
||||
// close the upstream pipe (consistent with the 1:1 model's behaviour on connect
|
||||
// failure).
|
||||
// close the upstream pipe.
|
||||
if (!await EnsureBackendConnectedAsync(ct).ConfigureAwait(false))
|
||||
{
|
||||
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
|
||||
return;
|
||||
}
|
||||
|
||||
// Phase 10 — read-coalescing path. Only FC03/FC04 are coalescable; only when the
|
||||
// feature is enabled in the live config. If the late-arriving request matches an
|
||||
// Read-coalescing path. Only FC03/FC04 are coalescable; only when the feature
|
||||
// is enabled in the live config. If the late-arriving request matches an
|
||||
// already-in-flight peer, we attach to the existing entry and skip the backend
|
||||
// round-trip entirely. The existing entry's response will fan out to both parties.
|
||||
var coalescingOpts = _coalescingOptions();
|
||||
@@ -818,14 +814,14 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
var key = new CoalescingKey(unitId, fcByte, startAddr, qty);
|
||||
var newParty = new InterestedParty(pipe, originalTxId);
|
||||
|
||||
// The factory does the Phase-9 work: allocate a proxy TxId, build the
|
||||
// InFlightRequest with a mutable List<InterestedParty>, add to the correlation
|
||||
// map. We deliberately do NOT enqueue to the outbound channel inside the
|
||||
// factory — that's done outside the InFlightByKey lock to keep the lock
|
||||
// scope tight and to avoid holding the lock across an async send.
|
||||
// The factory allocates a proxy TxId, builds the InFlightRequest with a
|
||||
// mutable List<InterestedParty>, and adds to the correlation map. We
|
||||
// deliberately do NOT enqueue to the outbound channel inside the factory —
|
||||
// that's done outside the InFlightByKey lock to keep the lock scope tight
|
||||
// and to avoid holding the lock across an async send.
|
||||
//
|
||||
// proxyTxIdForSend / inFlightForSend communicate the factory's allocation back
|
||||
// out of the lock so the post-lock code can finish the send.
|
||||
// proxyTxIdForSend / inFlightForSend communicate the factory's allocation
|
||||
// back out of the lock so the post-lock code can finish the send.
|
||||
ushort proxyTxIdForSend = 0;
|
||||
InFlightRequest? inFlightForSend = null;
|
||||
|
||||
@@ -898,40 +894,38 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
|
||||
if (inFlightForSend is null)
|
||||
{
|
||||
// Phase 12 (W1.2) — the factory hit the allocator-saturation path or a
|
||||
// duplicate-key race and stored a stub `InFlightRequest` under `key`. Late
|
||||
// attachers may have joined the stub between the factory call and this
|
||||
// cleanup; we must deliver the saturation exception to ALL of them, not just
|
||||
// the leader, otherwise the late attachers wait forever for a response that
|
||||
// The factory hit the allocator-saturation path or a duplicate-key race
|
||||
// and stored a stub `InFlightRequest` under `key`. Late attachers may
|
||||
// have joined the stub between the factory call and this cleanup; we
|
||||
// must deliver the saturation exception to ALL of them, not just the
|
||||
// leader, otherwise the late attachers wait forever for a response that
|
||||
// never comes (the stub has no proxy TxId, so no backend round-trip will
|
||||
// ever fire).
|
||||
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
|
||||
|
||||
if (_inFlightByKey.TryRemove(key, out var stub))
|
||||
{
|
||||
// Phase 12 (W4 / Nm1) — non-blocking delivery via TrySendResponse.
|
||||
// Previously this loop awaited SendResponseAsync per party, which would
|
||||
// serialise on a wedged late-attacher's full bounded channel and stall
|
||||
// delivery to its peers. Same doctrine as the W1.3 backend-reader fix:
|
||||
// the per-PLC fan-out path must never await per-pipe writes.
|
||||
// Non-blocking delivery via TrySendResponse — the per-PLC fan-out
|
||||
// path must never await per-pipe writes (a wedged late-attacher's
|
||||
// full bounded channel would otherwise stall delivery to its peers).
|
||||
foreach (var party in stub.InterestedParties)
|
||||
{
|
||||
byte[] excFrame = BuildExceptionFrame(party.OriginalTxId, unitId, fcByte, exceptionCode: 4);
|
||||
if (!party.Pipe.TrySendResponse(excFrame))
|
||||
_ctx.Counters.IncrementResponseDropForFullUpstream();
|
||||
else
|
||||
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length); // W6
|
||||
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// The stub was already removed by another path (extremely unlikely, but
|
||||
// defensive). Surface the exception to the original requester.
|
||||
// The stub was already removed by another path (extremely unlikely,
|
||||
// but defensive). Surface the exception to the original requester.
|
||||
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fcByte, exceptionCode: 4);
|
||||
if (!pipe.TrySendResponse(excFrame))
|
||||
_ctx.Counters.IncrementResponseDropForFullUpstream();
|
||||
else
|
||||
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length); // W6
|
||||
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -962,16 +956,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
return;
|
||||
}
|
||||
|
||||
// Non-coalescing path (FC06/FC16 writes, FC03/04 with coalescing disabled, or any
|
||||
// other FC). This is the Phase-9 path verbatim — every request gets its own proxy
|
||||
// TxId and its own backend round-trip.
|
||||
// Non-coalescing path (FC06/FC16 writes, FC03/04 with coalescing disabled, or
|
||||
// any other FC). Every request gets its own proxy TxId and its own backend
|
||||
// round-trip.
|
||||
|
||||
if (!_allocator.TryAllocate(out ushort proxyTxIdFc))
|
||||
{
|
||||
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
|
||||
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fcByte, exceptionCode: 4);
|
||||
await pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
|
||||
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length); // W6
|
||||
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -993,10 +987,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
return;
|
||||
}
|
||||
|
||||
// Phase 10 — even when the coalescing path is bypassed (e.g. coalescing disabled
|
||||
// for FC03/04), we still report the request as a Miss so Hit + Miss = total
|
||||
// FC03/FC04 requests across snapshots. FC06/FC16 are not counted here (they are
|
||||
// not coalescable in any sense).
|
||||
// Even when the coalescing path is bypassed (e.g. coalescing disabled for
|
||||
// FC03/04), we still report the request as a Miss so Hit + Miss = total
|
||||
// FC03/FC04 requests across snapshots. FC06/FC16 are not counted here (they
|
||||
// are not coalescable in any sense).
|
||||
if (fcByte is 0x03 or 0x04)
|
||||
_ctx.Counters.IncrementCoalescedMiss();
|
||||
|
||||
@@ -1037,12 +1031,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
/// Modbus exception (code 0x0B / Gateway Target Device Failed To Respond) to each
|
||||
/// interested party with the original TxId restored.
|
||||
///
|
||||
/// <para><b>Why this exists.</b> In the 1:1 connection model, a lost response would
|
||||
/// fault the dedicated backend socket and the upstream pair would close. The multiplexed
|
||||
/// model needs an explicit per-request timer because a single missing or mis-routed
|
||||
/// response would otherwise leak a correlation entry forever and hang the upstream
|
||||
/// pipe indefinitely. Real-world causes: PLC drops a response, network packet loss,
|
||||
/// backend that mis-echoes MBAP TxIds.</para>
|
||||
/// <para><b>Why this exists.</b> In a multiplexed connection model a single missing
|
||||
/// or mis-routed response would otherwise leak a correlation entry forever and hang
|
||||
/// the upstream pipe indefinitely. Real-world causes: PLC drops a response, network
|
||||
/// packet loss, backend that mis-echoes MBAP TxIds.</para>
|
||||
/// </summary>
|
||||
private async Task RunRequestTimeoutWatchdogAsync(CancellationToken ct)
|
||||
{
|
||||
@@ -1070,10 +1062,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
|
||||
_allocator.Release(proxyTxId);
|
||||
|
||||
// Phase 10 — also clear the coalescing-by-key entry. A late attach that
|
||||
// raced in just before the watchdog claim will still receive the 0x0B
|
||||
// exception via this entry's InterestedParties list (List<T> mutations
|
||||
// happen before fan-out begins).
|
||||
// Also clear the coalescing-by-key entry. A late attach that raced
|
||||
// in just before the watchdog claim will still receive the 0x0B
|
||||
// exception via this entry's InterestedParties list (List<T>
|
||||
// mutations happen before fan-out begins).
|
||||
if (req.Fc is 0x03 or 0x04)
|
||||
{
|
||||
var coalKey = new CoalescingKey(req.UnitId, req.Fc, req.StartAddress, req.Qty);
|
||||
@@ -1097,7 +1089,7 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
try
|
||||
{
|
||||
await party.Pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
|
||||
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length); // W6
|
||||
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
|
||||
}
|
||||
catch
|
||||
{
|
||||
@@ -1150,10 +1142,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase 11 — builds an MBAP-framed response from cached PDU bytes for the given
|
||||
/// upstream party. The cache stores POST-rewriter PDU bodies (no MBAP); each hit
|
||||
/// stamps a fresh MBAP header carrying the requesting party's original TxId so the
|
||||
/// response looks indistinguishable from a fresh backend reply.
|
||||
/// Builds an MBAP-framed response from cached PDU bytes for the given upstream
|
||||
/// party. The cache stores POST-rewriter PDU bodies (no MBAP); each hit stamps a
|
||||
/// fresh MBAP header carrying the requesting party's original TxId so the response
|
||||
/// looks indistinguishable from a fresh backend reply.
|
||||
/// </summary>
|
||||
private static byte[] BuildCacheHitFrame(ushort originalTxId, byte unitId, byte[] cachedPdu)
|
||||
{
|
||||
|
||||
@@ -49,12 +49,11 @@ internal sealed partial class UpstreamPipe : IAsyncDisposable
|
||||
// Internal CTS lets the multiplexer signal "drop this pipe now" without waiting for
|
||||
// the upstream socket to close cleanly.
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
// Phase 12 (W2.2) — volatile so writes from DisposeAsync are observed by IsAlive /
|
||||
// TrySendResponse on other threads without a fence.
|
||||
// Volatile so writes from DisposeAsync are observed by IsAlive / TrySendResponse on
|
||||
// other threads without a fence.
|
||||
private volatile bool _disposed;
|
||||
|
||||
// Phase 9: per-pipe forwarded-PDU counter (replaces the per-pair counter from the
|
||||
// 1:1 model). Read by the status page.
|
||||
// Per-pipe forwarded-PDU counter. Read by the status page.
|
||||
private long _pdusForwardedCount;
|
||||
|
||||
/// <summary>Stable identity for status-page reporting and cascade cleanup.</summary>
|
||||
@@ -227,11 +226,11 @@ internal sealed partial class UpstreamPipe : IAsyncDisposable
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase 12 (W1.3) — non-blocking response enqueue. Returns <c>true</c> when the frame
|
||||
/// was queued for delivery, <c>false</c> when the pipe is dead OR the response channel
|
||||
/// is full. Used by the per-PLC backend reader's fan-out loop so a single wedged
|
||||
/// upstream cannot stall responses to peers sharing the same backend socket — without
|
||||
/// this, a full <c>_responseChannel</c> on one pipe would block the reader task.
|
||||
/// Non-blocking response enqueue. Returns <c>true</c> when the frame was queued for
|
||||
/// delivery, <c>false</c> when the pipe is dead OR the response channel is full.
|
||||
/// Used by the per-PLC backend reader's fan-out loop so a single wedged upstream
|
||||
/// cannot stall responses to peers sharing the same backend socket — without this, a
|
||||
/// full <c>_responseChannel</c> on one pipe would block the reader task.
|
||||
///
|
||||
/// <para>A <c>false</c> return indicates the frame is the multiplexer's responsibility
|
||||
/// to drop and (optionally) account for via a counter. The wedged upstream's socket
|
||||
|
||||
@@ -2,8 +2,8 @@ namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// No-op PDU pipeline: passes every frame through byte-for-byte without rewriting.
|
||||
/// Registered as the <see cref="IPduPipeline"/> singleton in Phase 03.
|
||||
/// Phase 04 replaces this registration with BcdPduPipeline.
|
||||
/// Used by tests and fallback paths; production wires
|
||||
/// <see cref="BcdPduPipeline"/> as the <see cref="IPduPipeline"/> singleton.
|
||||
/// </summary>
|
||||
internal sealed class NoopPduPipeline : IPduPipeline
|
||||
{
|
||||
@@ -14,6 +14,5 @@ internal sealed class NoopPduPipeline : IPduPipeline
|
||||
PduContext context)
|
||||
{
|
||||
// Intentional no-op: bytes forwarded unmodified.
|
||||
// Phase 04: replace this registration with BcdPduPipeline.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,20 +14,20 @@ namespace Mbproxy.Proxy;
|
||||
/// served by the same <see cref="Multiplexing.PlcMultiplexer"/>; all mutable state is
|
||||
/// accessed through <see cref="ProxyCounters"/> which uses Interlocked for thread-safety.
|
||||
///
|
||||
/// <para><b>Phase 9 — request correlation:</b> the multiplexer sets <see cref="CurrentRequest"/>
|
||||
/// <para><b>Request correlation:</b> the multiplexer sets <see cref="CurrentRequest"/>
|
||||
/// before calling the pipeline on each direction. On the request path the pipeline can
|
||||
/// peek at the future correlation entry it just enqueued; on the response path the pipeline
|
||||
/// uses the request's <c>StartAddress</c>/<c>Qty</c> to decode FC03/FC04 BCD slots. Different
|
||||
/// in-flight responses use different <see cref="InFlightRequest"/> instances, so there is no
|
||||
/// cross-talk between concurrent multiplexed requests.</para>
|
||||
/// peek at the future correlation entry it just enqueued; on the response path the
|
||||
/// pipeline uses the request's <c>StartAddress</c>/<c>Qty</c> to decode FC03/FC04 BCD
|
||||
/// slots. Different in-flight responses use different <see cref="InFlightRequest"/>
|
||||
/// instances, so there is no cross-talk between concurrent multiplexed requests.</para>
|
||||
///
|
||||
/// <para><b>Concurrency:</b> a single <see cref="PerPlcContext"/> instance is shared across
|
||||
/// the per-upstream read tasks (which call the pipeline on the request path) and the
|
||||
/// single backend reader task (which calls the pipeline on the response path). Because the
|
||||
/// per-call <see cref="CurrentRequest"/> would be racy if mutated on the shared context,
|
||||
/// the multiplexer constructs a lightweight per-call clone (<see cref="WithCurrentRequest"/>)
|
||||
/// for each pipeline invocation. The shared mutable state — the tag map, counters, logger —
|
||||
/// is read-only or Interlocked.</para>
|
||||
/// <para><b>Concurrency:</b> a single <see cref="PerPlcContext"/> instance is shared
|
||||
/// across the per-upstream read tasks (which call the pipeline on the request path) and
|
||||
/// the single backend reader task (which calls the pipeline on the response path).
|
||||
/// Because the per-call <see cref="CurrentRequest"/> would be racy if mutated on the
|
||||
/// shared context, the multiplexer constructs a lightweight per-call clone
|
||||
/// (<see cref="WithCurrentRequest"/>) for each pipeline invocation. The shared mutable
|
||||
/// state — the tag map, counters, logger — is read-only or Interlocked.</para>
|
||||
/// </summary>
|
||||
internal class PerPlcContext : PduContext
|
||||
{
|
||||
@@ -46,10 +46,9 @@ internal class PerPlcContext : PduContext
|
||||
internal InFlightRequest? CurrentRequest { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Phase 11 — optional per-PLC response cache. <c>null</c> on contexts that opt out
|
||||
/// (every BCD tag has <see cref="BcdTag.CacheTtlMs"/> = 0) or in unit tests that don't
|
||||
/// exercise the cache. The multiplexer constructs and disposes the cache alongside
|
||||
/// itself.
|
||||
/// Optional per-PLC response cache. <c>null</c> on contexts that opt out (every BCD
|
||||
/// tag has <see cref="BcdTag.CacheTtlMs"/> = 0) or in unit tests that don't exercise
|
||||
/// the cache. The multiplexer constructs and disposes the cache alongside itself.
|
||||
/// </summary>
|
||||
internal ResponseCache? Cache { get; init; }
|
||||
|
||||
|
||||
@@ -11,15 +11,13 @@ namespace Mbproxy.Proxy;
|
||||
/// Owns one <see cref="TcpListener"/> bound to a PLC's configured listen port and one
|
||||
/// <see cref="PlcMultiplexer"/> that owns the single backend connection to the PLC.
|
||||
///
|
||||
/// <para><b>Phase 9 — TxId multiplexing:</b> the listener no longer pairs each upstream
|
||||
/// socket with a dedicated backend socket. Instead, every accepted upstream is wrapped
|
||||
/// in an <see cref="UpstreamPipe"/> and handed to the multiplexer. The multiplexer holds
|
||||
/// at most one TCP connection to the PLC, eliminating the H2-ECOM100's 4-concurrent-client
|
||||
/// cap from the upstream side.</para>
|
||||
/// <para>Every accepted upstream is wrapped in an <see cref="UpstreamPipe"/> and handed
|
||||
/// to the multiplexer, which TxId-multiplexes them onto a single backend socket — this
|
||||
/// eliminates the H2-ECOM100's 4-concurrent-client cap from the upstream side.</para>
|
||||
///
|
||||
/// <para>The listener's accept loop is otherwise unchanged. <see cref="StartAsync"/>
|
||||
/// binds the socket; <see cref="RunAsync"/> runs until cancelled or the listener faults;
|
||||
/// <see cref="DisposeAsync"/> tears down both the listener and the multiplexer.</para>
|
||||
/// <para><see cref="StartAsync"/> binds the socket; <see cref="RunAsync"/> runs until
|
||||
/// cancelled or the listener faults; <see cref="DisposeAsync"/> tears down both the
|
||||
/// listener and the multiplexer.</para>
|
||||
/// </summary>
|
||||
internal sealed partial class PlcListener : IAsyncDisposable
|
||||
{
|
||||
@@ -49,9 +47,9 @@ internal sealed partial class PlcListener : IAsyncDisposable
|
||||
=> _multiplexer?.AttachedPipes ?? Array.Empty<UpstreamPipe>();
|
||||
|
||||
/// <summary>
|
||||
/// Phase 12 (W1.1) — exposes the running multiplexer so a hot-reload reseat can swap
|
||||
/// the per-PLC context on the live instance. <c>null</c> between StopAsync and a fresh
|
||||
/// start; callers must null-check.
|
||||
/// Exposes the running multiplexer so a hot-reload reseat can swap the per-PLC
|
||||
/// context on the live instance. <c>null</c> between StopAsync and a fresh start;
|
||||
/// callers must null-check.
|
||||
/// </summary>
|
||||
internal PlcMultiplexer? Multiplexer => _multiplexer;
|
||||
|
||||
@@ -89,10 +87,10 @@ internal sealed partial class PlcListener : IAsyncDisposable
|
||||
_listener.Start();
|
||||
LogBound(_listenerLogger, _plc.Name, _plc.ListenPort);
|
||||
|
||||
// The multiplexer needs a PerPlcContext to share the BCD tag map and counters with
|
||||
// the pipeline. If the caller (typically a test or pre-Phase-6 startup path) didn't
|
||||
// supply one, construct a minimal context that exposes only the PlcName so the
|
||||
// multiplexer + a noop/passthrough pipeline still round-trip frames correctly.
|
||||
// The multiplexer needs a PerPlcContext to share the BCD tag map and counters
|
||||
// with the pipeline. If the caller (typically a test) didn't supply one,
|
||||
// construct a minimal context that exposes only the PlcName so the multiplexer
|
||||
// + a noop/passthrough pipeline still round-trip frames correctly.
|
||||
var ctx = _perPlcContext ?? new PerPlcContext
|
||||
{
|
||||
PlcName = _plc.Name,
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// Immutable snapshot of per-PLC counters. Consumed by Phase 07's status page.
|
||||
/// Immutable snapshot of per-PLC counters. Consumed by the status page.
|
||||
/// All fields are point-in-time reads; no ordering guarantees across fields.
|
||||
///
|
||||
/// <para><b>Backwards-compat policy (see docs/kpi.md):</b> fields are <i>added</i>, never
|
||||
/// renamed or removed. Phase 9 appended <c>InFlightCount</c>, <c>MaxInFlight</c>,
|
||||
/// <c>TxIdWraps</c>, <c>BackendDisconnectCascades</c>, and <c>BackendQueueDepth</c> for
|
||||
/// the TxId-multiplexer telemetry surface (Tier 1.6 in docs/kpi.md).</para>
|
||||
/// renamed or removed.</para>
|
||||
/// </summary>
|
||||
public sealed record CounterSnapshot(
|
||||
long PdusForwarded,
|
||||
@@ -53,82 +51,85 @@ public sealed record CounterSnapshot(
|
||||
long ConnectsFailed,
|
||||
/// <summary>
|
||||
/// Number of Modbus requests currently in flight on this PLC's multiplexed backend
|
||||
/// connection (point-in-time snapshot of the correlation map size). Phase 9.
|
||||
/// connection (point-in-time snapshot of the correlation map size).
|
||||
/// </summary>
|
||||
long InFlightCount,
|
||||
/// <summary>
|
||||
/// Peak <see cref="InFlightCount"/> observed since the multiplexer was constructed.
|
||||
/// Updated via <see cref="Interlocked"/> CAS so concurrent in-flight increments do not
|
||||
/// lose the high-water mark. Phase 9.
|
||||
/// Updated via <see cref="Interlocked"/> CAS so concurrent in-flight increments do
|
||||
/// not lose the high-water mark.
|
||||
/// </summary>
|
||||
long MaxInFlight,
|
||||
/// <summary>
|
||||
/// Number of times the per-PLC TxId allocator's rolling cursor has wrapped
|
||||
/// 0xFFFF → 0x0000. A non-zero value is benign; a sudden burst suggests extreme
|
||||
/// in-flight churn. Phase 9.
|
||||
/// in-flight churn.
|
||||
/// </summary>
|
||||
long TxIdWraps,
|
||||
/// <summary>
|
||||
/// Cumulative count of upstream pipes closed as a side effect of a backend disconnect.
|
||||
/// Each backend reconnect cycle adds the number of attached upstream clients at the
|
||||
/// time of the disconnect. Phase 9.
|
||||
/// Cumulative count of upstream pipes closed as a side effect of a backend
|
||||
/// disconnect. Each backend reconnect cycle adds the number of attached upstream
|
||||
/// clients at the time of the disconnect.
|
||||
/// </summary>
|
||||
long BackendDisconnectCascades,
|
||||
/// <summary>
|
||||
/// Current depth of the per-PLC outbound channel feeding the backend writer task
|
||||
/// (frames queued, not yet on the wire). A sustained non-zero value indicates the
|
||||
/// backend is slower than upstream demand. Phase 9.
|
||||
/// backend is slower than upstream demand.
|
||||
/// </summary>
|
||||
long BackendQueueDepth,
|
||||
/// <summary>
|
||||
/// Phase 10 — cumulative count of FC03/FC04 requests that attached to an already-in-flight
|
||||
/// peer instead of opening a fresh backend round-trip. <c>CoalescedHitCount + CoalescedMissCount</c>
|
||||
/// equals total FC03/FC04 requests seen by the multiplexer.
|
||||
/// Cumulative count of FC03/FC04 requests that attached to an already-in-flight
|
||||
/// peer instead of opening a fresh backend round-trip.
|
||||
/// <c>CoalescedHitCount + CoalescedMissCount</c> equals total FC03/FC04 requests
|
||||
/// seen by the multiplexer.
|
||||
/// </summary>
|
||||
long CoalescedHitCount,
|
||||
/// <summary>
|
||||
/// Phase 10 — cumulative count of FC03/FC04 requests that opened a fresh in-flight entry
|
||||
/// (no matching peer was in flight, or the matching peer had reached its <c>MaxParties</c>
|
||||
/// cap). With <c>ReadCoalescing.Enabled = false</c>, every FC03/FC04 request becomes a miss.
|
||||
/// Cumulative count of FC03/FC04 requests that opened a fresh in-flight entry (no
|
||||
/// matching peer was in flight, or the matching peer had reached its
|
||||
/// <c>MaxParties</c> cap). With <c>ReadCoalescing.Enabled = false</c>, every
|
||||
/// FC03/FC04 request becomes a miss.
|
||||
/// </summary>
|
||||
long CoalescedMissCount,
|
||||
/// <summary>
|
||||
/// Phase 10 — count of coalesced response fan-outs that were skipped because the
|
||||
/// attached upstream pipe had already disconnected. A spike is a churn indicator; the
|
||||
/// metric itself is informational (Tier 2 in <c>docs/kpi.md</c>).
|
||||
/// Count of coalesced response fan-outs that were skipped because the attached
|
||||
/// upstream pipe had already disconnected. A spike is a churn indicator; the metric
|
||||
/// itself is informational (Tier 2 in <c>docs/kpi.md</c>).
|
||||
/// </summary>
|
||||
long CoalescedResponseToDeadUpstream,
|
||||
/// <summary>
|
||||
/// Phase 11 — cumulative count of FC03/FC04 requests served from the response cache.
|
||||
/// <c>CacheHitCount + CacheMissCount</c> equals total FC03/FC04 requests whose resolved
|
||||
/// TTL was > 0 (cache-eligible). Reads against tags with TTL = 0 increment neither.
|
||||
/// Cumulative count of FC03/FC04 requests served from the response cache.
|
||||
/// <c>CacheHitCount + CacheMissCount</c> equals total FC03/FC04 requests whose
|
||||
/// resolved TTL was > 0 (cache-eligible). Reads against tags with TTL = 0
|
||||
/// increment neither.
|
||||
/// </summary>
|
||||
long CacheHitCount,
|
||||
/// <summary>
|
||||
/// Phase 11 — cumulative count of cache-eligible FC03/FC04 requests that fell through
|
||||
/// to coalescing / backend (no fresh entry was present or the entry had expired).
|
||||
/// Cumulative count of cache-eligible FC03/FC04 requests that fell through to
|
||||
/// coalescing / backend (no fresh entry was present or the entry had expired).
|
||||
/// </summary>
|
||||
long CacheMissCount,
|
||||
/// <summary>
|
||||
/// Phase 11 — cumulative count of cache entries invalidated by overlapping FC06/FC16
|
||||
/// write responses. A high rate suggests caching is fighting writes; consider lower
|
||||
/// TTLs on cache-overlapping tags.
|
||||
/// Cumulative count of cache entries invalidated by overlapping FC06/FC16 write
|
||||
/// responses. A high rate suggests caching is fighting writes; consider lower TTLs
|
||||
/// on cache-overlapping tags.
|
||||
/// </summary>
|
||||
long CacheInvalidations,
|
||||
/// <summary>
|
||||
/// Phase 11 — point-in-time snapshot of the per-PLC <see cref="Cache.ResponseCache"/>
|
||||
/// entry count. Read on the snapshot path; 0 when no cache is wired.
|
||||
/// Point-in-time snapshot of the per-PLC <see cref="Cache.ResponseCache"/> entry
|
||||
/// count. Read on the snapshot path; 0 when no cache is wired.
|
||||
/// </summary>
|
||||
long CacheEntryCount,
|
||||
/// <summary>
|
||||
/// Phase 11 — point-in-time approximation of cached PDU bytes for this PLC. Sum of
|
||||
/// Point-in-time approximation of cached PDU bytes for this PLC. Sum of
|
||||
/// <see cref="Cache.CacheEntry.Length"/> across entries. Read on the snapshot path.
|
||||
/// </summary>
|
||||
long CacheBytes,
|
||||
/// <summary>
|
||||
/// Phase 12 (W1.3) — cumulative count of backend response frames the per-PLC reader
|
||||
/// task dropped because the destination upstream pipe's bounded response channel was
|
||||
/// full. A non-zero value indicates one or more upstream clients are not draining their
|
||||
/// Cumulative count of backend response frames the per-PLC reader task dropped
|
||||
/// because the destination upstream pipe's bounded response channel was full. A
|
||||
/// non-zero value indicates one or more upstream clients are not draining their
|
||||
/// socket fast enough to keep up with the backend; the wedged client loses its own
|
||||
/// responses but its peers on the same PLC continue to receive theirs.
|
||||
/// </summary>
|
||||
@@ -163,34 +164,34 @@ internal sealed class ProxyCounters
|
||||
private long _connectsSuccess;
|
||||
private long _connectsFailed;
|
||||
|
||||
// Phase 9 multiplexer telemetry.
|
||||
// Multiplexer telemetry.
|
||||
private long _maxInFlight;
|
||||
private long _backendDisconnectCascades;
|
||||
|
||||
// Phase 10 — coalescing counters. Hit + Miss = total FC03/FC04 requests.
|
||||
// Coalescing counters. Hit + Miss = total FC03/FC04 requests.
|
||||
private long _coalescedHitCount;
|
||||
private long _coalescedMissCount;
|
||||
private long _coalescedResponseToDeadUpstream;
|
||||
|
||||
// Phase 11 — response-cache counters. Hit + Miss = total cache-eligible FC03/FC04.
|
||||
// Response-cache counters. Hit + Miss = total cache-eligible FC03/FC04.
|
||||
private long _cacheHitCount;
|
||||
private long _cacheMissCount;
|
||||
private long _cacheInvalidations;
|
||||
|
||||
// Phase 12 (W1.3) — backend-reader fan-out drop counter. Increments when the reader
|
||||
// task tried to enqueue a response to an upstream pipe whose bounded response channel
|
||||
// was full. Without the non-blocking enqueue this would deadlock the reader; with it
|
||||
// we drop and account.
|
||||
// Backend-reader fan-out drop counter. Increments when the reader task tried to
|
||||
// enqueue a response to an upstream pipe whose bounded response channel was full.
|
||||
// Without the non-blocking enqueue this would deadlock the reader; with it we drop
|
||||
// and account.
|
||||
private long _responseDropForFullUpstream;
|
||||
|
||||
// Phase 11 — live cache state pulled from a per-PLC ResponseCache on each snapshot.
|
||||
// The multiplexer registers a single provider via SetCacheStatsProvider so the status
|
||||
// Live cache state pulled from a per-PLC ResponseCache on each snapshot. The
|
||||
// multiplexer registers a single provider via SetCacheStatsProvider so the status
|
||||
// page sees current entry-count / bytes without a separate poll.
|
||||
private volatile ICacheStatsProvider? _cacheStatsProvider;
|
||||
|
||||
// Phase 9: live state pulled from the multiplexer's allocator/map/queue on each
|
||||
// snapshot. The multiplexer registers a single provider via SetMultiplexProvider.
|
||||
// We use a volatile reference for lock-free read on the snapshot path.
|
||||
// Live state pulled from the multiplexer's allocator/map/queue on each snapshot.
|
||||
// The multiplexer registers a single provider via SetMultiplexProvider. We use a
|
||||
// volatile reference for lock-free read on the snapshot path.
|
||||
private volatile IMultiplexCountersProvider? _multiplexProvider;
|
||||
// LastBindError is a string (not a long); accessed via volatile field on ProxyCounters
|
||||
// but actually stored on the supervisor. We expose it here for snapshot parity.
|
||||
@@ -269,61 +270,61 @@ internal sealed class ProxyCounters
|
||||
=> Interlocked.Increment(ref _connectsFailed);
|
||||
|
||||
/// <summary>
|
||||
/// Records <paramref name="n"/> upstream pipes closed by a backend disconnect cascade.
|
||||
/// Phase 9.
|
||||
/// Records <paramref name="n"/> upstream pipes closed by a backend disconnect
|
||||
/// cascade.
|
||||
/// </summary>
|
||||
public void AddDisconnectCascades(int n)
|
||||
=> Interlocked.Add(ref _backendDisconnectCascades, n);
|
||||
|
||||
/// <summary>
|
||||
/// Phase 10 — records one FC03/FC04 request that attached to an already-in-flight peer.
|
||||
/// Records one FC03/FC04 request that attached to an already-in-flight peer.
|
||||
/// </summary>
|
||||
public void IncrementCoalescedHit()
|
||||
=> Interlocked.Increment(ref _coalescedHitCount);
|
||||
|
||||
/// <summary>
|
||||
/// Phase 10 — records one FC03/FC04 request that opened a fresh in-flight entry
|
||||
/// (no matching peer was in flight, or the matching peer had reached MaxParties).
|
||||
/// Records one FC03/FC04 request that opened a fresh in-flight entry (no matching
|
||||
/// peer was in flight, or the matching peer had reached MaxParties).
|
||||
/// </summary>
|
||||
public void IncrementCoalescedMiss()
|
||||
=> Interlocked.Increment(ref _coalescedMissCount);
|
||||
|
||||
/// <summary>
|
||||
/// Phase 10 — records one coalesced response fan-out that was skipped because the
|
||||
/// attached upstream pipe had already disconnected. Informational only.
|
||||
/// Records one coalesced response fan-out that was skipped because the attached
|
||||
/// upstream pipe had already disconnected. Informational only.
|
||||
/// </summary>
|
||||
public void IncrementCoalescedResponseToDeadUpstream()
|
||||
=> Interlocked.Increment(ref _coalescedResponseToDeadUpstream);
|
||||
|
||||
/// <summary>Phase 11 — records one FC03/FC04 cache hit.</summary>
|
||||
/// <summary>Records one FC03/FC04 cache hit.</summary>
|
||||
public void IncrementCacheHit()
|
||||
=> Interlocked.Increment(ref _cacheHitCount);
|
||||
|
||||
/// <summary>Phase 11 — records one cache-eligible FC03/FC04 read that missed.</summary>
|
||||
/// <summary>Records one cache-eligible FC03/FC04 read that missed.</summary>
|
||||
public void IncrementCacheMiss()
|
||||
=> Interlocked.Increment(ref _cacheMissCount);
|
||||
|
||||
/// <summary>Phase 11 — records <paramref name="n"/> cache entries invalidated by a write.</summary>
|
||||
/// <summary>Records <paramref name="n"/> cache entries invalidated by a write.</summary>
|
||||
public void AddCacheInvalidations(int n)
|
||||
=> Interlocked.Add(ref _cacheInvalidations, n);
|
||||
|
||||
/// <summary>
|
||||
/// Phase 12 (W1.3) — records one backend response frame dropped because the destination
|
||||
/// upstream pipe's response channel was full.
|
||||
/// Records one backend response frame dropped because the destination upstream
|
||||
/// pipe's response channel was full.
|
||||
/// </summary>
|
||||
public void IncrementResponseDropForFullUpstream()
|
||||
=> Interlocked.Increment(ref _responseDropForFullUpstream);
|
||||
|
||||
/// <summary>
|
||||
/// Phase 11 — wires the per-PLC <see cref="Cache.ResponseCache"/> as the live stats
|
||||
/// source for the snapshot path. Pass <c>null</c> to detach during disposal.
|
||||
/// Wires the per-PLC <see cref="Cache.ResponseCache"/> as the live stats source for
|
||||
/// the snapshot path. Pass <c>null</c> to detach during disposal.
|
||||
/// </summary>
|
||||
internal void SetCacheStatsProvider(ICacheStatsProvider? provider)
|
||||
=> _cacheStatsProvider = provider;
|
||||
|
||||
/// <summary>
|
||||
/// CAS-updates the peak in-flight high-water mark. Called on every successful
|
||||
/// allocation by the multiplexer. Phase 9.
|
||||
/// allocation by the multiplexer.
|
||||
/// </summary>
|
||||
public void ObserveInFlight(int currentInFlight)
|
||||
{
|
||||
@@ -341,7 +342,7 @@ internal sealed class ProxyCounters
|
||||
/// Wires the live multiplexer telemetry source into this counter set. Called by
|
||||
/// <see cref="Mbproxy.Proxy.Multiplexing.PlcMultiplexer"/> at construction time so
|
||||
/// the status page's <see cref="Snapshot"/> can include live in-flight / queue-depth
|
||||
/// values without polling the multiplexer separately. Phase 9.
|
||||
/// values without polling the multiplexer separately.
|
||||
/// </summary>
|
||||
internal void SetMultiplexProvider(IMultiplexCountersProvider? provider)
|
||||
=> _multiplexProvider = provider;
|
||||
@@ -454,7 +455,7 @@ internal sealed class ProxyCounters
|
||||
/// and registered with <see cref="ProxyCounters.SetMultiplexProvider"/> so
|
||||
/// <see cref="ProxyCounters.Snapshot"/> can include live mux telemetry without holding
|
||||
/// a direct reference to the multiplexer (which would couple counter snapshots to the
|
||||
/// connection layer's lifecycle). Phase 9.
|
||||
/// connection layer's lifecycle).
|
||||
/// </summary>
|
||||
internal interface IMultiplexCountersProvider
|
||||
{
|
||||
@@ -469,8 +470,8 @@ internal interface IMultiplexCountersProvider
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase 11 — read-only window into the per-PLC <see cref="Cache.ResponseCache"/>'s live
|
||||
/// state for the snapshot path. The multiplexer wires this on cache construction so the
|
||||
/// Read-only window into the per-PLC <see cref="Cache.ResponseCache"/>'s live state
|
||||
/// for the snapshot path. The multiplexer wires this on cache construction so the
|
||||
/// status page sees live counts without holding a direct reference to the cache.
|
||||
/// </summary>
|
||||
internal interface ICacheStatsProvider
|
||||
|
||||
@@ -24,8 +24,8 @@ namespace Mbproxy.Proxy;
|
||||
/// log <c>mbproxy.startup.ready</c> with bound/configured counts.</item>
|
||||
/// </list>
|
||||
///
|
||||
/// Phase 06: passes the supervisor dictionary to <see cref="ConfigReconciler.Attach"/>
|
||||
/// after initial startup so hot-reload changes are applied by the reconciler.
|
||||
/// Passes the supervisor dictionary to <see cref="ConfigReconciler.Attach"/> after
|
||||
/// initial startup so hot-reload changes are applied by the reconciler.
|
||||
///
|
||||
/// Stop: cancels all supervisors in parallel with a 5-second hard deadline.
|
||||
/// </summary>
|
||||
@@ -36,30 +36,30 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
private readonly ILogger<ProxyWorker> _logger;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly ConfigReconciler _reconciler;
|
||||
// Phase 12 (W1.5) — admin endpoint is no longer IHostedService; ProxyWorker drives its
|
||||
// Admin endpoint is not registered as IHostedService; ProxyWorker drives its
|
||||
// lifecycle directly so the design's "drain THEN stop admin" ordering is honoured.
|
||||
//
|
||||
// Resolved LAZILY (in ExecuteAsync) rather than in the constructor because the DI graph
|
||||
// is circular: AdminEndpointHost → StatusSnapshotBuilder → ProxyWorker. A constructor
|
||||
// GetService<AdminEndpointHost>() during ProxyWorker's own construction returns null
|
||||
// silently. Lazy resolution sidesteps the cycle — by the time ExecuteAsync runs the DI
|
||||
// container is fully built.
|
||||
// Resolved LAZILY (in ExecuteAsync) rather than in the constructor because the DI
|
||||
// graph is circular: AdminEndpointHost → StatusSnapshotBuilder → ProxyWorker. A
|
||||
// constructor GetService<AdminEndpointHost>() during ProxyWorker's own construction
|
||||
// returns null silently. Lazy resolution sidesteps the cycle — by the time
|
||||
// ExecuteAsync runs the DI container is fully built.
|
||||
private readonly IServiceProvider _services;
|
||||
private AdminEndpointHost? _admin;
|
||||
|
||||
// Phase 06: supervisors are now managed jointly by ProxyWorker (initial bootstrap)
|
||||
// and ConfigReconciler (subsequent hot-reload changes). The dictionary is shared
|
||||
// via ConfigReconciler.Attach() after initial startup.
|
||||
// Supervisors are managed jointly by ProxyWorker (initial bootstrap) and
|
||||
// ConfigReconciler (subsequent hot-reload changes). The dictionary is shared via
|
||||
// ConfigReconciler.Attach() after initial startup.
|
||||
//
|
||||
// Phase 12 (W2.3) — ConcurrentDictionary because ConfigReconciler mutates this from
|
||||
// parallel Task.WhenAll continuations (Add/Remove/Restart paths). The outer Apply is
|
||||
// serialised by a semaphore but the inner per-PLC tasks run concurrently. Status-page
|
||||
// reads via IReadOnlyDictionary still work without locking.
|
||||
// ConcurrentDictionary because ConfigReconciler mutates this from parallel
|
||||
// Task.WhenAll continuations (Add/Remove/Restart paths). The outer Apply is
|
||||
// serialised by a semaphore but the inner per-PLC tasks run concurrently.
|
||||
// Status-page reads via IReadOnlyDictionary still work without locking.
|
||||
private readonly ConcurrentDictionary<string, PlcListenerSupervisor> _supervisors =
|
||||
new(StringComparer.Ordinal);
|
||||
|
||||
/// <summary>
|
||||
/// Read-only view of the live supervisor dictionary. Consumed by Phase 07's
|
||||
/// Read-only view of the live supervisor dictionary. Consumed by
|
||||
/// <see cref="Admin.StatusSnapshotBuilder"/> to enumerate per-PLC state.
|
||||
/// The caller should read this on the status-page path only (not the hot path).
|
||||
/// </summary>
|
||||
@@ -79,7 +79,7 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
_loggerFactory = loggerFactory;
|
||||
_reconciler = reconciler;
|
||||
_services = services;
|
||||
// Phase 12 (W1.5) — admin endpoint resolved lazily in ExecuteAsync (see field comment).
|
||||
// Admin endpoint resolved lazily in ExecuteAsync (see field comment).
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
@@ -107,11 +107,11 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
continue;
|
||||
}
|
||||
|
||||
// Phase 11 — construct a per-PLC response cache only when at least one
|
||||
// resolved tag opts in (CacheTtlMs > 0). Skipping cache construction for a
|
||||
// PLC with no cacheable tags keeps the no-cache path free of the eviction
|
||||
// timer and the per-call resolution cost, preserving "default behaviour =
|
||||
// Phase 10 unchanged" when no operator has opted any tag in.
|
||||
// Construct a per-PLC response cache only when at least one resolved tag
|
||||
// opts in (CacheTtlMs > 0). Skipping cache construction for a PLC with no
|
||||
// cacheable tags keeps the no-cache path free of the eviction timer and the
|
||||
// per-call resolution cost, preserving the "no caching" default behaviour
|
||||
// when no operator has opted any tag in.
|
||||
var cache = HasAnyCacheableTag(result.Map)
|
||||
? new ResponseCache(opts.Cache.MaxEntriesPerPlc, opts.Cache.EvictionIntervalMs)
|
||||
: null;
|
||||
@@ -144,9 +144,9 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
resilienceOpts.ListenerRecovery,
|
||||
_loggerFactory.CreateLogger($"Mbproxy.Proxy.ListenerRecovery.{plc.Name}"));
|
||||
|
||||
// Phase 10 — give the supervisor a live accessor for ReadCoalescingOptions
|
||||
// so a hot-reload of `Mbproxy.Resilience.ReadCoalescing.Enabled` propagates
|
||||
// to the multiplexer's per-PDU coalescing decision.
|
||||
// Give the supervisor a live accessor for ReadCoalescingOptions so a
|
||||
// hot-reload of `Mbproxy.Resilience.ReadCoalescing.Enabled` propagates to
|
||||
// the multiplexer's per-PDU coalescing decision.
|
||||
Func<ReadCoalescingOptions> coalescingAccessor =
|
||||
() => _options.CurrentValue.Resilience.ReadCoalescing;
|
||||
|
||||
@@ -166,13 +166,13 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
_supervisors[plc.Name] = supervisor;
|
||||
}
|
||||
|
||||
// ── Phase 06: wire reconciler BEFORE starting supervisors ─────────────────
|
||||
// ── Wire reconciler BEFORE starting supervisors ──────────────────────────
|
||||
// Attach hands the reconciler the authoritative supervisor dictionary and the
|
||||
// initial options snapshot. The reconciler won't process OnChange events until
|
||||
// after this call — the brief window between Attach and first supervisor start
|
||||
// is safe because the channel signal only enqueues; apply runs asynchronously.
|
||||
// Phase 12 (W2.1) — also pass the live coalescing accessor so reconciler-built
|
||||
// supervisors (add/restart paths) honour hot-reloaded ReadCoalescing values.
|
||||
// Pass the live coalescing accessor so reconciler-built supervisors
|
||||
// (add/restart paths) honour hot-reloaded ReadCoalescing values.
|
||||
Func<ReadCoalescingOptions> reconcilerCoalescingAccessor =
|
||||
() => _options.CurrentValue.Resilience.ReadCoalescing;
|
||||
_reconciler.Attach(_supervisors, opts, reconcilerCoalescingAccessor);
|
||||
@@ -213,10 +213,10 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
int boundCount = _supervisors.Values.Count(s => s.Snapshot().State == SupervisorState.Bound);
|
||||
LogStartupReady(_logger, boundCount, plcsConfigured);
|
||||
|
||||
// Phase 12 (W1.5) — start the admin endpoint AFTER listeners are bound so the
|
||||
// status page can never observe the service in a "no PLCs configured yet" state.
|
||||
// The admin endpoint is no longer registered as IHostedService (the host's reverse
|
||||
// stop order would tear it down BEFORE drain). ProxyWorker drives both ends.
|
||||
// Start the admin endpoint AFTER listeners are bound so the status page can
|
||||
// never observe the service in a "no PLCs configured yet" state. The admin
|
||||
// endpoint is not registered as IHostedService (the host's reverse stop order
|
||||
// would tear it down BEFORE drain) — ProxyWorker drives both ends.
|
||||
//
|
||||
// Resolution happens here, not in the constructor — the DI graph is circular
|
||||
// (admin → StatusSnapshotBuilder → ProxyWorker) and a constructor-time lookup
|
||||
@@ -235,10 +235,9 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
}
|
||||
else
|
||||
{
|
||||
// Phase 12 (W4 / Nm6) — surface the absence. The previous IHostedService
|
||||
// registration would have hard-errored in DI if AddMbproxyAdmin() was missing
|
||||
// from Program.cs; the W1.5 lazy lookup returns null silently. A single warning
|
||||
// makes a botched composition observable without blocking startup.
|
||||
// Surface the absence. The lazy lookup returns null silently if
|
||||
// AddMbproxyAdmin() is missing from Program.cs; a single warning makes a
|
||||
// botched composition observable without blocking startup.
|
||||
_logger.LogWarning(
|
||||
"Admin endpoint not registered (AddMbproxyAdmin() missing from composition). " +
|
||||
"Status page will be unavailable; service continues without it.");
|
||||
@@ -250,8 +249,7 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase 12 (W1.5) — graceful shutdown sequence (replaces the deleted
|
||||
/// <c>ShutdownCoordinator</c>):
|
||||
/// Graceful shutdown sequence:
|
||||
/// <list type="number">
|
||||
/// <item>Cancel <see cref="ExecuteAsync"/> via <c>base.StopAsync</c>.</item>
|
||||
/// <item><b>Snapshot</b> per-PLC in-flight counts BEFORE stopping supervisors —
|
||||
@@ -263,10 +261,7 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
/// stop is the actual drain — it cancels the listener, which exits its
|
||||
/// accept loop, which disposes the multiplexer, which cascades all attached
|
||||
/// pipes. There is no separate "drain in-flight" phase because there is
|
||||
/// nothing to drain that wouldn't be killed by the supervisor stop itself
|
||||
/// (the original Phase-08 ShutdownCoordinator's drain loop had this same
|
||||
/// shape and was structurally always-zero — call out from
|
||||
/// codereviews/2026-05-14/ReReviewAfterRemediation.md NC1).</item>
|
||||
/// nothing to drain that wouldn't be killed by the supervisor stop itself.</item>
|
||||
/// <item>Stop the admin endpoint LAST so the status page survives the supervisor
|
||||
/// stop phase and operators can observe the live state right up to shutdown.</item>
|
||||
/// <item>Dispose every supervisor to release sockets, channels, and watchdog timers.</item>
|
||||
@@ -277,16 +272,15 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
/// </summary>
|
||||
public override async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Phase 12 (W5 / m2) — snapshot in-flight BEFORE base.StopAsync so the field
|
||||
// matches its name: "the count at the moment the host signalled stop", not "the
|
||||
// count at the moment we got around to computing it." `base.StopAsync` cancels the
|
||||
// ExecuteAsync stoppingToken; in the milliseconds before it returns, in-flight
|
||||
// requests whose responses arrive will be removed from _correlation and the
|
||||
// watchdog can clear stale entries — the count would otherwise drift downward.
|
||||
// Snapshot in-flight BEFORE base.StopAsync so the field matches its name: "the
|
||||
// count at the moment the host signalled stop", not "the count at the moment we
|
||||
// got around to computing it." `base.StopAsync` cancels the ExecuteAsync
|
||||
// stoppingToken; in the milliseconds before it returns, in-flight requests
|
||||
// whose responses arrive will be removed from _correlation and the watchdog can
|
||||
// clear stale entries — the count would otherwise drift downward.
|
||||
//
|
||||
// Phase 12 (W4 / NC1) — must run BEFORE supervisor stop too: after
|
||||
// supervisor.StopAsync, multiplexers are disposed and CountInFlight returns 0
|
||||
// unconditionally (the original ShutdownCoordinator had the same defect).
|
||||
// Must run BEFORE supervisor stop too: after supervisor.StopAsync, multiplexers
|
||||
// are disposed and CountInFlight returns 0 unconditionally.
|
||||
int inFlightAtCancel = CountInFlight();
|
||||
|
||||
// Cancel ExecuteAsync first.
|
||||
@@ -294,9 +288,9 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
// Phase 12 (W2.20) — supervisor stop deadline read from the live config so a
|
||||
// hot-reloaded GracefulShutdownTimeoutMs is honoured. Supervisor stop is the
|
||||
// drain: cancelling the supervisor cancels the listener, which exits accept, which
|
||||
// Supervisor stop deadline read from the live config so a hot-reloaded
|
||||
// GracefulShutdownTimeoutMs is honoured. Supervisor stop is the drain:
|
||||
// cancelling the supervisor cancels the listener, which exits accept, which
|
||||
// disposes the multiplexer, which cascades all attached pipes.
|
||||
int gracefulMs = _options.CurrentValue.Connection.GracefulShutdownTimeoutMs;
|
||||
|
||||
@@ -352,11 +346,11 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
// ── Logging ───────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Phase 11 — returns <c>true</c> when at least one BcdTag in the resolved map has a
|
||||
/// positive <see cref="BcdTag.CacheTtlMs"/>. A PLC with no cacheable tags skips the
|
||||
/// Returns <c>true</c> when at least one BcdTag in the resolved map has a positive
|
||||
/// <see cref="BcdTag.CacheTtlMs"/>. A PLC with no cacheable tags skips the
|
||||
/// <see cref="Mbproxy.Proxy.Cache.ResponseCache"/> entirely (no eviction timer, no
|
||||
/// per-call cache resolution cost), so the default-OFF deployment is byte-identical
|
||||
/// to a Phase-10 deployment.
|
||||
/// per-call cache resolution cost), so the default-OFF deployment runs the
|
||||
/// no-cache code path.
|
||||
/// </summary>
|
||||
private static bool HasAnyCacheableTag(BcdTagMap map)
|
||||
{
|
||||
@@ -375,7 +369,6 @@ internal sealed partial class ProxyWorker : BackgroundService
|
||||
Message = "Failed to bind listener: Plc={Plc} Port={Port} Reason={Reason}")]
|
||||
private static partial void LogBindFailed(ILogger logger, string plc, int port, string reason);
|
||||
|
||||
// Phase 12 (W1.5) — moved here from the deleted ShutdownCoordinator.
|
||||
[LoggerMessage(EventId = 80, EventName = "mbproxy.shutdown.complete",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Graceful shutdown complete: InFlightAtCancel={InFlightAtCancel} ElapsedMs={ElapsedMs}")]
|
||||
|
||||
@@ -46,15 +46,15 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
private volatile string? _lastBindError;
|
||||
private int _recoveryAttempts; // Interlocked
|
||||
|
||||
// Phase 07: current active listener for status-page pair enumeration.
|
||||
// Current active listener for status-page pair enumeration.
|
||||
private volatile PlcListener? _currentListener;
|
||||
|
||||
// Phase 06: _perPlcContext is now mutable so ReplaceContextAsync can swap it.
|
||||
// Access from the accept loop (RunAsync) and from ReplaceContextAsync must be
|
||||
// coherent; we use a volatile reference so the accept loop always reads the latest
|
||||
// context without locking. The PlcListener created on each Polly attempt holds
|
||||
// its own copy of the context at construction time; existing in-flight connections
|
||||
// keep their old reference until they complete.
|
||||
// _perPlcContext is mutable so ReplaceContextAsync can swap it. Access from the accept
|
||||
// loop (RunAsync) and from ReplaceContextAsync must be coherent; we use a volatile
|
||||
// reference so the accept loop always reads the latest context without locking. The
|
||||
// PlcListener created on each Polly attempt holds its own copy of the context at
|
||||
// construction time; existing in-flight connections keep their old reference until they
|
||||
// complete.
|
||||
private volatile PerPlcContext? _currentContext;
|
||||
|
||||
/// <summary>
|
||||
@@ -67,16 +67,15 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
|
||||
private bool _disposed;
|
||||
|
||||
// Phase 12 (W2.15) — completes when the supervisor has transitioned out of Stopped
|
||||
// for the first time (reached Bound or Recovering). Replaces the previous busy-poll
|
||||
// implementation in WaitForInitialBindAttemptAsync, which raced fast Stopped→Bound→
|
||||
// Stopped transitions and never exited if the supervisor task threw inside Polly.
|
||||
// Completes when the supervisor has transitioned out of Stopped for the first time
|
||||
// (reached Bound or Recovering). Used by WaitForInitialBindAttemptAsync to avoid
|
||||
// racing fast Stopped→Bound→Stopped transitions or hanging if the supervisor task
|
||||
// throws inside Polly.
|
||||
//
|
||||
// Phase 12 (W4 / NM4) — non-readonly so StartAsync can re-arm it for a re-Started
|
||||
// supervisor. Without re-arming, a restart-after-stop scenario would have
|
||||
// WaitForInitialBindAttemptAsync return immediately on the previous run's signal,
|
||||
// never observing the new run's bind status. No production caller currently re-Starts,
|
||||
// but the supervisor's state machine should be consistent.
|
||||
// Non-readonly so StartAsync can re-arm it for a re-Started supervisor. Without
|
||||
// re-arming, a restart-after-stop scenario would have WaitForInitialBindAttemptAsync
|
||||
// return immediately on the previous run's signal, never observing the new run's
|
||||
// bind status.
|
||||
private TaskCompletionSource _firstAttemptCompleted = new(
|
||||
TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
|
||||
@@ -104,7 +103,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
_multiplexerLogger = multiplexerLogger;
|
||||
_pipeLogger = pipeLogger;
|
||||
_perPlcContext = perPlcContext;
|
||||
_currentContext = perPlcContext; // Phase 06: live context slot
|
||||
_currentContext = perPlcContext; // live context slot
|
||||
_recoveryPipeline = recoveryPipeline;
|
||||
_logger = logger;
|
||||
_backendConnectPipeline = backendConnectPipeline;
|
||||
@@ -121,7 +120,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
/// <summary>
|
||||
/// Live collection of active <see cref="UpstreamPipe"/> instances attached to this
|
||||
/// PLC's multiplexer. Returns an empty collection when the listener is not bound.
|
||||
/// Consumed by Phase 07's status page (renamed from <c>ActivePairs</c> in Phase 9).
|
||||
/// Consumed by the status page.
|
||||
/// </summary>
|
||||
public IReadOnlyCollection<UpstreamPipe> ActiveUpstreams
|
||||
=> _currentListener?.ActiveUpstreams ?? Array.Empty<UpstreamPipe>();
|
||||
@@ -137,26 +136,25 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
/// </summary>
|
||||
public Task StartAsync(CancellationToken ct)
|
||||
{
|
||||
// Phase 12 (W2.16) — refuse to re-Start an already-running or already-disposed
|
||||
// supervisor. After Stop the state machine returns to Stopped and StartAsync
|
||||
// can re-arm; W4/NM3+NM4 below ensure the per-Start state (CTS, TCS) is fresh
|
||||
// each time so no leak or stale signal carries across cycles.
|
||||
// Refuse to re-Start an already-running or already-disposed supervisor. After
|
||||
// Stop the state machine returns to Stopped and StartAsync can re-arm; the per-
|
||||
// Start state (CTS, TCS) is refreshed below so no leak or stale signal carries
|
||||
// across cycles.
|
||||
if (_disposed)
|
||||
throw new ObjectDisposedException(nameof(PlcListenerSupervisor));
|
||||
if (_state != SupervisorState.Stopped || !_supervisorTask.IsCompleted)
|
||||
throw new InvalidOperationException(
|
||||
$"Supervisor for Plc='{_plc.Name}' has already been started.");
|
||||
|
||||
// Phase 12 (W4 / NM3) — dispose the previous CTS before reassigning. The original
|
||||
// code overwrote _supervisorCts unconditionally, leaking the prior CTS on every
|
||||
// re-Start cycle (and any registrations linked to it). Idempotent: ObjectDisposed
|
||||
// catch covers the very-first-Start case where the field-init CTS is still fresh.
|
||||
// Dispose the previous CTS before reassigning so a re-Start cycle does not leak
|
||||
// the prior CTS (and any registrations linked to it). Idempotent: the
|
||||
// ObjectDisposed catch covers the very-first-Start case where the field-init CTS
|
||||
// is still fresh.
|
||||
try { _supervisorCts.Dispose(); } catch (ObjectDisposedException) { /* fresh */ }
|
||||
_supervisorCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
|
||||
// Phase 12 (W4 / NM4) — re-arm the first-attempt TCS so a re-Started supervisor
|
||||
// doesn't immediately observe the previous run's signal in
|
||||
// WaitForInitialBindAttemptAsync.
|
||||
// Re-arm the first-attempt TCS so a re-Started supervisor doesn't immediately
|
||||
// observe the previous run's signal in WaitForInitialBindAttemptAsync.
|
||||
_firstAttemptCompleted = new TaskCompletionSource(
|
||||
TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
|
||||
@@ -170,10 +168,10 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
/// <see cref="SupervisorState.Recovering"/>).
|
||||
/// Returns immediately if the supervisor is already past that point.
|
||||
///
|
||||
/// <para><b>Phase 12 (W2.15)</b> — backed by a <see cref="TaskCompletionSource"/> set
|
||||
/// when the supervisor task first transitions out of <see cref="SupervisorState.Stopped"/>.
|
||||
/// Replaces the previous 10 ms busy-poll which raced fast bind+stop sequences and could
|
||||
/// hang if the supervisor task threw before any state write happened.</para>
|
||||
/// <para>Backed by a <see cref="TaskCompletionSource"/> set when the supervisor task
|
||||
/// first transitions out of <see cref="SupervisorState.Stopped"/>. This avoids both
|
||||
/// racing fast bind+stop sequences and hanging if the supervisor task throws before
|
||||
/// any state write happens.</para>
|
||||
/// </summary>
|
||||
public async Task WaitForInitialBindAttemptAsync(CancellationToken ct)
|
||||
{
|
||||
@@ -184,7 +182,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Caller cancelled; not a fault — same observable behaviour as the prior poll.
|
||||
// Caller cancelled; not a fault.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,8 +219,8 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
/// <summary>
|
||||
/// Returns a point-in-time snapshot of this supervisor's state.
|
||||
///
|
||||
/// <para><b>Phase 12 (W2.17)</b> — reads the three observable fields under a single
|
||||
/// lock so the status page can never report inconsistent triples like
|
||||
/// <para>Reads the three observable fields under a single lock so the status page
|
||||
/// can never report inconsistent triples like
|
||||
/// <c>(State=Bound, LastBindError=<previous>, RecoveryAttempts>0)</c>. The
|
||||
/// supervisor task uses <see cref="TransitionTo"/> which takes the same lock, so a
|
||||
/// snapshot reads a transition-consistent view.</para>
|
||||
@@ -241,9 +239,9 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
private readonly object _snapshotLock = new();
|
||||
|
||||
/// <summary>
|
||||
/// Phase 12 (W2.17) — atomic three-field transition. State, lastBindError, and
|
||||
/// (optionally) the recoveryAttempts increment all happen under one lock so a
|
||||
/// concurrent <see cref="Snapshot"/> never sees a half-applied transition.
|
||||
/// Atomic three-field transition. State, lastBindError, and (optionally) the
|
||||
/// recoveryAttempts increment all happen under one lock so a concurrent
|
||||
/// <see cref="Snapshot"/> never sees a half-applied transition.
|
||||
/// </summary>
|
||||
private void TransitionTo(SupervisorState newState, string? lastBindError, bool incrementRecoveryAttempt)
|
||||
{
|
||||
@@ -258,15 +256,10 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
|
||||
/// <summary>
|
||||
/// Atomically swaps the per-PLC context (tag map + optional response cache) on the
|
||||
/// running listener AND its live multiplexer.
|
||||
///
|
||||
/// <para><b>Phase 12 (W1.1)</b> — previously this method only updated the supervisor's
|
||||
/// <c>_currentContext</c> slot, which meant the running <see cref="PlcMultiplexer"/>
|
||||
/// kept using the OLD context (it captured the reference at construction). A reload
|
||||
/// only became visible on the next listener fault. Now the swap propagates into the
|
||||
/// running mux via <see cref="PlcMultiplexer.ReplaceContext"/>, so the very next PDU
|
||||
/// sees the new tag map / new cache. Counters are preserved (the new context carries
|
||||
/// the same <c>ProxyCounters</c> instance) so operator history is not reset.</para>
|
||||
/// running listener AND its live multiplexer. The swap propagates into the running
|
||||
/// mux via <see cref="PlcMultiplexer.ReplaceContext"/>, so the very next PDU sees
|
||||
/// the new tag map / new cache. Counters are preserved (the new context carries the
|
||||
/// same <c>ProxyCounters</c> instance) so operator history is not reset.
|
||||
///
|
||||
/// <para><b>Old cache lifecycle</b>: the supervisor disposes the outgoing context's
|
||||
/// cache AFTER the multiplexer has been swapped to the new context. By that point no
|
||||
@@ -281,16 +274,16 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
// subsequent fault recovery) will pick up newCtx through this slot.
|
||||
_currentContext = newCtx;
|
||||
|
||||
// Phase 12 (W1.1) — push the swap into the running multiplexer so existing
|
||||
// connections see the new tag map / new cache on their next PDU. _currentListener
|
||||
// may be null between Polly retry attempts; in that case the next listener built
|
||||
// inside the Polly loop will pick up newCtx through _currentContext above.
|
||||
// Push the swap into the running multiplexer so existing connections see the new
|
||||
// tag map / new cache on their next PDU. _currentListener may be null between
|
||||
// Polly retry attempts; in that case the next listener built inside the Polly loop
|
||||
// will pick up newCtx through _currentContext above.
|
||||
_currentListener?.Multiplexer?.ReplaceContext(newCtx);
|
||||
|
||||
// Phase 12 (W1.1 + W2.8) — drop the outgoing cache AFTER the swap so the running
|
||||
// multiplexer can no longer reach it. Clear() snapshots the entry count for the
|
||||
// mbproxy.cache.flushed log event before disposing the cache (which stops the
|
||||
// eviction loop and releases the timer).
|
||||
// Drop the outgoing cache AFTER the swap so the running multiplexer can no longer
|
||||
// reach it. Clear() snapshots the entry count for the mbproxy.cache.flushed log
|
||||
// event before disposing the cache (which stops the eviction loop and releases
|
||||
// the timer).
|
||||
if (oldCache is not null && !ReferenceEquals(oldCache, newCtx.Cache))
|
||||
{
|
||||
int dropped = oldCache.Clear();
|
||||
@@ -318,11 +311,11 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
// A faulted listener's TcpListener socket must be disposed before
|
||||
// re-binding. We create a new PlcListener on each attempt.
|
||||
//
|
||||
// Phase 06: use _currentContext (volatile) so that a ReplaceContextAsync
|
||||
// call between Polly retry attempts is picked up here. Each listener
|
||||
// captures the context at construction time; existing in-flight pairs
|
||||
// keep their own reference. See ReplaceContextAsync for the transition
|
||||
// window documentation.
|
||||
// Use _currentContext (volatile) so that a ReplaceContextAsync call
|
||||
// between Polly retry attempts is picked up here. Each listener captures
|
||||
// the context at construction time; existing in-flight pairs keep their
|
||||
// own reference. See ReplaceContextAsync for the transition window
|
||||
// documentation.
|
||||
var listener = new PlcListener(
|
||||
_plc,
|
||||
_connectionOptions,
|
||||
@@ -334,7 +327,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
_backendConnectPipeline,
|
||||
_coalescingOptions);
|
||||
|
||||
// Phase 07: expose the current listener for status-page pair enumeration.
|
||||
// Expose the current listener for status-page pair enumeration.
|
||||
_currentListener = listener;
|
||||
|
||||
try
|
||||
@@ -351,10 +344,10 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
|
||||
string truncated = Truncate(bindEx.Message, 256);
|
||||
TransitionTo(SupervisorState.Recovering, truncated, incrementRecoveryAttempt: true);
|
||||
// Phase 12 (W2.15) — signal the first transition out of Stopped.
|
||||
// Signal the first transition out of Stopped.
|
||||
_firstAttemptCompleted.TrySetResult();
|
||||
|
||||
// Also update the per-PLC counters if available (Phase 07 reads these).
|
||||
// Also update the per-PLC counters if available (status page reads these).
|
||||
_currentContext?.Counters.IncrementRecoveryAttempt(truncated);
|
||||
|
||||
LogBindFailed(_logger, _plc.Name, _plc.ListenPort, truncated);
|
||||
@@ -379,7 +372,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
// Clear the last bind error on a successful bind.
|
||||
TransitionTo(SupervisorState.Bound, lastBindError: null, incrementRecoveryAttempt: false);
|
||||
_currentContext?.Counters.ClearLastBindError();
|
||||
// Phase 12 (W2.15) — signal the first transition out of Stopped.
|
||||
// Signal the first transition out of Stopped.
|
||||
_firstAttemptCompleted.TrySetResult();
|
||||
|
||||
// ── Run the accept loop ──────────────────────────────────────────
|
||||
@@ -407,9 +400,8 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
|
||||
string truncated = Truncate(runEx.Message, 256);
|
||||
TransitionTo(SupervisorState.Recovering, truncated, incrementRecoveryAttempt: true);
|
||||
// Phase 12 (W2.15) — also signal first-attempt-completed in case the
|
||||
// very first listener.RunAsync faulted before the bind-success path
|
||||
// signalled it.
|
||||
// Also signal first-attempt-completed in case the very first
|
||||
// listener.RunAsync faulted before the bind-success path signalled it.
|
||||
_firstAttemptCompleted.TrySetResult();
|
||||
|
||||
// Also update the per-PLC counters if available.
|
||||
@@ -457,16 +449,16 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
_state = SupervisorState.Stopped;
|
||||
}
|
||||
_currentListener = null;
|
||||
// Phase 12 (W2.15) — defensive: if RunSupervisorAsync exits before any bind
|
||||
// attempt fired (e.g. construction-time fault), unblock any awaiting
|
||||
// Defensive: if RunSupervisorAsync exits before any bind attempt fired
|
||||
// (e.g. construction-time fault), unblock any awaiting
|
||||
// WaitForInitialBindAttemptAsync caller so it doesn't hang.
|
||||
_firstAttemptCompleted.TrySetResult();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Phase 12 (W2 cleanup) — single helper for the truncate-exception-message pattern
|
||||
/// previously copy-pasted across three call sites.
|
||||
/// Single helper for the truncate-exception-message pattern shared across the
|
||||
/// supervisor's bind/run/end recovery paths.
|
||||
/// </summary>
|
||||
private static string Truncate(string s, int max) => s.Length > max ? s[..max] : s;
|
||||
|
||||
@@ -487,8 +479,8 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
// Best-effort cleanup.
|
||||
}
|
||||
|
||||
// Phase 11: dispose the response cache (if any) — its eviction timer would
|
||||
// otherwise outlive the supervisor.
|
||||
// Dispose the response cache (if any) — its eviction timer would otherwise
|
||||
// outlive the supervisor.
|
||||
_currentContext?.Cache?.Dispose();
|
||||
|
||||
_supervisorCts.Dispose();
|
||||
|
||||
@@ -26,14 +26,14 @@ public enum SupervisorState
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Immutable point-in-time snapshot of a supervisor's state. Consumed by Phase 07's
|
||||
/// status page via <see cref="PlcListenerSupervisor.Snapshot"/>.
|
||||
/// Immutable point-in-time snapshot of a supervisor's state. Consumed by the status
|
||||
/// page via <see cref="PlcListenerSupervisor.Snapshot"/>.
|
||||
///
|
||||
/// <para><b>RecoveryAttempts semantics</b>: this counter <em>accumulates over the lifetime
|
||||
/// of the supervisor</em> and is never reset. Operators reading the status page should
|
||||
/// interpret it as "how many times has this listener faulted or failed to bind since
|
||||
/// the service started" — useful for detecting port-flapping or repeated OS network
|
||||
/// resets. Phase 07 surfaces it as-is.</para>
|
||||
/// resets.</para>
|
||||
/// </summary>
|
||||
/// <param name="State">Current state of the supervisor.</param>
|
||||
/// <param name="LastBindError">
|
||||
|
||||
Reference in New Issue
Block a user