mbproxy: strip historical phase/wave/plan references from source comments

Comments described the *history* of how the code arrived (phase numbers,
wave IDs, review IDs, dated TODOs) instead of what it does today. That
scaffolding rotted as the codebase evolved. Cleaned 60 source files +
.gitignore; behaviour unchanged (387/387 tests still pass).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-14 13:04:30 -04:00
parent b3b8313e9c
commit 1a2856526a
60 changed files with 750 additions and 811 deletions
+2 -1
View File
@@ -1,13 +1,14 @@
# Build output
bin/
obj/
publish-out/
# Visual Studio artifacts
.vs/
*.user
*.suo
# Test simulator Python venv (phase 01 onward)
# Test simulator Python venv
tests/sim/.venv/
# mbproxy runtime logs (default location, see appsettings.json)
+15 -15
View File
@@ -25,11 +25,11 @@ namespace Mbproxy.Admin;
///
/// <para>Routes: exactly two — <c>GET /</c> (HTML) and <c>GET /status.json</c> (JSON).</para>
///
/// <para><b>Phase 12 (W1.5)</b> — was previously also registered as <see cref="IHostedService"/>,
/// but the host's automatic stop ordering (reverse of registration) ran admin.StopAsync
/// BEFORE ProxyWorker.StopAsync, which broke the design's "drain THEN stop admin" guarantee
/// and caused a double-stop with the now-deleted <c>ShutdownCoordinator</c>. Now a plain
/// singleton with explicit lifecycle calls from ProxyWorker.</para>
/// <para>Registered as a plain singleton (not <see cref="IHostedService"/>) so
/// <see cref="Proxy.ProxyWorker"/> can drive its lifecycle explicitly. This is required to
/// honour the design contract that the in-flight drain finishes BEFORE admin stops; an
/// IHostedService registration would let the host stop admin in reverse-registration order
/// and break that ordering.</para>
/// </summary>
internal sealed partial class AdminEndpointHost : IAsyncDisposable
{
@@ -44,11 +44,11 @@ internal sealed partial class AdminEndpointHost : IAsyncDisposable
// Protects concurrent Start/Stop calls (hot-reload + StopAsync racing).
private readonly SemaphoreSlim _lock = new(1, 1);
// Phase 12 (W4 / Nm7) — idempotency flag for DisposeAsync. ProxyWorker.StopAsync
// calls our StopAsync explicitly; the DI container then disposes the singleton on
// host shutdown. Without this flag the second pass would Dispose `_lock` twice and
// re-dispose the change registration (both currently safe but symmetry with
// PlcMultiplexer prevents future regression).
// Idempotency flag for DisposeAsync. ProxyWorker.StopAsync calls our StopAsync
// explicitly; the DI container then disposes the singleton on host shutdown. Without
// this flag the second pass would Dispose `_lock` twice and re-dispose the change
// registration (both currently safe but symmetry with PlcMultiplexer prevents future
// regression).
private volatile bool _disposed;
// Current configured port — used to detect changes on hot-reload.
@@ -77,11 +77,11 @@ internal sealed partial class AdminEndpointHost : IAsyncDisposable
// Subscribe to config changes: if AdminPort changes, re-bind.
_optionsChangeRegistration = _optionsMonitor.OnChange(opts =>
{
// Phase 12 (W5 / M1) — short-circuit if disposal has already started. The
// OnChange callback can fire (and the Task.Run can be queued) AFTER StopAsync
// disposed the change registration but BEFORE DI ran DisposeAsync; without
// this guard the lambda would resurrect a fresh Kestrel app on the new port
// after the host already considered admin shut down.
// Short-circuit if disposal has already started. The OnChange callback can
// fire (and the Task.Run can be queued) AFTER StopAsync disposed the change
// registration but BEFORE DI ran DisposeAsync; without this guard the lambda
// would resurrect a fresh Kestrel app on the new port after the host already
// considered admin shut down.
if (_disposed) return;
int newPort = opts.AdminPort;
@@ -6,9 +6,9 @@ namespace Mbproxy.Admin;
/// Reads <see cref="AssemblyInformationalVersionAttribute"/> once at startup and caches the
/// result as a string. Used for the <c>service.version</c> field on the status page.
///
/// <para>Note: <see cref="Assembly.Location"/> is unreliable under single-file publish
/// (Phase 08). We use <c>Assembly.GetExecutingAssembly().GetCustomAttribute&lt;&gt;()</c>
/// which works correctly regardless of publish mode.</para>
/// <para>Note: <see cref="Assembly.Location"/> is unreliable under single-file publish.
/// We use <c>Assembly.GetExecutingAssembly().GetCustomAttribute&lt;&gt;()</c> which works
/// correctly regardless of publish mode.</para>
/// </summary>
internal sealed class AssemblyVersionAccessor
{
+15 -14
View File
@@ -60,9 +60,9 @@ public sealed record PlcPdusStatus(
long RewrittenSlots,
long PartialBcdWarnings,
/// <summary>
/// Phase 12 (W2.22) — count of BCD-rewriter slot decisions where the wire value was
/// not a valid BCD nibble pattern (e.g. <c>0xABCD</c> at a tag address). The slot
/// passes through unrewritten and this counter increments.
/// Count of BCD-rewriter slot decisions where the wire value was not a valid BCD
/// nibble pattern (e.g. <c>0xABCD</c> at a tag address). The slot passes through
/// unrewritten and this counter increments.
/// </summary>
long InvalidBcdWarnings);
@@ -75,15 +75,16 @@ public sealed record FcCounts(
long Other);
/// <summary>
/// Backend connect, exception, and multiplexer telemetry. Phase 9 added
/// <c>InFlight</c>, <c>MaxInFlight</c>, <c>TxIdWraps</c>, <c>DisconnectCascades</c>, and
/// <c>QueueDepth</c>. Phase 10 added the three coalescing counters
/// (<c>CoalescedHitCount</c>, <c>CoalescedMissCount</c>, <c>CoalescedResponseToDeadUpstream</c>);
/// the dashboard-side derived <c>coalescingRatio</c> is intentionally NOT carried on the wire
/// — consumers compute <c>Hit / (Hit + Miss)</c>. Phase 11 added the five cache counters
/// (<c>CacheHitCount</c>, <c>CacheMissCount</c>, <c>CacheInvalidations</c>,
/// <c>CacheEntryCount</c>, <c>CacheBytes</c>); the dashboard-side derived
/// <c>cacheHitRatio</c> is intentionally NOT carried on the wire.
/// Backend connect, exception, and multiplexer telemetry, including the in-flight
/// multiplexer fields (<c>InFlight</c>, <c>MaxInFlight</c>, <c>TxIdWraps</c>,
/// <c>DisconnectCascades</c>, <c>QueueDepth</c>), the read-coalescing counters
/// (<c>CoalescedHitCount</c>, <c>CoalescedMissCount</c>, <c>CoalescedResponseToDeadUpstream</c>),
/// and the response-cache counters (<c>CacheHitCount</c>, <c>CacheMissCount</c>,
/// <c>CacheInvalidations</c>, <c>CacheEntryCount</c>, <c>CacheBytes</c>).
///
/// <para>The dashboard-side derived ratios <c>coalescingRatio</c> and <c>cacheHitRatio</c>
/// are intentionally NOT carried on the wire — consumers compute <c>Hit / (Hit + Miss)</c>
/// from the raw counters.</para>
/// </summary>
public sealed record PlcBackendStatus(
long ConnectsSuccess,
@@ -111,8 +112,8 @@ public sealed record ExceptionCounts(
long Code03,
long Code04,
/// <summary>
/// Phase 12 (W2.22) — backend exceptions whose response code is not 0104 (e.g. 0x06
/// Server Device Busy, 0x0B Gateway Target Failed To Respond, vendor-specific codes).
/// Backend exceptions whose response code is not 0104 (e.g. 0x06 Server Device
/// Busy, 0x0B Gateway Target Failed To Respond, vendor-specific codes).
/// </summary>
long CodeOther);
+11 -11
View File
@@ -5,7 +5,7 @@ namespace Mbproxy.Admin;
/// <summary>
/// Renders a <see cref="StatusResponse"/> as a self-contained HTML page.
///
/// <para>Constraints (from design.md Phase 07):</para>
/// <para>Constraints (see <c>docs/design.md</c> status-page section):</para>
/// <list type="bullet">
/// <item>No external assets (CSS/JS/fonts/favicons) — firewalled networks only.</item>
/// <item><c>&lt;meta http-equiv="refresh" content="5"&gt;</c> for auto-refresh.</item>
@@ -77,16 +77,16 @@ internal static class StatusHtmlRenderer
sb.Append("<th>FC06</th><th>FC16</th><th>FC?</th><th>BCD slots</th>");
sb.Append("<th>Partial BCD</th><th>Invalid BCD</th><th>Ex 01</th><th>Ex 02</th><th>Ex 03</th><th>Ex 04</th><th>Ex ?</th>");
sb.Append("<th>RTT ms</th><th>Bytes in</th><th>Bytes out</th>");
// Phase 9: multiplexer telemetry columns.
// Multiplexer telemetry columns.
sb.Append("<th>In-flight</th><th>Max in-flight</th><th>TxId wraps</th>");
sb.Append("<th>Cascades</th><th>Queue</th>");
// Phase 10: coalescing column. Single cell carries hit / (hit + miss) ratio as
// a percentage plus the raw hit count for context. Kept compact (one cell) to
// Coalescing column. Single cell carries hit / (hit + miss) ratio as a
// percentage plus the raw hit count for context. Kept compact (one cell) to
// stay under the 50 KB page-weight budget.
sb.Append("<th>Coal</th>");
// Phase 11: cache column. Single cell carries hit-ratio percent plus raw hit
// count; an em-dash when no cache-eligible reads have occurred. Page-weight
// budget assertion stays under 50 KB for the 54-PLC fleet.
// Cache column. Single cell carries hit-ratio percent plus raw hit count;
// an em-dash when no cache-eligible reads have occurred. Page-weight budget
// assertion stays under 50 KB for the 54-PLC fleet.
sb.Append("<th>Cache</th>");
sb.Append("</tr></thead><tbody>");
@@ -150,14 +150,14 @@ internal static class StatusHtmlRenderer
sb.Append("<td>").Append(plc.Backend.LastRoundTripMs.ToString("F1")).Append("</td>");
sb.Append("<td>").Append(plc.Bytes.UpstreamIn).Append("</td>");
sb.Append("<td>").Append(plc.Bytes.UpstreamOut).Append("</td>");
// Phase 9: multiplexer telemetry cells.
// Multiplexer telemetry cells.
sb.Append("<td>").Append(plc.Backend.InFlight).Append("</td>");
sb.Append("<td>").Append(plc.Backend.MaxInFlight).Append("</td>");
sb.Append("<td>").Append(plc.Backend.TxIdWraps).Append("</td>");
sb.Append("<td>").Append(plc.Backend.DisconnectCascades).Append("</td>");
sb.Append("<td>").Append(plc.Backend.QueueDepth).Append("</td>");
// Phase 10: coalescing ratio cell — "<pct>% (<hit>)". When no coalesced reads
// have been seen, render an em-dash to keep the cell narrow.
// Coalescing ratio cell — "<pct>% (<hit>)". When no coalesced reads have
// been seen, render an em-dash to keep the cell narrow.
long coalHit = plc.Backend.CoalescedHitCount;
long coalMiss = plc.Backend.CoalescedMissCount;
sb.Append("<td>");
@@ -171,7 +171,7 @@ internal static class StatusHtmlRenderer
sb.Append(pct).Append("% (").Append(coalHit).Append(')');
}
sb.Append("</td>");
// Phase 11: cache ratio cell — same pattern as coalescing.
// Cache ratio cell — same pattern as coalescing.
long cacheHit = plc.Backend.CacheHitCount;
long cacheMiss = plc.Backend.CacheMissCount;
sb.Append("<td>");
@@ -66,10 +66,6 @@ internal sealed class StatusSnapshotBuilder
var activeUpstreams = supervisor?.ActiveUpstreams ?? Array.Empty<UpstreamPipe>();
var clientSnapshots = activeUpstreams
.Select(p => new ClientSnapshot(
// Phase 12 (W3 cleanup) — the second `?.Address.ToString()` was
// unreachable: if RemoteEp is non-null the first ?.ToString() returns
// a string; if it's null the second branch's outer `?.` short-circuits
// identically. Simplified to the equivalent two-branch form.
Remote: p.RemoteEp?.ToString() ?? "?",
ConnectedAtUtc: p.ConnectedAtUtc,
PdusForwarded: p.PdusForwardedCount))
@@ -114,7 +110,6 @@ internal sealed class StatusSnapshotBuilder
CacheBytes: 0,
ResponseDropForFullUpstream: 0);
// Phase 08: ConnectsSuccess / ConnectsFailed are now tracked in ProxyCounters.
long connectsSuccess = counters.ConnectsSuccess;
long connectsFailed = counters.ConnectsFailed;
+2 -2
View File
@@ -13,8 +13,8 @@ namespace Mbproxy.Bcd;
/// Example: 12_345_678 → low=0x5678, high=0x1234.
///
/// Bad-nibble policy: Decode16/Decode32 throw <see cref="FormatException"/>
/// (not a sentinel). The Phase 04 rewrite pipeline catches and surfaces the
/// exception as an mbproxy.rewrite.invalid_bcd warning event.
/// (not a sentinel). The rewrite pipeline catches and surfaces the exception as an
/// mbproxy.rewrite.invalid_bcd warning event.
/// </summary>
internal static class BcdCodec
{
+4 -4
View File
@@ -4,9 +4,9 @@ namespace Mbproxy.Bcd;
/// Immutable description of a single BCD-encoded V-memory tag as seen on the Modbus wire.
/// Width is 16 (one register) or 32 (two registers, CDAB low-word-first).
///
/// <para><b>Phase 11 — <see cref="CacheTtlMs"/></b> is the resolved per-tag response-cache
/// TTL in milliseconds. 0 (the default) means caching is disabled for this tag. Positive
/// values cap upstream staleness; the multi-tag-range read uses <c>min(TTLs)</c> across all
/// <para><b><see cref="CacheTtlMs"/></b> is the resolved per-tag response-cache TTL in
/// milliseconds. 0 (the default) means caching is disabled for this tag. Positive values
/// cap upstream staleness; the multi-tag-range read uses <c>min(TTLs)</c> across all
/// matched tags and treats any 0 in the range as "uncached for the whole read."</para>
/// </summary>
public sealed record BcdTag(ushort Address, byte Width, int CacheTtlMs = 0)
@@ -33,7 +33,7 @@ public sealed record BcdTag(ushort Address, byte Width, int CacheTtlMs = 0)
/// <summary>True when this tag occupies two registers (32-bit BCD).</summary>
public bool IsThirtyTwoBit => Width == 32;
/// <summary>True when this tag opts into the Phase-11 response cache.</summary>
/// <summary>True when this tag opts into the response cache.</summary>
public bool IsCacheable => CacheTtlMs > 0;
/// <summary>
+2 -2
View File
@@ -47,7 +47,7 @@ public sealed class BcdTagMap
=> _map.TryGetValue(address, out tag!);
/// <summary>
/// Phase 11 — resolves the effective cache TTL for an FC03/FC04 read over the range
/// Resolves the effective cache TTL for an FC03/FC04 read over the range
/// [<paramref name="startAddress"/>, <paramref name="startAddress"/> + <paramref name="qty"/>).
///
/// <para>Returns 0 (uncached) when:</para>
@@ -135,7 +135,7 @@ public sealed class BcdTagMap
return false;
}
// Sort ascending by offset so Phase 04 can iterate in wire order.
// Sort ascending by offset so the rewrite pipeline can iterate in wire order.
result.Sort(static (a, b) => a.OffsetWords.CompareTo(b.OffsetWords));
hits = result;
return true;
+11 -13
View File
@@ -34,8 +34,8 @@ public static class BcdTagMapBuilder
=> Build(global, perPlc, perPlcDefaultCacheTtlMs: 0);
/// <summary>
/// Phase 11 overload resolves the effective BCD tag list for one PLC and validates
/// it, additionally folding the per-PLC <paramref name="perPlcDefaultCacheTtlMs"/> into
/// Overload that resolves the effective BCD tag list for one PLC and validates it,
/// additionally folding the per-PLC <paramref name="perPlcDefaultCacheTtlMs"/> into
/// any tag whose explicit <see cref="BcdTagOptions.CacheTtlMs"/> is null.
///
/// <para>Resolution order per tag:</para>
@@ -53,13 +53,11 @@ public static class BcdTagMapBuilder
var errors = new List<BcdError>();
var warnings = new List<BcdWarning>();
// Phase 12 (W2.11) — duplicate-address detection happens BEFORE the working
// dictionary collapses keys. Iterating each list independently catches duplicates
// that would otherwise be silently last-write-wins'd by the dictionary indexer.
// Cross-list collisions (same address in BOTH Global and Add) are the documented
// width-override pattern and must NOT be flagged — only intra-list duplicates fail.
// Without this fix the DuplicateAddress validation error was dead code (the
// post-collapse dict has unique keys by construction).
// Duplicate-address detection happens BEFORE the working dictionary collapses
// keys. Iterating each list independently catches duplicates that would otherwise
// be silently last-write-wins'd by the dictionary indexer. Cross-list collisions
// (same address in BOTH Global and Add) are the documented width-override pattern
// and must NOT be flagged — only intra-list duplicates fail.
static void DetectIntraListDuplicates(
IEnumerable<BcdTagOptions> source, string sourceName, List<BcdError> errors)
{
@@ -119,7 +117,7 @@ public static class BcdTagMapBuilder
continue;
}
// Phase 11 — resolve the effective per-tag cache TTL:
// Resolve the effective per-tag cache TTL:
// explicit per-tag (including 0) wins; otherwise fall back to per-PLC default.
int resolvedTtl = opt.CacheTtlMs ?? perPlcDefaultCacheTtlMs;
if (resolvedTtl < 0) resolvedTtl = 0;
@@ -128,9 +126,9 @@ public static class BcdTagMapBuilder
}
// High-register collision check (only meaningful for 32-bit entries).
// Phase 12 (W2.12) — dedupe symmetric reports. Two 32-bit tags whose pairs collide
// (e.g. (100,W=32) and (101,W=32)) would otherwise produce two BcdErrors — one
// from each direction. Track reported (low,high) pairs so each collision logs once.
// Dedupe symmetric reports. Two 32-bit tags whose pairs collide (e.g. (100,W=32)
// and (101,W=32)) would otherwise produce two BcdErrors — one from each
// direction. Track reported (low,high) pairs so each collision logs once.
var reportedCollisions = new HashSet<(ushort, ushort)>();
foreach (var tag in validated.Values)
{
@@ -48,17 +48,17 @@ internal sealed partial class ConfigReconciler : IDisposable
private readonly ServiceCounters _serviceCounters;
// The supervisor dictionary is set by ProxyWorker after initial startup.
// Phase 12 (W2.3) — ConcurrentDictionary so the per-PLC Add/Remove/Restart task
// continuations inside ApplyUnderLockAsync can mutate it concurrently. The outer Apply
// is serialised by _applySemaphore but the inner Task.WhenAll runs in parallel.
// ConcurrentDictionary so the per-PLC Add/Remove/Restart task continuations inside
// ApplyUnderLockAsync can mutate it concurrently. The outer Apply is serialised by
// _applySemaphore but the inner Task.WhenAll runs in parallel.
private ConcurrentDictionary<string, PlcListenerSupervisor>? _supervisors;
private MbproxyOptions? _currentOptions;
// Phase 12 (W2.1) — live accessor for ReadCoalescingOptions, threaded through Attach
// so PLCs added or restarted via hot-reload honour the current
// Live accessor for ReadCoalescingOptions, threaded through Attach so PLCs added or
// restarted via hot-reload honour the current
// `Mbproxy.Resilience.ReadCoalescing.{Enabled,MaxParties}` values. Without this,
// reconciler-built supervisors silently used the default `new ReadCoalescingOptions()`,
// and a hot-reload of `Enabled = false` did not propagate to those supervisors.
// reconciler-built supervisors would use the default `new ReadCoalescingOptions()`
// and a hot-reload of `Enabled = false` would not propagate to them.
private Func<ReadCoalescingOptions>? _coalescingAccessor;
// ── Debounce + serialisation machinery ───────────────────────────────────────────────
@@ -111,8 +111,8 @@ internal sealed partial class ConfigReconciler : IDisposable
/// <summary>
/// Provides the reconciler with the supervisor dictionary, the initial options snapshot,
/// and (Phase 12 W2.1) the live <see cref="ReadCoalescingOptions"/> accessor that
/// add/restart supervisors must use so a hot-reloaded
/// and the live <see cref="ReadCoalescingOptions"/> accessor that add/restart
/// supervisors must use so a hot-reloaded
/// <c>Mbproxy.Resilience.ReadCoalescing.Enabled</c> propagates to them. Must be called
/// exactly once by <see cref="Proxy.ProxyWorker"/> before any <c>OnChange</c> events
/// can arrive (i.e. immediately after the supervisors are created). Thread-safe: the
@@ -287,7 +287,8 @@ internal sealed partial class ConfigReconciler : IDisposable
await old.DisposeAsync().ConfigureAwait(false);
}
// Build fresh context. Phase 11: pass DefaultCacheTtlMs.
// Build fresh context. Pass DefaultCacheTtlMs so per-PLC default
// caching folds into the resolved tag map.
var result = BcdTagMapBuilder.Build(next.BcdTags, plcNew.BcdTags, plcNew.DefaultCacheTtlMs);
var newCtx = new PerPlcContext
{
@@ -345,8 +346,8 @@ internal sealed partial class ConfigReconciler : IDisposable
// Preserve existing counters so operators see real history.
Counters = supervisor.CurrentCounters,
Logger = _loggerFactory.CreateLogger($"Mbproxy.Proxy.BcdRewriter.{name}"),
// Phase 11: any reseat (tag-map change) constructs a fresh cache.
// The supervisor disposes the old one inside ReplaceContextAsync.
// Any reseat (tag-map change) constructs a fresh cache. The
// supervisor disposes the old one inside ReplaceContextAsync.
Cache = BuildCacheIfNeeded(newMap, next.Cache),
};
@@ -373,7 +374,8 @@ internal sealed partial class ConfigReconciler : IDisposable
{
try
{
// Phase 11: pass DefaultCacheTtlMs.
// Pass DefaultCacheTtlMs so per-PLC default caching folds into the
// resolved tag map.
var result = BcdTagMapBuilder.Build(next.BcdTags, plcNew.BcdTags, plcNew.DefaultCacheTtlMs);
var newCtx = new PerPlcContext
{
@@ -427,9 +429,9 @@ internal sealed partial class ConfigReconciler : IDisposable
// ── Helpers ───────────────────────────────────────────────────────────────────────────
/// <summary>
/// Phase 11 — constructs a <see cref="ResponseCache"/> only when at least one resolved
/// tag in <paramref name="map"/> opts in (<see cref="BcdTag.CacheTtlMs"/> &gt; 0).
/// Returns <c>null</c> otherwise so the no-cache path is byte-identical to Phase 10.
/// Constructs a <see cref="ResponseCache"/> only when at least one resolved tag in
/// <paramref name="map"/> opts in (<see cref="BcdTag.CacheTtlMs"/> &gt; 0). Returns
/// <c>null</c> otherwise so the no-cache path bypasses cache logic entirely.
/// </summary>
private static ResponseCache? BuildCacheIfNeeded(BcdTagMap map, CacheOptions opts)
{
@@ -78,8 +78,8 @@ public sealed record ReloadPlan(
// Tag-map change → reseat (swap context, keep socket).
// We must build both maps to compare them structurally.
// Compute happens after validation so Build should never return errors here.
// Phase 11: include DefaultCacheTtlMs in the build so a per-PLC default change
// is detected by TagMapsEqual via the per-tag CacheTtlMs delta.
// Include DefaultCacheTtlMs in the build so a per-PLC default change is
// detected by TagMapsEqual via the per-tag CacheTtlMs delta.
var oldMap = BcdTagMapBuilder.Build(current.BcdTags, plcOld.BcdTags, plcOld.DefaultCacheTtlMs).Map;
var newMap = BcdTagMapBuilder.Build(next.BcdTags, plcNew.BcdTags, plcNew.DefaultCacheTtlMs).Map;
@@ -97,8 +97,8 @@ public sealed record ReloadPlan(
/// <summary>
/// Structural equality between two <see cref="BcdTagMap"/> instances: same set of
/// (Address, Width, CacheTtlMs) triples. Order doesn't matter — we compare as sets.
/// Phase 11 includes <see cref="BcdTag.CacheTtlMs"/> in the comparison so a per-tag
/// or per-PLC default TTL change reseats the context (which flushes the cache).
/// Includes <see cref="BcdTag.CacheTtlMs"/> in the comparison so a per-tag or per-PLC
/// default TTL change reseats the context (which flushes the cache).
/// </summary>
private static bool TagMapsEqual(BcdTagMap a, BcdTagMap b)
{
@@ -75,10 +75,10 @@ internal static class ReloadValidator
// ── 4. Per-PLC tag-map build ──────────────────────────────────────────
// BcdTagMapBuilder.Build is the single source of truth for tag-list
// well-formedness; we must not duplicate its validation logic here.
// Phase 12 (W2.10) — also re-check the RESOLVED per-tag CacheTtlMs against
// AllowLongTtl. The raw-input check at section 5 covers explicit per-tag and
// per-PLC-default values, but defensively re-validating the post-fold values
// catches any future fold logic that produces a value above the gate.
// Also re-check the RESOLVED per-tag CacheTtlMs against AllowLongTtl. The raw-
// input check at section 5 covers explicit per-tag and per-PLC-default values,
// but defensively re-validating the post-fold values catches any future fold
// logic that produces a value above the gate.
bool allowLongTtlForResolved = next.Cache.AllowLongTtl;
foreach (var plc in next.Plcs)
{
@@ -98,7 +98,7 @@ internal static class ReloadValidator
}
}
// ── 5. Cache TTL bounds (Phase 11) ────────────────────────────────────
// ── 5. Cache TTL bounds ───────────────────────────────────────────────
// The MbproxyOptionsValidator catches these at schema time too, but ReloadValidator
// is the gate that the hot-reload path consults directly so re-checking here keeps
// both paths internally consistent (and the validator runs against tag-map-resolved
@@ -129,8 +129,8 @@ internal static class ReloadValidator
if (next.Cache.EvictionIntervalMs < 0)
errs.Add($"Cache.EvictionIntervalMs must be >= 0; got {next.Cache.EvictionIntervalMs}.");
// Phase 12 (W2.18) — Connection timeouts must be > 0. A reload that sets any
// of these to 0 or negative would break the runtime; reject the reload as a whole.
// Connection timeouts must be > 0. A reload that sets any of these to 0 or
// negative would break the runtime; reject the reload as a whole.
if (next.Connection.BackendConnectTimeoutMs <= 0)
errs.Add(
$"Connection.BackendConnectTimeoutMs must be > 0; got {next.Connection.BackendConnectTimeoutMs}.");
@@ -29,9 +29,9 @@ internal sealed class EventLogBridge : ILogEventSink
private const int MaxMessageBytes = 32 * 1024; // 32 KB Event Log limit
private readonly bool _enabled;
// Phase 12 (W2.23) — cache the source-exists check at construction so Emit doesn't
// hit the registry on every Error+ log line. A missing source after start requires a
// service restart to pick up; in practice install.ps1 registers it once at install.
// Cache the source-exists check at construction so Emit doesn't hit the registry on
// every Error+ log line. A missing source after start requires a service restart to
// pick up; in practice install.ps1 registers it once at install.
private readonly bool _sourceExists;
public EventLogBridge(bool enabled)
@@ -50,7 +50,7 @@ internal sealed class EventLogBridge : ILogEventSink
if (!_enabled) return;
if (logEvent.Level < LogEventLevel.Error) return;
// Cached at construction (W2.23) — silently swallow if the source isn't registered.
// Cached at construction — silently swallow if the source isn't registered.
// The service account may not be able to create it and we must not crash the logger.
if (!_sourceExists) return;
+12 -14
View File
@@ -10,12 +10,10 @@ internal static class HostingExtensions
{
/// <summary>
/// Registers the <c>"Mbproxy"</c> configuration section, binds it to
/// <see cref="MbproxyOptions"/> via <c>IOptionsMonitor</c>, and registers
/// the schema-level <see cref="MbproxyOptionsValidator"/>.
///
/// Phase 06: also registers <see cref="ServiceCounters"/> (singleton) and
/// <see cref="ConfigReconciler"/> (singleton) so they can be injected into
/// <see cref="Proxy.ProxyWorker"/>.
/// <see cref="MbproxyOptions"/> via <c>IOptionsMonitor</c>, registers the schema-
/// level <see cref="MbproxyOptionsValidator"/>, and registers the singleton
/// <see cref="ServiceCounters"/> and <see cref="ConfigReconciler"/> so they can be
/// injected into <see cref="Proxy.ProxyWorker"/>.
/// </summary>
public static IHostApplicationBuilder AddMbproxyOptions(this IHostApplicationBuilder builder)
{
@@ -28,17 +26,17 @@ internal static class HostingExtensions
Microsoft.Extensions.Options.IValidateOptions<MbproxyOptions>,
MbproxyOptionsValidator>();
// Phase 06: service-wide counters (read by Phase 07 status page).
// Service-wide counters (read by the status page).
builder.Services.AddSingleton<ServiceCounters>();
// Phase 06: hot-reload reconciler (singleton; subscribes to IOptionsMonitor.OnChange).
// Hot-reload reconciler (singleton; subscribes to IOptionsMonitor.OnChange).
builder.Services.AddSingleton<ConfigReconciler>();
return builder;
}
/// <summary>
/// Registers Phase 07 admin endpoint services:
/// Registers the admin endpoint services:
/// <list type="bullet">
/// <item><see cref="AssemblyVersionAccessor"/> (singleton — reads version attribute once).</item>
/// <item><see cref="StatusSnapshotBuilder"/> (singleton — pure orchestration).</item>
@@ -47,8 +45,8 @@ internal static class HostingExtensions
/// Must be called after <see cref="AddMbproxyOptions"/> and after
/// <c>AddHostedService&lt;ProxyWorker&gt;</c> (so ProxyWorker is available via DI).
///
/// <para><b>Phase 12 (W1.5)</b> — <see cref="AdminEndpointHost"/> is no longer registered
/// via <c>AddHostedService</c>. <see cref="Proxy.ProxyWorker"/> drives its lifecycle
/// <para><see cref="AdminEndpointHost"/> is intentionally NOT registered via
/// <c>AddHostedService</c>. <see cref="Proxy.ProxyWorker"/> drives its lifecycle
/// directly so admin start/stop ordering matches the design contract (admin starts
/// after listeners are up; admin stops AFTER the in-flight drain).</para>
/// </summary>
@@ -61,10 +59,10 @@ internal static class HostingExtensions
}
/// <summary>
/// Configures Serilog from the <c>"Serilog"</c> configuration section,
/// with console and rolling-file sinks as defaults.
/// Configures Serilog from the <c>"Serilog"</c> configuration section, with console
/// and rolling-file sinks as defaults.
///
/// <para>Phase 08: when <paramref name="addEventLogBridge"/> is <c>true</c>, the
/// <para>When <paramref name="addEventLogBridge"/> is <c>true</c>, the
/// <see cref="Diagnostics.EventLogBridge"/> is added as a sub-sink for events at
/// <see cref="Serilog.Events.LogEventLevel.Error"/> and above. This flag should only be
/// set when the service is running as a Windows Service — the bridge silently ignores
+12 -14
View File
@@ -8,16 +8,15 @@
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>Mbproxy</RootNamespace>
<AssemblyName>Mbproxy</AssemblyName>
<!-- Phase 08: Assembly version. CI can override via /p:InformationalVersion=... -->
<!-- Assembly version. CI can override via /p:InformationalVersion=... -->
<InformationalVersion>1.0.0</InformationalVersion>
</PropertyGroup>
<!-- Phase 08: single-file self-contained publish (Release only; Debug stays normal for fast iteration).
NOTE: the resulting Mbproxy.exe is ~100 MB because the self-contained publish bundles the full
.NET 10 + ASP.NET Core runtime. This exceeds the original 50 MB target in the phase spec;
the runtime size is a fixed cost of self-contained deployment on .NET 10 with ASP.NET Core.
Operators who need a smaller footprint can use a framework-dependent publish
(dotnet publish -c Release -r win-x64 - -self-contained false /p:PublishSingleFile=true)
<!-- Single-file self-contained publish (Release only; Debug stays normal for fast iteration).
The resulting Mbproxy.exe is ~100 MB because the self-contained publish bundles the full
.NET 10 + ASP.NET Core runtime — fixed cost of self-contained deployment on .NET 10 with
ASP.NET Core. Operators who need a smaller footprint can use a framework-dependent publish
(dotnet publish -c Release -r win-x64 -p:SelfContained=false -p:PublishSingleFile=true)
if the target machine has .NET 10 installed. -->
<PropertyGroup Condition="'$(Configuration)' == 'Release'">
<PublishSingleFile>true</PublishSingleFile>
@@ -27,7 +26,7 @@
</PropertyGroup>
<ItemGroup>
<!-- ASP.NET Core for the Phase 07 Kestrel-hosted admin endpoint. -->
<!-- ASP.NET Core for the Kestrel-hosted admin endpoint. -->
<FrameworkReference Include="Microsoft.AspNetCore.App" />
</ItemGroup>
@@ -50,12 +49,11 @@
</ItemGroup>
<ItemGroup>
<!-- Phase 12 (W2.21) — link the install template as the published appsettings.json
so the binary ships with a fully-commented, usable example config (one PLC, one
BCD tag, all sections present) instead of an empty stub. The .NET configuration
loader supports JSONC (comments) under the default Host.CreateApplicationBuilder
path, so the comments in the template are valid at runtime.
A fresh `dotnet run` from src/Mbproxy is no longer a no-op service. -->
<!-- Link the install template as the published appsettings.json so the binary ships
with a fully-commented, usable example config (one PLC, one BCD tag, all sections
present) instead of an empty stub. The .NET configuration loader supports JSONC
(comments) under the default Host.CreateApplicationBuilder path, so the comments
in the template are valid at runtime. -->
<None Remove="appsettings.json" />
<Content Include="..\..\install\mbproxy.config.template.json"
Link="appsettings.json">
+4 -4
View File
@@ -6,10 +6,10 @@ public sealed class BcdTagOptions
public byte Width { get; init; } // 16 or 32
/// <summary>
/// Phase 11 — optional opt-in to the response cache. Null (default) means
/// "unset" and falls back to the per-PLC <see cref="PlcOptions.DefaultCacheTtlMs"/>;
/// 0 explicitly disables caching for this tag even when the PLC default is non-zero.
/// Positive values cap the staleness window in milliseconds.
/// Optional opt-in to the response cache. Null (default) means "unset" and falls
/// back to the per-PLC <see cref="PlcOptions.DefaultCacheTtlMs"/>; 0 explicitly
/// disables caching for this tag even when the PLC default is non-zero. Positive
/// values cap the staleness window in milliseconds.
/// </summary>
public int? CacheTtlMs { get; init; }
}
+10 -11
View File
@@ -11,16 +11,16 @@ public sealed class MbproxyOptions
public ResilienceOptions Resilience { get; init; } = new();
/// <summary>
/// Phase 11 — service-wide response-cache settings. The cache is opt-in
/// per-tag (<see cref="BcdTagOptions.CacheTtlMs"/>); this section configures the
/// safety knobs that gate / bound the cache.
/// Service-wide response-cache settings. The cache is opt-in per-tag
/// (<see cref="BcdTagOptions.CacheTtlMs"/>); this section configures the safety
/// knobs that gate / bound the cache.
/// </summary>
public CacheOptions Cache { get; init; } = new();
}
/// <summary>
/// Phase 11 — service-wide response-cache knobs. The cache is OFF by default for every
/// tag; this section governs the limits when an operator opts a tag in.
/// Service-wide response-cache knobs. The cache is OFF by default for every tag;
/// this section governs the limits when an operator opts a tag in.
/// </summary>
public sealed class CacheOptions
{
@@ -47,8 +47,8 @@ public sealed class CacheOptions
}
/// <summary>
/// Schema-level validation for <see cref="MbproxyOptions"/>.
/// Business-rule validation (duplicate addresses, port conflicts) is deferred to phase 06.
/// Schema-level validation for <see cref="MbproxyOptions"/>. Business-rule validation
/// (duplicate addresses, port conflicts) is delegated to <see cref="Configuration.ReloadValidator"/>.
/// </summary>
public sealed class MbproxyOptionsValidator : IValidateOptions<MbproxyOptions>
{
@@ -68,7 +68,7 @@ public sealed class MbproxyOptionsValidator : IValidateOptions<MbproxyOptions>
{
var plc = options.Plcs[i];
// Phase 11 — per-PLC default TTL bounds.
// Per-PLC default TTL bounds.
if (plc.DefaultCacheTtlMs < 0)
errors.Add($"Plcs[{i}] ({plc.Name}): DefaultCacheTtlMs must be >= 0.");
else if (plc.DefaultCacheTtlMs > 60_000 && !allowLongTtl)
@@ -94,9 +94,8 @@ public sealed class MbproxyOptionsValidator : IValidateOptions<MbproxyOptions>
if (options.Cache.EvictionIntervalMs < 0)
errors.Add($"Cache.EvictionIntervalMs must be >= 0; got {options.Cache.EvictionIntervalMs}.");
// Phase 12 (W2.18) — Connection timeouts must be strictly positive. A 0 or negative
// value produces a CancelAfter(0) that fires immediately and breaks every backend
// connect/request.
// Connection timeouts must be strictly positive. A 0 or negative value produces
// a CancelAfter(0) that fires immediately and breaks every backend connect/request.
if (options.Connection.BackendConnectTimeoutMs <= 0)
errors.Add(
$"Connection.BackendConnectTimeoutMs must be > 0; got {options.Connection.BackendConnectTimeoutMs}.");
+1 -1
View File
@@ -14,7 +14,7 @@ public sealed class PlcOptions
public PlcBcdOverrides? BcdTags { get; init; }
/// <summary>
/// Phase 11 — per-PLC default cache TTL applied to any tag whose explicit
/// Per-PLC default cache TTL applied to any tag whose explicit
/// <see cref="BcdTagOptions.CacheTtlMs"/> is unset (null). 0 (the default) means
/// "no caching by default at this PLC". Per-tag values always win over the per-PLC
/// default when set; an explicit zero on a tag still disables caching for that tag.
@@ -10,8 +10,8 @@ public sealed class ResilienceOptions
};
/// <summary>
/// Phase 10 — in-flight read coalescing options. Defaults to enabled with a 32-party
/// cap so unconfigured deployments get the de-duplication benefit immediately.
/// In-flight read coalescing options. Defaults to enabled with a 32-party cap so
/// unconfigured deployments get the de-duplication benefit immediately.
/// </summary>
public ReadCoalescingOptions ReadCoalescing { get; init; } = new();
}
@@ -29,10 +29,10 @@ public sealed class RecoveryProfile
}
/// <summary>
/// Phase 10 — knobs for the in-flight read-coalescing feature. The feature attaches
/// late-arriving FC03/FC04 reads of identical <c>(unitId, fc, start, qty)</c> tuples to an
/// already-in-flight peer, fanning out the single backend response to every attached
/// upstream client.
/// Knobs for the in-flight read-coalescing feature. The feature attaches late-arriving
/// FC03/FC04 reads of identical <c>(unitId, fc, start, qty)</c> tuples to an already-
/// in-flight peer, fanning out the single backend response to every attached upstream
/// client.
///
/// <para>Zero post-response staleness — coalescing operates entirely within the in-flight
/// window (microseconds to ~10 ms typical). Once the response is delivered, the coalescing
@@ -41,10 +41,10 @@ public sealed class RecoveryProfile
public sealed class ReadCoalescingOptions
{
/// <summary>
/// Master switch. When <c>false</c>, every FC03/FC04 request takes the Phase-9 path
/// (allocate a fresh proxy TxId and round-trip to the backend). Hot-reloadable via
/// <c>IOptionsMonitor</c>; flipping to <c>false</c> at runtime does not disturb already-
/// coalesced entries — they drain naturally.
/// Master switch. When <c>false</c>, every FC03/FC04 request allocates a fresh
/// proxy TxId and round-trips to the backend without attempting to coalesce.
/// Hot-reloadable via <c>IOptionsMonitor</c>; flipping to <c>false</c> at runtime
/// does not disturb already-coalesced entries — they drain naturally.
/// </summary>
public bool Enabled { get; init; } = true;
+5 -5
View File
@@ -7,14 +7,14 @@ var builder = Host.CreateApplicationBuilder(args);
// Windows Service support; no-op when running under dotnet run / console.
builder.Services.AddWindowsService();
// Phase 08: wire EventLogBridge only when actually running as a Windows Service.
// Wire EventLogBridge only when actually running as a Windows Service.
bool isWindowsService = WindowsServiceHelpers.IsWindowsService();
// Wire up structured config, Serilog, and typed options.
builder.AddMbproxySerilog(addEventLogBridge: isWindowsService);
builder.AddMbproxyOptions();
// PDU pipeline: BcdPduPipeline is stateless (Phase 9: per-call correlation flows through
// PDU pipeline: BcdPduPipeline is stateless (per-call correlation flows through
// PerPlcContext.CurrentRequest set by the multiplexer); registering as singleton is fine
// and avoids repeated construction.
builder.Services.AddSingleton<IPduPipeline, BcdPduPipeline>();
@@ -25,9 +25,9 @@ builder.Services.AddSingleton<IPduPipeline, BcdPduPipeline>();
builder.Services.AddSingleton<ProxyWorker>();
builder.Services.AddHostedService(sp => sp.GetRequiredService<ProxyWorker>());
// Phase 07: admin endpoint (Kestrel read-only status page).
// Phase 12 (W1.5): no longer registered as IHostedService; ProxyWorker drives its
// lifecycle so admin starts after listeners and stops AFTER the in-flight drain.
// Admin endpoint (Kestrel read-only status page). Not registered as IHostedService —
// ProxyWorker drives its lifecycle so admin starts after listeners and stops AFTER the
// in-flight drain.
builder.AddMbproxyAdmin();
await builder.Build().RunAsync();
+21 -23
View File
@@ -4,7 +4,7 @@ namespace Mbproxy.Proxy;
/// <summary>
/// BCD-rewriting PDU pipeline. Registered as the singleton <see cref="IPduPipeline"/>
/// in production (replaces <see cref="NoopPduPipeline"/> from Phase 03).
/// in production.
///
/// FC scope (per design.md):
/// FC03 / FC04 response — decode covered BCD slots from raw nibbles → binary integer.
@@ -15,13 +15,13 @@ namespace Mbproxy.Proxy;
/// MBAP transparency contract: the MBAP length field is NEVER modified. Re-encoded slots
/// are the same byte width as the originals (ushort → ushort), so the PDU length is stable.
///
/// <para><b>Phase 9 — request correlation:</b> FC03/FC04 responses do not carry the
/// original start address. The multiplexer builds an <see cref="Multiplexing.InFlightRequest"/>
/// <para><b>Request correlation:</b> FC03/FC04 responses do not carry the original
/// start address. The multiplexer builds an <see cref="Multiplexing.InFlightRequest"/>
/// on the request path, stores it in its <see cref="Multiplexing.CorrelationMap"/>, and
/// attaches it to the per-call <see cref="PerPlcContext.CurrentRequest"/> on the response
/// path. The rewriter consumes <c>CurrentRequest</c> instead of a per-pair last-request
/// slot, so concurrent responses from different upstream clients each decode against
/// their own request range without cross-talk.</para>
/// attaches it to the per-call <see cref="PerPlcContext.CurrentRequest"/> on the
/// response path. The rewriter consumes <c>CurrentRequest</c>, so concurrent responses
/// from different upstream clients each decode against their own request range without
/// cross-talk.</para>
///
/// <para>This class is stateless. All per-call state arrives via <see cref="PduContext"/>
/// (specifically <see cref="PerPlcContext.CurrentRequest"/> on response). It is safe to
@@ -157,12 +157,12 @@ internal sealed class BcdPduPipeline : IPduPipeline
ushort startAddress = (ushort)((pdu[1] << 8) | pdu[2]);
ushort qty = (ushort)((pdu[3] << 8) | pdu[4]);
// Phase 12 (W2.14) — validate the request is fully sized for `qty` registers
// (each 2 bytes after the byteCount byte). A client claiming qty=10 with only
// 4 bytes of register data would otherwise have its BCD slots silently skipped
// by the per-slot bounds check below — half the request rewritten, half not.
// Returning here passes the malformed PDU through unchanged so the PLC's own
// validator surfaces the protocol error.
// Validate the request is fully sized for `qty` registers (each 2 bytes after
// the byteCount byte). A client claiming qty=10 with only 4 bytes of register
// data would otherwise have its BCD slots silently skipped by the per-slot
// bounds check below — half the request rewritten, half not. Returning here
// passes the malformed PDU through unchanged so the PLC's own validator
// surfaces the protocol error.
if (pdu.Length < 6 + qty * 2)
return;
@@ -210,14 +210,14 @@ internal sealed class BcdPduPipeline : IPduPipeline
ushort clientLow = (ushort)((pdu[lowByteOff] << 8) | pdu[lowByteOff + 1]);
ushort clientHigh = (ushort)((pdu[highByteOff] << 8) | pdu[highByteOff + 1]);
// Phase 12 (W2.13) — validate that BOTH input words are within the
// base-10000-digit range BEFORE reconstructing. Without this guard, a
// client writing (high=9999, low=9999) silently mutates to (high=9998,
// low=9999) because `9999 * 10_000 + 9999 = 99_989_999` is still <= the
// 32-bit BCD ceiling, so Encode32 accepts it and rewrites — losing 1 from
// the high word. The unconventional wire format ("two base-10000 CDAB
// digits", per design.md:123) means each word independently must be
// 0..9999 to round-trip cleanly.
// Validate that BOTH input words are within the base-10000-digit range
// BEFORE reconstructing. Without this guard, a client writing
// (high=9999, low=9999) silently mutates to (high=9998, low=9999)
// because `9999 * 10_000 + 9999 = 99_989_999` is still <= the 32-bit
// BCD ceiling, so Encode32 accepts it and rewrites — losing 1 from the
// high word. The unconventional wire format ("two base-10000 CDAB
// digits", per design.md) means each word independently must be 0..9999
// to round-trip cleanly.
if (clientLow > 9999 || clientHigh > 9999)
{
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, tag.Address,
@@ -473,6 +473,4 @@ internal sealed class BcdPduPipeline : IPduPipeline
// already counted this slot on the way out. Incrementing again would double-count.
}
// Phase 12 (W3 cleanup) — HasBadNibble was previously duplicated here; the canonical
// implementation now lives in BcdCodec.HasBadNibble (internal).
}
+4 -4
View File
@@ -4,10 +4,10 @@ namespace Mbproxy.Proxy.Cache;
/// <summary>
/// Hash key for the per-PLC <see cref="ResponseCache"/>. Structurally identical to
/// Phase 10's <see cref="CoalescingKey"/> — both keys discriminate the same dimensions
/// (UnitId, FunctionCode, StartAddress, Quantity), but the two type aliases live in
/// different namespaces so the two phases can evolve independently without one shaping
/// the other's API surface.
/// the read-coalescing <see cref="CoalescingKey"/> — both keys discriminate the same
/// dimensions (UnitId, FunctionCode, StartAddress, Quantity), but the two type aliases
/// live in different namespaces so the cache and the coalescer can evolve independently
/// without one shaping the other's API surface.
///
/// <para><b>Equality semantics:</b> record-struct value equality. FC03 and FC04 produce
/// different keys for the same address (different Modbus tables); different
@@ -1,8 +1,8 @@
namespace Mbproxy.Proxy.Cache;
/// <summary>
/// Source-generated <see cref="LoggerMessage"/> definitions for the Phase-11 response
/// cache. Event names are stable — do not rename without updating <c>docs/design.md</c>'s
/// Source-generated <see cref="LoggerMessage"/> definitions for the response cache.
/// Event names are stable — do not rename without updating <c>docs/design.md</c>'s
/// Logging event-name table.
///
/// <para>Levels are conservative — a busy PLC under steady cache pressure would emit one
@@ -1,7 +1,7 @@
namespace Mbproxy.Proxy.Cache;
/// <summary>
/// Per-PLC opt-in response cache for FC03 / FC04 read responses. Phase 11.
/// Per-PLC opt-in response cache for FC03 / FC04 read responses.
///
/// <para><b>Lifecycle.</b> One instance per PLC, owned by the per-PLC context. The cache
/// is consulted on every FC03/FC04 request before coalescing; populated by the backend
+7 -9
View File
@@ -13,17 +13,15 @@ public enum MbapDirection
}
/// <summary>
/// Per-pair context carried through each PDU pipeline call.
/// Phase 03: carries only <see cref="PlcName"/>.
/// Phase 04 extends this via <see cref="PerPlcContext"/>, which carries the BcdTagMap,
/// counters, and logger. Phase 09 added the per-call <c>CurrentRequest</c> slot to
/// <see cref="PerPlcContext"/> for multiplexer-aware response correlation.
/// Per-pair context carried through each PDU pipeline call. Carries only
/// <see cref="PlcName"/> at the base level; <see cref="PerPlcContext"/> extends it with
/// the BcdTagMap, counters, logger, and per-call <c>CurrentRequest</c> slot for
/// multiplexer-aware response correlation.
/// </summary>
public class PduContext
{
/// <summary>The configured PLC name (from <c>MbproxyOptions.Plcs[i].Name</c>).</summary>
public string PlcName { get; init; } = "";
// Phase 04 adds: BcdTagMap, counters, logger
}
/// <summary>
@@ -31,8 +29,8 @@ public class PduContext
/// Called once per frame in each direction (request and response).
///
/// Implementations must be safe to call concurrently from multiple connection pairs.
/// In Phase 03 the only implementation is <see cref="NoopPduPipeline"/> (pass-through).
/// Phase 04 replaces it with a BCD rewriter registered via DI.
/// Production wires <see cref="BcdPduPipeline"/>; <see cref="NoopPduPipeline"/> is a
/// pass-through fallback used by tests.
/// </summary>
public interface IPduPipeline
{
@@ -42,6 +40,6 @@ public interface IPduPipeline
/// <param name="direction">Whether this is a request or a response frame.</param>
/// <param name="mbapHeader">The 7-byte MBAP header (read-only; includes TxId, UnitId, FC is in pdu[0]).</param>
/// <param name="pdu">The PDU bytes starting at the function code. May be mutated in place.</param>
/// <param name="context">Per-pair context (PLC name; extended in phase 04).</param>
/// <param name="context">Per-pair context (PLC name; extended via <see cref="PerPlcContext"/>).</param>
void Process(MbapDirection direction, ReadOnlySpan<byte> mbapHeader, Span<byte> pdu, PduContext context);
}
@@ -1,14 +1,14 @@
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Source-generated <see cref="LoggerMessage"/> definitions for the Phase-10 read-coalescing
/// Source-generated <see cref="LoggerMessage"/> definitions for the read-coalescing
/// feature. Event names are stable — do not rename without updating docs/design.md's
/// "Logging" event-name table.
///
/// <para>Levels are intentionally conservative — coalescing fires on every overlapping
/// read in a busy fleet (HMIs/historians polling the same screen tags), so the steady-state
/// log volume would be deafening at Information. The counters surface the same data at
/// far lower cost.</para>
/// read in a busy fleet (HMIs/historians polling the same screen tags), so the
/// steady-state log volume would be deafening at Information. The counters surface the
/// same data at far lower cost.</para>
/// </summary>
internal static partial class CoalescingLogEvents
{
@@ -8,9 +8,9 @@ namespace Mbproxy.Proxy.Multiplexing;
/// when the matching response arrives.
///
/// <para>Backed by <see cref="ConcurrentDictionary{TKey, TValue}"/>. The single-writer /
/// single-remover pattern in Phase 9 does not strictly require it — but cascade-on-
/// disconnect walks the map from a separate task and Phase 10 adds upstream-side
/// cancellation paths, so the safer primitive is worth the negligible cost.</para>
/// single-remover pattern does not strictly require it — but cascade-on-disconnect walks
/// the map from a separate task and the coalescing path adds upstream-side cancellation
/// paths, so the safer primitive is worth the negligible cost.</para>
/// </summary>
internal sealed class CorrelationMap
{
@@ -1,16 +1,16 @@
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Per-PLC "in-flight by key" map that powers <b>Phase 10 read coalescing</b>. Holds the
/// currently-in-flight FC03/FC04 requests keyed by their <see cref="CoalescingKey"/> so a
/// Per-PLC "in-flight by key" map that powers read coalescing. Holds the currently-
/// in-flight FC03/FC04 requests keyed by their <see cref="CoalescingKey"/> so a
/// late-arriving request with an identical key can attach to the existing in-flight entry
/// instead of opening a second backend round-trip.
///
/// <para><b>Concurrency model.</b> A single <see cref="object"/> lock serialises every
/// state-touching method. The simpler-lock-over-CAS choice is deliberate (per the phase
/// doc) — the map is per-PLC and the wire rate per PLC is bounded by the ECOM's internal
/// scan cadence (~210 ms per request). The lock-free <c>AddOrUpdate</c> alternative is not
/// worth the read-and-prove-it-correct burden.</para>
/// state-touching method. The simpler-lock-over-CAS choice is deliberate the map is
/// per-PLC and the wire rate per PLC is bounded by the ECOM's internal scan cadence
/// (~210 ms per request). The lock-free <c>AddOrUpdate</c> alternative is not worth the
/// read-and-prove-it-correct burden.</para>
///
/// <para><b>Mutable-list seam.</b> Each entry stores a <see cref="List{InterestedParty}"/>
/// that is also exposed through the parent <see cref="InFlightRequest.InterestedParties"/>
@@ -55,10 +55,6 @@ internal sealed class InFlightByKeyMap
/// already has <paramref name="maxParties"/> attached parties, the next arrival opens
/// a fresh entry (and a fresh backend round-trip). This bounds the response-fanout
/// cost per entry at O(maxParties).</para>
///
/// <para>Phase 12 (W3 cleanup) — was previously declared as <c>bool TryAttachOrCreate</c>
/// but always returned <c>true</c>. The bool was dead; the result is in the
/// <paramref name="wasNew"/> out parameter.</para>
/// </summary>
public void AttachOrCreate(
CoalescingKey key,
@@ -6,10 +6,9 @@ namespace Mbproxy.Proxy.Multiplexing;
/// multiplexer must rewrite the response's MBAP TxId back to <see cref="OriginalTxId"/>
/// before handing the frame to the pipe, so each upstream sees the proxy as transparent.
///
/// <para><b>Phase 9 invariant:</b> exactly one <see cref="InterestedParty"/> per
/// <see cref="InFlightRequest"/>. <b>Phase 10 (read coalescing)</b> reuses this exact
/// shape to fan-out a single backend response to multiple upstream parties. Do not
/// collapse this into a single field on <see cref="InFlightRequest"/>.</para>
/// <para>Read coalescing fans out a single backend response to multiple upstream parties
/// via this record. Do not collapse this into a single field on
/// <see cref="InFlightRequest"/>.</para>
/// </summary>
internal sealed record InterestedParty(UpstreamPipe Pipe, ushort OriginalTxId);
@@ -22,15 +21,12 @@ internal sealed record InterestedParty(UpstreamPipe Pipe, ushort OriginalTxId);
/// <item><description>Provide the BCD rewriter with the originating request's
/// <c>StartAddress</c> / <c>Qty</c> for FC03/FC04 response decoding — the response
/// PDU itself does not carry the start address.</description></item>
/// <item><description>Measure backend round-trip time via <see cref="SentAtUtc"/>
/// (replaces the per-pair stopwatch slot from the 1:1 model).</description></item>
/// <item><description>Measure backend round-trip time via <see cref="SentAtUtc"/>.</description></item>
/// </list>
///
/// <para><b>Phase 9:</b> <see cref="InterestedParties"/> always has exactly one element.
/// The list shape is the load-bearing seam that <b>Phase 10 — read coalescing</b> hooks
/// into to fan out a single PLC response to multiple upstream clients without further
/// refactor of the multiplexer's data model. Reviewer note: do <i>not</i> simplify back
/// to a single <c>UpstreamPipe</c> field.</para>
/// <para>The <see cref="InterestedParties"/> list shape is the load-bearing seam that
/// read coalescing uses to fan out a single PLC response to multiple upstream clients.
/// Reviewer note: do <i>not</i> simplify back to a single <c>UpstreamPipe</c> field.</para>
/// </summary>
internal sealed record InFlightRequest(
byte UnitId,
@@ -8,9 +8,7 @@ namespace Mbproxy.Proxy.Multiplexing;
internal static partial class MultiplexerLogEvents
{
/// <summary>
/// Emitted once per upstream client accept. Replaces the per-pair
/// <c>mbproxy.client.connected</c> event from the 1:1 model (same event name,
/// same property shape — operators' log queries are unchanged).
/// Emitted once per upstream client accept.
/// </summary>
[LoggerMessage(
EventId = 110,
@@ -84,9 +82,7 @@ internal static partial class MultiplexerLogEvents
string remoteEp);
/// <summary>
/// Emitted when the backend connect Polly pipeline fails. Mirrors the existing
/// <c>mbproxy.backend.failed</c> event from the 1:1 model so operators' alerts keep
/// working unchanged after Phase 9.
/// Emitted when the backend connect Polly pipeline fails.
/// </summary>
[LoggerMessage(
EventId = 115,
@@ -48,14 +48,14 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
private readonly ConnectionOptions _connectionOptions;
private readonly IPduPipeline _pipeline;
// Phase 12 (W1.1) — `_ctx` is volatile so a hot-reload reseat can swap it on the running
// `_ctx` is volatile so a hot-reload reseat can swap it on the running
// multiplexer. Each method that uses the context snapshots it into a local at the start
// of the operation so a single PDU sees a consistent (TagMap, Cache) pair even if the
// swap fires mid-PDU. ReplaceContext is the single mutator.
private volatile PerPlcContext _ctx;
private readonly ILogger<PlcMultiplexer> _logger;
private readonly ResiliencePipeline? _backendConnectPipeline;
// Phase 10: live read-coalescing config accessor. The accessor is read per-PDU on the
// Live read-coalescing config accessor. The accessor is read per-PDU on the
// request path so a hot-reload of `Mbproxy.Resilience.ReadCoalescing.Enabled`
// propagates immediately. Production wires this to
// `() => optionsMonitor.CurrentValue.Resilience.ReadCoalescing`. Tests default to a
@@ -74,8 +74,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
SingleWriter = false,
});
// Attached pipes — Phase 9 needs the list for the status page; Phase 10 will need it for
// coalescing (fan-out). ConcurrentDictionary keyed on UpstreamPipe.Id for O(1) detach.
// Attached pipes — used by the status page and by coalescing fan-out.
// ConcurrentDictionary keyed on UpstreamPipe.Id for O(1) detach.
private readonly ConcurrentDictionary<Guid, UpstreamPipe> _pipes = new();
// Lifecycle plumbing. Backend tasks share a CTS; cascading disconnect cancels it,
@@ -88,10 +88,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
private Task? _backendReaderTask;
private readonly CancellationTokenSource _disposeCts = new();
// Phase 12 (W2.2) — volatile so the disposing thread's write is observed by every
// hot-path reader (OnUpstreamFrameAsync, ReplaceContext, Attach, etc.) without a
// separate fence. On x86/x64 plain reads happen to give acquire-release semantics, so
// this is defense for ARM hosts and future portability.
// Volatile so the disposing thread's write is observed by every hot-path reader
// (OnUpstreamFrameAsync, ReplaceContext, Attach, etc.) without a separate fence.
// On x86/x64 plain reads happen to give acquire-release semantics, so this is
// defense for ARM hosts and future portability.
private volatile bool _disposed;
private Task? _watchdogTask;
@@ -112,8 +112,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
_backendConnectPipeline = backendConnectPipeline;
_coalescingOptions = coalescingOptions ?? (static () => new ReadCoalescingOptions());
// Phase 11 — register the per-PLC cache as the live stats source for the snapshot
// path. Cache may be null when the per-PLC context has not been wired with one
// Register the per-PLC cache as the live stats source for the snapshot path.
// Cache may be null when the per-PLC context has not been wired with one
// (every tag uncached, or unit tests).
if (_ctx.Cache is not null)
_ctx.Counters.SetCacheStatsProvider(new CacheStatsAdapter(_ctx.Cache));
@@ -155,8 +155,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
}
/// <summary>
/// Phase 12 (W1.1) — atomically swaps the per-PLC context on a running multiplexer.
/// Called by <see cref="Supervision.PlcListenerSupervisor.ReplaceContextAsync"/> when a
/// Atomically swaps the per-PLC context on a running multiplexer. Called by
/// <see cref="Supervision.PlcListenerSupervisor.ReplaceContextAsync"/> when a
/// hot-reload tag-list change is applied to a PLC whose listener is already bound.
///
/// <para>The new context's tag map and (optional) response cache become visible on the
@@ -174,10 +174,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
{
if (_disposed) return;
// Phase 12 (W4 / NM2) — provider FIRST, then _ctx. The status page's snapshot
// path reads `_cacheStatsProvider` independently of `_ctx`. If we swapped `_ctx`
// first, a snapshot taken in the gap between the two writes would still hold the
// OLD adapter wrapping the OLD cache — which the supervisor is about to dispose
// Provider FIRST, then _ctx. The status page's snapshot path reads
// `_cacheStatsProvider` independently of `_ctx`. If we swapped `_ctx` first, a
// snapshot taken in the gap between the two writes would still hold the OLD
// adapter wrapping the OLD cache — which the supervisor is about to dispose
// (`PlcListenerSupervisor.ReplaceContextAsync` runs `oldCache.Dispose()` after we
// return). Setting the provider first means snapshots in the swap window read
// either (old provider, old ctx) or (new provider, new ctx) — both coherent —
@@ -254,9 +254,9 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
}
_pipes.Clear();
// Phase 12 (W2.5, W2.6) — guard the CTS dispose against a watchdog tick that
// raced past the WaitAsync above (e.g. a slow Task.Delay completion observing
// cancellation late). Also dispose the connect-gate semaphore.
// Guard the CTS dispose against a watchdog tick that raced past the WaitAsync
// above (e.g. a slow Task.Delay completion observing cancellation late). Also
// dispose the connect-gate semaphore.
try { _disposeCts.Dispose(); } catch (ObjectDisposedException) { /* already disposed */ }
try { _connectGate.Dispose(); } catch (ObjectDisposedException) { /* already disposed */ }
}
@@ -336,30 +336,28 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
private async Task TearDownBackendAsync(string reason, bool cascadeUpstreams)
{
// Phase 12 (W1.4) — serialise tear-down vs connect-up via the connect gate. Without
// this, a fresh EnsureBackendConnectedAsync racing with the channel drain below
// could see stranded frames sent on its new socket with old (already-released) TxIds,
// Serialise tear-down vs connect-up via the connect gate. Without this, a fresh
// EnsureBackendConnectedAsync racing with the channel drain below could see
// stranded frames sent on its new socket with old (already-released) TxIds,
// producing orphaned responses that hang upstream peers via the watchdog.
//
// Phase 12 (W4 / NM1) — bound the wait. Without a timeout, a long Polly-wrapped
// EnsureBackendConnectedAsync against an unreachable host can hold the gate for
// the full BackendConnectTimeoutMs * MaxAttempts window, blocking DisposeAsync (and
// therefore ProxyWorker.StopAsync) for that duration. A 2 s teardown deadline
// bounds disposal latency; if the gate is unavailable we proceed best-effort
// without it (the worst-case consequence is one orphaned in-flight cycle on the
// dying backend, which the upstream watchdog will surface as exception 0x0B).
// Bounded wait: a long Polly-wrapped EnsureBackendConnectedAsync against an
// unreachable host can hold the gate for the full BackendConnectTimeoutMs *
// MaxAttempts window, blocking DisposeAsync (and therefore ProxyWorker.StopAsync)
// for that duration. A 2 s teardown deadline bounds disposal latency; if the gate
// is unavailable we proceed best-effort without it (the worst-case consequence is
// one orphaned in-flight cycle on the dying backend, which the upstream watchdog
// will surface as exception 0x0B).
//
// Phase 12 (W5 / m1) — KNOWN RACE on the gate-not-held path: a concurrent
// EnsureBackendConnectedAsync that DOES hold the gate may TryAllocate a TxId
// that collides (after wraparound in the allocator's forward scan) with a TxId
// we're about to release from the channel-drain step below. The double-release
// would mark the new request's slot as free even though it's legitimately
// in-flight, allowing the next allocation to reuse the same slot and
// CorrelationMap.TryAdd to fail (silent request drop). Probability is very low
// (requires gate timeout + new accept landing during cascade + TxId collision in
// a 65,536-slot space); the only consequence is one dropped request that the
// client retries. Documented as accepted best-effort behaviour in
// codereviews/2026-05-14/ReReviewAfterRemediation.md (m1).
// KNOWN RACE on the gate-not-held path: a concurrent EnsureBackendConnectedAsync
// that DOES hold the gate may TryAllocate a TxId that collides (after wraparound
// in the allocator's forward scan) with a TxId we're about to release from the
// channel-drain step below. The double-release would mark the new request's slot
// as free even though it's legitimately in-flight, allowing the next allocation
// to reuse the same slot and CorrelationMap.TryAdd to fail (silent request drop).
// Probability is very low (requires gate timeout + new accept landing during
// cascade + TxId collision in a 65,536-slot space); the only consequence is one
// dropped request that the client retries. Accepted as best-effort behaviour.
bool gateHeld = false;
try
{
@@ -412,8 +410,8 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
_allocator.Release(kvp.Key);
}
// Phase 10 — also drain the in-flight-by-key map so a brand-new identical request
// through the freshly-reconnected backend is treated as a miss (no stale entries
// Also drain the in-flight-by-key map so a brand-new identical request through
// the freshly-reconnected backend is treated as a miss (no stale entries
// outlive the backend they were destined for).
_inFlightByKey.DrainAll();
@@ -437,11 +435,11 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
_ctx.Counters.AddDisconnectCascades(upstreamCount);
}
// Phase 12 (W1.4) — drain any stranded frames left in the outbound channel by
// the writer task that just faulted/cancelled. Released their proxy TxIds back
// to the allocator. By the time we reach this line the writer has stopped
// reading from the channel (cancelled CTS) and the upstream pipes have been
// cascaded (no more enqueues), so the channel state is stable.
// Drain any stranded frames left in the outbound channel by the writer task
// that just faulted/cancelled. Release their proxy TxIds back to the
// allocator. By the time we reach this line the writer has stopped reading
// from the channel (cancelled CTS) and the upstream pipes have been cascaded
// (no more enqueues), so the channel state is stable.
int strandedDropped = 0;
while (_outboundChannel.Reader.TryRead(out byte[]? stranded))
{
@@ -464,7 +462,7 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
}
finally
{
// Only release if we acquired (W4 / NM1) — best-effort path may have skipped.
// Only release if we acquired — best-effort path may have skipped.
if (gateHeld)
{
try { _connectGate.Release(); }
@@ -499,10 +497,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
}
catch (Exception ex)
{
// Backend failure — cascade. Phase 12 (W4 / NM5) — skip if disposal is
// already in progress; DisposeAsync runs an explicit TearDown and the
// fire-and-forget here would race against it, hitting a disposed
// _connectGate and producing an unobserved-task exception.
// Backend failure — cascade. Skip if disposal is already in progress;
// DisposeAsync runs an explicit TearDown and the fire-and-forget here would
// race against it, hitting a disposed _connectGate and producing an
// unobserved-task exception.
if (!_disposeCts.IsCancellationRequested)
_ = TearDownBackendAsync($"writer fault: {ex.Message}", cascadeUpstreams: true);
}
@@ -554,10 +552,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
// Free the allocator slot immediately so it can be reused.
_allocator.Release(proxyTxId);
// Phase 10 — for FC03/FC04 reads, also clear the coalescing-by-key entry so
// a brand-new identical request issued AFTER this response is treated as a
// miss (opens a fresh round-trip). The TryRemove is best-effort: a watchdog
// timeout or cascade may have already removed it.
// For FC03/FC04 reads, also clear the coalescing-by-key entry so a
// brand-new identical request issued AFTER this response is treated as a
// miss (opens a fresh round-trip). The TryRemove is best-effort: a
// watchdog timeout or cascade may have already removed it.
if (inFlight.Fc is 0x03 or 0x04)
{
var coalKey = new CoalescingKey(inFlight.UnitId, inFlight.Fc,
@@ -580,16 +578,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
frame.AsSpan(MbapFrame.HeaderSize, pduBodyLen),
responseCtx);
// Phase 11 — post-rewriter cache update:
// Post-rewriter cache update:
// * FC03/FC04 successful responses are stored when the request was
// cache-eligible (resolvedTtlMs > 0).
// * FC06/FC16 successful responses invalidate every cached entry whose
// address range overlaps the write.
//
// Phase 12 (W2.7) — exception bit comes from the post-rewriter buffer
// (the rewriter never touches the FC byte today, but reading from
// inFlight.Fc would lose the exception bit). The base FC for routing
// decisions uses inFlight.Fc — the request side knows what was sent.
// Exception bit comes from the post-rewriter buffer (the rewriter never
// touches the FC byte today, but reading from inFlight.Fc would lose the
// exception bit). The base FC for routing decisions uses inFlight.Fc —
// the request side knows what was sent.
if (_ctx.Cache is { } postCache)
{
byte fcInResponse = frame[MbapFrame.HeaderSize];
@@ -623,16 +621,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
}
else if (inFlight.Fc is 0x06 or 0x10)
{
// Phase 12 (W2.9) — the design contract "invalidations during a
// recovering listener state are skipped" (design.md:203) is
// upheld IMPLICITLY here: invalidation only fires inside the
// backend reader task when a non-exception FC06/FC16 response
// arrives. A `Recovering` listener has no backend reader (the
// multiplexer is torn down between recovery attempts), so no
// response can land here, so no invalidation. The gating is
// structural, not conditional. If a future change ever produces
// a write response off the live backend, an explicit recovering-
// state check would need to be added.
// The design contract "invalidations during a recovering
// listener state are skipped" is upheld IMPLICITLY here:
// invalidation only fires inside the backend reader task when
// a non-exception FC06/FC16 response arrives. A `Recovering`
// listener has no backend reader (the multiplexer is torn
// down between recovery attempts), so no response can land
// here, so no invalidation. The gating is structural, not
// conditional. If a future change ever produces a write
// response off the live backend, an explicit recovering-state
// check would need to be added.
int invalidated = postCache.Invalidate(
inFlight.UnitId, inFlight.StartAddress, inFlight.Qty);
if (invalidated > 0)
@@ -647,23 +645,23 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
}
// Fan out to each interested party with their original TxId restored.
// Phase 9: always exactly one party. Phase 10: N parties (read coalescing).
// Note: the InFlightByKey TryRemove above (for FC03/FC04) guarantees no
// Without coalescing there is exactly one party; with coalescing there
// are N. The InFlightByKey TryRemove above (for FC03/FC04) guarantees no
// further attaches can occur — the parties list is now a stable snapshot.
//
// Phase 12 (W1.3) — non-blocking fan-out via `TrySendResponse`. The
// single backend reader task must NEVER `await` a per-upstream channel
// write: a wedged upstream (full bounded response channel) would otherwise
// stall the reader and starve every other client on this PLC. A drop here
// is recorded via `responseDropForFullUpstream`; the wedged upstream loses
// its own response and will be reaped by its own socket-close path.
// Non-blocking fan-out via `TrySendResponse`. The single backend reader
// task must NEVER `await` a per-upstream channel write: a wedged upstream
// (full bounded response channel) would otherwise stall the reader and
// starve every other client on this PLC. A drop here is recorded via
// `responseDropForFullUpstream`; the wedged upstream loses its own
// response and will be reaped by its own socket-close path.
foreach (var party in inFlight.InterestedParties)
{
if (!party.Pipe.IsAlive)
{
// Phase 10 — record the dead-upstream skip only for FC03/FC04 (the
// only function codes that take the coalescing path). For non-
// coalescing FCs this branch is silent — the Phase-9 behaviour.
// Record the dead-upstream skip only for FC03/FC04 (the only
// function codes that take the coalescing path). For
// non-coalescing FCs this branch is silent.
if (inFlight.Fc is 0x03 or 0x04
&& inFlight.InterestedParties.Count > 1)
{
@@ -675,10 +673,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
continue;
}
// The frame buffer is private to this iteration; if there are multiple
// parties (Phase 10), each gets its own copy with its own original TxId
// patched in. Phase 9 always has Count == 1, so the single-buffer path
// is the common case; we copy to keep Phase-10 forward compatibility.
// The frame buffer is private to this iteration; if there are
// multiple coalesced parties, each gets its own copy with its own
// original TxId patched in. The single-party case reuses the buffer
// directly as the common-case fast path.
byte[] outFrame = inFlight.InterestedParties.Count == 1
? frame
: (byte[])frame.Clone();
@@ -692,17 +690,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
}
else
{
// Phase 12 (W6) — count outbound bytes per delivered party.
// With coalescing, one backend response fans out to N parties and
// produces N × frame.Length bytes leaving the proxy upstream-side.
// Count outbound bytes per delivered party. With coalescing, one
// backend response fans out to N parties and produces
// N × frame.Length bytes leaving the proxy upstream-side.
_ctx.Counters.AddBytes(up: 0, down: outFrame.Length);
}
}
}
// Reader exited cleanly — backend closed by remote. Cascade.
// Phase 12 (W4 / NM5) — skip if dispose is already in progress (see writer-side
// comment above for rationale).
// Reader exited cleanly — backend closed by remote. Cascade. Skip if
// dispose is already in progress (see writer-side comment above).
if (!_disposeCts.IsCancellationRequested)
_ = TearDownBackendAsync("backend reader EOF", cascadeUpstreams: true);
}
@@ -730,16 +727,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
out ushort originalTxId, out _, out _, out byte unitId))
return;
// Phase 12 (W6) — count inbound bytes from the upstream client. Surfaces in
// bytes.upstreamIn on the status page. Counted ONCE per parsed frame regardless
// of subsequent routing (cache hit, coalesce, backend round-trip, exception).
// Count inbound bytes from the upstream client. Surfaces in bytes.upstreamIn on
// the status page. Counted ONCE per parsed frame regardless of subsequent
// routing (cache hit, coalesce, backend round-trip, exception).
_ctx.Counters.AddBytes(up: frame.Length, down: 0);
// Parse the PDU FC + start/qty. FC03/FC04 reads use start/qty for the coalescing key
// and (Phase 11) for the cache lookup. FC06 writes carry [addr][value]; we treat qty
// as 1 for invalidation. FC16 carries [start][qty][byteCount]...; qty is the write
// span used for cache invalidation. Phase 11: FC06/FC16 start/qty drive cache
// invalidation by overlap rather than exact key.
// Parse the PDU FC + start/qty. FC03/FC04 reads use start/qty for the coalescing
// key and for the cache lookup. FC06 writes carry [addr][value]; we treat qty as
// 1 for invalidation. FC16 carries [start][qty][byteCount]...; qty is the write
// span used for cache invalidation. FC06/FC16 start/qty drive cache invalidation
// by overlap rather than exact key.
int pduOffset = MbapFrame.HeaderSize;
byte fcByte = frame.Length > pduOffset ? frame[pduOffset] : (byte)0;
ushort startAddr = 0;
@@ -763,12 +760,12 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
qty = (ushort)((frame[pduOffset + 3] << 8) | frame[pduOffset + 4]);
}
// Phase 11 — response-cache path. Cache check happens BEFORE coalescing AND before
// we attempt to bring up the backend connection. A hit short-circuits everything,
// including the EnsureBackendConnectedAsync call — operators with all reads cached
// and the backend down still get served (the cache survives backend disconnects per
// the design contract). The cache only fires for FC03/FC04 and only when the read
// range's resolved TTL > 0.
// Response-cache path. Cache check happens BEFORE coalescing AND before we
// attempt to bring up the backend connection. A hit short-circuits everything,
// including the EnsureBackendConnectedAsync call — operators with all reads
// cached and the backend down still get served (the cache survives backend
// disconnects per the design contract). The cache only fires for FC03/FC04 and
// only when the read range's resolved TTL > 0.
int resolvedCacheTtlMs = 0;
if (fcByte is 0x03 or 0x04 && _ctx.Cache is { } responseCache)
{
@@ -783,7 +780,7 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
byte[] hitFrame = BuildCacheHitFrame(originalTxId, unitId, cached.PduBytes);
await pipe.SendResponseAsync(hitFrame, ct).ConfigureAwait(false);
// Phase 12 (W6) — outbound bytes for cache-hit response.
// Outbound bytes for cache-hit response.
_ctx.Counters.AddBytes(up: 0, down: hitFrame.Length);
return;
}
@@ -800,16 +797,15 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
}
// Ensure backend is connected. Failure here means we cannot service the request;
// close the upstream pipe (consistent with the 1:1 model's behaviour on connect
// failure).
// close the upstream pipe.
if (!await EnsureBackendConnectedAsync(ct).ConfigureAwait(false))
{
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
return;
}
// Phase 10 — read-coalescing path. Only FC03/FC04 are coalescable; only when the
// feature is enabled in the live config. If the late-arriving request matches an
// Read-coalescing path. Only FC03/FC04 are coalescable; only when the feature
// is enabled in the live config. If the late-arriving request matches an
// already-in-flight peer, we attach to the existing entry and skip the backend
// round-trip entirely. The existing entry's response will fan out to both parties.
var coalescingOpts = _coalescingOptions();
@@ -818,14 +814,14 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
var key = new CoalescingKey(unitId, fcByte, startAddr, qty);
var newParty = new InterestedParty(pipe, originalTxId);
// The factory does the Phase-9 work: allocate a proxy TxId, build the
// InFlightRequest with a mutable List<InterestedParty>, add to the correlation
// map. We deliberately do NOT enqueue to the outbound channel inside the
// factory — that's done outside the InFlightByKey lock to keep the lock
// scope tight and to avoid holding the lock across an async send.
// The factory allocates a proxy TxId, builds the InFlightRequest with a
// mutable List<InterestedParty>, and adds to the correlation map. We
// deliberately do NOT enqueue to the outbound channel inside the factory —
// that's done outside the InFlightByKey lock to keep the lock scope tight
// and to avoid holding the lock across an async send.
//
// proxyTxIdForSend / inFlightForSend communicate the factory's allocation back
// out of the lock so the post-lock code can finish the send.
// proxyTxIdForSend / inFlightForSend communicate the factory's allocation
// back out of the lock so the post-lock code can finish the send.
ushort proxyTxIdForSend = 0;
InFlightRequest? inFlightForSend = null;
@@ -898,40 +894,38 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
if (inFlightForSend is null)
{
// Phase 12 (W1.2) — the factory hit the allocator-saturation path or a
// duplicate-key race and stored a stub `InFlightRequest` under `key`. Late
// attachers may have joined the stub between the factory call and this
// cleanup; we must deliver the saturation exception to ALL of them, not just
// the leader, otherwise the late attachers wait forever for a response that
// The factory hit the allocator-saturation path or a duplicate-key race
// and stored a stub `InFlightRequest` under `key`. Late attachers may
// have joined the stub between the factory call and this cleanup; we
// must deliver the saturation exception to ALL of them, not just the
// leader, otherwise the late attachers wait forever for a response that
// never comes (the stub has no proxy TxId, so no backend round-trip will
// ever fire).
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
if (_inFlightByKey.TryRemove(key, out var stub))
{
// Phase 12 (W4 / Nm1) — non-blocking delivery via TrySendResponse.
// Previously this loop awaited SendResponseAsync per party, which would
// serialise on a wedged late-attacher's full bounded channel and stall
// delivery to its peers. Same doctrine as the W1.3 backend-reader fix:
// the per-PLC fan-out path must never await per-pipe writes.
// Non-blocking delivery via TrySendResponse — the per-PLC fan-out
// path must never await per-pipe writes (a wedged late-attacher's
// full bounded channel would otherwise stall delivery to its peers).
foreach (var party in stub.InterestedParties)
{
byte[] excFrame = BuildExceptionFrame(party.OriginalTxId, unitId, fcByte, exceptionCode: 4);
if (!party.Pipe.TrySendResponse(excFrame))
_ctx.Counters.IncrementResponseDropForFullUpstream();
else
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length); // W6
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
}
}
else
{
// The stub was already removed by another path (extremely unlikely, but
// defensive). Surface the exception to the original requester.
// The stub was already removed by another path (extremely unlikely,
// but defensive). Surface the exception to the original requester.
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fcByte, exceptionCode: 4);
if (!pipe.TrySendResponse(excFrame))
_ctx.Counters.IncrementResponseDropForFullUpstream();
else
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length); // W6
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
}
return;
}
@@ -962,16 +956,16 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
return;
}
// Non-coalescing path (FC06/FC16 writes, FC03/04 with coalescing disabled, or any
// other FC). This is the Phase-9 path verbatim — every request gets its own proxy
// TxId and its own backend round-trip.
// Non-coalescing path (FC06/FC16 writes, FC03/04 with coalescing disabled, or
// any other FC). Every request gets its own proxy TxId and its own backend
// round-trip.
if (!_allocator.TryAllocate(out ushort proxyTxIdFc))
{
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fcByte, exceptionCode: 4);
await pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length); // W6
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
return;
}
@@ -993,10 +987,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
return;
}
// Phase 10 — even when the coalescing path is bypassed (e.g. coalescing disabled
// for FC03/04), we still report the request as a Miss so Hit + Miss = total
// FC03/FC04 requests across snapshots. FC06/FC16 are not counted here (they are
// not coalescable in any sense).
// Even when the coalescing path is bypassed (e.g. coalescing disabled for
// FC03/04), we still report the request as a Miss so Hit + Miss = total
// FC03/FC04 requests across snapshots. FC06/FC16 are not counted here (they
// are not coalescable in any sense).
if (fcByte is 0x03 or 0x04)
_ctx.Counters.IncrementCoalescedMiss();
@@ -1037,12 +1031,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
/// Modbus exception (code 0x0B / Gateway Target Device Failed To Respond) to each
/// interested party with the original TxId restored.
///
/// <para><b>Why this exists.</b> In the 1:1 connection model, a lost response would
/// fault the dedicated backend socket and the upstream pair would close. The multiplexed
/// model needs an explicit per-request timer because a single missing or mis-routed
/// response would otherwise leak a correlation entry forever and hang the upstream
/// pipe indefinitely. Real-world causes: PLC drops a response, network packet loss,
/// backend that mis-echoes MBAP TxIds.</para>
/// <para><b>Why this exists.</b> In a multiplexed connection model a single missing
/// or mis-routed response would otherwise leak a correlation entry forever and hang
/// the upstream pipe indefinitely. Real-world causes: PLC drops a response, network
/// packet loss, backend that mis-echoes MBAP TxIds.</para>
/// </summary>
private async Task RunRequestTimeoutWatchdogAsync(CancellationToken ct)
{
@@ -1070,10 +1062,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
_allocator.Release(proxyTxId);
// Phase 10 — also clear the coalescing-by-key entry. A late attach that
// raced in just before the watchdog claim will still receive the 0x0B
// exception via this entry's InterestedParties list (List<T> mutations
// happen before fan-out begins).
// Also clear the coalescing-by-key entry. A late attach that raced
// in just before the watchdog claim will still receive the 0x0B
// exception via this entry's InterestedParties list (List<T>
// mutations happen before fan-out begins).
if (req.Fc is 0x03 or 0x04)
{
var coalKey = new CoalescingKey(req.UnitId, req.Fc, req.StartAddress, req.Qty);
@@ -1097,7 +1089,7 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
try
{
await party.Pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length); // W6
_ctx.Counters.AddBytes(up: 0, down: excFrame.Length);
}
catch
{
@@ -1150,10 +1142,10 @@ internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvi
}
/// <summary>
/// Phase 11 — builds an MBAP-framed response from cached PDU bytes for the given
/// upstream party. The cache stores POST-rewriter PDU bodies (no MBAP); each hit
/// stamps a fresh MBAP header carrying the requesting party's original TxId so the
/// response looks indistinguishable from a fresh backend reply.
/// Builds an MBAP-framed response from cached PDU bytes for the given upstream
/// party. The cache stores POST-rewriter PDU bodies (no MBAP); each hit stamps a
/// fresh MBAP header carrying the requesting party's original TxId so the response
/// looks indistinguishable from a fresh backend reply.
/// </summary>
private static byte[] BuildCacheHitFrame(ushort originalTxId, byte unitId, byte[] cachedPdu)
{
@@ -49,12 +49,11 @@ internal sealed partial class UpstreamPipe : IAsyncDisposable
// Internal CTS lets the multiplexer signal "drop this pipe now" without waiting for
// the upstream socket to close cleanly.
private readonly CancellationTokenSource _cts = new();
// Phase 12 (W2.2) — volatile so writes from DisposeAsync are observed by IsAlive /
// TrySendResponse on other threads without a fence.
// Volatile so writes from DisposeAsync are observed by IsAlive / TrySendResponse on
// other threads without a fence.
private volatile bool _disposed;
// Phase 9: per-pipe forwarded-PDU counter (replaces the per-pair counter from the
// 1:1 model). Read by the status page.
// Per-pipe forwarded-PDU counter. Read by the status page.
private long _pdusForwardedCount;
/// <summary>Stable identity for status-page reporting and cascade cleanup.</summary>
@@ -227,11 +226,11 @@ internal sealed partial class UpstreamPipe : IAsyncDisposable
}
/// <summary>
/// Phase 12 (W1.3) — non-blocking response enqueue. Returns <c>true</c> when the frame
/// was queued for delivery, <c>false</c> when the pipe is dead OR the response channel
/// is full. Used by the per-PLC backend reader's fan-out loop so a single wedged
/// upstream cannot stall responses to peers sharing the same backend socket — without
/// this, a full <c>_responseChannel</c> on one pipe would block the reader task.
/// Non-blocking response enqueue. Returns <c>true</c> when the frame was queued for
/// delivery, <c>false</c> when the pipe is dead OR the response channel is full.
/// Used by the per-PLC backend reader's fan-out loop so a single wedged upstream
/// cannot stall responses to peers sharing the same backend socket — without this, a
/// full <c>_responseChannel</c> on one pipe would block the reader task.
///
/// <para>A <c>false</c> return indicates the frame is the multiplexer's responsibility
/// to drop and (optionally) account for via a counter. The wedged upstream's socket
+2 -3
View File
@@ -2,8 +2,8 @@ namespace Mbproxy.Proxy;
/// <summary>
/// No-op PDU pipeline: passes every frame through byte-for-byte without rewriting.
/// Registered as the <see cref="IPduPipeline"/> singleton in Phase 03.
/// Phase 04 replaces this registration with BcdPduPipeline.
/// Used by tests and fallback paths; production wires
/// <see cref="BcdPduPipeline"/> as the <see cref="IPduPipeline"/> singleton.
/// </summary>
internal sealed class NoopPduPipeline : IPduPipeline
{
@@ -14,6 +14,5 @@ internal sealed class NoopPduPipeline : IPduPipeline
PduContext context)
{
// Intentional no-op: bytes forwarded unmodified.
// Phase 04: replace this registration with BcdPduPipeline.
}
}
+15 -16
View File
@@ -14,20 +14,20 @@ namespace Mbproxy.Proxy;
/// served by the same <see cref="Multiplexing.PlcMultiplexer"/>; all mutable state is
/// accessed through <see cref="ProxyCounters"/> which uses Interlocked for thread-safety.
///
/// <para><b>Phase 9 — request correlation:</b> the multiplexer sets <see cref="CurrentRequest"/>
/// <para><b>Request correlation:</b> the multiplexer sets <see cref="CurrentRequest"/>
/// before calling the pipeline on each direction. On the request path the pipeline can
/// peek at the future correlation entry it just enqueued; on the response path the pipeline
/// uses the request's <c>StartAddress</c>/<c>Qty</c> to decode FC03/FC04 BCD slots. Different
/// in-flight responses use different <see cref="InFlightRequest"/> instances, so there is no
/// cross-talk between concurrent multiplexed requests.</para>
/// peek at the future correlation entry it just enqueued; on the response path the
/// pipeline uses the request's <c>StartAddress</c>/<c>Qty</c> to decode FC03/FC04 BCD
/// slots. Different in-flight responses use different <see cref="InFlightRequest"/>
/// instances, so there is no cross-talk between concurrent multiplexed requests.</para>
///
/// <para><b>Concurrency:</b> a single <see cref="PerPlcContext"/> instance is shared across
/// the per-upstream read tasks (which call the pipeline on the request path) and the
/// single backend reader task (which calls the pipeline on the response path). Because the
/// per-call <see cref="CurrentRequest"/> would be racy if mutated on the shared context,
/// the multiplexer constructs a lightweight per-call clone (<see cref="WithCurrentRequest"/>)
/// for each pipeline invocation. The shared mutable state — the tag map, counters, logger —
/// is read-only or Interlocked.</para>
/// <para><b>Concurrency:</b> a single <see cref="PerPlcContext"/> instance is shared
/// across the per-upstream read tasks (which call the pipeline on the request path) and
/// the single backend reader task (which calls the pipeline on the response path).
/// Because the per-call <see cref="CurrentRequest"/> would be racy if mutated on the
/// shared context, the multiplexer constructs a lightweight per-call clone
/// (<see cref="WithCurrentRequest"/>) for each pipeline invocation. The shared mutable
/// state — the tag map, counters, logger — is read-only or Interlocked.</para>
/// </summary>
internal class PerPlcContext : PduContext
{
@@ -46,10 +46,9 @@ internal class PerPlcContext : PduContext
internal InFlightRequest? CurrentRequest { get; init; }
/// <summary>
/// Phase 11 — optional per-PLC response cache. <c>null</c> on contexts that opt out
/// (every BCD tag has <see cref="BcdTag.CacheTtlMs"/> = 0) or in unit tests that don't
/// exercise the cache. The multiplexer constructs and disposes the cache alongside
/// itself.
/// Optional per-PLC response cache. <c>null</c> on contexts that opt out (every BCD
/// tag has <see cref="BcdTag.CacheTtlMs"/> = 0) or in unit tests that don't exercise
/// the cache. The multiplexer constructs and disposes the cache alongside itself.
/// </summary>
internal ResponseCache? Cache { get; init; }
+13 -15
View File
@@ -11,15 +11,13 @@ namespace Mbproxy.Proxy;
/// Owns one <see cref="TcpListener"/> bound to a PLC's configured listen port and one
/// <see cref="PlcMultiplexer"/> that owns the single backend connection to the PLC.
///
/// <para><b>Phase 9 — TxId multiplexing:</b> the listener no longer pairs each upstream
/// socket with a dedicated backend socket. Instead, every accepted upstream is wrapped
/// in an <see cref="UpstreamPipe"/> and handed to the multiplexer. The multiplexer holds
/// at most one TCP connection to the PLC, eliminating the H2-ECOM100's 4-concurrent-client
/// cap from the upstream side.</para>
/// <para>Every accepted upstream is wrapped in an <see cref="UpstreamPipe"/> and handed
/// to the multiplexer, which TxId-multiplexes them onto a single backend socket — this
/// eliminates the H2-ECOM100's 4-concurrent-client cap from the upstream side.</para>
///
/// <para>The listener's accept loop is otherwise unchanged. <see cref="StartAsync"/>
/// binds the socket; <see cref="RunAsync"/> runs until cancelled or the listener faults;
/// <see cref="DisposeAsync"/> tears down both the listener and the multiplexer.</para>
/// <para><see cref="StartAsync"/> binds the socket; <see cref="RunAsync"/> runs until
/// cancelled or the listener faults; <see cref="DisposeAsync"/> tears down both the
/// listener and the multiplexer.</para>
/// </summary>
internal sealed partial class PlcListener : IAsyncDisposable
{
@@ -49,9 +47,9 @@ internal sealed partial class PlcListener : IAsyncDisposable
=> _multiplexer?.AttachedPipes ?? Array.Empty<UpstreamPipe>();
/// <summary>
/// Phase 12 (W1.1) — exposes the running multiplexer so a hot-reload reseat can swap
/// the per-PLC context on the live instance. <c>null</c> between StopAsync and a fresh
/// start; callers must null-check.
/// Exposes the running multiplexer so a hot-reload reseat can swap the per-PLC
/// context on the live instance. <c>null</c> between StopAsync and a fresh start;
/// callers must null-check.
/// </summary>
internal PlcMultiplexer? Multiplexer => _multiplexer;
@@ -89,10 +87,10 @@ internal sealed partial class PlcListener : IAsyncDisposable
_listener.Start();
LogBound(_listenerLogger, _plc.Name, _plc.ListenPort);
// The multiplexer needs a PerPlcContext to share the BCD tag map and counters with
// the pipeline. If the caller (typically a test or pre-Phase-6 startup path) didn't
// supply one, construct a minimal context that exposes only the PlcName so the
// multiplexer + a noop/passthrough pipeline still round-trip frames correctly.
// The multiplexer needs a PerPlcContext to share the BCD tag map and counters
// with the pipeline. If the caller (typically a test) didn't supply one,
// construct a minimal context that exposes only the PlcName so the multiplexer
// + a noop/passthrough pipeline still round-trip frames correctly.
var ctx = _perPlcContext ?? new PerPlcContext
{
PlcName = _plc.Name,
+67 -66
View File
@@ -1,13 +1,11 @@
namespace Mbproxy.Proxy;
/// <summary>
/// Immutable snapshot of per-PLC counters. Consumed by Phase 07's status page.
/// Immutable snapshot of per-PLC counters. Consumed by the status page.
/// All fields are point-in-time reads; no ordering guarantees across fields.
///
/// <para><b>Backwards-compat policy (see docs/kpi.md):</b> fields are <i>added</i>, never
/// renamed or removed. Phase 9 appended <c>InFlightCount</c>, <c>MaxInFlight</c>,
/// <c>TxIdWraps</c>, <c>BackendDisconnectCascades</c>, and <c>BackendQueueDepth</c> for
/// the TxId-multiplexer telemetry surface (Tier 1.6 in docs/kpi.md).</para>
/// renamed or removed.</para>
/// </summary>
public sealed record CounterSnapshot(
long PdusForwarded,
@@ -53,82 +51,85 @@ public sealed record CounterSnapshot(
long ConnectsFailed,
/// <summary>
/// Number of Modbus requests currently in flight on this PLC's multiplexed backend
/// connection (point-in-time snapshot of the correlation map size). Phase 9.
/// connection (point-in-time snapshot of the correlation map size).
/// </summary>
long InFlightCount,
/// <summary>
/// Peak <see cref="InFlightCount"/> observed since the multiplexer was constructed.
/// Updated via <see cref="Interlocked"/> CAS so concurrent in-flight increments do not
/// lose the high-water mark. Phase 9.
/// Updated via <see cref="Interlocked"/> CAS so concurrent in-flight increments do
/// not lose the high-water mark.
/// </summary>
long MaxInFlight,
/// <summary>
/// Number of times the per-PLC TxId allocator's rolling cursor has wrapped
/// 0xFFFF → 0x0000. A non-zero value is benign; a sudden burst suggests extreme
/// in-flight churn. Phase 9.
/// in-flight churn.
/// </summary>
long TxIdWraps,
/// <summary>
/// Cumulative count of upstream pipes closed as a side effect of a backend disconnect.
/// Each backend reconnect cycle adds the number of attached upstream clients at the
/// time of the disconnect. Phase 9.
/// Cumulative count of upstream pipes closed as a side effect of a backend
/// disconnect. Each backend reconnect cycle adds the number of attached upstream
/// clients at the time of the disconnect.
/// </summary>
long BackendDisconnectCascades,
/// <summary>
/// Current depth of the per-PLC outbound channel feeding the backend writer task
/// (frames queued, not yet on the wire). A sustained non-zero value indicates the
/// backend is slower than upstream demand. Phase 9.
/// backend is slower than upstream demand.
/// </summary>
long BackendQueueDepth,
/// <summary>
/// Phase 10 — cumulative count of FC03/FC04 requests that attached to an already-in-flight
/// peer instead of opening a fresh backend round-trip. <c>CoalescedHitCount + CoalescedMissCount</c>
/// equals total FC03/FC04 requests seen by the multiplexer.
/// Cumulative count of FC03/FC04 requests that attached to an already-in-flight
/// peer instead of opening a fresh backend round-trip.
/// <c>CoalescedHitCount + CoalescedMissCount</c> equals total FC03/FC04 requests
/// seen by the multiplexer.
/// </summary>
long CoalescedHitCount,
/// <summary>
/// Phase 10 — cumulative count of FC03/FC04 requests that opened a fresh in-flight entry
/// (no matching peer was in flight, or the matching peer had reached its <c>MaxParties</c>
/// cap). With <c>ReadCoalescing.Enabled = false</c>, every FC03/FC04 request becomes a miss.
/// Cumulative count of FC03/FC04 requests that opened a fresh in-flight entry (no
/// matching peer was in flight, or the matching peer had reached its
/// <c>MaxParties</c> cap). With <c>ReadCoalescing.Enabled = false</c>, every
/// FC03/FC04 request becomes a miss.
/// </summary>
long CoalescedMissCount,
/// <summary>
/// Phase 10 — count of coalesced response fan-outs that were skipped because the
/// attached upstream pipe had already disconnected. A spike is a churn indicator; the
/// metric itself is informational (Tier 2 in <c>docs/kpi.md</c>).
/// Count of coalesced response fan-outs that were skipped because the attached
/// upstream pipe had already disconnected. A spike is a churn indicator; the metric
/// itself is informational (Tier 2 in <c>docs/kpi.md</c>).
/// </summary>
long CoalescedResponseToDeadUpstream,
/// <summary>
/// Phase 11 — cumulative count of FC03/FC04 requests served from the response cache.
/// <c>CacheHitCount + CacheMissCount</c> equals total FC03/FC04 requests whose resolved
/// TTL was &gt; 0 (cache-eligible). Reads against tags with TTL = 0 increment neither.
/// Cumulative count of FC03/FC04 requests served from the response cache.
/// <c>CacheHitCount + CacheMissCount</c> equals total FC03/FC04 requests whose
/// resolved TTL was &gt; 0 (cache-eligible). Reads against tags with TTL = 0
/// increment neither.
/// </summary>
long CacheHitCount,
/// <summary>
/// Phase 11 — cumulative count of cache-eligible FC03/FC04 requests that fell through
/// to coalescing / backend (no fresh entry was present or the entry had expired).
/// Cumulative count of cache-eligible FC03/FC04 requests that fell through to
/// coalescing / backend (no fresh entry was present or the entry had expired).
/// </summary>
long CacheMissCount,
/// <summary>
/// Phase 11 — cumulative count of cache entries invalidated by overlapping FC06/FC16
/// write responses. A high rate suggests caching is fighting writes; consider lower
/// TTLs on cache-overlapping tags.
/// Cumulative count of cache entries invalidated by overlapping FC06/FC16 write
/// responses. A high rate suggests caching is fighting writes; consider lower TTLs
/// on cache-overlapping tags.
/// </summary>
long CacheInvalidations,
/// <summary>
/// Phase 11 — point-in-time snapshot of the per-PLC <see cref="Cache.ResponseCache"/>
/// entry count. Read on the snapshot path; 0 when no cache is wired.
/// Point-in-time snapshot of the per-PLC <see cref="Cache.ResponseCache"/> entry
/// count. Read on the snapshot path; 0 when no cache is wired.
/// </summary>
long CacheEntryCount,
/// <summary>
/// Phase 11 — point-in-time approximation of cached PDU bytes for this PLC. Sum of
/// Point-in-time approximation of cached PDU bytes for this PLC. Sum of
/// <see cref="Cache.CacheEntry.Length"/> across entries. Read on the snapshot path.
/// </summary>
long CacheBytes,
/// <summary>
/// Phase 12 (W1.3) — cumulative count of backend response frames the per-PLC reader
/// task dropped because the destination upstream pipe's bounded response channel was
/// full. A non-zero value indicates one or more upstream clients are not draining their
/// Cumulative count of backend response frames the per-PLC reader task dropped
/// because the destination upstream pipe's bounded response channel was full. A
/// non-zero value indicates one or more upstream clients are not draining their
/// socket fast enough to keep up with the backend; the wedged client loses its own
/// responses but its peers on the same PLC continue to receive theirs.
/// </summary>
@@ -163,34 +164,34 @@ internal sealed class ProxyCounters
private long _connectsSuccess;
private long _connectsFailed;
// Phase 9 multiplexer telemetry.
// Multiplexer telemetry.
private long _maxInFlight;
private long _backendDisconnectCascades;
// Phase 10 — coalescing counters. Hit + Miss = total FC03/FC04 requests.
// Coalescing counters. Hit + Miss = total FC03/FC04 requests.
private long _coalescedHitCount;
private long _coalescedMissCount;
private long _coalescedResponseToDeadUpstream;
// Phase 11 — response-cache counters. Hit + Miss = total cache-eligible FC03/FC04.
// Response-cache counters. Hit + Miss = total cache-eligible FC03/FC04.
private long _cacheHitCount;
private long _cacheMissCount;
private long _cacheInvalidations;
// Phase 12 (W1.3) — backend-reader fan-out drop counter. Increments when the reader
// task tried to enqueue a response to an upstream pipe whose bounded response channel
// was full. Without the non-blocking enqueue this would deadlock the reader; with it
// we drop and account.
// Backend-reader fan-out drop counter. Increments when the reader task tried to
// enqueue a response to an upstream pipe whose bounded response channel was full.
// Without the non-blocking enqueue this would deadlock the reader; with it we drop
// and account.
private long _responseDropForFullUpstream;
// Phase 11 — live cache state pulled from a per-PLC ResponseCache on each snapshot.
// The multiplexer registers a single provider via SetCacheStatsProvider so the status
// Live cache state pulled from a per-PLC ResponseCache on each snapshot. The
// multiplexer registers a single provider via SetCacheStatsProvider so the status
// page sees current entry-count / bytes without a separate poll.
private volatile ICacheStatsProvider? _cacheStatsProvider;
// Phase 9: live state pulled from the multiplexer's allocator/map/queue on each
// snapshot. The multiplexer registers a single provider via SetMultiplexProvider.
// We use a volatile reference for lock-free read on the snapshot path.
// Live state pulled from the multiplexer's allocator/map/queue on each snapshot.
// The multiplexer registers a single provider via SetMultiplexProvider. We use a
// volatile reference for lock-free read on the snapshot path.
private volatile IMultiplexCountersProvider? _multiplexProvider;
// LastBindError is a string (not a long); accessed via volatile field on ProxyCounters
// but actually stored on the supervisor. We expose it here for snapshot parity.
@@ -269,61 +270,61 @@ internal sealed class ProxyCounters
=> Interlocked.Increment(ref _connectsFailed);
/// <summary>
/// Records <paramref name="n"/> upstream pipes closed by a backend disconnect cascade.
/// Phase 9.
/// Records <paramref name="n"/> upstream pipes closed by a backend disconnect
/// cascade.
/// </summary>
public void AddDisconnectCascades(int n)
=> Interlocked.Add(ref _backendDisconnectCascades, n);
/// <summary>
/// Phase 10 — records one FC03/FC04 request that attached to an already-in-flight peer.
/// Records one FC03/FC04 request that attached to an already-in-flight peer.
/// </summary>
public void IncrementCoalescedHit()
=> Interlocked.Increment(ref _coalescedHitCount);
/// <summary>
/// Phase 10 — records one FC03/FC04 request that opened a fresh in-flight entry
/// (no matching peer was in flight, or the matching peer had reached MaxParties).
/// Records one FC03/FC04 request that opened a fresh in-flight entry (no matching
/// peer was in flight, or the matching peer had reached MaxParties).
/// </summary>
public void IncrementCoalescedMiss()
=> Interlocked.Increment(ref _coalescedMissCount);
/// <summary>
/// Phase 10 — records one coalesced response fan-out that was skipped because the
/// attached upstream pipe had already disconnected. Informational only.
/// Records one coalesced response fan-out that was skipped because the attached
/// upstream pipe had already disconnected. Informational only.
/// </summary>
public void IncrementCoalescedResponseToDeadUpstream()
=> Interlocked.Increment(ref _coalescedResponseToDeadUpstream);
/// <summary>Phase 11 — records one FC03/FC04 cache hit.</summary>
/// <summary>Records one FC03/FC04 cache hit.</summary>
public void IncrementCacheHit()
=> Interlocked.Increment(ref _cacheHitCount);
/// <summary>Phase 11 — records one cache-eligible FC03/FC04 read that missed.</summary>
/// <summary>Records one cache-eligible FC03/FC04 read that missed.</summary>
public void IncrementCacheMiss()
=> Interlocked.Increment(ref _cacheMissCount);
/// <summary>Phase 11 — records <paramref name="n"/> cache entries invalidated by a write.</summary>
/// <summary>Records <paramref name="n"/> cache entries invalidated by a write.</summary>
public void AddCacheInvalidations(int n)
=> Interlocked.Add(ref _cacheInvalidations, n);
/// <summary>
/// Phase 12 (W1.3) — records one backend response frame dropped because the destination
/// upstream pipe's response channel was full.
/// Records one backend response frame dropped because the destination upstream
/// pipe's response channel was full.
/// </summary>
public void IncrementResponseDropForFullUpstream()
=> Interlocked.Increment(ref _responseDropForFullUpstream);
/// <summary>
/// Phase 11 — wires the per-PLC <see cref="Cache.ResponseCache"/> as the live stats
/// source for the snapshot path. Pass <c>null</c> to detach during disposal.
/// Wires the per-PLC <see cref="Cache.ResponseCache"/> as the live stats source for
/// the snapshot path. Pass <c>null</c> to detach during disposal.
/// </summary>
internal void SetCacheStatsProvider(ICacheStatsProvider? provider)
=> _cacheStatsProvider = provider;
/// <summary>
/// CAS-updates the peak in-flight high-water mark. Called on every successful
/// allocation by the multiplexer. Phase 9.
/// allocation by the multiplexer.
/// </summary>
public void ObserveInFlight(int currentInFlight)
{
@@ -341,7 +342,7 @@ internal sealed class ProxyCounters
/// Wires the live multiplexer telemetry source into this counter set. Called by
/// <see cref="Mbproxy.Proxy.Multiplexing.PlcMultiplexer"/> at construction time so
/// the status page's <see cref="Snapshot"/> can include live in-flight / queue-depth
/// values without polling the multiplexer separately. Phase 9.
/// values without polling the multiplexer separately.
/// </summary>
internal void SetMultiplexProvider(IMultiplexCountersProvider? provider)
=> _multiplexProvider = provider;
@@ -454,7 +455,7 @@ internal sealed class ProxyCounters
/// and registered with <see cref="ProxyCounters.SetMultiplexProvider"/> so
/// <see cref="ProxyCounters.Snapshot"/> can include live mux telemetry without holding
/// a direct reference to the multiplexer (which would couple counter snapshots to the
/// connection layer's lifecycle). Phase 9.
/// connection layer's lifecycle).
/// </summary>
internal interface IMultiplexCountersProvider
{
@@ -469,8 +470,8 @@ internal interface IMultiplexCountersProvider
}
/// <summary>
/// Phase 11 — read-only window into the per-PLC <see cref="Cache.ResponseCache"/>'s live
/// state for the snapshot path. The multiplexer wires this on cache construction so the
/// Read-only window into the per-PLC <see cref="Cache.ResponseCache"/>'s live state
/// for the snapshot path. The multiplexer wires this on cache construction so the
/// status page sees live counts without holding a direct reference to the cache.
/// </summary>
internal interface ICacheStatsProvider
+52 -59
View File
@@ -24,8 +24,8 @@ namespace Mbproxy.Proxy;
/// log <c>mbproxy.startup.ready</c> with bound/configured counts.</item>
/// </list>
///
/// Phase 06: passes the supervisor dictionary to <see cref="ConfigReconciler.Attach"/>
/// after initial startup so hot-reload changes are applied by the reconciler.
/// Passes the supervisor dictionary to <see cref="ConfigReconciler.Attach"/> after
/// initial startup so hot-reload changes are applied by the reconciler.
///
/// Stop: cancels all supervisors in parallel with a 5-second hard deadline.
/// </summary>
@@ -36,30 +36,30 @@ internal sealed partial class ProxyWorker : BackgroundService
private readonly ILogger<ProxyWorker> _logger;
private readonly ILoggerFactory _loggerFactory;
private readonly ConfigReconciler _reconciler;
// Phase 12 (W1.5) — admin endpoint is no longer IHostedService; ProxyWorker drives its
// Admin endpoint is not registered as IHostedService; ProxyWorker drives its
// lifecycle directly so the design's "drain THEN stop admin" ordering is honoured.
//
// Resolved LAZILY (in ExecuteAsync) rather than in the constructor because the DI graph
// is circular: AdminEndpointHost → StatusSnapshotBuilder → ProxyWorker. A constructor
// GetService<AdminEndpointHost>() during ProxyWorker's own construction returns null
// silently. Lazy resolution sidesteps the cycle — by the time ExecuteAsync runs the DI
// container is fully built.
// Resolved LAZILY (in ExecuteAsync) rather than in the constructor because the DI
// graph is circular: AdminEndpointHost → StatusSnapshotBuilder → ProxyWorker. A
// constructor GetService<AdminEndpointHost>() during ProxyWorker's own construction
// returns null silently. Lazy resolution sidesteps the cycle — by the time
// ExecuteAsync runs the DI container is fully built.
private readonly IServiceProvider _services;
private AdminEndpointHost? _admin;
// Phase 06: supervisors are now managed jointly by ProxyWorker (initial bootstrap)
// and ConfigReconciler (subsequent hot-reload changes). The dictionary is shared
// via ConfigReconciler.Attach() after initial startup.
// Supervisors are managed jointly by ProxyWorker (initial bootstrap) and
// ConfigReconciler (subsequent hot-reload changes). The dictionary is shared via
// ConfigReconciler.Attach() after initial startup.
//
// Phase 12 (W2.3) — ConcurrentDictionary because ConfigReconciler mutates this from
// parallel Task.WhenAll continuations (Add/Remove/Restart paths). The outer Apply is
// serialised by a semaphore but the inner per-PLC tasks run concurrently. Status-page
// reads via IReadOnlyDictionary still work without locking.
// ConcurrentDictionary because ConfigReconciler mutates this from parallel
// Task.WhenAll continuations (Add/Remove/Restart paths). The outer Apply is
// serialised by a semaphore but the inner per-PLC tasks run concurrently.
// Status-page reads via IReadOnlyDictionary still work without locking.
private readonly ConcurrentDictionary<string, PlcListenerSupervisor> _supervisors =
new(StringComparer.Ordinal);
/// <summary>
/// Read-only view of the live supervisor dictionary. Consumed by Phase 07's
/// Read-only view of the live supervisor dictionary. Consumed by
/// <see cref="Admin.StatusSnapshotBuilder"/> to enumerate per-PLC state.
/// The caller should read this on the status-page path only (not the hot path).
/// </summary>
@@ -79,7 +79,7 @@ internal sealed partial class ProxyWorker : BackgroundService
_loggerFactory = loggerFactory;
_reconciler = reconciler;
_services = services;
// Phase 12 (W1.5) — admin endpoint resolved lazily in ExecuteAsync (see field comment).
// Admin endpoint resolved lazily in ExecuteAsync (see field comment).
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
@@ -107,11 +107,11 @@ internal sealed partial class ProxyWorker : BackgroundService
continue;
}
// Phase 11 — construct a per-PLC response cache only when at least one
// resolved tag opts in (CacheTtlMs > 0). Skipping cache construction for a
// PLC with no cacheable tags keeps the no-cache path free of the eviction
// timer and the per-call resolution cost, preserving "default behaviour =
// Phase 10 unchanged" when no operator has opted any tag in.
// Construct a per-PLC response cache only when at least one resolved tag
// opts in (CacheTtlMs > 0). Skipping cache construction for a PLC with no
// cacheable tags keeps the no-cache path free of the eviction timer and the
// per-call resolution cost, preserving the "no caching" default behaviour
// when no operator has opted any tag in.
var cache = HasAnyCacheableTag(result.Map)
? new ResponseCache(opts.Cache.MaxEntriesPerPlc, opts.Cache.EvictionIntervalMs)
: null;
@@ -144,9 +144,9 @@ internal sealed partial class ProxyWorker : BackgroundService
resilienceOpts.ListenerRecovery,
_loggerFactory.CreateLogger($"Mbproxy.Proxy.ListenerRecovery.{plc.Name}"));
// Phase 10 — give the supervisor a live accessor for ReadCoalescingOptions
// so a hot-reload of `Mbproxy.Resilience.ReadCoalescing.Enabled` propagates
// to the multiplexer's per-PDU coalescing decision.
// Give the supervisor a live accessor for ReadCoalescingOptions so a
// hot-reload of `Mbproxy.Resilience.ReadCoalescing.Enabled` propagates to
// the multiplexer's per-PDU coalescing decision.
Func<ReadCoalescingOptions> coalescingAccessor =
() => _options.CurrentValue.Resilience.ReadCoalescing;
@@ -166,13 +166,13 @@ internal sealed partial class ProxyWorker : BackgroundService
_supervisors[plc.Name] = supervisor;
}
// ── Phase 06: wire reconciler BEFORE starting supervisors ─────────────────
// ── Wire reconciler BEFORE starting supervisors ──────────────────────────
// Attach hands the reconciler the authoritative supervisor dictionary and the
// initial options snapshot. The reconciler won't process OnChange events until
// after this call — the brief window between Attach and first supervisor start
// is safe because the channel signal only enqueues; apply runs asynchronously.
// Phase 12 (W2.1) — also pass the live coalescing accessor so reconciler-built
// supervisors (add/restart paths) honour hot-reloaded ReadCoalescing values.
// Pass the live coalescing accessor so reconciler-built supervisors
// (add/restart paths) honour hot-reloaded ReadCoalescing values.
Func<ReadCoalescingOptions> reconcilerCoalescingAccessor =
() => _options.CurrentValue.Resilience.ReadCoalescing;
_reconciler.Attach(_supervisors, opts, reconcilerCoalescingAccessor);
@@ -213,10 +213,10 @@ internal sealed partial class ProxyWorker : BackgroundService
int boundCount = _supervisors.Values.Count(s => s.Snapshot().State == SupervisorState.Bound);
LogStartupReady(_logger, boundCount, plcsConfigured);
// Phase 12 (W1.5) — start the admin endpoint AFTER listeners are bound so the
// status page can never observe the service in a "no PLCs configured yet" state.
// The admin endpoint is no longer registered as IHostedService (the host's reverse
// stop order would tear it down BEFORE drain). ProxyWorker drives both ends.
// Start the admin endpoint AFTER listeners are bound so the status page can
// never observe the service in a "no PLCs configured yet" state. The admin
// endpoint is not registered as IHostedService (the host's reverse stop order
// would tear it down BEFORE drain) ProxyWorker drives both ends.
//
// Resolution happens here, not in the constructor — the DI graph is circular
// (admin → StatusSnapshotBuilder → ProxyWorker) and a constructor-time lookup
@@ -235,10 +235,9 @@ internal sealed partial class ProxyWorker : BackgroundService
}
else
{
// Phase 12 (W4 / Nm6) — surface the absence. The previous IHostedService
// registration would have hard-errored in DI if AddMbproxyAdmin() was missing
// from Program.cs; the W1.5 lazy lookup returns null silently. A single warning
// makes a botched composition observable without blocking startup.
// Surface the absence. The lazy lookup returns null silently if
// AddMbproxyAdmin() is missing from Program.cs; a single warning makes a
// botched composition observable without blocking startup.
_logger.LogWarning(
"Admin endpoint not registered (AddMbproxyAdmin() missing from composition). " +
"Status page will be unavailable; service continues without it.");
@@ -250,8 +249,7 @@ internal sealed partial class ProxyWorker : BackgroundService
}
/// <summary>
/// Phase 12 (W1.5) — graceful shutdown sequence (replaces the deleted
/// <c>ShutdownCoordinator</c>):
/// Graceful shutdown sequence:
/// <list type="number">
/// <item>Cancel <see cref="ExecuteAsync"/> via <c>base.StopAsync</c>.</item>
/// <item><b>Snapshot</b> per-PLC in-flight counts BEFORE stopping supervisors —
@@ -263,10 +261,7 @@ internal sealed partial class ProxyWorker : BackgroundService
/// stop is the actual drain — it cancels the listener, which exits its
/// accept loop, which disposes the multiplexer, which cascades all attached
/// pipes. There is no separate "drain in-flight" phase because there is
/// nothing to drain that wouldn't be killed by the supervisor stop itself
/// (the original Phase-08 ShutdownCoordinator's drain loop had this same
/// shape and was structurally always-zero — call out from
/// codereviews/2026-05-14/ReReviewAfterRemediation.md NC1).</item>
/// nothing to drain that wouldn't be killed by the supervisor stop itself.</item>
/// <item>Stop the admin endpoint LAST so the status page survives the supervisor
/// stop phase and operators can observe the live state right up to shutdown.</item>
/// <item>Dispose every supervisor to release sockets, channels, and watchdog timers.</item>
@@ -277,16 +272,15 @@ internal sealed partial class ProxyWorker : BackgroundService
/// </summary>
public override async Task StopAsync(CancellationToken cancellationToken)
{
// Phase 12 (W5 / m2) — snapshot in-flight BEFORE base.StopAsync so the field
// matches its name: "the count at the moment the host signalled stop", not "the
// count at the moment we got around to computing it." `base.StopAsync` cancels the
// ExecuteAsync stoppingToken; in the milliseconds before it returns, in-flight
// requests whose responses arrive will be removed from _correlation and the
// watchdog can clear stale entries — the count would otherwise drift downward.
// Snapshot in-flight BEFORE base.StopAsync so the field matches its name: "the
// count at the moment the host signalled stop", not "the count at the moment we
// got around to computing it." `base.StopAsync` cancels the ExecuteAsync
// stoppingToken; in the milliseconds before it returns, in-flight requests
// whose responses arrive will be removed from _correlation and the watchdog can
// clear stale entries — the count would otherwise drift downward.
//
// Phase 12 (W4 / NC1) — must run BEFORE supervisor stop too: after
// supervisor.StopAsync, multiplexers are disposed and CountInFlight returns 0
// unconditionally (the original ShutdownCoordinator had the same defect).
// Must run BEFORE supervisor stop too: after supervisor.StopAsync, multiplexers
// are disposed and CountInFlight returns 0 unconditionally.
int inFlightAtCancel = CountInFlight();
// Cancel ExecuteAsync first.
@@ -294,9 +288,9 @@ internal sealed partial class ProxyWorker : BackgroundService
var sw = Stopwatch.StartNew();
// Phase 12 (W2.20) — supervisor stop deadline read from the live config so a
// hot-reloaded GracefulShutdownTimeoutMs is honoured. Supervisor stop is the
// drain: cancelling the supervisor cancels the listener, which exits accept, which
// Supervisor stop deadline read from the live config so a hot-reloaded
// GracefulShutdownTimeoutMs is honoured. Supervisor stop is the drain:
// cancelling the supervisor cancels the listener, which exits accept, which
// disposes the multiplexer, which cascades all attached pipes.
int gracefulMs = _options.CurrentValue.Connection.GracefulShutdownTimeoutMs;
@@ -352,11 +346,11 @@ internal sealed partial class ProxyWorker : BackgroundService
// ── Logging ───────────────────────────────────────────────────────────────────────────
/// <summary>
/// Phase 11 — returns <c>true</c> when at least one BcdTag in the resolved map has a
/// positive <see cref="BcdTag.CacheTtlMs"/>. A PLC with no cacheable tags skips the
/// Returns <c>true</c> when at least one BcdTag in the resolved map has a positive
/// <see cref="BcdTag.CacheTtlMs"/>. A PLC with no cacheable tags skips the
/// <see cref="Mbproxy.Proxy.Cache.ResponseCache"/> entirely (no eviction timer, no
/// per-call cache resolution cost), so the default-OFF deployment is byte-identical
/// to a Phase-10 deployment.
/// per-call cache resolution cost), so the default-OFF deployment runs the
/// no-cache code path.
/// </summary>
private static bool HasAnyCacheableTag(BcdTagMap map)
{
@@ -375,7 +369,6 @@ internal sealed partial class ProxyWorker : BackgroundService
Message = "Failed to bind listener: Plc={Plc} Port={Port} Reason={Reason}")]
private static partial void LogBindFailed(ILogger logger, string plc, int port, string reason);
// Phase 12 (W1.5) — moved here from the deleted ShutdownCoordinator.
[LoggerMessage(EventId = 80, EventName = "mbproxy.shutdown.complete",
Level = LogLevel.Information,
Message = "Graceful shutdown complete: InFlightAtCancel={InFlightAtCancel} ElapsedMs={ElapsedMs}")]
@@ -46,15 +46,15 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
private volatile string? _lastBindError;
private int _recoveryAttempts; // Interlocked
// Phase 07: current active listener for status-page pair enumeration.
// Current active listener for status-page pair enumeration.
private volatile PlcListener? _currentListener;
// Phase 06: _perPlcContext is now mutable so ReplaceContextAsync can swap it.
// Access from the accept loop (RunAsync) and from ReplaceContextAsync must be
// coherent; we use a volatile reference so the accept loop always reads the latest
// context without locking. The PlcListener created on each Polly attempt holds
// its own copy of the context at construction time; existing in-flight connections
// keep their old reference until they complete.
// _perPlcContext is mutable so ReplaceContextAsync can swap it. Access from the accept
// loop (RunAsync) and from ReplaceContextAsync must be coherent; we use a volatile
// reference so the accept loop always reads the latest context without locking. The
// PlcListener created on each Polly attempt holds its own copy of the context at
// construction time; existing in-flight connections keep their old reference until they
// complete.
private volatile PerPlcContext? _currentContext;
/// <summary>
@@ -67,16 +67,15 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
private bool _disposed;
// Phase 12 (W2.15) — completes when the supervisor has transitioned out of Stopped
// for the first time (reached Bound or Recovering). Replaces the previous busy-poll
// implementation in WaitForInitialBindAttemptAsync, which raced fast Stopped→Bound→
// Stopped transitions and never exited if the supervisor task threw inside Polly.
// Completes when the supervisor has transitioned out of Stopped for the first time
// (reached Bound or Recovering). Used by WaitForInitialBindAttemptAsync to avoid
// racing fast Stopped→Bound→Stopped transitions or hanging if the supervisor task
// throws inside Polly.
//
// Phase 12 (W4 / NM4) — non-readonly so StartAsync can re-arm it for a re-Started
// supervisor. Without re-arming, a restart-after-stop scenario would have
// WaitForInitialBindAttemptAsync return immediately on the previous run's signal,
// never observing the new run's bind status. No production caller currently re-Starts,
// but the supervisor's state machine should be consistent.
// Non-readonly so StartAsync can re-arm it for a re-Started supervisor. Without
// re-arming, a restart-after-stop scenario would have WaitForInitialBindAttemptAsync
// return immediately on the previous run's signal, never observing the new run's
// bind status.
private TaskCompletionSource _firstAttemptCompleted = new(
TaskCreationOptions.RunContinuationsAsynchronously);
@@ -104,7 +103,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
_multiplexerLogger = multiplexerLogger;
_pipeLogger = pipeLogger;
_perPlcContext = perPlcContext;
_currentContext = perPlcContext; // Phase 06: live context slot
_currentContext = perPlcContext; // live context slot
_recoveryPipeline = recoveryPipeline;
_logger = logger;
_backendConnectPipeline = backendConnectPipeline;
@@ -121,7 +120,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
/// <summary>
/// Live collection of active <see cref="UpstreamPipe"/> instances attached to this
/// PLC's multiplexer. Returns an empty collection when the listener is not bound.
/// Consumed by Phase 07's status page (renamed from <c>ActivePairs</c> in Phase 9).
/// Consumed by the status page.
/// </summary>
public IReadOnlyCollection<UpstreamPipe> ActiveUpstreams
=> _currentListener?.ActiveUpstreams ?? Array.Empty<UpstreamPipe>();
@@ -137,26 +136,25 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
/// </summary>
public Task StartAsync(CancellationToken ct)
{
// Phase 12 (W2.16) — refuse to re-Start an already-running or already-disposed
// supervisor. After Stop the state machine returns to Stopped and StartAsync
// can re-arm; W4/NM3+NM4 below ensure the per-Start state (CTS, TCS) is fresh
// each time so no leak or stale signal carries across cycles.
// Refuse to re-Start an already-running or already-disposed supervisor. After
// Stop the state machine returns to Stopped and StartAsync can re-arm; the per-
// Start state (CTS, TCS) is refreshed below so no leak or stale signal carries
// across cycles.
if (_disposed)
throw new ObjectDisposedException(nameof(PlcListenerSupervisor));
if (_state != SupervisorState.Stopped || !_supervisorTask.IsCompleted)
throw new InvalidOperationException(
$"Supervisor for Plc='{_plc.Name}' has already been started.");
// Phase 12 (W4 / NM3) — dispose the previous CTS before reassigning. The original
// code overwrote _supervisorCts unconditionally, leaking the prior CTS on every
// re-Start cycle (and any registrations linked to it). Idempotent: ObjectDisposed
// catch covers the very-first-Start case where the field-init CTS is still fresh.
// Dispose the previous CTS before reassigning so a re-Start cycle does not leak
// the prior CTS (and any registrations linked to it). Idempotent: the
// ObjectDisposed catch covers the very-first-Start case where the field-init CTS
// is still fresh.
try { _supervisorCts.Dispose(); } catch (ObjectDisposedException) { /* fresh */ }
_supervisorCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
// Phase 12 (W4 / NM4) — re-arm the first-attempt TCS so a re-Started supervisor
// doesn't immediately observe the previous run's signal in
// WaitForInitialBindAttemptAsync.
// Re-arm the first-attempt TCS so a re-Started supervisor doesn't immediately
// observe the previous run's signal in WaitForInitialBindAttemptAsync.
_firstAttemptCompleted = new TaskCompletionSource(
TaskCreationOptions.RunContinuationsAsynchronously);
@@ -170,10 +168,10 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
/// <see cref="SupervisorState.Recovering"/>).
/// Returns immediately if the supervisor is already past that point.
///
/// <para><b>Phase 12 (W2.15)</b> — backed by a <see cref="TaskCompletionSource"/> set
/// when the supervisor task first transitions out of <see cref="SupervisorState.Stopped"/>.
/// Replaces the previous 10 ms busy-poll which raced fast bind+stop sequences and could
/// hang if the supervisor task threw before any state write happened.</para>
/// <para>Backed by a <see cref="TaskCompletionSource"/> set when the supervisor task
/// first transitions out of <see cref="SupervisorState.Stopped"/>. This avoids both
/// racing fast bind+stop sequences and hanging if the supervisor task throws before
/// any state write happens.</para>
/// </summary>
public async Task WaitForInitialBindAttemptAsync(CancellationToken ct)
{
@@ -184,7 +182,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
}
catch (OperationCanceledException)
{
// Caller cancelled; not a fault — same observable behaviour as the prior poll.
// Caller cancelled; not a fault.
}
}
@@ -221,8 +219,8 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
/// <summary>
/// Returns a point-in-time snapshot of this supervisor's state.
///
/// <para><b>Phase 12 (W2.17)</b> — reads the three observable fields under a single
/// lock so the status page can never report inconsistent triples like
/// <para>Reads the three observable fields under a single lock so the status page
/// can never report inconsistent triples like
/// <c>(State=Bound, LastBindError=&lt;previous&gt;, RecoveryAttempts&gt;0)</c>. The
/// supervisor task uses <see cref="TransitionTo"/> which takes the same lock, so a
/// snapshot reads a transition-consistent view.</para>
@@ -241,9 +239,9 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
private readonly object _snapshotLock = new();
/// <summary>
/// Phase 12 (W2.17) — atomic three-field transition. State, lastBindError, and
/// (optionally) the recoveryAttempts increment all happen under one lock so a
/// concurrent <see cref="Snapshot"/> never sees a half-applied transition.
/// Atomic three-field transition. State, lastBindError, and (optionally) the
/// recoveryAttempts increment all happen under one lock so a concurrent
/// <see cref="Snapshot"/> never sees a half-applied transition.
/// </summary>
private void TransitionTo(SupervisorState newState, string? lastBindError, bool incrementRecoveryAttempt)
{
@@ -258,15 +256,10 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
/// <summary>
/// Atomically swaps the per-PLC context (tag map + optional response cache) on the
/// running listener AND its live multiplexer.
///
/// <para><b>Phase 12 (W1.1)</b> — previously this method only updated the supervisor's
/// <c>_currentContext</c> slot, which meant the running <see cref="PlcMultiplexer"/>
/// kept using the OLD context (it captured the reference at construction). A reload
/// only became visible on the next listener fault. Now the swap propagates into the
/// running mux via <see cref="PlcMultiplexer.ReplaceContext"/>, so the very next PDU
/// sees the new tag map / new cache. Counters are preserved (the new context carries
/// the same <c>ProxyCounters</c> instance) so operator history is not reset.</para>
/// running listener AND its live multiplexer. The swap propagates into the running
/// mux via <see cref="PlcMultiplexer.ReplaceContext"/>, so the very next PDU sees
/// the new tag map / new cache. Counters are preserved (the new context carries the
/// same <c>ProxyCounters</c> instance) so operator history is not reset.
///
/// <para><b>Old cache lifecycle</b>: the supervisor disposes the outgoing context's
/// cache AFTER the multiplexer has been swapped to the new context. By that point no
@@ -281,16 +274,16 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
// subsequent fault recovery) will pick up newCtx through this slot.
_currentContext = newCtx;
// Phase 12 (W1.1) — push the swap into the running multiplexer so existing
// connections see the new tag map / new cache on their next PDU. _currentListener
// may be null between Polly retry attempts; in that case the next listener built
// inside the Polly loop will pick up newCtx through _currentContext above.
// Push the swap into the running multiplexer so existing connections see the new
// tag map / new cache on their next PDU. _currentListener may be null between
// Polly retry attempts; in that case the next listener built inside the Polly loop
// will pick up newCtx through _currentContext above.
_currentListener?.Multiplexer?.ReplaceContext(newCtx);
// Phase 12 (W1.1 + W2.8) — drop the outgoing cache AFTER the swap so the running
// multiplexer can no longer reach it. Clear() snapshots the entry count for the
// mbproxy.cache.flushed log event before disposing the cache (which stops the
// eviction loop and releases the timer).
// Drop the outgoing cache AFTER the swap so the running multiplexer can no longer
// reach it. Clear() snapshots the entry count for the mbproxy.cache.flushed log
// event before disposing the cache (which stops the eviction loop and releases
// the timer).
if (oldCache is not null && !ReferenceEquals(oldCache, newCtx.Cache))
{
int dropped = oldCache.Clear();
@@ -318,11 +311,11 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
// A faulted listener's TcpListener socket must be disposed before
// re-binding. We create a new PlcListener on each attempt.
//
// Phase 06: use _currentContext (volatile) so that a ReplaceContextAsync
// call between Polly retry attempts is picked up here. Each listener
// captures the context at construction time; existing in-flight pairs
// keep their own reference. See ReplaceContextAsync for the transition
// window documentation.
// Use _currentContext (volatile) so that a ReplaceContextAsync call
// between Polly retry attempts is picked up here. Each listener captures
// the context at construction time; existing in-flight pairs keep their
// own reference. See ReplaceContextAsync for the transition window
// documentation.
var listener = new PlcListener(
_plc,
_connectionOptions,
@@ -334,7 +327,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
_backendConnectPipeline,
_coalescingOptions);
// Phase 07: expose the current listener for status-page pair enumeration.
// Expose the current listener for status-page pair enumeration.
_currentListener = listener;
try
@@ -351,10 +344,10 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
string truncated = Truncate(bindEx.Message, 256);
TransitionTo(SupervisorState.Recovering, truncated, incrementRecoveryAttempt: true);
// Phase 12 (W2.15) — signal the first transition out of Stopped.
// Signal the first transition out of Stopped.
_firstAttemptCompleted.TrySetResult();
// Also update the per-PLC counters if available (Phase 07 reads these).
// Also update the per-PLC counters if available (status page reads these).
_currentContext?.Counters.IncrementRecoveryAttempt(truncated);
LogBindFailed(_logger, _plc.Name, _plc.ListenPort, truncated);
@@ -379,7 +372,7 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
// Clear the last bind error on a successful bind.
TransitionTo(SupervisorState.Bound, lastBindError: null, incrementRecoveryAttempt: false);
_currentContext?.Counters.ClearLastBindError();
// Phase 12 (W2.15) — signal the first transition out of Stopped.
// Signal the first transition out of Stopped.
_firstAttemptCompleted.TrySetResult();
// ── Run the accept loop ──────────────────────────────────────────
@@ -407,9 +400,8 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
string truncated = Truncate(runEx.Message, 256);
TransitionTo(SupervisorState.Recovering, truncated, incrementRecoveryAttempt: true);
// Phase 12 (W2.15) — also signal first-attempt-completed in case the
// very first listener.RunAsync faulted before the bind-success path
// signalled it.
// Also signal first-attempt-completed in case the very first
// listener.RunAsync faulted before the bind-success path signalled it.
_firstAttemptCompleted.TrySetResult();
// Also update the per-PLC counters if available.
@@ -457,16 +449,16 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
_state = SupervisorState.Stopped;
}
_currentListener = null;
// Phase 12 (W2.15) — defensive: if RunSupervisorAsync exits before any bind
// attempt fired (e.g. construction-time fault), unblock any awaiting
// Defensive: if RunSupervisorAsync exits before any bind attempt fired
// (e.g. construction-time fault), unblock any awaiting
// WaitForInitialBindAttemptAsync caller so it doesn't hang.
_firstAttemptCompleted.TrySetResult();
}
}
/// <summary>
/// Phase 12 (W2 cleanup) — single helper for the truncate-exception-message pattern
/// previously copy-pasted across three call sites.
/// Single helper for the truncate-exception-message pattern shared across the
/// supervisor's bind/run/end recovery paths.
/// </summary>
private static string Truncate(string s, int max) => s.Length > max ? s[..max] : s;
@@ -487,8 +479,8 @@ internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
// Best-effort cleanup.
}
// Phase 11: dispose the response cache (if any) — its eviction timer would
// otherwise outlive the supervisor.
// Dispose the response cache (if any) — its eviction timer would otherwise
// outlive the supervisor.
_currentContext?.Cache?.Dispose();
_supervisorCts.Dispose();
@@ -26,14 +26,14 @@ public enum SupervisorState
}
/// <summary>
/// Immutable point-in-time snapshot of a supervisor's state. Consumed by Phase 07's
/// status page via <see cref="PlcListenerSupervisor.Snapshot"/>.
/// Immutable point-in-time snapshot of a supervisor's state. Consumed by the status
/// page via <see cref="PlcListenerSupervisor.Snapshot"/>.
///
/// <para><b>RecoveryAttempts semantics</b>: this counter <em>accumulates over the lifetime
/// of the supervisor</em> and is never reset. Operators reading the status page should
/// interpret it as "how many times has this listener faulted or failed to bind since
/// the service started" — useful for detecting port-flapping or repeated OS network
/// resets. Phase 07 surfaces it as-is.</para>
/// resets.</para>
/// </summary>
/// <param name="State">Current state of the supervisor.</param>
/// <param name="LastBindError">
+1 -1
View File
@@ -2,7 +2,7 @@ namespace Mbproxy;
/// <summary>
/// Service-wide counters for the mbproxy host. Tracks reload accept/reject counts and
/// timestamps so Phase 07's status page can surface them without coupling to the reconciler.
/// timestamps so the status page can surface them without coupling to the reconciler.
///
/// <para>Constructed once at DI startup and shared as a singleton. All writes are via
/// dedicated methods that use <see cref="Interlocked"/> so reads from the status page
@@ -330,10 +330,10 @@ public sealed class AdminEndpointTests
System.IO.File.Move(tmp, path, overwrite: true);
}
// ── Phase 12 (W3 test gap) — non-GET methods rejected ──────────────────
// ── non-GET methods rejected ─────────────────────────────────────────
/// <summary>
/// W3 — verifies the admin endpoint rejects non-GET methods (POST / PUT / DELETE)
/// Verifies the admin endpoint rejects non-GET methods (POST / PUT / DELETE)
/// with HTTP 405 Method Not Allowed. The design intentionally exposes only `GET /`
/// and `GET /status.json`; this test guards against an accidental MapPost/Map* being
/// added later.
@@ -10,8 +10,8 @@ namespace Mbproxy.Tests.Bcd;
/// NOTE on allocation profile:
/// BcdCodec is a purely static class operating on value types (ushort, int, tuples).
/// It allocates only when constructing exception objects (the error path), never on
/// the success path. TryGet / hot-path decode callers in Phase 04 will be
/// allocation-free for valid BCD registers.
/// the success path. TryGet / hot-path decode callers are allocation-free for valid
/// BCD registers.
/// </summary>
[Trait("Category", "Unit")]
public sealed class BcdCodecTests
@@ -45,8 +45,8 @@ public sealed class BcdCodecTests
}
/// <summary>
/// Phase 12 (W3 test gap #11) — locks the boundary contract for the `(uint)value > Max16`
/// range check. `int.MinValue` cast to `uint` becomes `0x80000000`, which is well above
/// Locks the boundary contract for the `(uint)value > Max16` range check.
/// `int.MinValue` cast to `uint` becomes `0x80000000`, which is well above
/// `Max16` (= 9999), so the throw fires cleanly without arithmetic surprise. Prevents
/// regressions if the bounds check is ever rewritten with a two-sided int comparison
/// that would underflow on extreme negatives.
@@ -99,9 +99,9 @@ public sealed class BcdTagMapBuilderTests
}
/// <summary>
/// Phase 12 (W2.11) — duplicates within Global itself are now detected
/// pre-collapse and produce a DuplicateAddress error. (Before W2.11 the input
/// dictionary silently collapsed to last-write-wins, leaving the validator dead.)
/// Duplicates within Global itself are detected pre-collapse and produce a
/// DuplicateAddress error. (A naive input dictionary would silently collapse
/// to last-write-wins, leaving the validator dead.)
/// </summary>
[Fact]
public void Build_DuplicateAddressInGlobal_ReturnsDuplicateAddressError()
@@ -122,9 +122,9 @@ public sealed class BcdTagMapBuilderTests
}
/// <summary>
/// Phase 12 (W2.11) — duplicates within the per-PLC Add list itself are now detected
/// pre-collapse. (Cross-list collisions Global vs Add remain the legitimate width-
/// override pattern and are NOT errors — see the next test.)
/// Duplicates within the per-PLC Add list itself are detected pre-collapse.
/// (Cross-list collisions Global vs Add remain the legitimate width-override
/// pattern and are NOT errors — see the next test.)
/// </summary>
[Fact]
public void Build_DuplicateAddress_Within_AddList_ReturnsDuplicateAddressError()
@@ -147,9 +147,9 @@ public sealed class BcdTagMapBuilderTests
}
/// <summary>
/// Phase 12 (W2.11) — same-address entries appearing in BOTH Global AND Add are
/// the documented width-override pattern (design.md "Hybrid tag resolution"). They
/// must NOT be flagged as duplicates; Add wins.
/// Same-address entries appearing in BOTH Global AND Add are the documented
/// width-override pattern (design.md "Hybrid tag resolution"). They must NOT
/// be flagged as duplicates; Add wins.
/// </summary>
[Fact]
public void Build_AddOverridesGlobalAtSameAddress_NoDuplicateError_AddWins()
@@ -282,12 +282,12 @@ public sealed class ConfigReconcilerTests : IAsyncDisposable
}
/// <summary>
/// Phase 12 (W3 test gap #16) — stress-test the W2.3 ConcurrentDictionary fix and the
/// W2.1 coalescing-accessor wiring. Many concurrent Apply calls drive add/remove of
/// many distinct PLCs; without W2.3's ConcurrentDictionary the inner Task.WhenAll
/// continuations would corrupt the dictionary and crash with KeyNotFoundException or
/// ArgumentException. The test asserts: all applies complete, no exceptions are
/// thrown, and the reload counter is exactly the apply count.
/// Stress-tests the live supervisor dictionary and the coalescing-accessor wiring.
/// Many concurrent Apply calls drive add/remove of many distinct PLCs; the inner
/// Task.WhenAll continuations must not corrupt the dictionary or crash with
/// KeyNotFoundException or ArgumentException. The test asserts: all applies
/// complete, no exceptions are thrown, and the reload counter is exactly the
/// apply count.
/// </summary>
[Fact(Timeout = 30_000)]
public async Task Apply_ManyConcurrentReloads_With_PlcChurn_NoCorruption()
@@ -305,7 +305,7 @@ public sealed class ConfigReconcilerTests : IAsyncDisposable
// Build 8 different option snapshots, each a different PLC roster.
// Each Apply will trigger Add+Remove churn against the live supervisor dict —
// exactly the path that W2.3's ConcurrentDictionary was needed for.
// exactly the path that the ConcurrentDictionary guards against corruption.
const int snapshots = 8;
const int plcsPerSnapshot = 4;
var snaps = new MbproxyOptions[snapshots];
@@ -324,8 +324,8 @@ public sealed class ConfigReconcilerTests : IAsyncDisposable
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(25));
// Fire 16 concurrent applies cycling through the 8 snapshots so each is
// submitted twice. Inner per-PLC Task.WhenAll continuations from W2.3 will run
// in parallel and stress-test the dictionary mutation safety.
// submitted twice. Inner per-PLC Task.WhenAll continuations run in parallel
// and stress-test the dictionary mutation safety.
var tasks = Enumerable.Range(0, 16)
.Select(i => Task.Run(() => reconciler.ApplyAsync(snaps[i % snapshots], cts.Token), cts.Token))
.ToArray();
@@ -320,10 +320,10 @@ public sealed class HotReloadE2ETests : IAsyncLifetime
await host.StopAsync(stopCts.Token);
}
// ── Phase 12 (W3 test gap) — cache flush on tag-list reload ─────────────────────────
// ── cache flush on tag-list reload ──────────────────────────────────────────────────
/// <summary>
/// W2.8 / W3 — verifies that a tag-list reload for a PLC with a cacheable tag emits
/// Verifies that a tag-list reload for a PLC with a cacheable tag emits
/// <c>mbproxy.cache.flushed</c>. The cache count is 0 (no real backend to populate
/// it), but the event must still fire — it's the operator's signal that the in-memory
/// cache state was reset by a config reload.
@@ -363,13 +363,13 @@ public sealed class HotReloadE2ETests : IAsyncLifetime
await host.StopAsync(stopCts.Token);
}
// ── Phase 12 (W3 test gap #10) — ReadCoalescing.Enabled hot-reload flip ─────────────
// ── ReadCoalescing.Enabled hot-reload flip ──────────────────────────────────────────
/// <summary>
/// W3 — verifies that flipping <c>Mbproxy.Resilience.ReadCoalescing.Enabled</c> at
/// Verifies that flipping <c>Mbproxy.Resilience.ReadCoalescing.Enabled</c> at
/// runtime via hot-reload propagates to the live <see cref="IOptionsMonitor{T}"/>
/// snapshot. The W2.1 fix wires the accessor through to add/restart supervisors;
/// the multiplexer reads it per-PDU. Proving the IOptionsMonitor sees the new value
/// snapshot. The accessor is wired through to add/restart supervisors; the
/// multiplexer reads it per-PDU. Proving the IOptionsMonitor sees the new value
/// is sufficient — the per-PDU read path is unit-tested at the multiplexer level.
/// </summary>
[Fact(Timeout = 8_000)]
@@ -156,10 +156,10 @@ public sealed class ReloadValidatorTests
Assert.Contains(errors, e => e.Contains("non-empty"));
}
// ── Phase 12 (W2.10) — Cache.AllowLongTtl gate ──────────────────────────────────────
// ── Cache.AllowLongTtl gate ─────────────────────────────────────────────────────────
/// <summary>
/// W2 — per-tag CacheTtlMs > 60_000 without Cache.AllowLongTtl is rejected.
/// Per-tag CacheTtlMs > 60_000 without Cache.AllowLongTtl is rejected.
/// </summary>
[Fact]
public void Validate_PerTagCacheTtl_Above60s_Without_AllowLongTtl_Fails()
@@ -181,7 +181,7 @@ public sealed class ReloadValidatorTests
}
/// <summary>
/// W2 — same value passes when AllowLongTtl is true (operator opt-in).
/// Same value passes when AllowLongTtl is true (operator opt-in).
/// </summary>
[Fact]
public void Validate_PerTagCacheTtl_Above60s_With_AllowLongTtl_Passes()
@@ -203,9 +203,9 @@ public sealed class ReloadValidatorTests
}
/// <summary>
/// W2 — per-PLC DefaultCacheTtlMs > 60_000 inherited by a tag with null CacheTtlMs is
/// caught by the resolved-value check even if the per-PLC default check itself passes
/// (it doesn't, but this validates the defensive resolved re-check from W2.10).
/// Per-PLC DefaultCacheTtlMs > 60_000 inherited by a tag with null CacheTtlMs is
/// caught by the resolved-value check even if the per-PLC default check itself
/// passes (it doesn't, but this validates the defensive resolved re-check).
/// </summary>
[Fact]
public void Validate_ResolvedTtl_FromPerPlcDefault_AboveCap_Fails()
@@ -233,7 +233,7 @@ public sealed class ReloadValidatorTests
Assert.Contains(errors, e => e.Contains("60_000"));
}
// ── Phase 12 (W2.18) — ConnectionOptions validation ─────────────────────────────────
// ── ConnectionOptions validation ────────────────────────────────────────────────────
[Fact]
public void Validate_ZeroBackendConnectTimeoutMs_Fails()
@@ -92,7 +92,7 @@ internal static class TestHostBuilderExtensions
builder.Services.AddSerilog(serilogLogger, dispose: false);
builder.AddMbproxyOptions();
// Phase 03: register the no-op pipeline and ProxyWorker (replaces HeartbeatWorker).
// Register the no-op pipeline and ProxyWorker.
builder.Services.AddSingleton<IPduPipeline, NoopPduPipeline>();
builder.Services.AddHostedService<ProxyWorker>();
@@ -1,7 +1,6 @@
<!-- xunit version: v3 (xunit.v3 3.2.2) — chosen because a stable release exists on NuGet as of 2026-05-13 -->
<!-- NModbus 3.0.83 — chosen for small footprint, net10.0 compatibility, and synchronous/async FC03/FC16 API
that maps directly to the Modbus PDU function codes used in smoke and e2e tests.
Added in Phase 01 as the Modbus TCP client for all simulator-backed tests. -->
<!-- xunit v3 (xunit.v3 3.2.2). NModbus 3.0.83 — small footprint, net10.0 compatibility, and
synchronous/async FC03/FC16 API that maps directly to the Modbus PDU function codes used
in smoke and e2e tests. -->
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
@@ -16,14 +15,13 @@
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="18.5.1" />
<!-- xunit v3: stable as of 2026-05-13 -->
<PackageReference Include="xunit.v3" Version="3.2.2" />
<PackageReference Include="xunit.runner.visualstudio" Version="3.1.5">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="Shouldly" Version="4.3.0" />
<!-- NModbus: Modbus TCP client for simulator smoke tests and e2e tests (Phase 01+) -->
<!-- NModbus: Modbus TCP client for simulator smoke tests and e2e tests. -->
<PackageReference Include="NModbus" Version="3.0.83" />
</ItemGroup>
@@ -31,13 +29,12 @@
<ProjectReference Include="..\..\src\Mbproxy\Mbproxy.csproj" />
</ItemGroup>
<!-- Phase 12 (W2.21) — the linked appsettings.json from Mbproxy.csproj propagates to
the test bin via the project reference. Tests build their own in-memory
configurations and must not pick up the shipped template's example PLCs. The
Target deletes the inherited file from both the build output AND the publish
payload (W4 / T3 — adds Publish so a `dotnet publish` against this csproj for a
packaged self-test would not leak the template's example PLCs into the published
bundle). -->
<!-- The linked appsettings.json from Mbproxy.csproj propagates to the test bin via the
project reference. Tests build their own in-memory configurations and must not pick
up the shipped template's example PLCs. This target deletes the inherited file from
both the build output AND the publish payload so a `dotnet publish` against this
csproj for a packaged self-test would not leak the template's example PLCs into the
published bundle. -->
<Target Name="RemoveInheritedAppsettings" AfterTargets="Build;Publish">
<Delete Files="$(OutputPath)appsettings.json" Condition="Exists('$(OutputPath)appsettings.json')" />
<Delete Files="$(PublishDir)appsettings.json" Condition="'$(PublishDir)' != '' AND Exists('$(PublishDir)appsettings.json')" />
@@ -41,9 +41,8 @@ public sealed class BcdPduPipelineTests
}
/// <summary>
/// Phase 9: the rewriter consumes <see cref="PerPlcContext.CurrentRequest"/> rather
/// than a per-pair last-request slot. Tests build a synthetic <see cref="InFlightRequest"/>
/// to drive response decoding.
/// The rewriter consumes <see cref="PerPlcContext.CurrentRequest"/>. Tests build a
/// synthetic <see cref="InFlightRequest"/> to drive response decoding.
/// </summary>
private static InFlightRequest MakeInFlight(byte fc, ushort startAddress, ushort qty)
=> new(
@@ -51,9 +50,8 @@ public sealed class BcdPduPipelineTests
Fc: fc,
StartAddress: startAddress,
Qty: qty,
// Phase 9: always exactly one party. We don't have a real UpstreamPipe in
// pipeline unit tests; the rewriter never dereferences the party list, so a
// null-forgiving placeholder is safe.
// We don't have a real UpstreamPipe in pipeline unit tests; the rewriter
// never dereferences the party list, so an empty placeholder is safe.
InterestedParties: Array.Empty<InterestedParty>(),
SentAtUtc: DateTimeOffset.UtcNow);
@@ -107,9 +105,9 @@ public sealed class BcdPduPipelineTests
}
/// <summary>
/// Simulate sending an FC03/04 request then reading the response.
/// Phase 9: builds an <see cref="InFlightRequest"/> matching the request and attaches
/// it to the response-call context (replacing the per-pair last-request slot).
/// Simulate sending an FC03/04 request then reading the response. Builds an
/// <see cref="InFlightRequest"/> matching the request and attaches it to the
/// response-call context.
/// </summary>
private void SendRequestThenProcessResponse(
PerPlcContext ctx,
@@ -361,11 +359,11 @@ public sealed class BcdPduPipelineTests
}
/// <summary>
/// Phase 12 (W2.13) — a client writing a 32-bit BCD value where either word exceeds
/// 9999 must NOT be silently mutated by the `high*10000+low` reconstruction. Validation
/// rejects the slot, increments invalidBcdWarnings, and passes the raw bytes through.
/// Without W2.13 the codec would accept e.g. (high=9999, low=9999) → 99_989_999 →
/// re-encode as (high=9998, low=9999), silently losing 1 from the high word.
/// A client writing a 32-bit BCD value where either word exceeds 9999 must NOT be
/// silently mutated by the `high*10000+low` reconstruction. Validation rejects the
/// slot, increments invalidBcdWarnings, and passes the raw bytes through. (Otherwise
/// the codec would accept e.g. (high=9999, low=9999) → 99_989_999 → re-encode as
/// (high=9998, low=9999), silently losing 1 from the high word.)
/// </summary>
[Fact]
public void FC16_32Bit_ClientHighOrLowAbove9999_PassesThroughRaw_WithInvalidBcdWarning()
@@ -383,10 +381,10 @@ public sealed class BcdPduPipelineTests
}
/// <summary>
/// Phase 12 (W2.14) — a malformed FC16 request that claims qty=N but ships fewer than
/// 6+N*2 bytes must NOT be partially rewritten. Without W2.14 each individual slot's
/// per-slot bounds check would skip the OOB slot, leaving early slots rewritten and late
/// slots untouched (a half-rewritten request reaching the PLC).
/// A malformed FC16 request that claims qty=N but ships fewer than 6+N*2 bytes must
/// NOT be partially rewritten. Without the up-front length check, each individual
/// slot's per-slot bounds check would skip the OOB slot, leaving early slots rewritten
/// and late slots untouched (a half-rewritten request reaching the PLC).
/// </summary>
[Fact]
public void FC16_TruncatedRegisterData_PassesThroughRaw_NoPartialRewrite()
@@ -405,12 +403,11 @@ public sealed class BcdPduPipelineTests
}
/// <summary>
/// Phase 12 (W3 test gap #15) — DL205/DL260 caps FC03/FC04 reads at qty=128 (above
/// Modbus spec's 125; documented in DL260/dl205.md). The proxy must NOT truncate the
/// qty field — a request with qty &gt; 128 at non-BCD addresses must pass through
/// unchanged so the PLC's own validator returns exception 03 to the client. This is
/// the transparent-pass-through contract for FCs and addresses the rewriter doesn't
/// own.
/// DL205/DL260 caps FC03/FC04 reads at qty=128 (above Modbus spec's 125; documented
/// in DL260/dl205.md). The proxy must NOT truncate the qty field — a request with
/// qty &gt; 128 at non-BCD addresses must pass through unchanged so the PLC's own
/// validator returns exception 03 to the client. This is the transparent-pass-through
/// contract for FCs and addresses the rewriter doesn't own.
/// </summary>
[Fact]
public void FC03_Request_QtyAbove128_AtNonBcdAddress_PassesThroughUnchanged()
@@ -426,10 +423,9 @@ public sealed class BcdPduPipelineTests
}
/// <summary>
/// Phase 12 (W3 test gap) — symmetric inverse of the existing partial-overlap test:
/// the write range starts ON the high register of a 32-bit pair (low word is BEFORE
/// the write range). Must also be passed through raw with a partial warning, not
/// half-rewritten.
/// Symmetric inverse of the existing partial-overlap test: the write range starts ON
/// the high register of a 32-bit pair (low word is BEFORE the write range). Must also
/// be passed through raw with a partial warning, not half-rewritten.
/// </summary>
[Fact]
public void FC16_WriteStartsOnHighWord_Of32BitPair_PassesThroughRaw_WithPartialWarning()
@@ -448,10 +444,9 @@ public sealed class BcdPduPipelineTests
}
/// <summary>
/// Phase 12 (W3 test gap) — mixed slots in a single FC03 read: a 16-bit BCD tag, a
/// 32-bit BCD pair, and an unconfigured register. Each slot should be handled
/// independently — the 16-bit and 32-bit rewritten, the unconfigured register passed
/// through unchanged.
/// Mixed slots in a single FC03 read: a 16-bit BCD tag, a 32-bit BCD pair, and an
/// unconfigured register. Each slot should be handled independently — the 16-bit and
/// 32-bit rewritten, the unconfigured register passed through unchanged.
/// </summary>
[Fact]
public void FC03_Mixed_16Bit_32Bit_AndNonBcd_InOneRead_OnlyConfiguredSlotsRewritten()
@@ -488,9 +483,9 @@ public sealed class BcdPduPipelineTests
}
/// <summary>
/// Phase 12 (W3 test gap) — FC16 response handling. The response carries no register
/// values (just an echo of [fc][start][qty]) so the rewriter must pass it through
/// unchanged regardless of tag-map content.
/// FC16 response handling. The response carries no register values (just an echo of
/// [fc][start][qty]) so the rewriter must pass it through unchanged regardless of
/// tag-map content.
/// </summary>
[Fact]
public void FC16_Response_PassesThroughUnchanged_RegardlessOfTagMap()
@@ -15,18 +15,16 @@ using Xunit;
namespace Mbproxy.Tests.Proxy.Cache;
/// <summary>
/// End-to-end coverage of the Phase-11 response cache against the pymodbus DL205
/// simulator.
/// End-to-end coverage of the response cache against the pymodbus DL205 simulator.
///
/// <para><b>pymodbus 3.13 simulator quirk.</b> Like Phase 9 and Phase 10, these tests
/// serialise reads in the simulator-backed cases. The Phase-11 cache's behavioural
/// guarantee (a TTL-bounded cache hit returns the cached value without backend traffic)
/// is independent of the simulator's known concurrent-MBAP-frame bug — sequential reads
/// keep the sim in single-PDU mode, which is its known-good envelope.</para>
/// <para><b>pymodbus 3.13 simulator quirk.</b> These tests serialise reads in the
/// simulator-backed cases. The cache's behavioural guarantee (a TTL-bounded cache hit
/// returns the cached value without backend traffic) is independent of the simulator's
/// known concurrent-MBAP-frame bug — sequential reads keep the sim in single-PDU mode,
/// which is its known-good envelope.</para>
///
/// <para>The headline assertion lives here: 10 reads at 100 ms intervals with a 1 s TTL
/// must result in EXACTLY 1 backend round-trip. If this test fails, Phase 11 does not
/// ship — see <c>11-response-cache.md</c>.</para>
/// must result in EXACTLY 1 backend round-trip.</para>
/// </summary>
[Collection(nameof(Mbproxy.Tests.Sim.DL205SimulatorCollection))]
[Trait("Category", "E2E")]
@@ -153,8 +151,8 @@ public sealed class ResponseCacheE2ETests
/// <summary>
/// Mandatory regression. With no cache config anywhere (default deployment shape),
/// behaviour must be byte-identical to Phase 10. Sequential reads through the same
/// client produce one backend round-trip each — no elision.
/// behaviour must be byte-identical to the non-cached path: sequential reads through
/// the same client produce one backend round-trip each — no elision.
/// </summary>
[Fact(Timeout = 5_000)]
public async Task Cache_DisabledByDefault_BehaviourIs_ByteIdenticalTo_Phase10()
@@ -165,7 +163,7 @@ public sealed class ResponseCacheE2ETests
int adminPort = PickFreePort();
var config = MakeBaseConfig(proxyPort);
config["Mbproxy:AdminPort"] = adminPort.ToString();
// No Cache section, no CacheTtlMs on any tag — pure Phase-10 behaviour.
// No Cache section, no CacheTtlMs on any tag — non-cached behaviour.
config["Mbproxy:BcdTags:Global:0:Address"] = "1072";
config["Mbproxy:BcdTags:Global:0:Width"] = "16";
@@ -465,7 +465,8 @@ public sealed class ResponseCacheMultiplexerTests
public async Task UncachedReads_BehaveIdentically_ToPhase10()
{
// Regression guard: PerPlcContext with Cache = null must behave byte-identically
// to Phase 10 — every FC03 read produces a backend round-trip (coalescing aside).
// to the non-cached path — every FC03 read produces a backend round-trip
// (coalescing aside).
int backendPort = PickFreePort();
await using var backend = new StubBackend(backendPort);
@@ -49,12 +49,12 @@ public sealed class MultiplexerE2ETests
// ── E2E 1: Five simultaneous upstream clients (connection-cap lift) ──────────────
/// <summary>
/// Headline test for Phase 9: prove that the multiplexer accepts the 5th upstream
/// client on the same proxy port — pre-Phase-9's 1:1 model would have failed at
/// backend connect (H2-ECOM100 cap = 4). Each client's request is serialised behind
/// the previous client's response so the pymodbus 3.13 simulator's concurrent-frame
/// bug never triggers; the multiplexer's connection ceiling, not its under-concurrency
/// behaviour, is what this test proves.
/// Headline test: prove that the multiplexer accepts the 5th upstream client on the
/// same proxy port — a 1:1 model would have failed at backend connect (H2-ECOM100
/// cap = 4). Each client's request is serialised behind the previous client's response
/// so the pymodbus 3.13 simulator's concurrent-frame bug never triggers; the
/// multiplexer's connection ceiling, not its under-concurrency behaviour, is what
/// this test proves.
/// </summary>
[Fact(Timeout = 5_000)]
public async Task E2E_FiveSimultaneousClients_AllReadHR1072_AllGetDecoded_1234()
@@ -82,8 +82,9 @@ public sealed class MultiplexerE2ETests
await using var hd = new AsyncHostDispose(host);
await Task.Delay(200, TestContext.Current.CancellationToken);
// Open five simultaneous TCP connections to the proxy first (each would have used
// a dedicated backend socket pre-Phase-9, blowing through the 4-client cap).
// Open five simultaneous TCP connections to the proxy first (under a 1:1 model
// each would have needed a dedicated backend socket, blowing through the
// 4-client cap).
var clients = new TcpClient[5];
try
{
@@ -380,8 +380,8 @@ public sealed class PlcMultiplexerTests
try
{
// Both clients use the same upstream TxId 0x0007 — the proxy must hand out
// distinct proxy TxIds on the backend wire. Phase 10: reads target DIFFERENT
// addresses so coalescing does not fuse them into a single backend request.
// distinct proxy TxIds on the backend wire. Reads target DIFFERENT addresses
// so coalescing does not fuse them into a single backend request.
await c1.SendAsync(BuildFc03ReadFrame(0x0007, 0, 1), SocketFlags.None);
await c2.SendAsync(BuildFc03ReadFrame(0x0007, 10, 1), SocketFlags.None);
@@ -625,12 +625,12 @@ public sealed class PlcMultiplexerTests
}
}
// ── Phase 12 Wave-1 regression tests ──────────────────────────────────────
// ── ReplaceContext live-swap regression tests ────────────────────────────────
/// <summary>
/// W1.1 — verifies that <see cref="PlcMultiplexer.ReplaceContext"/> swaps the live
/// per-PLC context on the running multiplexer, so the very next PDU's BCD rewriter
/// uses the new tag map (not the captured-at-construction map). Before W1.1 this
/// Verifies that <see cref="PlcMultiplexer.ReplaceContext"/> swaps the live per-PLC
/// context on the running multiplexer, so the very next PDU's BCD rewriter uses the
/// new tag map (not the captured-at-construction map). Without the live swap this
/// scenario would silently keep using the old map until the listener faulted and the
/// supervisor's Polly loop reconstructed everything.
/// </summary>
@@ -682,11 +682,11 @@ public sealed class PlcMultiplexerTests
}
/// <summary>
/// W1.1 — verifies that swapping in a fresh response cache via <see cref="PlcMultiplexer.ReplaceContext"/>
/// makes the running multiplexer consult the NEW cache for subsequent reads, not the
/// old cache that was disposed by the supervisor. Without W1.1 the running mux would
/// keep its constructor-captured cache reference and either return stale entries or
/// hit a disposed cache.
/// Verifies that swapping in a fresh response cache via
/// <see cref="PlcMultiplexer.ReplaceContext"/> makes the running multiplexer consult
/// the NEW cache for subsequent reads, not the old cache that was disposed by the
/// supervisor. Without the live swap the running mux would keep its constructor-
/// captured cache reference and either return stale entries or hit a disposed cache.
/// </summary>
[Fact]
public async Task ReplaceContext_NewCache_NextReadGoesToBackend_NotOldCache()
@@ -757,7 +757,7 @@ public sealed class PlcMultiplexerTests
}
}
// ── Phase 12 (W3 final-tier race tests) ──────────────────────────────────
// ── Final-tier race tests ─────────────────────────────────────────────────
/// <summary>
/// Reflection helper — drains the multiplexer's TxIdAllocator by calling
@@ -781,10 +781,10 @@ public sealed class PlcMultiplexerTests
}
/// <summary>
/// W3 #5 — TxId allocator saturation propagates as a Modbus exception 04 to the
/// upstream client (no hang, no crash). The 16-bit TxId space (65,536 slots) is
/// pre-saturated via reflection so the next request hits the
/// <c>!_allocator.TryAllocate</c> branch in <c>OnUpstreamFrameAsync</c> immediately.
/// TxId allocator saturation propagates as a Modbus exception 04 to the upstream
/// client (no hang, no crash). The 16-bit TxId space (65,536 slots) is pre-saturated
/// via reflection so the next request hits the <c>!_allocator.TryAllocate</c> branch
/// in <c>OnUpstreamFrameAsync</c> immediately.
/// </summary>
[Fact]
public async Task TxIdAllocator_Saturated_NextRequest_GetsException04_WithOriginalTxId()
@@ -835,11 +835,11 @@ public sealed class PlcMultiplexerTests
}
/// <summary>
/// W3 #6 — under TxId saturation, two concurrent identical FC03 reads must BOTH
/// receive exception 04 (one as the leader directly, the other either via a
/// coalesced fan-out from the W1.2 cleanup OR via its own independent saturation
/// path — either timing produces the same observable contract). Validates that no
/// pipe hangs forever waiting for a backend response that would never arrive.
/// Under TxId saturation, two concurrent identical FC03 reads must BOTH receive
/// exception 04 (one as the leader directly, the other either via a coalesced
/// fan-out from the saturation cleanup OR via its own independent saturation path —
/// either timing produces the same observable contract). Validates that no pipe
/// hangs forever waiting for a backend response that would never arrive.
/// </summary>
[Fact]
public async Task TxIdAllocator_Saturated_TwoConcurrentIdenticalReads_BothPipesGetException04()
@@ -877,7 +877,7 @@ public sealed class PlcMultiplexerTests
var rspA = await ReadOneFrameAsync(cA, TestContext.Current.CancellationToken);
var rspB = await ReadOneFrameAsync(cB, TestContext.Current.CancellationToken);
// Both must be exception 04 with the original TxId echoed — the W1.2 contract
// Both must be exception 04 with the original TxId echoed — the contract
// is "no late attacher hangs."
foreach (var (rsp, expectedTxId, label) in new[] {
(rspA, txA, "A"), (rspB, txB, "B") })
@@ -898,14 +898,14 @@ public sealed class PlcMultiplexerTests
}
/// <summary>
/// W3 #7 — backend-reader head-of-line block. One upstream pipe is wedged by the
/// test holding its socket-receive side without reading. The W1.3 fix routes the
/// fan-out through <c>TrySendResponse</c> so the per-PLC backend reader cannot be
/// stalled by a wedged pipe; responses to a healthy peer must keep flowing and the
/// wedged pipe's <c>responseDropForFullUpstream</c> counter must increment.
/// Backend-reader head-of-line block guard. One upstream pipe is wedged by the test
/// holding its socket-receive side without reading. The fan-out is routed through
/// <c>TrySendResponse</c> so the per-PLC backend reader cannot be stalled by a
/// wedged pipe; responses to a healthy peer must keep flowing and the wedged pipe's
/// <c>responseDropForFullUpstream</c> counter must increment.
///
/// <para>Pre-W1.3 the synchronous <c>await SendResponseAsync</c> inside the reader
/// would block on the wedged pipe's full bounded channel and starve every peer.</para>
/// <para>A synchronous <c>await SendResponseAsync</c> inside the reader would block
/// on the wedged pipe's full bounded channel and starve every peer.</para>
/// </summary>
[Fact]
public async Task SlowUpstream_DoesNotStallPeerResponses_DropCounterIncrements()
@@ -943,8 +943,8 @@ public sealed class PlcMultiplexerTests
await cB.SendAsync(BuildFc03ReadFrame(txB, 0, 1), SocketFlags.None);
// B's response must arrive within a few hundred ms even with A wedged. If
// the W1.3 fix were missing, the reader would be blocked on A's channel and
// B would time out.
// the non-blocking enqueue path were missing, the reader would be blocked on
// A's channel and B would time out.
var rspB = await ReadOneFrameAsync(cB, TestContext.Current.CancellationToken)
.WaitAsync(TimeSpan.FromSeconds(3), TestContext.Current.CancellationToken);
ushort echoB = (ushort)((rspB[0] << 8) | rspB[1]);
@@ -972,7 +972,7 @@ public sealed class PlcMultiplexerTests
}
/// <summary>
/// W3 #8 — watchdog↔response race. The W1 design uses claim-then-dispatch:
/// Watchdog↔response race. The design uses claim-then-dispatch:
/// <c>CorrelationMap.TryRemove</c> is the single source of truth, so exactly ONE
/// of (response delivered, watchdog timeout) wins for any given proxy TxId. This
/// test exercises the race window directly: a stub backend that responds at almost
@@ -992,13 +992,13 @@ public sealed class PlcMultiplexerTests
// = 400 the tick is 100 ms. Configure the backend to delay 350-450 ms for each
// request so some land before, some after the timeout.
int backendPort = PickFreePort();
// Phase 12 (W4 / T2) — deterministic alternation rather than seeded Random. Random
// with a fixed seed is not stable across .NET major versions (Microsoft has changed
// the implementation, e.g. legacy → Xoshiro128 in .NET 6), so a runtime upgrade
// could land all samples on one side of the watchdog deadline and break the
// "both branches must fire" assertion below. Counter-based alternation guarantees
// 15 fast (350 ms, beats watchdog) and 15 slow (450 ms, loses to watchdog) responses
// across 30 iterations, regardless of runtime.
// Deterministic alternation rather than seeded Random. Random with a fixed seed is
// not stable across .NET major versions (Microsoft has changed the implementation,
// e.g. legacy → Xoshiro128 in .NET 6), so a runtime upgrade could land all samples
// on one side of the watchdog deadline and break the "both branches must fire"
// assertion below. Counter-based alternation guarantees 15 fast (350 ms, beats
// watchdog) and 15 slow (450 ms, loses to watchdog) responses across 30 iterations,
// regardless of runtime.
int reqCount = 0;
var slowBackend = new SlowResponseBackend(backendPort, () =>
{
@@ -1062,13 +1062,13 @@ public sealed class PlcMultiplexerTests
}
/// <summary>
/// W3 #9 — cascade racing with new accepts. Stress-test: while the backend is repeatedly
/// Cascade racing with new accepts. Stress-test: while the backend is repeatedly
/// killed and resurrected (forcing repeated cascade cycles), new upstream clients
/// connect and disconnect concurrently. The contract verified is the
/// no-crash-under-churn property: the multiplexer must survive arbitrary interleavings
/// of teardown and new-pipe-attach without throwing into the host or leaking sockets.
///
/// <para>The originally-flagged race window — a new pipe added between
/// <para>The race window — a new pipe added between
/// <c>_pipes.Values.ToArray()</c> and <c>_pipes.Clear()</c> in <c>TearDownBackendAsync</c>
/// — leaves the new pipe alive but orphaned from <c>_pipes</c>. Its read loop will
/// receive normal traffic until the next cascade or its socket closes. This test
@@ -1165,7 +1165,7 @@ public sealed class PlcMultiplexerTests
/// <summary>
/// Backend stub that delays each response by a caller-supplied amount. Used by the
/// W3 #8 watchdog race test.
/// watchdog-vs-response race test.
/// </summary>
private sealed class SlowResponseBackend : IAsyncDisposable
{
@@ -10,9 +10,9 @@ namespace Mbproxy.Tests.Proxy.Multiplexing;
/// <summary>
/// Verifies that <see cref="BcdPduPipeline"/> correlates FC03/FC04 responses through
/// <see cref="PerPlcContext.CurrentRequest"/> (Phase 9) rather than the pre-Phase-9
/// per-pair last-request slot. Concurrent in-flight requests from different upstream
/// clients must decode against their own request range without cross-talk.
/// <see cref="PerPlcContext.CurrentRequest"/>. Concurrent in-flight requests from
/// different upstream clients must decode against their own request range without
/// cross-talk.
/// </summary>
[Trait("Category", "Unit")]
public sealed class RewriterCorrelationTests
@@ -9,8 +9,8 @@ namespace Mbproxy.Tests.Proxy.Multiplexing;
/// <summary>
/// Unit tests for <see cref="UpstreamPipe"/>'s response-channel contract — particularly
/// the Phase 12 (W1.3) <see cref="UpstreamPipe.TrySendResponse"/> non-blocking enqueue
/// added so the per-PLC backend reader cannot be stalled by one slow upstream client.
/// the <see cref="UpstreamPipe.TrySendResponse"/> non-blocking enqueue, which exists
/// so the per-PLC backend reader cannot be stalled by one slow upstream client.
/// </summary>
[Trait("Category", "Unit")]
public sealed class UpstreamPipeTests
@@ -40,7 +40,7 @@ public sealed class UpstreamPipeTests
// ── Tests ─────────────────────────────────────────────────────────────────
/// <summary>
/// W1.3 — when no write-loop is draining the response channel, repeated
/// When no write-loop is draining the response channel, repeated
/// <see cref="UpstreamPipe.TrySendResponse"/> calls must succeed up to the channel's
/// bounded capacity and return <c>false</c> on every subsequent call without blocking.
/// This is the non-blocking contract the per-PLC backend reader relies on.
@@ -80,7 +80,7 @@ public sealed class UpstreamPipeTests
}
/// <summary>
/// W1.3 — once the pipe has been disposed, <see cref="UpstreamPipe.TrySendResponse"/>
/// Once the pipe has been disposed, <see cref="UpstreamPipe.TrySendResponse"/>
/// returns <c>false</c> regardless of channel state, never throws.
/// </summary>
[Fact]
@@ -51,13 +51,12 @@ public sealed class ProxyForwardingTests
}
// ── 2a. FC03 read HR1072 — with BCD configured → decoded 1234 ──────────────────────
// Replaced Phase 03 placeholder: Forward_FC03_HR1072_Returns_RawBCD_0x1234
[Fact(Timeout = 5_000)]
public async Task Forward_FC03_HR1072_Returns_Decoded_1234()
{
// Phase 04: BcdPduPipeline is active. When BCD tag 1072 (width=16) is configured,
// the proxy decodes the raw 0x1234 nibbles and the client receives binary 1234.
// BcdPduPipeline is active. When BCD tag 1072 (width=16) is configured, the proxy
// decodes the raw 0x1234 nibbles and the client receives binary 1234.
if (_sim.SkipReason is not null)
Assert.Skip(_sim.SkipReason);
@@ -230,9 +229,9 @@ public sealed class ProxyForwardingTests
public async Task BackendConnectFailure_ClosesUpstreamCleanly()
{
// Point the proxy at port 1 on loopback — guaranteed unreachable.
// After Phase 9 the multiplexer lazily connects to the backend on the first
// upstream PDU, so we have to actually send a request before the proxy attempts
// the (failing) backend connect that closes the upstream.
// The multiplexer lazily connects to the backend on the first upstream PDU, so
// we have to actually send a request before the proxy attempts the (failing)
// backend connect that closes the upstream.
const int badBackendPort = 1;
const int backendTimeoutMs = 500; // short timeout for test speed
@@ -352,7 +351,7 @@ public sealed class ProxyForwardingTests
new Serilog.LoggerConfiguration().MinimumLevel.Fatal().CreateLogger(),
dispose: false);
builder.AddMbproxyOptions();
// BCD rewriter pipeline — used by the Phase 04 tests in this file.
// BCD rewriter pipeline — used by the BCD-decode tests in this file.
builder.Services.AddSingleton<IPduPipeline, BcdPduPipeline>();
builder.Services.AddHostedService<ProxyWorker>();
return builder.Build();
@@ -12,11 +12,10 @@ using Xunit;
namespace Mbproxy.Tests.Proxy.Supervision;
/// <summary>
/// Integration tests for the backend-connect Polly retry path. Phase 9 moved backend
/// connect ownership from <c>PlcConnectionPair.CreateAsync</c> into
/// <see cref="PlcMultiplexer"/>. These tests exercise the same Polly pipeline by driving
/// upstream-to-multiplexer frames against a bad/intermittent backend and observing the
/// resulting connect-success/connect-failed counters.
/// Integration tests for the backend-connect Polly retry path. Backend connect
/// ownership lives in <see cref="PlcMultiplexer"/>. These tests exercise the Polly
/// pipeline by driving upstream-to-multiplexer frames against a bad/intermittent
/// backend and observing the resulting connect-success/connect-failed counters.
/// </summary>
[Trait("Category", "Unit")]
public sealed class BackendConnectRetryTests
@@ -176,13 +176,13 @@ public sealed class SupervisorTests
// ── Test 4: runtime fault triggers recovery ──────────────────────────────────────────
/// <summary>
/// Phase 12 (W3 test gap #4) — replaces the previous placeholder. Genuinely faults
/// the running listener mid-life by stopping its underlying <see cref="TcpListener"/>
/// via reflection (the only externally-observable hook to force the accept loop's
/// <see cref="Socket.AcceptAsync"/> to throw <see cref="ObjectDisposedException"/>).
/// The supervisor must observe the fault, transition to <see cref="SupervisorState.Recovering"/>,
/// and re-bind on the next Polly attempt — emitting one
/// <c>mbproxy.listener.recovered</c> event and bumping <c>RecoveryAttempts</c>.
/// Genuinely faults the running listener mid-life by stopping its underlying
/// <see cref="TcpListener"/> via reflection (the only externally-observable hook
/// to force the accept loop's <see cref="Socket.AcceptAsync"/> to throw
/// <see cref="ObjectDisposedException"/>). The supervisor must observe the fault,
/// transition to <see cref="SupervisorState.Recovering"/>, and re-bind on the next
/// Polly attempt — emitting one <c>mbproxy.listener.recovered</c> event and bumping
/// <c>RecoveryAttempts</c>.
/// </summary>
[Fact]
public async Task Supervisor_RuntimeFault_OnRunningListener_RecoversAndRebinds()
@@ -67,8 +67,8 @@ public sealed class SimulatorSmokeTests
/// <summary>
/// Reads holding register 1072 via FC03 and expects raw BCD value
/// <c>0x1234</c> (4660 decimal). This register represents decimal 1234 stored as
/// BCD nibbles. Phase 04's e2e test will read the same register through the proxy
/// and assert binary 1234 — proving the proxy rewrote the response.
/// BCD nibbles. The end-to-end rewriter test reads the same register through the
/// proxy and asserts binary 1234 — proving the proxy rewrote the response.
/// </summary>
[Fact(Timeout = 5_000)]
public async Task Simulator_FC03_ReturnsBCD_RawValueAtHR1072_0x1234()