mbproxy: initial commit through Phase 9 (TxId multiplexing)

Adds the mbproxy service end-to-end. Phases 00-08 implement the
production-ready single-listener / 1:1-backend transparent Modbus TCP
proxy with bidirectional BCD rewriting for the ~54-PLC DL205/DL260
fleet. Phase 9 replaces the connection layer with a single backend
socket per PLC plus MBAP TxId rewriting, lifting the H2-ECOM100's
4-concurrent-client cap as an operational ceiling.

Phase 9 additions of note:
- PlcMultiplexer + UpstreamPipe + TxIdAllocator + CorrelationMap
- InFlightRequest with IReadOnlyList<InterestedParty> (load-bearing
  for Phase 10 read coalescing — do not collapse to a single field)
- Per-request watchdog: surfaces Modbus exception 0x0B to upstream
  on BackendRequestTimeoutMs, defending against lost responses,
  dead-PLC paths, and pymodbus 3.13.0's concurrent-multiplexed-
  request bug (its ServerRequestHandler.last_pdu state race)
- Status DTO + HTML gain inFlight / maxInFlight / txIdWraps /
  disconnectCascades / queueDepth (Tier 1.6 in docs/kpi.md)

Tests: 263 unit + 38 E2E. Multiplexer correctness under truly
concurrent backend traffic is proved against a stub backend in
PlcMultiplexerTests; MultiplexerE2ETests paces requests so pymodbus
3.13's single-PDU framer stays in known-good mode.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-14 01:49:35 -04:00
parent 2e937228a0
commit 56eee3c563
105 changed files with 18430 additions and 0 deletions
@@ -0,0 +1,225 @@
using System.Text.Json;
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting;
using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Options;
using Mbproxy.Options;
namespace Mbproxy.Admin;
/// <summary>
/// Hosted service that owns the Kestrel-backed admin HTTP endpoint.
///
/// <para>Lifecycle:</para>
/// <list type="bullet">
/// <item><see cref="StartAsync"/> builds a <see cref="WebApplication"/> bound to
/// <c>Mbproxy.AdminPort</c> and starts it non-blocking.</item>
/// <item>If the bind fails (port in use, etc.), logs <c>mbproxy.admin.bind.failed</c>
/// at Error and continues — the proxy listeners are unaffected.</item>
/// <item>If <c>AdminPort</c> changes via hot-reload, the current app is stopped and a
/// new one is started on the new port. Other config changes are ignored here.</item>
/// <item><see cref="StopAsync"/> shuts down the current Kestrel app with a 2 s deadline.</item>
/// </list>
///
/// <para>Routes: exactly two — <c>GET /</c> (HTML) and <c>GET /status.json</c> (JSON).</para>
/// </summary>
internal sealed partial class AdminEndpointHost : IHostedService, IAsyncDisposable
{
private readonly IOptionsMonitor<MbproxyOptions> _optionsMonitor;
private readonly StatusSnapshotBuilder _builder;
private readonly ILoggerFactory _loggerFactory;
private readonly ILogger<AdminEndpointHost> _logger;
// The currently-running Kestrel app; null when stopped or when bind failed.
private WebApplication? _app;
// Protects concurrent Start/Stop calls (hot-reload + StopAsync racing).
private readonly SemaphoreSlim _lock = new(1, 1);
// Current configured port — used to detect changes on hot-reload.
private int _currentPort;
// Subscription token for IOptionsMonitor.OnChange.
private IDisposable? _optionsChangeRegistration;
public AdminEndpointHost(
IOptionsMonitor<MbproxyOptions> optionsMonitor,
StatusSnapshotBuilder builder,
ILoggerFactory loggerFactory)
{
_optionsMonitor = optionsMonitor;
_builder = builder;
_loggerFactory = loggerFactory;
_logger = loggerFactory.CreateLogger<AdminEndpointHost>();
}
public async Task StartAsync(CancellationToken cancellationToken)
{
_currentPort = _optionsMonitor.CurrentValue.AdminPort;
await StartAppAsync(_currentPort, cancellationToken).ConfigureAwait(false);
// Subscribe to config changes: if AdminPort changes, re-bind.
_optionsChangeRegistration = _optionsMonitor.OnChange(opts =>
{
int newPort = opts.AdminPort;
if (newPort == _currentPort) return; // Only care about AdminPort changes.
// Fire-and-forget: re-bind is async; we can't await in OnChange.
_ = Task.Run(async () =>
{
await _lock.WaitAsync().ConfigureAwait(false);
try
{
if (newPort == _currentPort) return; // double-check under lock
// Stop the old app.
await StopCurrentAppAsync().ConfigureAwait(false);
_currentPort = newPort;
// Start on the new port.
await StartAppAsync(newPort, CancellationToken.None).ConfigureAwait(false);
}
finally
{
_lock.Release();
}
});
});
}
public async Task StopAsync(CancellationToken cancellationToken)
{
_optionsChangeRegistration?.Dispose();
_optionsChangeRegistration = null;
await _lock.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
await StopCurrentAppAsync().ConfigureAwait(false);
}
finally
{
_lock.Release();
}
}
// ── Internal helpers ─────────────────────────────────────────────────────
/// <summary>
/// Builds and starts a Kestrel <see cref="WebApplication"/> on <paramref name="port"/>.
/// On bind failure, logs the error and sets <c>_app = null</c> — does NOT throw.
/// Caller must hold <c>_lock</c> or be in a single-threaded context (StartAsync).
/// </summary>
private async Task StartAppAsync(int port, CancellationToken ct)
{
try
{
// Use CreateSlimBuilder with explicit args (empty) to avoid inheriting
// process-level environment variables like ASPNETCORE_URLS.
var builder = WebApplication.CreateSlimBuilder(new WebApplicationOptions
{
Args = [],
});
// Suppress Kestrel/ASP.NET Core built-in logging; forward to the outer host's
// logger factory so that admin-endpoint errors appear in the proxy's log stream.
builder.Logging.ClearProviders();
builder.Logging.AddProvider(new ForwardingLoggerProvider(_loggerFactory));
// Explicit Kestrel listen — overrides any ASPNETCORE_URLS that leaked in.
builder.WebHost.UseKestrel(k =>
{
k.Listen(System.Net.IPAddress.Any, port);
});
var app = builder.Build();
// ── Routes ───────────────────────────────────────────────────────
app.MapGet("/", (HttpContext ctx) =>
{
var snapshot = _builder.Build();
string html = StatusHtmlRenderer.Render(snapshot);
return Results.Content(html, "text/html; charset=utf-8");
});
app.MapGet("/status.json", (HttpContext ctx) =>
{
var snapshot = _builder.Build();
string json = JsonSerializer.Serialize(snapshot, StatusJsonContext.Default.StatusResponse);
return Results.Content(json, "application/json");
});
await app.StartAsync(ct).ConfigureAwait(false);
_app = app;
LogAdminStarted(_logger, port);
}
catch (Exception ex) when (ex is not OperationCanceledException)
{
// Bind failed — log and continue. Proxy listeners are unaffected.
LogAdminBindFailed(_logger, port, ex.Message);
_app = null;
}
}
/// <summary>
/// Stops the current <see cref="WebApplication"/> with a 2 s deadline, then disposes it.
/// </summary>
private async Task StopCurrentAppAsync()
{
if (_app is null) return;
var app = _app;
_app = null;
try
{
using var stopCts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
await app.StopAsync(stopCts.Token).ConfigureAwait(false);
}
catch
{
// Best-effort.
}
await app.DisposeAsync().ConfigureAwait(false);
}
// ── IAsyncDisposable ─────────────────────────────────────────────────────
public async ValueTask DisposeAsync()
{
_optionsChangeRegistration?.Dispose();
_lock.Dispose();
if (_app is { } app)
{
_app = null;
await app.DisposeAsync().ConfigureAwait(false);
}
}
// ── Logging ──────────────────────────────────────────────────────────────
[LoggerMessage(EventId = 70, EventName = "mbproxy.admin.started",
Level = LogLevel.Information,
Message = "Admin endpoint started on port {Port}")]
private static partial void LogAdminStarted(ILogger logger, int port);
[LoggerMessage(EventId = 71, EventName = "mbproxy.admin.bind.failed",
Level = LogLevel.Error,
Message = "Admin endpoint bind failed — admin page will be unavailable: Port={Port} Reason={Reason}")]
private static partial void LogAdminBindFailed(ILogger logger, int port, string reason);
// ── Inner logger provider (forwards Kestrel/ASP.NET logs to the proxy's factory) ────
private sealed class ForwardingLoggerProvider : ILoggerProvider
{
private readonly ILoggerFactory _factory;
public ForwardingLoggerProvider(ILoggerFactory factory) => _factory = factory;
public ILogger CreateLogger(string categoryName) => _factory.CreateLogger(categoryName);
public void Dispose() { }
}
}
@@ -0,0 +1,24 @@
using System.Reflection;
namespace Mbproxy.Admin;
/// <summary>
/// Reads <see cref="AssemblyInformationalVersionAttribute"/> once at startup and caches the
/// result as a string. Used for the <c>service.version</c> field on the status page.
///
/// <para>Note: <see cref="Assembly.Location"/> is unreliable under single-file publish
/// (Phase 08). We use <c>Assembly.GetExecutingAssembly().GetCustomAttribute&lt;&gt;()</c>
/// which works correctly regardless of publish mode.</para>
/// </summary>
internal sealed class AssemblyVersionAccessor
{
/// <summary>
/// The cached informational version string, e.g. <c>"1.2.3+gitsha"</c>.
/// Falls back to <c>"0.0.0"</c> when the attribute is absent (e.g., unit-test host).
/// </summary>
public string Version { get; } =
Assembly.GetExecutingAssembly()
.GetCustomAttribute<AssemblyInformationalVersionAttribute>()
?.InformationalVersion
?? "0.0.0";
}
+106
View File
@@ -0,0 +1,106 @@
using System.Text.Json.Serialization;
namespace Mbproxy.Admin;
// ── Wire DTOs for GET /status.json ───────────────────────────────────────────
// Field names must match design.md "Status page" tables EXACTLY (camelCase via
// JsonKnownNamingPolicy.CamelCase on the source-gen context).
/// <summary>
/// Top-level response envelope for <c>GET /status.json</c>.
/// </summary>
public sealed record StatusResponse(
ServiceFields Service,
ListenersAggregate Listeners,
IReadOnlyList<PlcStatus> Plcs);
/// <summary>Service-wide identity and reload counters.</summary>
public sealed record ServiceFields(
long UptimeSeconds,
string Version,
DateTimeOffset? ConfigLastReloadUtc,
int ConfigReloadCount,
int ConfigReloadRejectedCount);
/// <summary>Aggregate listener state across all configured PLCs.</summary>
public sealed record ListenersAggregate(int Bound, int Configured);
/// <summary>Per-PLC status row.</summary>
public sealed record PlcStatus(
string Name,
string Host,
int ListenPort,
PlcListenerStatus Listener,
PlcClientsStatus Clients,
PlcPdusStatus Pdus,
PlcBackendStatus Backend,
PlcBytesStatus Bytes);
/// <summary>Listener state sub-object.</summary>
public sealed record PlcListenerStatus(
string State,
string? LastBindError,
int RecoveryAttempts);
/// <summary>Connected-clients sub-object.</summary>
public sealed record PlcClientsStatus(
int Connected,
IReadOnlyList<ClientSnapshot> RemoteEndpoints);
/// <summary>Per-connection-pair snapshot for the status page.</summary>
public sealed record ClientSnapshot(
string Remote,
DateTimeOffset ConnectedAtUtc,
long PdusForwarded);
/// <summary>PDU counters sub-object.</summary>
public sealed record PlcPdusStatus(
long Forwarded,
FcCounts ByFc,
long RewrittenSlots,
long PartialBcdWarnings);
/// <summary>Per-function-code request counts.</summary>
public sealed record FcCounts(
long Fc03,
long Fc04,
long Fc06,
long Fc16,
long Other);
/// <summary>
/// Backend connect, exception, and multiplexer telemetry. Phase 9 added
/// <c>InFlight</c>, <c>MaxInFlight</c>, <c>TxIdWraps</c>, <c>DisconnectCascades</c>, and
/// <c>QueueDepth</c> to surface the live state of the per-PLC TxId-multiplexed connection.
/// </summary>
public sealed record PlcBackendStatus(
long ConnectsSuccess,
long ConnectsFailed,
ExceptionCounts ExceptionsByCode,
double LastRoundTripMs,
long InFlight,
long MaxInFlight,
long TxIdWraps,
long DisconnectCascades,
long QueueDepth);
/// <summary>Modbus exception counts by code.</summary>
public sealed record ExceptionCounts(
long Code01,
long Code02,
long Code03,
long Code04);
/// <summary>Byte-transfer counters.</summary>
public sealed record PlcBytesStatus(
long UpstreamIn,
long UpstreamOut);
// ── Source-generation context ─────────────────────────────────────────────────
// TreatWarningsAsErrors is on, so the context must include every reachable type.
[JsonSerializable(typeof(StatusResponse))]
[JsonSourceGenerationOptions(
WriteIndented = false,
PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)]
internal partial class StatusJsonContext : JsonSerializerContext;
@@ -0,0 +1,189 @@
using System.Text;
namespace Mbproxy.Admin;
/// <summary>
/// Renders a <see cref="StatusResponse"/> as a self-contained HTML page.
///
/// <para>Constraints (from design.md Phase 07):</para>
/// <list type="bullet">
/// <item>No external assets (CSS/JS/fonts/favicons) — firewalled networks only.</item>
/// <item><c>&lt;meta http-equiv="refresh" content="5"&gt;</c> for auto-refresh.</item>
/// <item>Page weight ≤ 50 KB for a 54-PLC fleet.</item>
/// <item>Listener state colour-coded: bound=green, recovering=orange, stopped=grey.</item>
/// <item>Connected clients rendered as compact <c>[remote (n PDUs)]</c> list (not nested table).</item>
/// </list>
/// </summary>
internal static class StatusHtmlRenderer
{
private const string Css = """
body{font-family:monospace;font-size:13px;margin:1em}
h1{font-size:1.1em;margin-bottom:.3em}
.meta{color:#555;margin-bottom:.8em;font-size:12px}
table{border-collapse:collapse;width:100%}
th,td{border:1px solid #ccc;padding:3px 6px;white-space:nowrap}
th{background:#f0f0f0;text-align:left}
tr:nth-child(even)td{background:#fafafa}
.bound{color:green;font-weight:bold}
.recovering{color:darkorange;font-weight:bold}
.stopped{color:grey}
.err{font-size:11px;color:#a00}
.clients{font-size:11px;color:#333}
""";
/// <summary>
/// Renders the status page as a complete HTML document string.
/// May allocate; intended for the status-page read path only.
/// </summary>
public static string Render(StatusResponse status)
{
var sb = new StringBuilder(4096);
sb.Append("<!DOCTYPE html><html lang=\"en\"><head><meta charset=\"utf-8\">");
sb.Append("<meta http-equiv=\"refresh\" content=\"5\">");
sb.Append("<title>mbproxy status</title>");
sb.Append("<style>").Append(Css).Append("</style>");
sb.Append("</head><body>");
// ── Header ────────────────────────────────────────────────────────────
sb.Append("<h1>mbproxy status</h1>");
sb.Append("<div class=\"meta\">");
sb.Append("Version: ").Append(HtmlEncode(status.Service.Version));
sb.Append(" &nbsp;|&nbsp; Uptime: ").Append(FormatUptime(status.Service.UptimeSeconds));
sb.Append(" &nbsp;|&nbsp; Listeners: ")
.Append(status.Listeners.Bound).Append('/').Append(status.Listeners.Configured)
.Append(" bound");
if (status.Service.ConfigLastReloadUtc.HasValue)
{
sb.Append(" &nbsp;|&nbsp; Last reload: ")
.Append(HtmlEncode(status.Service.ConfigLastReloadUtc.Value.ToString("yyyy-MM-dd HH:mm:ss") + "Z"));
}
sb.Append(" &nbsp;|&nbsp; Reloads: ").Append(status.Service.ConfigReloadCount);
if (status.Service.ConfigReloadRejectedCount > 0)
sb.Append(" (").Append(status.Service.ConfigReloadRejectedCount).Append(" rejected)");
sb.Append("</div>");
// ── PLC table ─────────────────────────────────────────────────────────
if (status.Plcs.Count == 0)
{
sb.Append("<p><em>No PLCs configured.</em></p>");
}
else
{
sb.Append("<table>");
sb.Append("<thead><tr>");
sb.Append("<th>Name</th><th>Host</th><th>Port</th><th>State</th>");
sb.Append("<th>Clients</th><th>PDUs fwd</th><th>FC03</th><th>FC04</th>");
sb.Append("<th>FC06</th><th>FC16</th><th>FC?</th><th>BCD slots</th>");
sb.Append("<th>Partial BCD</th><th>Ex 01</th><th>Ex 02</th><th>Ex 03</th><th>Ex 04</th>");
sb.Append("<th>RTT ms</th><th>Bytes in</th><th>Bytes out</th>");
// Phase 9: multiplexer telemetry columns.
sb.Append("<th>In-flight</th><th>Max in-flight</th><th>TxId wraps</th>");
sb.Append("<th>Cascades</th><th>Queue</th>");
sb.Append("</tr></thead><tbody>");
foreach (var plc in status.Plcs)
{
sb.Append("<tr>");
sb.Append("<td>").Append(HtmlEncode(plc.Name)).Append("</td>");
sb.Append("<td>").Append(HtmlEncode(plc.Host)).Append("</td>");
sb.Append("<td>").Append(plc.ListenPort).Append("</td>");
// State cell with colour coding
string stateClass = plc.Listener.State switch
{
"bound" => "bound",
"recovering" => "recovering",
_ => "stopped",
};
sb.Append("<td><span class=\"").Append(stateClass).Append("\">")
.Append(HtmlEncode(plc.Listener.State)).Append("</span>");
if (plc.Listener.State == "recovering" && plc.Listener.LastBindError is { } err)
{
sb.Append("<br><span class=\"err\">")
.Append(HtmlEncode(err))
.Append(" (attempt ").Append(plc.Listener.RecoveryAttempts).Append(")")
.Append("</span>");
}
sb.Append("</td>");
// Connected clients
sb.Append("<td><span class=\"clients\">");
sb.Append(plc.Clients.Connected);
if (plc.Clients.RemoteEndpoints.Count > 0)
{
sb.Append("<br>");
bool first = true;
foreach (var c in plc.Clients.RemoteEndpoints)
{
if (!first) sb.Append(", ");
sb.Append(HtmlEncode(c.Remote))
.Append(" (").Append(c.PdusForwarded).Append(')');
first = false;
}
}
sb.Append("</span></td>");
// Counter cells
sb.Append("<td>").Append(plc.Pdus.Forwarded).Append("</td>");
sb.Append("<td>").Append(plc.Pdus.ByFc.Fc03).Append("</td>");
sb.Append("<td>").Append(plc.Pdus.ByFc.Fc04).Append("</td>");
sb.Append("<td>").Append(plc.Pdus.ByFc.Fc06).Append("</td>");
sb.Append("<td>").Append(plc.Pdus.ByFc.Fc16).Append("</td>");
sb.Append("<td>").Append(plc.Pdus.ByFc.Other).Append("</td>");
sb.Append("<td>").Append(plc.Pdus.RewrittenSlots).Append("</td>");
sb.Append("<td>").Append(plc.Pdus.PartialBcdWarnings).Append("</td>");
sb.Append("<td>").Append(plc.Backend.ExceptionsByCode.Code01).Append("</td>");
sb.Append("<td>").Append(plc.Backend.ExceptionsByCode.Code02).Append("</td>");
sb.Append("<td>").Append(plc.Backend.ExceptionsByCode.Code03).Append("</td>");
sb.Append("<td>").Append(plc.Backend.ExceptionsByCode.Code04).Append("</td>");
sb.Append("<td>").Append(plc.Backend.LastRoundTripMs.ToString("F1")).Append("</td>");
sb.Append("<td>").Append(plc.Bytes.UpstreamIn).Append("</td>");
sb.Append("<td>").Append(plc.Bytes.UpstreamOut).Append("</td>");
// Phase 9: multiplexer telemetry cells.
sb.Append("<td>").Append(plc.Backend.InFlight).Append("</td>");
sb.Append("<td>").Append(plc.Backend.MaxInFlight).Append("</td>");
sb.Append("<td>").Append(plc.Backend.TxIdWraps).Append("</td>");
sb.Append("<td>").Append(plc.Backend.DisconnectCascades).Append("</td>");
sb.Append("<td>").Append(plc.Backend.QueueDepth).Append("</td>");
sb.Append("</tr>");
}
sb.Append("</tbody></table>");
}
sb.Append("</body></html>");
return sb.ToString();
}
// ── Helpers ───────────────────────────────────────────────────────────────
private static string FormatUptime(long seconds)
{
var ts = TimeSpan.FromSeconds(seconds);
if (ts.TotalHours >= 1)
return $"{(int)ts.TotalHours}h {ts.Minutes:D2}m {ts.Seconds:D2}s";
if (ts.TotalMinutes >= 1)
return $"{ts.Minutes}m {ts.Seconds:D2}s";
return $"{seconds}s";
}
private static string HtmlEncode(string s)
{
// Fast path: no special chars.
if (!ContainsHtmlSpecial(s)) return s;
return s
.Replace("&", "&amp;")
.Replace("<", "&lt;")
.Replace(">", "&gt;")
.Replace("\"", "&quot;");
}
private static bool ContainsHtmlSpecial(string s)
{
foreach (char c in s)
if (c is '&' or '<' or '>' or '"') return true;
return false;
}
}
@@ -0,0 +1,157 @@
using Mbproxy.Options;
using Mbproxy.Proxy;
using Mbproxy.Proxy.Multiplexing;
using Mbproxy.Proxy.Supervision;
using Microsoft.Extensions.Options;
namespace Mbproxy.Admin;
/// <summary>
/// Pure orchestration: reads live state from injected singletons and builds a
/// <see cref="StatusResponse"/> for <c>GET /</c> and <c>GET /status.json</c>.
///
/// <para>No I/O; no side effects. Constructed once via DI; <see cref="Build"/> is the
/// only operation and may be called on any thread at any time.</para>
/// </summary>
internal sealed class StatusSnapshotBuilder
{
private readonly IOptionsMonitor<MbproxyOptions> _options;
private readonly ServiceCounters _serviceCounters;
private readonly AssemblyVersionAccessor _version;
private readonly ProxyWorker _proxyWorker;
public StatusSnapshotBuilder(
IOptionsMonitor<MbproxyOptions> options,
ServiceCounters serviceCounters,
AssemblyVersionAccessor version,
ProxyWorker proxyWorker)
{
_options = options;
_serviceCounters = serviceCounters;
_version = version;
_proxyWorker = proxyWorker;
}
/// <summary>
/// Builds a point-in-time <see cref="StatusResponse"/>.
/// Each counter is read atomically; no locks are held across the build.
/// </summary>
public StatusResponse Build()
{
var opts = _options.CurrentValue;
var now = DateTimeOffset.UtcNow;
var started = _serviceCounters.StartedAtUtc;
var uptime = (long)(now - started).TotalSeconds;
var supervisors = _proxyWorker.Supervisors;
// ── Build per-PLC status rows ─────────────────────────────────────────
var plcStatuses = new List<PlcStatus>(opts.Plcs.Count);
int boundCount = 0;
foreach (var plc in opts.Plcs)
{
supervisors.TryGetValue(plc.Name, out var supervisor);
// Supervisor state
SupervisorSnapshot? snap = supervisor?.Snapshot();
string stateStr = snap?.State switch
{
SupervisorState.Bound => "bound",
SupervisorState.Recovering => "recovering",
_ => "stopped",
};
if (snap?.State == SupervisorState.Bound) boundCount++;
// Per-client snapshots
var activeUpstreams = supervisor?.ActiveUpstreams ?? Array.Empty<UpstreamPipe>();
var clientSnapshots = activeUpstreams
.Select(p => new ClientSnapshot(
Remote: p.RemoteEp?.ToString() ?? p.RemoteEp?.Address.ToString() ?? "?",
ConnectedAtUtc: p.ConnectedAtUtc,
PdusForwarded: p.PdusForwardedCount))
.ToList();
// Counter snapshot
var counters = supervisor?.CurrentCounters.Snapshot()
?? new CounterSnapshot(
PdusForwarded: 0,
Fc03: 0,
Fc04: 0,
Fc06: 0,
Fc16: 0,
FcOther: 0,
RewrittenSlots: 0,
PartialBcdWarnings: 0,
InvalidBcdWarnings: 0,
BackendException01: 0,
BackendException02: 0,
BackendException03: 0,
BackendException04: 0,
BackendExceptionOther: 0,
BytesUpstreamIn: 0,
BytesUpstreamOut: 0,
RecoveryAttempts: 0,
LastBindError: null,
LastRoundTripMs: 0.0,
ConnectsSuccess: 0,
ConnectsFailed: 0,
InFlightCount: 0,
MaxInFlight: 0,
TxIdWraps: 0,
BackendDisconnectCascades: 0,
BackendQueueDepth: 0);
// Phase 08: ConnectsSuccess / ConnectsFailed are now tracked in ProxyCounters.
long connectsSuccess = counters.ConnectsSuccess;
long connectsFailed = counters.ConnectsFailed;
plcStatuses.Add(new PlcStatus(
Name: plc.Name,
Host: plc.Host,
ListenPort: plc.ListenPort,
Listener: new PlcListenerStatus(
State: stateStr,
LastBindError: snap?.LastBindError,
RecoveryAttempts: snap?.RecoveryAttempts ?? 0),
Clients: new PlcClientsStatus(
Connected: clientSnapshots.Count,
RemoteEndpoints: clientSnapshots),
Pdus: new PlcPdusStatus(
Forwarded: counters.PdusForwarded,
ByFc: new FcCounts(counters.Fc03, counters.Fc04, counters.Fc06, counters.Fc16, counters.FcOther),
RewrittenSlots: counters.RewrittenSlots,
PartialBcdWarnings: counters.PartialBcdWarnings),
Backend: new PlcBackendStatus(
ConnectsSuccess: connectsSuccess,
ConnectsFailed: connectsFailed,
ExceptionsByCode: new ExceptionCounts(
counters.BackendException01,
counters.BackendException02,
counters.BackendException03,
counters.BackendException04),
LastRoundTripMs: counters.LastRoundTripMs,
InFlight: counters.InFlightCount,
MaxInFlight: counters.MaxInFlight,
TxIdWraps: counters.TxIdWraps,
DisconnectCascades: counters.BackendDisconnectCascades,
QueueDepth: counters.BackendQueueDepth),
Bytes: new PlcBytesStatus(
UpstreamIn: counters.BytesUpstreamIn,
UpstreamOut: counters.BytesUpstreamOut)));
}
// ── Service-wide fields ───────────────────────────────────────────────
var service = new ServiceFields(
UptimeSeconds: uptime,
Version: _version.Version,
ConfigLastReloadUtc: _serviceCounters.LastReloadUtc,
ConfigReloadCount: _serviceCounters.ReloadAppliedCount,
ConfigReloadRejectedCount: _serviceCounters.ReloadRejectedCount);
var listeners = new ListenersAggregate(
Bound: boundCount,
Configured: opts.Plcs.Count);
return new StatusResponse(service, listeners, plcStatuses);
}
}
+111
View File
@@ -0,0 +1,111 @@
namespace Mbproxy.Bcd;
/// <summary>
/// Pure, allocation-free codec for DirectLOGIC BCD register encoding/decoding.
///
/// 16-bit BCD: one register holds 4 BCD digits (09999).
/// Wire value 0x1234 decodes to decimal 1234.
///
/// 32-bit BCD (CDAB word order, low-word-first):
/// Register at Address = low 4 BCD digits (least-significant).
/// Register at Address+1 = high 4 BCD digits (most-significant).
/// Decoded decimal = Decode16(high) * 10_000 + Decode16(low).
/// Example: 12_345_678 → low=0x5678, high=0x1234.
///
/// Bad-nibble policy: Decode16/Decode32 throw <see cref="FormatException"/>
/// (not a sentinel). The Phase 04 rewrite pipeline catches and surfaces the
/// exception as an mbproxy.rewrite.invalid_bcd warning event.
/// </summary>
internal static class BcdCodec
{
private const int Max16 = 9_999;
private const int Max32 = 99_999_999;
// ── Encode ──────────────────────────────────────────────────────────────
/// <summary>
/// Encodes a non-negative integer in [0, 9999] to a 16-bit BCD register.
/// E.g. 1234 → 0x1234.
/// </summary>
/// <exception cref="ArgumentOutOfRangeException">value &lt; 0 or value &gt; 9999.</exception>
public static ushort Encode16(int value)
{
if ((uint)value > Max16)
throw new ArgumentOutOfRangeException(nameof(value),
value, $"BCD-16 value must be in [0, {Max16}]; got {value}.");
// Pack four decimal digits into four BCD nibbles.
int d3 = value / 1000;
int d2 = (value / 100) % 10;
int d1 = (value / 10) % 10;
int d0 = value % 10;
return (ushort)((d3 << 12) | (d2 << 8) | (d1 << 4) | d0);
}
/// <summary>
/// Encodes a non-negative integer in [0, 99_999_999] to a CDAB BCD register pair.
/// Returns (low, high) where low holds the 4 least-significant BCD digits and
/// high holds the 4 most-significant BCD digits.
/// E.g. 12_345_678 → (low: 0x5678, high: 0x1234).
/// </summary>
/// <exception cref="ArgumentOutOfRangeException">value &lt; 0 or value &gt; 99_999_999.</exception>
public static (ushort low, ushort high) Encode32(int value)
{
if ((uint)value > Max32)
throw new ArgumentOutOfRangeException(nameof(value),
value, $"BCD-32 value must be in [0, {Max32}]; got {value}.");
int lo = value % 10_000; // low 4 decimal digits
int hi = value / 10_000; // high 4 decimal digits
return (Encode16(lo), Encode16(hi));
}
// ── Decode ──────────────────────────────────────────────────────────────
/// <summary>
/// Decodes a 16-bit BCD register to a non-negative integer.
/// E.g. 0x1234 → 1234.
/// </summary>
/// <exception cref="FormatException">Any nibble is &gt;= 0xA (not a valid BCD digit).</exception>
public static int Decode16(ushort raw)
{
// Validate all four nibbles first (fail fast with the raw value in the message).
if (HasBadNibble(raw))
throw new FormatException(
$"Register value 0x{raw:X4} is not valid BCD: one or more nibbles are >= 0xA.");
int d3 = (raw >> 12) & 0xF;
int d2 = (raw >> 8) & 0xF;
int d1 = (raw >> 4) & 0xF;
int d0 = raw & 0xF;
return d3 * 1000 + d2 * 100 + d1 * 10 + d0;
}
/// <summary>
/// Decodes a CDAB BCD register pair to a non-negative integer.
/// <paramref name="low"/> = low 4 BCD digits; <paramref name="high"/> = high 4 BCD digits.
/// E.g. (low: 0x5678, high: 0x1234) → 12_345_678.
/// </summary>
/// <exception cref="FormatException">Either word has a bad nibble.</exception>
public static int Decode32(ushort low, ushort high)
{
// Decode high first: if it throws, we skip decoding low unnecessarily.
// But the spec says "throws once with the raw value" per word, so we decode
// in natural order. Decode16 throws on the first bad word it encounters.
int hiVal = Decode16(high);
int loVal = Decode16(low);
return hiVal * 10_000 + loVal;
}
// ── Private helpers ─────────────────────────────────────────────────────
/// <summary>Returns true if any nibble in <paramref name="raw"/> is >= 0xA.</summary>
private static bool HasBadNibble(ushort raw)
{
// Check each nibble independently.
return ((raw >> 12) & 0xF) >= 0xA
|| ((raw >> 8) & 0xF) >= 0xA
|| ((raw >> 4) & 0xF) >= 0xA
|| (raw & 0xF) >= 0xA;
}
}
+36
View File
@@ -0,0 +1,36 @@
namespace Mbproxy.Bcd;
/// <summary>
/// Immutable description of a single BCD-encoded V-memory tag as seen on the Modbus wire.
/// Width is 16 (one register) or 32 (two registers, CDAB low-word-first).
/// </summary>
public sealed record BcdTag(ushort Address, byte Width)
{
/// <summary>
/// Creates a <see cref="BcdTag"/> and validates that Width is 16 or 32.
/// </summary>
/// <exception cref="ArgumentException">Width is not 16 or 32.</exception>
public static BcdTag Create(ushort address, byte width)
{
if (width != 16 && width != 32)
throw new ArgumentException(
$"BCD tag Width must be 16 or 32; got {width} at address {address}.",
nameof(width));
return new BcdTag(address, width);
}
/// <summary>True when this tag occupies two registers (32-bit BCD).</summary>
public bool IsThirtyTwoBit => Width == 32;
/// <summary>
/// The address of the high-word register for a 32-bit tag (Address + 1).
/// Only valid when <see cref="IsThirtyTwoBit"/> is true.
/// </summary>
/// <exception cref="InvalidOperationException">Tag is 16-bit.</exception>
public ushort HighRegister =>
IsThirtyTwoBit
? (ushort)(Address + 1)
: throw new InvalidOperationException(
$"HighRegister is only defined for 32-bit BCD tags (Address {Address} is {Width}-bit).");
}
+112
View File
@@ -0,0 +1,112 @@
using System.Collections.Frozen;
namespace Mbproxy.Bcd;
/// <summary>
/// A hit returned by <see cref="BcdTagMap.TryGetForRange"/>.
/// <see cref="OffsetWords"/> is the zero-based word offset of the tag's low register
/// within the requested read range [startAddress, startAddress+qty).
/// </summary>
public readonly record struct RangeHit(int OffsetWords, BcdTag Tag);
/// <summary>
/// Immutable, address-keyed lookup of BCD tags resolved for a single PLC.
/// All hot-path methods are allocation-free on the no-hit path.
/// </summary>
public sealed class BcdTagMap
{
// ── Empty singleton ──────────────────────────────────────────────────────
/// <summary>An empty map with no tags. Returned when no tags are configured.</summary>
public static BcdTagMap Empty { get; } = new(FrozenDictionary<ushort, BcdTag>.Empty);
// Reusable empty list for the no-hit path in TryGetForRange — zero allocation.
private static readonly IReadOnlyList<RangeHit> s_emptyHits =
Array.Empty<RangeHit>();
// ── State ────────────────────────────────────────────────────────────────
// FrozenDictionary gives O(1) lookup with minimal overhead after construction.
private readonly FrozenDictionary<ushort, BcdTag> _map;
internal BcdTagMap(FrozenDictionary<ushort, BcdTag> map) => _map = map;
// ── Public API ───────────────────────────────────────────────────────────
/// <summary>Number of BCD tags in this map.</summary>
public int Count => _map.Count;
/// <summary>All tags in the map (for telemetry / status page).</summary>
public IEnumerable<BcdTag> All => _map.Values;
/// <summary>
/// O(1) point lookup by Modbus register address.
/// Allocation-free regardless of hit or miss.
/// </summary>
public bool TryGet(ushort address, out BcdTag tag)
=> _map.TryGetValue(address, out tag!);
/// <summary>
/// Returns every BCD tag whose register footprint intersects
/// [<paramref name="startAddress"/>, <paramref name="startAddress"/> + <paramref name="qty"/>).
///
/// A 16-bit tag at address A intersects when A is in [start, start+qty).
/// A 32-bit tag at address A intersects when A or A+1 is in [start, start+qty)
/// — i.e. when A &lt; start+qty AND A+1 &gt;= start.
///
/// <see cref="RangeHit.OffsetWords"/> is the zero-based word position of the tag's
/// low register relative to <paramref name="startAddress"/> (may be negative for a
/// 32-bit tag whose low word starts before the range, but whose high word is in range).
///
/// Hits are returned sorted ascending by <see cref="RangeHit.OffsetWords"/>.
/// On the no-hit path this method does not allocate.
/// </summary>
public bool TryGetForRange(ushort startAddress, ushort qty,
out IReadOnlyList<RangeHit> hits)
{
if (_map.Count == 0 || qty == 0)
{
hits = s_emptyHits;
return false;
}
int rangeEnd = startAddress + qty; // exclusive upper bound (int to avoid overflow)
List<RangeHit>? result = null;
foreach (var kvp in _map)
{
var tag = kvp.Value;
int addr = tag.Address;
bool intersects;
if (tag.IsThirtyTwoBit)
{
// 32-bit tag occupies [addr, addr+2).
// Intersects when addr < rangeEnd AND addr+2 > startAddress.
intersects = addr < rangeEnd && (addr + 2) > startAddress;
}
else
{
// 16-bit tag occupies [addr, addr+1).
intersects = addr >= startAddress && addr < rangeEnd;
}
if (intersects)
{
result ??= new List<RangeHit>(4);
result.Add(new RangeHit(addr - startAddress, tag));
}
}
if (result is null || result.Count == 0)
{
hits = s_emptyHits;
return false;
}
// Sort ascending by offset so Phase 04 can iterate in wire order.
result.Sort(static (a, b) => a.OffsetWords.CompareTo(b.OffsetWords));
hits = result;
return true;
}
}
+117
View File
@@ -0,0 +1,117 @@
using System.Collections.Frozen;
using Mbproxy.Options;
namespace Mbproxy.Bcd;
/// <summary>
/// Builds an immutable <see cref="BcdTagMap"/> from global options and optional per-PLC overrides.
///
/// Resolution algorithm (per design.md):
/// 1. Start with the global tag list.
/// 2. Remove any address present in perPlc.Remove.
/// 3. Merge in perPlc.Add entries — if an address exists in the working set the Add entry wins
/// (this is how a per-PLC width override is expressed).
///
/// Validation:
/// - Duplicate address in the resolved list → BcdError(DuplicateAddress).
/// - 32-bit high register (Address+1) collides with any other entry → BcdError(OverlappingHighRegister).
/// - Width not 16 or 32 → BcdError(InvalidWidth).
/// - Remove address not found in global → BcdWarning (not an error).
/// </summary>
public static class BcdTagMapBuilder
{
/// <summary>
/// Resolves the effective BCD tag list for one PLC and validates it.
/// </summary>
/// <param name="global">The global BCD tag list from <c>appsettings.json</c>.</param>
/// <param name="perPlc">Optional per-PLC overrides (Add + Remove). May be null.</param>
/// <returns>
/// A <see cref="ValidationResult"/> whose <see cref="ValidationResult.Map"/> contains
/// only the entries that passed validation. Callers should treat non-empty
/// <see cref="ValidationResult.Errors"/> as a fatal configuration problem.
/// </returns>
public static ValidationResult Build(BcdTagListOptions global, PlcBcdOverrides? perPlc)
{
var errors = new List<BcdError>();
var warnings = new List<BcdWarning>();
// ── Step 1: collect the working set keyed by address ─────────────────
// Dictionary preserves last-write-wins semantics for the Add override.
var working = new Dictionary<ushort, BcdTagOptions>(global.Global.Count);
foreach (var tag in global.Global)
working[tag.Address] = tag;
// ── Step 2: apply Remove ─────────────────────────────────────────────
if (perPlc?.Remove is { } removeList)
{
foreach (var addr in removeList)
{
if (!working.Remove(addr))
warnings.Add(new BcdWarning(
$"Remove entry for address {addr} does not match any global tag; " +
"the entry is probably stale.", addr));
}
}
// ── Step 3: apply Add (override wins) ────────────────────────────────
if (perPlc?.Add is { } addList)
{
foreach (var tag in addList)
working[tag.Address] = tag;
}
// ── Step 4: validate the resolved list ───────────────────────────────
// We build a validated-entries list; only clean entries go into the map.
var validated = new Dictionary<ushort, BcdTag>(working.Count);
var seenAddresses = new HashSet<ushort>(working.Count);
foreach (var (addr, opt) in working)
{
// Width check first (defensive — IValidateOptions should have caught this already).
if (opt.Width != 16 && opt.Width != 32)
{
errors.Add(new BcdError(BcdValidationError.InvalidWidth,
$"Address {addr}: Width {opt.Width} is not 16 or 32.", addr));
continue;
}
// Duplicate address check.
if (!seenAddresses.Add(addr))
{
errors.Add(new BcdError(BcdValidationError.DuplicateAddress,
$"Address {addr} appears more than once in the resolved tag list.", addr));
continue;
}
validated[addr] = BcdTag.Create(addr, opt.Width);
}
// High-register collision check (only meaningful for 32-bit entries).
foreach (var tag in validated.Values)
{
if (!tag.IsThirtyTwoBit)
continue;
ushort highReg = tag.HighRegister;
if (validated.TryGetValue(highReg, out var collision))
{
errors.Add(new BcdError(BcdValidationError.OverlappingHighRegister,
$"32-bit BCD tag at address {tag.Address} has its high register " +
$"({highReg}) colliding with the entry at address {collision.Address}.",
tag.Address));
}
}
// ── Step 5: build the frozen map from entries that have no errors ─────
// Entries implicated in an OverlappingHighRegister error are still included
// in the map so that the caller can see all context; the error list tells them
// the config is invalid and must be corrected before the service is safe to run.
// (If callers want to exclude bad entries they should check Errors.Count > 0
// and refuse to start the listener for that PLC.)
var frozen = validated.ToFrozenDictionary();
var map = frozen.Count > 0 ? new BcdTagMap(frozen) : BcdTagMap.Empty;
return new ValidationResult(map, errors, warnings);
}
}
@@ -0,0 +1,32 @@
namespace Mbproxy.Bcd;
/// <summary>Discriminates the class of validation failure in a resolved BCD tag list.</summary>
public enum BcdValidationError
{
/// <summary>Two or more entries share the same Modbus register address.</summary>
DuplicateAddress,
/// <summary>
/// A 32-bit entry's high register (Address+1) collides with another entry's address.
/// </summary>
OverlappingHighRegister,
/// <summary>An entry has a Width that is not 16 or 32.</summary>
InvalidWidth,
}
/// <summary>A hard validation failure that prevents the map from being used.</summary>
public sealed record BcdError(BcdValidationError Kind, string Message, ushort? Address);
/// <summary>A non-fatal advisory that rides along with the map.</summary>
public sealed record BcdWarning(string Message, ushort? Address);
/// <summary>
/// Result of a <see cref="BcdTagMapBuilder.Build"/> call.
/// When <see cref="Errors"/> is non-empty the map is partial (only valid entries are included).
/// Callers should treat any error as a fatal configuration problem at startup.
/// </summary>
public sealed record ValidationResult(
BcdTagMap Map,
IReadOnlyList<BcdError> Errors,
IReadOnlyList<BcdWarning> Warnings);
@@ -0,0 +1,463 @@
using System.Threading.Channels;
using Mbproxy.Bcd;
using Mbproxy.Options;
using Mbproxy.Proxy;
using Mbproxy.Proxy.Multiplexing;
using Mbproxy.Proxy.Supervision;
using Microsoft.Extensions.Options;
using PolicyFactory = Mbproxy.Proxy.Supervision.PolicyFactory;
namespace Mbproxy.Configuration;
/// <summary>
/// Subscribes to <see cref="IOptionsMonitor{TOptions}.OnChange"/> and reconciles the
/// running set of <see cref="PlcListenerSupervisor"/> instances against the new
/// <see cref="MbproxyOptions"/> snapshot.
///
/// <para><b>Threading model</b>:
/// <list type="bullet">
/// <item>The <c>OnChange</c> callback is not allowed to block. It enqueues a
/// sentinel to a <see cref="Channel{T}"/> and returns immediately.</item>
/// <item>A dedicated background loop drains the channel, debounces rapid saves
/// (250 ms quiescent window), and then calls <see cref="ApplyAsync"/>.</item>
/// <item><see cref="ApplyAsync"/> is guarded by a <see cref="SemaphoreSlim(1,1)"/>
/// so concurrent reloads are serialised — the second change waits until the
/// first apply finishes. The last change wins.</item>
/// </list>
/// </para>
///
/// <para><b>Debounce rationale</b>: text editors on Windows commonly write via a
/// rename-and-replace pattern, which triggers 23 <c>FileSystemWatcher</c> events for
/// a single save. Without debouncing, the reconciler would run 23 times per save and
/// see intermediate half-written files. 250 ms covers every editor pattern observed in
/// practice while adding imperceptible latency for operators.</para>
///
/// <para><b>Partial-apply on error</b>: if one step of the apply sequence throws, the
/// exception is logged at Error and execution continues with the remaining steps. The
/// validator should have caught most preconditions; a runtime exception here is a true
/// bug worth surfacing. The host stays up regardless.</para>
/// </summary>
internal sealed partial class ConfigReconciler : IDisposable
{
// Dependencies
private readonly IOptionsMonitor<MbproxyOptions> _monitor;
private readonly ILoggerFactory _loggerFactory;
private readonly ILogger<ConfigReconciler> _logger;
private readonly ServiceCounters _serviceCounters;
// The supervisor dictionary is set by ProxyWorker after initial startup.
// All mutations happen inside ApplyAsync which is serialised by the semaphore.
private Dictionary<string, PlcListenerSupervisor>? _supervisors;
private MbproxyOptions? _currentOptions;
// ── Debounce + serialisation machinery ───────────────────────────────────────────────
// Channel carries Unit to signal "something changed — please check".
// The background loop drains it with a 250 ms quiescent window.
private readonly Channel<bool> _changeSignal =
Channel.CreateBounded<bool>(new BoundedChannelOptions(1)
{
FullMode = BoundedChannelFullMode.DropOldest,
});
// Serialises concurrent ApplyAsync invocations.
// A slow apply will queue the next one, and the last enqueued state wins.
private readonly SemaphoreSlim _applySemaphore = new(1, 1);
private readonly CancellationTokenSource _disposalCts = new();
private readonly IDisposable? _changeRegistration;
private readonly Task _debounceLoop;
// Debounce window: how long to wait for additional OnChange events before applying.
private static readonly TimeSpan DebounceWindow = TimeSpan.FromMilliseconds(250);
// ── Construction ─────────────────────────────────────────────────────────────────────
public ConfigReconciler(
IOptionsMonitor<MbproxyOptions> monitor,
ILoggerFactory loggerFactory,
ServiceCounters serviceCounters)
{
_monitor = monitor;
_loggerFactory = loggerFactory;
_logger = loggerFactory.CreateLogger<ConfigReconciler>();
_serviceCounters = serviceCounters;
// Subscribe to OnChange. The callback must return immediately — enqueue only.
_changeRegistration = _monitor.OnChange((_, _) =>
{
// Best-effort write — if the channel is full (BoundedChannelFullMode.DropOldest)
// the oldest signal is dropped and replaced; the reconciler will still see the
// latest options value when it wakes up. No blocking.
_changeSignal.Writer.TryWrite(true);
});
// Start the debounce/apply background loop.
_debounceLoop = Task.Run(() => DebounceLoopAsync(_disposalCts.Token));
}
// ── Wire-up called by ProxyWorker after initial startup ──────────────────────────────
/// <summary>
/// Provides the reconciler with the supervisor dictionary and the initial options
/// snapshot. Must be called exactly once by <see cref="Proxy.ProxyWorker"/> before
/// any <c>OnChange</c> events can arrive (i.e. immediately after the supervisors are
/// created). Thread-safe: the reconciler hasn't started processing changes yet at this
/// point.
/// </summary>
public void Attach(
Dictionary<string, PlcListenerSupervisor> supervisors,
MbproxyOptions initialOptions)
{
_supervisors = supervisors;
_currentOptions = initialOptions;
}
// ── ApplyAsync (exposed for tests) ───────────────────────────────────────────────────
/// <summary>
/// Validates <paramref name="next"/>, computes a <see cref="ReloadPlan"/>, and applies
/// it to the running supervisor set. Serialised by <c>_applySemaphore</c> so two
/// concurrent calls never interleave.
///
/// <para>Returns <c>true</c> if the reload was accepted and applied (even partially).
/// Returns <c>false</c> if validation failed — no state was mutated.</para>
/// </summary>
public async Task<bool> ApplyAsync(MbproxyOptions next, CancellationToken ct)
{
await _applySemaphore.WaitAsync(ct).ConfigureAwait(false);
try
{
return await ApplyUnderLockAsync(next, ct).ConfigureAwait(false);
}
finally
{
_applySemaphore.Release();
}
}
// ── Debounce loop ─────────────────────────────────────────────────────────────────────
private async Task DebounceLoopAsync(CancellationToken ct)
{
try
{
while (!ct.IsCancellationRequested)
{
// Wait for the first signal.
await _changeSignal.Reader.WaitToReadAsync(ct).ConfigureAwait(false);
// Drain and keep waiting until no new signal arrives for DebounceWindow.
// This merges bursts of 23 events from rename-and-replace saves into one apply.
bool gotSignal;
do
{
_changeSignal.Reader.TryRead(out _); // consume the pending signal
using var debounceCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
debounceCts.CancelAfter(DebounceWindow);
try
{
gotSignal = await _changeSignal.Reader.WaitToReadAsync(debounceCts.Token)
.ConfigureAwait(false);
}
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
{
// Debounce window elapsed with no new signal — good, proceed with apply.
gotSignal = false;
}
}
while (gotSignal);
if (ct.IsCancellationRequested) break;
// Snapshot the current options value (IOptionsMonitor always returns the latest).
var next = _monitor.CurrentValue;
try
{
await ApplyAsync(next, ct).ConfigureAwait(false);
}
catch (OperationCanceledException) when (ct.IsCancellationRequested)
{
break;
}
catch (Exception ex)
{
_logger.LogError(ex, "Unexpected exception in ConfigReconciler debounce loop: {Message}", ex.Message);
}
}
}
catch (OperationCanceledException)
{
// Normal: disposal cancelled the token.
}
}
// ── Core apply logic (runs under _applySemaphore) ─────────────────────────────────────
private async Task<bool> ApplyUnderLockAsync(MbproxyOptions next, CancellationToken ct)
{
// If Attach() hasn't been called yet, skip (initial startup is still in progress).
if (_supervisors is null || _currentOptions is null)
{
_logger.LogDebug("ConfigReconciler.ApplyAsync called before Attach() — skipping.");
return false;
}
// ── 1. Validate atomically ────────────────────────────────────────────
if (!ReloadValidator.Validate(next, out var errors))
{
string joined = string.Join("; ", errors);
LogReloadRejected(_logger, joined);
_serviceCounters.RecordReloadRejected();
return false;
}
// ── 2. Compute the plan ───────────────────────────────────────────────
var plan = ReloadPlan.Compute(_currentOptions, next);
int plcsAdded = plan.ToAdd.Count;
int plcsRemoved = plan.ToRemove.Count;
int plcsRestarted = plan.ToRestart.Count;
int plcsReseated = plan.ToReseat.Count;
// Compute global tag delta (count of entries that differ).
int globalTagDelta = ComputeGlobalTagDelta(_currentOptions.BcdTags, next.BcdTags);
// ── 3. Apply: Remove ─────────────────────────────────────────────────
if (plan.ToRemove.Count > 0)
{
var removeTasks = plan.ToRemove
.Where(name => _supervisors.ContainsKey(name))
.Select(async name =>
{
try
{
var s = _supervisors[name];
_supervisors.Remove(name);
using var stopCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
stopCts.CancelAfter(TimeSpan.FromSeconds(10));
await s.StopAsync(stopCts.Token).ConfigureAwait(false);
await s.DisposeAsync().ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error stopping supervisor for removed PLC '{Plc}': {Message}",
name, ex.Message);
}
})
.ToArray();
await Task.WhenAll(removeTasks).ConfigureAwait(false);
}
// ── 4. Apply: Restart (stop + rebuild + start) ───────────────────────
if (plan.ToRestart.Count > 0)
{
var resilienceOpts = next.Resilience;
var backendPipeline = PolicyFactory.BuildBackendConnect(
resilienceOpts.BackendConnect,
_loggerFactory.CreateLogger("Mbproxy.Proxy.BackendConnect"));
var restartTasks = plan.ToRestart.Select(async entry =>
{
var (name, plcNew) = entry;
try
{
// Stop old supervisor.
if (_supervisors.TryGetValue(name, out var old))
{
_supervisors.Remove(name);
using var stopCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
stopCts.CancelAfter(TimeSpan.FromSeconds(10));
await old.StopAsync(stopCts.Token).ConfigureAwait(false);
await old.DisposeAsync().ConfigureAwait(false);
}
// Build fresh context.
var result = BcdTagMapBuilder.Build(next.BcdTags, plcNew.BcdTags);
var newCtx = new PerPlcContext
{
PlcName = plcNew.Name,
TagMap = result.Map,
Counters = new Proxy.ProxyCounters(),
Logger = _loggerFactory.CreateLogger($"Mbproxy.Proxy.BcdRewriter.{plcNew.Name}"),
};
// Build and start new supervisor.
var recoveryPipeline = PolicyFactory.BuildListenerRecovery(
resilienceOpts.ListenerRecovery,
_loggerFactory.CreateLogger($"Mbproxy.Proxy.ListenerRecovery.{plcNew.Name}"));
var newSupervisor = new PlcListenerSupervisor(
plcNew,
next.Connection,
new Proxy.BcdPduPipeline(),
_loggerFactory.CreateLogger<Proxy.PlcListener>(),
_loggerFactory.CreateLogger<PlcMultiplexer>(),
_loggerFactory.CreateLogger($"Mbproxy.Proxy.UpstreamPipe.{plcNew.Name}"),
newCtx,
recoveryPipeline,
_loggerFactory.CreateLogger<PlcListenerSupervisor>(),
backendPipeline);
_supervisors[name] = newSupervisor;
await newSupervisor.StartAsync(ct).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error restarting supervisor for PLC '{Plc}': {Message}",
name, ex.Message);
}
}).ToArray();
await Task.WhenAll(restartTasks).ConfigureAwait(false);
}
// ── 5. Apply: Reseat (swap tag map, keep listener socket) ────────────
foreach (var (name, newMap) in plan.ToReseat)
{
if (!_supervisors.TryGetValue(name, out var supervisor))
continue;
try
{
var plcNew = next.Plcs.First(p => p.Name == name);
var newCtx = new PerPlcContext
{
PlcName = name,
TagMap = newMap,
// Preserve existing counters so operators see real history.
Counters = supervisor.CurrentCounters,
Logger = _loggerFactory.CreateLogger($"Mbproxy.Proxy.BcdRewriter.{name}"),
};
using var reseatCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
reseatCts.CancelAfter(TimeSpan.FromSeconds(5));
await supervisor.ReplaceContextAsync(newCtx, reseatCts.Token).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error reseating context for PLC '{Plc}': {Message}",
name, ex.Message);
}
}
// ── 6. Apply: Add new PLCs ────────────────────────────────────────────
if (plan.ToAdd.Count > 0)
{
var resilienceOpts = next.Resilience;
var backendPipeline = PolicyFactory.BuildBackendConnect(
resilienceOpts.BackendConnect,
_loggerFactory.CreateLogger("Mbproxy.Proxy.BackendConnect"));
var addTasks = plan.ToAdd.Select(async plcNew =>
{
try
{
var result = BcdTagMapBuilder.Build(next.BcdTags, plcNew.BcdTags);
var newCtx = new PerPlcContext
{
PlcName = plcNew.Name,
TagMap = result.Map,
Counters = new Proxy.ProxyCounters(),
Logger = _loggerFactory.CreateLogger($"Mbproxy.Proxy.BcdRewriter.{plcNew.Name}"),
};
var recoveryPipeline = PolicyFactory.BuildListenerRecovery(
resilienceOpts.ListenerRecovery,
_loggerFactory.CreateLogger($"Mbproxy.Proxy.ListenerRecovery.{plcNew.Name}"));
var newSupervisor = new PlcListenerSupervisor(
plcNew,
next.Connection,
new Proxy.BcdPduPipeline(),
_loggerFactory.CreateLogger<Proxy.PlcListener>(),
_loggerFactory.CreateLogger<PlcMultiplexer>(),
_loggerFactory.CreateLogger($"Mbproxy.Proxy.UpstreamPipe.{plcNew.Name}"),
newCtx,
recoveryPipeline,
_loggerFactory.CreateLogger<PlcListenerSupervisor>(),
backendPipeline);
_supervisors[plcNew.Name] = newSupervisor;
await newSupervisor.StartAsync(ct).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(ex, "Error adding supervisor for PLC '{Plc}': {Message}",
plcNew.Name, ex.Message);
}
}).ToArray();
await Task.WhenAll(addTasks).ConfigureAwait(false);
}
// ── 7. Record success ─────────────────────────────────────────────────
_currentOptions = next;
var appliedAt = DateTimeOffset.UtcNow;
_serviceCounters.RecordReloadApplied(appliedAt);
LogReloadApplied(_logger, plcsAdded, plcsRemoved, plcsRestarted, plcsReseated, globalTagDelta);
return true;
}
// ── Helpers ───────────────────────────────────────────────────────────────────────────
private static int ComputeGlobalTagDelta(BcdTagListOptions before, BcdTagListOptions after)
{
// Count entries in before but not in after (removed), plus entries in after
// but not in before (added), plus entries with the same address but different width.
var beforeDict = before.Global.ToDictionary(t => t.Address);
var afterDict = after.Global.ToDictionary(t => t.Address);
int delta = 0;
foreach (var addr in beforeDict.Keys.Union(afterDict.Keys).Distinct())
{
bool inBefore = beforeDict.TryGetValue(addr, out var bTag);
bool inAfter = afterDict.TryGetValue(addr, out var aTag);
if (!inBefore || !inAfter)
delta++; // added or removed
else if (bTag!.Width != aTag!.Width)
delta++; // width changed
}
return delta;
}
// ── IDisposable ───────────────────────────────────────────────────────────────────────
public void Dispose()
{
_changeRegistration?.Dispose();
_disposalCts.Cancel();
try
{
_debounceLoop.Wait(TimeSpan.FromSeconds(2));
}
catch
{
// Best effort.
}
_disposalCts.Dispose();
_applySemaphore.Dispose();
}
// ── Logging ───────────────────────────────────────────────────────────────────────────
[LoggerMessage(EventId = 60, EventName = "mbproxy.config.reload.applied",
Level = LogLevel.Information,
Message = "Config reload applied — PlcsAdded={PlcsAdded} PlcsRemoved={PlcsRemoved} " +
"PlcsRestarted={PlcsRestarted} PlcsReseated={PlcsReseated} GlobalTagDelta={GlobalTagDelta}")]
private static partial void LogReloadApplied(
ILogger logger, int plcsAdded, int plcsRemoved, int plcsRestarted, int plcsReseated, int globalTagDelta);
[LoggerMessage(EventId = 61, EventName = "mbproxy.config.reload.rejected",
Level = LogLevel.Error,
Message = "Config reload rejected — Errors={Errors}")]
private static partial void LogReloadRejected(ILogger logger, string errors);
}
@@ -0,0 +1,113 @@
using Mbproxy.Bcd;
using Mbproxy.Options;
namespace Mbproxy.Configuration;
/// <summary>
/// Immutable record describing what needs to change between two <see cref="MbproxyOptions"/>
/// snapshots. Computed by <see cref="Compute"/> — a pure function with no side effects.
///
/// <para><b>PLC identity is keyed on <c>Name</c>, not <c>ListenPort</c>.</b>
/// A PLC whose <c>ListenPort</c> changes is still the same PLC (treated as a restart).
/// A PLC whose <c>Name</c> changes is treated as remove-the-old + add-the-new.</para>
///
/// <para><b>Reseat vs. Restart</b>:
/// <list type="bullet">
/// <item><see cref="ToRestart"/> — PLC host, ListenPort, or backend Port changed.
/// The supervisor must stop and start (new TCP socket needed).</item>
/// <item><see cref="ToReseat"/> — Only the resolved <see cref="BcdTagMap"/> changed
/// (via global tag list or per-PLC overrides). The supervisor can keep its
/// listener socket; only the context needs a map swap.</item>
/// </list>
/// </para>
/// </summary>
public sealed record ReloadPlan(
IReadOnlyList<PlcOptions> ToAdd,
IReadOnlyList<string> ToRemove, // PLC names
IReadOnlyList<(string Name, PlcOptions New)> ToRestart, // network identity changed
IReadOnlyList<(string Name, BcdTagMap NewMap)> ToReseat, // only tag map changed
ConnectionOptions Connection)
{
/// <summary>
/// Computes the reload plan that transforms <paramref name="current"/> into
/// <paramref name="next"/>. Called after <see cref="ReloadValidator.Validate"/>
/// has already confirmed <paramref name="next"/> is self-consistent.
/// </summary>
public static ReloadPlan Compute(MbproxyOptions current, MbproxyOptions next)
{
// Index current PLCs by name for O(1) lookup.
var currentByName = current.Plcs.ToDictionary(p => p.Name, StringComparer.Ordinal);
var nextByName = next.Plcs.ToDictionary(p => p.Name, StringComparer.Ordinal);
var toAdd = new List<PlcOptions>();
var toRemove = new List<string>();
var toRestart = new List<(string, PlcOptions)>();
var toReseat = new List<(string, BcdTagMap)>();
// ── PLCs in next but not in current → Add ────────────────────────────
foreach (var (name, plcNew) in nextByName)
{
if (!currentByName.ContainsKey(name))
toAdd.Add(plcNew);
}
// ── PLCs in current but not in next → Remove ─────────────────────────
foreach (var (name, _) in currentByName)
{
if (!nextByName.ContainsKey(name))
toRemove.Add(name);
}
// ── PLCs in both → compare ────────────────────────────────────────────
foreach (var (name, plcOld) in currentByName)
{
if (!nextByName.TryGetValue(name, out var plcNew))
continue; // Already in ToRemove.
// Network-identity change → restart (stop old TCP socket, start new one).
bool networkChanged = plcOld.Host != plcNew.Host
|| plcOld.ListenPort != plcNew.ListenPort
|| plcOld.Port != plcNew.Port;
if (networkChanged)
{
toRestart.Add((name, plcNew));
continue;
}
// Tag-map change → reseat (swap context, keep socket).
// We must build both maps to compare them structurally.
// Compute happens after validation so Build should never return errors here.
var oldMap = BcdTagMapBuilder.Build(current.BcdTags, plcOld.BcdTags).Map;
var newMap = BcdTagMapBuilder.Build(next.BcdTags, plcNew.BcdTags).Map;
if (!TagMapsEqual(oldMap, newMap))
toReseat.Add((name, newMap));
// Otherwise: PLC is unchanged — no action needed.
}
return new ReloadPlan(toAdd, toRemove, toRestart, toReseat, next.Connection);
}
// ── Helpers ───────────────────────────────────────────────────────────────────────────
/// <summary>
/// Structural equality between two <see cref="BcdTagMap"/> instances: same set of
/// (Address, Width) pairs. Order doesn't matter — we compare as sets.
/// </summary>
private static bool TagMapsEqual(BcdTagMap a, BcdTagMap b)
{
if (a.Count != b.Count) return false;
foreach (var tag in a.All)
{
if (!b.TryGet(tag.Address, out var bTag))
return false;
if (tag.Width != bTag.Width)
return false;
}
return true;
}
}
@@ -0,0 +1,88 @@
using Mbproxy.Bcd;
using Mbproxy.Options;
namespace Mbproxy.Configuration;
/// <summary>
/// Validates an incoming <see cref="MbproxyOptions"/> snapshot before any state mutation
/// is attempted. All cross-PLC checks (uniqueness, port collisions) live here.
/// Per-PLC tag-list well-formedness is delegated to <see cref="BcdTagMapBuilder.Build"/>.
///
/// <para>Usage:</para>
/// <code>
/// if (!ReloadValidator.Validate(next, out var errors))
/// // log errors and abort reload
/// </code>
/// </summary>
internal static class ReloadValidator
{
/// <summary>
/// Validates <paramref name="next"/>. Returns <c>true</c> when valid.
///
/// <para>Checks performed (in order):</para>
/// <list type="number">
/// <item>All PLC names are non-empty and unique (ordinal comparison).</item>
/// <item>All <c>ListenPort</c> values are in [1, 65535] and unique.</item>
/// <item><c>AdminPort</c> is in [1, 65535] and does not collide with any <c>ListenPort</c>.</item>
/// <item>For each PLC, <see cref="BcdTagMapBuilder.Build"/> reports no errors.</item>
/// </list>
/// </summary>
public static bool Validate(MbproxyOptions next, out IReadOnlyList<string> errors)
{
var errs = new List<string>();
// ── 1. PLC name uniqueness ────────────────────────────────────────────
var seenNames = new HashSet<string>(StringComparer.Ordinal);
for (int i = 0; i < next.Plcs.Count; i++)
{
var plc = next.Plcs[i];
if (string.IsNullOrWhiteSpace(plc.Name))
{
errs.Add($"Plcs[{i}]: Name must be non-empty.");
}
else if (!seenNames.Add(plc.Name))
{
errs.Add($"Plcs[{i}]: Duplicate PLC name '{plc.Name}'.");
}
}
// ── 2. ListenPort uniqueness and range ────────────────────────────────
var seenPorts = new Dictionary<int, string>(next.Plcs.Count); // port → PLC name
foreach (var plc in next.Plcs)
{
if (plc.ListenPort is < 1 or > 65535)
{
errs.Add($"Plc '{plc.Name}': ListenPort {plc.ListenPort} is out of range [1, 65535].");
}
else if (!seenPorts.TryAdd(plc.ListenPort, plc.Name))
{
errs.Add($"Plc '{plc.Name}': Duplicate ListenPort {plc.ListenPort} " +
$"(already used by '{seenPorts[plc.ListenPort]}').");
}
}
// ── 3. AdminPort range and collision ─────────────────────────────────
int adminPort = next.AdminPort;
if (adminPort is < 1 or > 65535)
{
errs.Add($"AdminPort {adminPort} is out of range [1, 65535].");
}
else if (seenPorts.TryGetValue(adminPort, out string? clashPlc))
{
errs.Add($"AdminPort {adminPort} collides with ListenPort of PLC '{clashPlc}'.");
}
// ── 4. Per-PLC tag-map build ──────────────────────────────────────────
// BcdTagMapBuilder.Build is the single source of truth for tag-list
// well-formedness; we must not duplicate its validation logic here.
foreach (var plc in next.Plcs)
{
var result = BcdTagMapBuilder.Build(next.BcdTags, plc.BcdTags);
foreach (var err in result.Errors)
errs.Add($"Plc '{plc.Name}': BCD tag map error ({err.Kind}): {err.Message}");
}
errors = errs;
return errs.Count == 0;
}
}
@@ -0,0 +1,81 @@
using System.Diagnostics;
using System.Runtime.Versioning;
using Serilog.Core;
using Serilog.Events;
namespace Mbproxy.Diagnostics;
/// <summary>
/// Serilog sink that writes events at level Error and above to the Windows Event Log
/// under source <c>mbproxy</c>.
///
/// <para>This sink is only active when the service is running as a Windows Service
/// (<see cref="Microsoft.Extensions.Hosting.WindowsServices.WindowsServiceHelpers.IsWindowsService"/>
/// returns <c>true</c>). Under <c>dotnet run</c> / test / interactive launch, the sink is
/// a no-op so that the Event Log source registration (which requires admin rights) is not
/// required in development.</para>
///
/// <para>The Event Log source <c>mbproxy</c> must be created by <c>install.ps1</c> before
/// the service starts. The bridge does NOT attempt to create the source at runtime — the
/// service account may not hold the required admin rights.</para>
///
/// <para>Messages are capped at 32 KB (the Windows Event Log single-entry limit).</para>
/// </summary>
[SupportedOSPlatform("windows")]
internal sealed class EventLogBridge : ILogEventSink
{
private const string Source = "mbproxy";
private const string LogName = "Application";
private const int MaxMessageBytes = 32 * 1024; // 32 KB Event Log limit
private readonly bool _enabled;
public EventLogBridge(bool enabled)
{
_enabled = enabled;
}
/// <inheritdoc/>
public void Emit(LogEvent logEvent)
{
if (!_enabled) return;
if (logEvent.Level < LogEventLevel.Error) return;
// Check that the source exists; if not, silently swallow — the service
// account may not be able to create it and we must not crash the logger.
if (!EventLog.SourceExists(Source)) return;
string message = logEvent.RenderMessage();
// Append exception detail when present.
if (logEvent.Exception is not null)
{
message += Environment.NewLine + logEvent.Exception;
}
// Truncate to the Event Log single-entry limit.
if (message.Length * 2 > MaxMessageBytes) // rough UTF-16 upper bound
{
int charLimit = MaxMessageBytes / 2 - 3;
message = message[..charLimit] + "...";
}
var type = logEvent.Level switch
{
LogEventLevel.Fatal => EventLogEntryType.Error,
LogEventLevel.Error => EventLogEntryType.Error,
LogEventLevel.Warning => EventLogEntryType.Warning,
_ => EventLogEntryType.Information,
};
try
{
EventLog.WriteEntry(Source, message, type);
}
catch
{
// Swallow: if the Event Log write fails (e.g., source not registered,
// quota exceeded) we must not crash the application or recurse.
}
}
}
@@ -0,0 +1,212 @@
using System.Diagnostics;
using Mbproxy.Admin;
using Mbproxy.Options;
using Mbproxy.Proxy;
using Mbproxy.Proxy.Multiplexing;
using Mbproxy.Proxy.Supervision;
using Microsoft.Extensions.Options;
namespace Mbproxy.Diagnostics;
// ── Testability interfaces ────────────────────────────────────────────────────────────────────
/// <summary>
/// Abstraction over a supervisor's stop operation and its multiplexer's in-flight count.
/// Introduced so <see cref="ShutdownCoordinator"/> unit tests can inject fakes
/// without needing a real <see cref="PlcListenerSupervisor"/>.
///
/// <para><b>Phase 9:</b> in-flight tracking is now per-multiplexer (the
/// <see cref="CorrelationMap"/>) rather than per-pair. <see cref="InFlightCount"/>
/// replaces <c>ActivePairs.IsProcessing</c> from the 1:1 model.</para>
/// </summary>
internal interface ISupervisorHandle
{
Task StopAsync(CancellationToken ct);
/// <summary>
/// Current number of in-flight Modbus requests on this PLC's multiplexed backend.
/// Zero if the multiplexer has no in-flight requests (idle).
/// </summary>
int InFlightCount { get; }
}
/// <summary>
/// Abstraction over the admin endpoint stop operation.
/// </summary>
internal interface IAdminEndpointHandle
{
Task StopAsync(CancellationToken ct);
}
/// <summary>
/// Adapts a concrete <see cref="PlcListenerSupervisor"/> to <see cref="ISupervisorHandle"/>.
/// </summary>
internal sealed class PlcSupervisorHandle : ISupervisorHandle
{
private readonly PlcListenerSupervisor _supervisor;
public PlcSupervisorHandle(PlcListenerSupervisor supervisor) => _supervisor = supervisor;
public Task StopAsync(CancellationToken ct) => _supervisor.StopAsync(ct);
public int InFlightCount
{
get
{
// CurrentCounters.Snapshot pulls live values from the multiplexer's
// IMultiplexCountersProvider hook; InFlightCount is point-in-time.
return (int)_supervisor.CurrentCounters.Snapshot().InFlightCount;
}
}
}
/// <summary>
/// Adapts <see cref="AdminEndpointHost"/> to <see cref="IAdminEndpointHandle"/>.
/// </summary>
internal sealed class AdminEndpointHandle : IAdminEndpointHandle
{
private readonly AdminEndpointHost _host;
public AdminEndpointHandle(AdminEndpointHost host) => _host = host;
public Task StopAsync(CancellationToken ct) => _host.StopAsync(ct);
}
// ── ShutdownCoordinator ───────────────────────────────────────────────────────────────────────
/// <summary>
/// Orchestrates graceful shutdown of the proxy service.
///
/// <para>Shutdown sequence:</para>
/// <list type="number">
/// <item>Stop accepting new upstream connections on all supervisors.</item>
/// <item>Wait for in-flight Modbus requests to drain (polls
/// <see cref="ISupervisorHandle.InFlightCount"/> across all supervisors) until
/// <see cref="ConnectionOptions.GracefulShutdownTimeoutMs"/> expires.</item>
/// <item>Stop the admin endpoint.</item>
/// <item>Log <c>mbproxy.shutdown.complete</c> with <c>InFlightAtCancel</c> and <c>ElapsedMs</c>.</item>
/// </list>
///
/// <para>This type is internal. It is registered in DI as a singleton and wired to
/// <see cref="IHostApplicationLifetime.ApplicationStopping"/> in <c>Program.cs</c>.</para>
/// </summary>
internal sealed partial class ShutdownCoordinator
{
private readonly IReadOnlyList<ISupervisorHandle> _supervisors;
private readonly IAdminEndpointHandle _adminEndpoint;
private readonly IOptions<MbproxyOptions> _options;
private readonly ILogger<ShutdownCoordinator> _logger;
/// <summary>
/// Production constructor — wraps concrete types in their adapter handles.
/// </summary>
public ShutdownCoordinator(
IEnumerable<PlcListenerSupervisor> supervisors,
AdminEndpointHost adminEndpoint,
IOptions<MbproxyOptions> options,
ILogger<ShutdownCoordinator> logger)
: this(
supervisors.Select(s => (ISupervisorHandle)new PlcSupervisorHandle(s)).ToList(),
new AdminEndpointHandle(adminEndpoint),
options,
logger)
{
}
/// <summary>
/// Testability constructor — accepts abstractions so unit tests can inject fakes.
/// </summary>
internal ShutdownCoordinator(
IReadOnlyList<ISupervisorHandle> supervisors,
IAdminEndpointHandle adminEndpoint,
IOptions<MbproxyOptions> options,
ILogger<ShutdownCoordinator> logger)
{
_supervisors = supervisors;
_adminEndpoint = adminEndpoint;
_options = options;
_logger = logger;
}
/// <summary>
/// Runs the graceful shutdown sequence.
/// </summary>
/// <param name="timeoutMs">
/// Override the configured <c>Connection.GracefulShutdownTimeoutMs</c> (use -1 to
/// read from options, which is the normal runtime path). Tests pass an explicit value.
/// </param>
/// <param name="hostCt">
/// The host lifetime cancellation token. Not used to gate the drain loop — the
/// coordinator manages its own deadline so it can log completion regardless.
/// </param>
public async Task ShutdownAsync(int timeoutMs = -1, CancellationToken hostCt = default)
{
int deadline = timeoutMs >= 0
? timeoutMs
: _options.Value.Connection.GracefulShutdownTimeoutMs;
var sw = Stopwatch.StartNew();
// ── Step 1: stop accepting new connections ────────────────────────────────────
using var stopCts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
var stopTasks = _supervisors
.Select(s => s.StopAsync(stopCts.Token))
.ToArray();
try
{
await Task.WhenAll(stopTasks).ConfigureAwait(false);
}
catch
{
// Best-effort: individual supervisor failures must not abort shutdown.
}
// ── Step 2: wait for in-flight PDUs to drain ──────────────────────────────────
int inFlightAtCancel = 0;
using var drainCts = new CancellationTokenSource(TimeSpan.FromMilliseconds(deadline));
try
{
while (!drainCts.Token.IsCancellationRequested)
{
int inFlight = CountInFlight(_supervisors);
if (inFlight == 0) break;
await Task.Delay(10, drainCts.Token).ConfigureAwait(false);
}
}
catch (OperationCanceledException)
{
// Deadline expired — count remaining in-flight and proceed.
inFlightAtCancel = CountInFlight(_supervisors);
}
// ── Step 3: stop the admin endpoint ──────────────────────────────────────────
// Admin is stopped AFTER listeners to preserve ordering guarantee:
// supervisors stop → drain → admin stops.
try
{
using var adminCts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
await _adminEndpoint.StopAsync(adminCts.Token).ConfigureAwait(false);
}
catch
{
// Best-effort.
}
// ── Step 4: log completion ────────────────────────────────────────────────────
LogShutdownComplete(_logger, inFlightAtCancel, sw.ElapsedMilliseconds);
}
private static int CountInFlight(IReadOnlyList<ISupervisorHandle> supervisors)
{
int count = 0;
foreach (var supervisor in supervisors)
{
count += supervisor.InFlightCount;
}
return count;
}
[LoggerMessage(EventId = 80, EventName = "mbproxy.shutdown.complete",
Level = LogLevel.Information,
Message = "Graceful shutdown complete: InFlightAtCancel={InFlightAtCancel} ElapsedMs={ElapsedMs}")]
private static partial void LogShutdownComplete(ILogger logger, int inFlightAtCancel, long elapsedMs);
}
+92
View File
@@ -0,0 +1,92 @@
using Mbproxy.Admin;
using Mbproxy.Configuration;
using Mbproxy.Diagnostics;
using Mbproxy.Options;
using Mbproxy.Proxy;
using Serilog;
namespace Mbproxy;
internal static class HostingExtensions
{
/// <summary>
/// Registers the <c>"Mbproxy"</c> configuration section, binds it to
/// <see cref="MbproxyOptions"/> via <c>IOptionsMonitor</c>, and registers
/// the schema-level <see cref="MbproxyOptionsValidator"/>.
///
/// Phase 06: also registers <see cref="ServiceCounters"/> (singleton) and
/// <see cref="ConfigReconciler"/> (singleton) so they can be injected into
/// <see cref="Proxy.ProxyWorker"/>.
/// </summary>
public static IHostApplicationBuilder AddMbproxyOptions(this IHostApplicationBuilder builder)
{
builder.Services
.AddOptions<MbproxyOptions>()
.BindConfiguration("Mbproxy")
.ValidateOnStart();
builder.Services.AddSingleton<
Microsoft.Extensions.Options.IValidateOptions<MbproxyOptions>,
MbproxyOptionsValidator>();
// Phase 06: service-wide counters (read by Phase 07 status page).
builder.Services.AddSingleton<ServiceCounters>();
// Phase 06: hot-reload reconciler (singleton; subscribes to IOptionsMonitor.OnChange).
builder.Services.AddSingleton<ConfigReconciler>();
return builder;
}
/// <summary>
/// Registers Phase 07 admin endpoint services:
/// <list type="bullet">
/// <item><see cref="AssemblyVersionAccessor"/> (singleton — reads version attribute once).</item>
/// <item><see cref="StatusSnapshotBuilder"/> (singleton — pure orchestration).</item>
/// <item><see cref="AdminEndpointHost"/> (hosted service — owns the Kestrel admin server).</item>
/// </list>
/// Must be called after <see cref="AddMbproxyOptions"/> and after
/// <c>AddHostedService&lt;ProxyWorker&gt;</c> (so ProxyWorker is available via DI).
/// </summary>
public static IHostApplicationBuilder AddMbproxyAdmin(this IHostApplicationBuilder builder)
{
builder.Services.AddSingleton<AssemblyVersionAccessor>();
builder.Services.AddSingleton<StatusSnapshotBuilder>();
// Register AdminEndpointHost as a singleton so ShutdownCoordinator can inject it
// directly without going through the IHostedService collection.
builder.Services.AddSingleton<AdminEndpointHost>();
builder.Services.AddHostedService(sp => sp.GetRequiredService<AdminEndpointHost>());
return builder;
}
/// <summary>
/// Configures Serilog from the <c>"Serilog"</c> configuration section,
/// with console and rolling-file sinks as defaults.
///
/// <para>Phase 08: when <paramref name="addEventLogBridge"/> is <c>true</c>, the
/// <see cref="Diagnostics.EventLogBridge"/> is added as a sub-sink for events at
/// <see cref="Serilog.Events.LogEventLevel.Error"/> and above. This flag should only be
/// set when the service is running as a Windows Service — the bridge silently ignores
/// events when the Event Log source is not registered.</para>
/// </summary>
public static IHostApplicationBuilder AddMbproxySerilog(
this IHostApplicationBuilder builder,
bool addEventLogBridge = false)
{
var cfg = new LoggerConfiguration()
.ReadFrom.Configuration(builder.Configuration);
if (addEventLogBridge && OperatingSystem.IsWindows())
{
cfg = cfg.WriteTo.Sink(
new EventLogBridge(enabled: true),
Serilog.Events.LogEventLevel.Error);
}
Log.Logger = cfg.CreateLogger();
builder.Services.AddSerilog(dispose: true);
return builder;
}
}
+57
View File
@@ -0,0 +1,57 @@
<Project Sdk="Microsoft.NET.Sdk.Worker">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<OutputType>Exe</OutputType>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<RootNamespace>Mbproxy</RootNamespace>
<AssemblyName>Mbproxy</AssemblyName>
<!-- Phase 08: Assembly version. CI can override via /p:InformationalVersion=... -->
<InformationalVersion>1.0.0</InformationalVersion>
</PropertyGroup>
<!-- Phase 08: single-file self-contained publish (Release only; Debug stays normal for fast iteration).
NOTE: the resulting Mbproxy.exe is ~100 MB because the self-contained publish bundles the full
.NET 10 + ASP.NET Core runtime. This exceeds the original 50 MB target in the phase spec;
the runtime size is a fixed cost of self-contained deployment on .NET 10 with ASP.NET Core.
Operators who need a smaller footprint can use a framework-dependent publish
(dotnet publish -c Release -r win-x64 - -self-contained false /p:PublishSingleFile=true)
if the target machine has .NET 10 installed. -->
<PropertyGroup Condition="'$(Configuration)' == 'Release'">
<PublishSingleFile>true</PublishSingleFile>
<SelfContained>true</SelfContained>
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
<IncludeNativeLibrariesForSelfExtract>true</IncludeNativeLibrariesForSelfExtract>
</PropertyGroup>
<ItemGroup>
<!-- ASP.NET Core for the Phase 07 Kestrel-hosted admin endpoint. -->
<FrameworkReference Include="Microsoft.AspNetCore.App" />
</ItemGroup>
<ItemGroup>
<!-- Microsoft.Extensions.Hosting is already included transitively via
Microsoft.AspNetCore.App — do not re-add it explicitly. -->
<PackageReference Include="Microsoft.Extensions.Hosting.WindowsServices" Version="10.0.8" />
<PackageReference Include="Serilog.Extensions.Hosting" Version="10.0.0" />
<PackageReference Include="Serilog.Settings.Configuration" Version="10.0.0" />
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
<PackageReference Include="Serilog.Sinks.File" Version="7.0.0" />
<!-- Referenced now so phase 04/05 don't need to touch this csproj; usage is deferred -->
<PackageReference Include="Polly" Version="8.6.6" />
</ItemGroup>
<ItemGroup>
<!-- Allow test project to access internal types (HeartbeatWorker, HostingExtensions, etc.) -->
<InternalsVisibleTo Include="Mbproxy.Tests" />
</ItemGroup>
<ItemGroup>
<Content Update="appsettings.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
</Project>
@@ -0,0 +1,12 @@
namespace Mbproxy.Options;
public sealed class BcdTagListOptions
{
public IReadOnlyList<BcdTagOptions> Global { get; init; } = [];
}
public sealed class PlcBcdOverrides
{
public IReadOnlyList<BcdTagOptions> Add { get; init; } = [];
public IReadOnlyList<ushort> Remove { get; init; } = [];
}
@@ -0,0 +1,7 @@
namespace Mbproxy.Options;
public sealed class BcdTagOptions
{
public ushort Address { get; init; }
public byte Width { get; init; } // 16 or 32
}
@@ -0,0 +1,12 @@
namespace Mbproxy.Options;
public sealed class ConnectionOptions
{
public int BackendConnectTimeoutMs { get; init; } = 3000;
public int BackendRequestTimeoutMs { get; init; } = 3000;
/// <summary>
/// Maximum time in milliseconds to wait for in-flight PDUs to complete during
/// graceful shutdown before cancelling them. Default: 10000 (10 s).
/// </summary>
public int GracefulShutdownTimeoutMs { get; init; } = 10000;
}
@@ -0,0 +1,47 @@
using Microsoft.Extensions.Options;
namespace Mbproxy.Options;
public sealed class MbproxyOptions
{
public BcdTagListOptions BcdTags { get; init; } = new();
public IReadOnlyList<PlcOptions> Plcs { get; init; } = [];
public int AdminPort { get; init; } = 8080;
public ConnectionOptions Connection { get; init; } = new();
public ResilienceOptions Resilience { get; init; } = new();
}
/// <summary>
/// Schema-level validation for <see cref="MbproxyOptions"/>.
/// Business-rule validation (duplicate addresses, port conflicts) is deferred to phase 06.
/// </summary>
public sealed class MbproxyOptionsValidator : IValidateOptions<MbproxyOptions>
{
public ValidateOptionsResult Validate(string? name, MbproxyOptions options)
{
var errors = new List<string>();
foreach (var tag in options.BcdTags.Global)
{
if (tag.Width != 16 && tag.Width != 32)
errors.Add($"BcdTags.Global: Address {tag.Address} has invalid Width {tag.Width}; must be 16 or 32.");
}
for (int i = 0; i < options.Plcs.Count; i++)
{
var plc = options.Plcs[i];
if (plc.BcdTags is { } overrides)
{
foreach (var tag in overrides.Add)
{
if (tag.Width != 16 && tag.Width != 32)
errors.Add($"Plcs[{i}] ({plc.Name}): BcdTags.Add Address {tag.Address} has invalid Width {tag.Width}; must be 16 or 32.");
}
}
}
return errors.Count > 0
? ValidateOptionsResult.Fail(errors)
: ValidateOptionsResult.Success;
}
}
+15
View File
@@ -0,0 +1,15 @@
namespace Mbproxy.Options;
public sealed class PlcOptions
{
public string Name { get; init; } = "";
public int ListenPort { get; init; }
public string Host { get; init; } = "";
/// <summary>
/// Backend Modbus TCP port on the PLC. Defaults to 502 (standard Modbus TCP port).
/// </summary>
public int Port { get; init; } = 502;
public PlcBcdOverrides? BcdTags { get; init; }
}
@@ -0,0 +1,23 @@
namespace Mbproxy.Options;
public sealed class ResilienceOptions
{
public RetryProfile BackendConnect { get; init; } = new() { MaxAttempts = 3, BackoffMs = [100, 500, 2000] };
public RecoveryProfile ListenerRecovery { get; init; } = new()
{
InitialBackoffMs = [1000, 2000, 5000, 15000, 30000],
SteadyStateMs = 30000,
};
}
public sealed class RetryProfile
{
public int MaxAttempts { get; init; }
public IReadOnlyList<int> BackoffMs { get; init; } = [];
}
public sealed class RecoveryProfile
{
public IReadOnlyList<int> InitialBackoffMs { get; init; } = [];
public int SteadyStateMs { get; init; }
}
+68
View File
@@ -0,0 +1,68 @@
using Mbproxy;
using Mbproxy.Admin;
using Mbproxy.Diagnostics;
using Mbproxy.Options;
using Mbproxy.Proxy;
using Microsoft.Extensions.Hosting.WindowsServices;
using Microsoft.Extensions.Options;
var builder = Host.CreateApplicationBuilder(args);
// Windows Service support; no-op when running under dotnet run / console.
builder.Services.AddWindowsService();
// Phase 08: wire EventLogBridge only when actually running as a Windows Service.
bool isWindowsService = WindowsServiceHelpers.IsWindowsService();
// Wire up structured config, Serilog, and typed options.
builder.AddMbproxySerilog(addEventLogBridge: isWindowsService);
builder.AddMbproxyOptions();
// PDU pipeline: BcdPduPipeline is stateless (Phase 9: per-call correlation flows through
// PerPlcContext.CurrentRequest set by the multiplexer); registering as singleton is fine
// and avoids repeated construction.
builder.Services.AddSingleton<IPduPipeline, BcdPduPipeline>();
// Proxy worker — owns all PlcListeners and logs mbproxy.startup.ready.
// Registered as singleton so StatusSnapshotBuilder can inject ProxyWorker directly
// and access its Supervisors dictionary.
builder.Services.AddSingleton<ProxyWorker>();
builder.Services.AddHostedService(sp => sp.GetRequiredService<ProxyWorker>());
// Phase 07: admin endpoint (Kestrel read-only status page).
builder.AddMbproxyAdmin();
// Phase 08: graceful-shutdown coordinator.
// ShutdownCoordinator depends on PlcListenerSupervisor instances via ProxyWorker.Supervisors.
// Registered as a singleton so Program can resolve it after the host is built.
builder.Services.AddSingleton<ShutdownCoordinator>(sp =>
{
var worker = sp.GetRequiredService<ProxyWorker>();
var admin = sp.GetRequiredService<AdminEndpointHost>();
var options = sp.GetRequiredService<IOptions<MbproxyOptions>>();
var logger = sp.GetRequiredService<ILogger<ShutdownCoordinator>>();
// Supervisors is populated after ProxyWorker.StartAsync; the coordinator only
// enumerates them during ShutdownAsync, which runs on ApplicationStopping —
// after the host is fully started.
return new ShutdownCoordinator(
worker.Supervisors.Values,
admin,
options,
logger);
});
var host = builder.Build();
// Wire ApplicationStopping → ShutdownCoordinator BEFORE hosted services start.
// The callback fires when the host signals stop; it drains in-flight PDUs and stops
// the admin endpoint before the host tears down individual services.
var lifetime = host.Services.GetRequiredService<IHostApplicationLifetime>();
lifetime.ApplicationStopping.Register(() =>
{
// IHostApplicationLifetime callbacks do not support async — block briefly.
// The coordinator manages its own drain deadline so the host is not held indefinitely.
var coordinator = host.Services.GetRequiredService<ShutdownCoordinator>();
coordinator.ShutdownAsync().GetAwaiter().GetResult();
});
await host.RunAsync();
+460
View File
@@ -0,0 +1,460 @@
using Mbproxy.Bcd;
namespace Mbproxy.Proxy;
/// <summary>
/// BCD-rewriting PDU pipeline. Registered as the singleton <see cref="IPduPipeline"/>
/// in production (replaces <see cref="NoopPduPipeline"/> from Phase 03).
///
/// FC scope (per design.md):
/// FC03 / FC04 response — decode covered BCD slots from raw nibbles → binary integer.
/// FC06 request — encode binary integer → BCD nibbles.
/// FC16 request — per-register over the configured slots.
/// All other FCs — pass through byte-for-byte.
///
/// MBAP transparency contract: the MBAP length field is NEVER modified. Re-encoded slots
/// are the same byte width as the originals (ushort → ushort), so the PDU length is stable.
///
/// <para><b>Phase 9 — request correlation:</b> FC03/FC04 responses do not carry the
/// original start address. The multiplexer builds an <see cref="Multiplexing.InFlightRequest"/>
/// on the request path, stores it in its <see cref="Multiplexing.CorrelationMap"/>, and
/// attaches it to the per-call <see cref="PerPlcContext.CurrentRequest"/> on the response
/// path. The rewriter consumes <c>CurrentRequest</c> instead of a per-pair last-request
/// slot, so concurrent responses from different upstream clients each decode against
/// their own request range without cross-talk.</para>
///
/// <para>This class is stateless. All per-call state arrives via <see cref="PduContext"/>
/// (specifically <see cref="PerPlcContext.CurrentRequest"/> on response). It is safe to
/// call concurrently from multiple upstream-read tasks and the single backend reader task.</para>
/// </summary>
internal sealed class BcdPduPipeline : IPduPipeline
{
// ── IPduPipeline.Process ─────────────────────────────────────────────────
public void Process(
MbapDirection direction,
ReadOnlySpan<byte> mbapHeader,
Span<byte> pdu,
PduContext context)
{
// PerPlcContext carries the BCD map, counters, and logger.
// If the caller passes a plain PduContext (e.g. in unit tests using NoopPduPipeline
// alongside this one), we skip BCD processing gracefully.
if (context is not PerPlcContext ctx)
return;
if (pdu.Length < 1)
return;
byte fc = pdu[0];
ctx.Counters.IncrementPdusForwarded();
ctx.Counters.IncrementFcCount(fc);
if (direction == MbapDirection.RequestToBackend)
{
ProcessRequest(fc, pdu, ctx);
}
else
{
ProcessResponse(fc, pdu, ctx);
}
}
// ── Request processing (FC06 / FC16) ────────────────────────────────────
private static void ProcessRequest(byte fc, Span<byte> pdu, PerPlcContext ctx)
{
switch (fc)
{
case 0x06:
ProcessFc06Request(pdu, ctx);
break;
case 0x10:
ProcessFc16Request(pdu, ctx);
break;
// All other FCs: transparent pass-through.
}
}
/// <summary>
/// FC06 Write Single Register request: [fc=06][addrHi][addrLo][valHi][valLo]
/// If the address is a configured 16-bit BCD tag, encode the client's binary integer
/// as BCD nibbles before forwarding to the PLC.
/// Partial-overlap (address is part of a 32-bit pair): warn + pass through raw.
/// </summary>
private static void ProcessFc06Request(Span<byte> pdu, PerPlcContext ctx)
{
if (pdu.Length < 5)
return;
ushort address = (ushort)((pdu[1] << 8) | pdu[2]);
ushort value = (ushort)((pdu[3] << 8) | pdu[4]);
// Direct point lookup at the exact address.
if (!ctx.TagMap.TryGet(address, out var tag))
{
// Not a BCD address — but check whether this address is the HIGH register
// of a 32-bit pair (Address+1 where Address is configured as 32-bit).
// TryGetForRange with qty=1 will catch that partial-overlap case.
if (ctx.TagMap.TryGetForRange(address, 1, out var hits) && hits.Count > 0)
{
// The only hit should be a 32-bit tag whose high register is at `address`.
foreach (var hit in hits)
{
if (hit.Tag.IsThirtyTwoBit && hit.OffsetWords < 0)
{
// This address is the high register of the 32-bit pair.
RewriterLogEvents.PartialBcd(ctx.Logger, ctx.PlcName, address, address, 1);
ctx.Counters.IncrementPartialBcd();
return;
}
}
}
return;
}
if (tag.IsThirtyTwoBit)
{
// FC06 writes exactly one register. If this is the LOW address of a 32-bit tag,
// that's a partial write. Per design partial-overlap policy: warn + pass through.
RewriterLogEvents.PartialBcd(ctx.Logger, ctx.PlcName, address, address, 1);
ctx.Counters.IncrementPartialBcd();
return;
}
// 16-bit tag: encode client's binary integer as BCD nibbles.
ushort encoded;
try
{
encoded = BcdCodec.Encode16(value);
}
catch (ArgumentOutOfRangeException)
{
// Value is outside [0, 9999] — cannot represent as 4-digit BCD.
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, address, value, "Write");
ctx.Counters.IncrementInvalidBcd();
return; // pass through raw
}
pdu[3] = (byte)(encoded >> 8);
pdu[4] = (byte)(encoded & 0xFF);
ctx.Counters.AddRewrittenSlots(1);
}
/// <summary>
/// FC16 Write Multiple Registers request:
/// [fc=10][startHi][startLo][qtyHi][qtyLo][byteCount][reg0Hi][reg0Lo]...
/// Re-encodes binary integers at configured BCD addresses to BCD nibbles.
/// </summary>
private static void ProcessFc16Request(Span<byte> pdu, PerPlcContext ctx)
{
// Minimum FC16 request PDU: fc(1) + start(2) + qty(2) + byteCount(1) = 6 bytes.
if (pdu.Length < 6)
return;
ushort startAddress = (ushort)((pdu[1] << 8) | pdu[2]);
ushort qty = (ushort)((pdu[3] << 8) | pdu[4]);
// byte byteCount = pdu[5]; (qty * 2, not used directly)
if (!ctx.TagMap.TryGetForRange(startAddress, qty, out var hits))
return; // no BCD tags in this range
int dataOffset = 6; // pdu[6..] = register data, 2 bytes per register
foreach (var hit in hits)
{
int offsetWords = hit.OffsetWords;
var tag = hit.Tag;
if (tag.IsThirtyTwoBit)
{
// Full 32-bit pair fits if both low (offsetWords) and high (offsetWords+1)
// are within the [0, qty) range.
bool lowInRange = offsetWords >= 0 && offsetWords < qty;
bool highInRange = (offsetWords + 1) >= 0 && (offsetWords + 1) < qty;
if (!lowInRange || !highInRange)
{
// Partial overlap — one of the two registers is outside the write range.
RewriterLogEvents.PartialBcd(ctx.Logger, ctx.PlcName,
tag.Address, startAddress, qty);
ctx.Counters.IncrementPartialBcd();
continue;
}
// Both registers are in range. Read the low/high words from the PDU.
int lowByteOff = dataOffset + offsetWords * 2;
int highByteOff = dataOffset + (offsetWords + 1) * 2;
if (lowByteOff + 2 > pdu.Length || highByteOff + 2 > pdu.Length)
continue; // malformed PDU — skip safely
// Per CDAB layout:
// pdu[lowByteOff..+2] = low register (low 4 BCD digits of value)
// pdu[highByteOff..+2] = high register (high 4 BCD digits of value)
// The client sends binary integers; encode to BCD nibbles.
//
// Design note: for a 32-bit write the client sends a 32-bit binary value
// split across two registers in CDAB order (low word at Address,
// high word at Address+1). We reconstruct the int and encode it.
ushort clientLow = (ushort)((pdu[lowByteOff] << 8) | pdu[lowByteOff + 1]);
ushort clientHigh = (ushort)((pdu[highByteOff] << 8) | pdu[highByteOff + 1]);
// Reconstruct the 32-bit binary value (CDAB: low-word = low digits).
int binaryValue = clientHigh * 10_000 + clientLow;
ushort bcdLow, bcdHigh;
try
{
(bcdLow, bcdHigh) = BcdCodec.Encode32(binaryValue);
}
catch (ArgumentOutOfRangeException)
{
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, tag.Address,
clientLow, "Write");
ctx.Counters.IncrementInvalidBcd();
continue;
}
pdu[lowByteOff] = (byte)(bcdLow >> 8);
pdu[lowByteOff + 1] = (byte)(bcdLow & 0xFF);
pdu[highByteOff] = (byte)(bcdHigh >> 8);
pdu[highByteOff + 1] = (byte)(bcdHigh & 0xFF);
ctx.Counters.AddRewrittenSlots(2);
}
else
{
// 16-bit tag.
if (offsetWords < 0 || offsetWords >= qty)
continue; // outside range (shouldn't happen for 16-bit but be defensive)
int byteOff = dataOffset + offsetWords * 2;
if (byteOff + 2 > pdu.Length)
continue;
ushort clientValue = (ushort)((pdu[byteOff] << 8) | pdu[byteOff + 1]);
ushort encoded;
try
{
encoded = BcdCodec.Encode16(clientValue);
}
catch (ArgumentOutOfRangeException)
{
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, tag.Address,
clientValue, "Write");
ctx.Counters.IncrementInvalidBcd();
continue;
}
pdu[byteOff] = (byte)(encoded >> 8);
pdu[byteOff + 1] = (byte)(encoded & 0xFF);
ctx.Counters.AddRewrittenSlots(1);
}
}
}
// ── Response processing (FC03 / FC04) ───────────────────────────────────
private static void ProcessResponse(byte fc, Span<byte> pdu, PerPlcContext ctx)
{
// Check for Modbus exception response (high bit of FC is set).
if ((fc & 0x80) != 0)
{
// Exception response: [fc|0x80][exceptionCode]
byte originalFc = (byte)(fc & 0x7F);
byte exceptionCode = pdu.Length >= 2 ? pdu[1] : (byte)0;
RewriterLogEvents.ExceptionPassthrough(ctx.Logger, ctx.PlcName, originalFc, exceptionCode);
ctx.Counters.IncrementBackendException(exceptionCode);
return; // pass through raw
}
switch (fc)
{
case 0x03:
case 0x04:
// Handled below.
break;
case 0x06:
// FC06 response echoes [fc][addrHi][addrLo][valHi][valLo].
// Since the proxy re-encoded the request (binary→BCD), the PLC echoes back
// BCD nibbles. The client expects its original binary value. Decode here.
ProcessFc06Response(pdu, ctx);
return;
case 0x10:
// FC16 response: [fc][startHi][startLo][qtyHi][qtyLo] — no register data.
return;
default:
return; // all other FCs pass through
}
// FC03/04 response: [fc][byteCount][reg0Hi][reg0Lo]...
// The start address is NOT in the response — the multiplexer attaches the matched
// InFlightRequest to ctx.CurrentRequest on the response path. Without it (e.g., a
// unit-test fixture invoking the pipeline directly without correlation) we cannot
// decode safely; pass the bytes through.
var currentReq = ctx.CurrentRequest;
if (currentReq is null)
return;
// Only FC03/04 responses should consult start/qty.
if (currentReq.Fc != 0x03 && currentReq.Fc != 0x04)
return;
ushort startAddress = currentReq.StartAddress;
ushort qty = currentReq.Qty;
if (pdu.Length < 2)
return;
int byteCount = pdu[1];
int wordsInResponse = byteCount / 2;
// Sanity: the qty in the request should match the words in the response.
// Use the smaller of the two to stay in bounds.
ushort effectiveQty = (ushort)Math.Min(qty, wordsInResponse);
if (!ctx.TagMap.TryGetForRange(startAddress, effectiveQty, out var hits))
return;
int dataOffset = 2; // pdu[2..] = register data
foreach (var hit in hits)
{
int offsetWords = hit.OffsetWords;
var tag = hit.Tag;
if (tag.IsThirtyTwoBit)
{
bool lowInRange = offsetWords >= 0 && offsetWords < effectiveQty;
bool highInRange = (offsetWords + 1) >= 0 && (offsetWords + 1) < effectiveQty;
if (!lowInRange || !highInRange)
{
RewriterLogEvents.PartialBcd(ctx.Logger, ctx.PlcName,
tag.Address, startAddress, qty);
ctx.Counters.IncrementPartialBcd();
continue;
}
int lowByteOff = dataOffset + offsetWords * 2;
int highByteOff = dataOffset + (offsetWords + 1) * 2;
if (lowByteOff + 2 > pdu.Length || highByteOff + 2 > pdu.Length)
continue;
// CDAB: Address = low register (low 4 BCD digits), Address+1 = high register
ushort rawLow = (ushort)((pdu[lowByteOff] << 8) | pdu[lowByteOff + 1]);
ushort rawHigh = (ushort)((pdu[highByteOff] << 8) | pdu[highByteOff + 1]);
int decoded;
try
{
decoded = BcdCodec.Decode32(rawLow, rawHigh);
}
catch (FormatException)
{
// Emit invalid_bcd for the low register (first bad word we'd encounter).
ushort badRaw = HasBadNibble(rawLow) ? rawLow : rawHigh;
ushort badAddr = HasBadNibble(rawLow) ? tag.Address : tag.HighRegister;
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, badAddr, badRaw, "Read");
ctx.Counters.IncrementInvalidBcd();
continue;
}
// Write decoded binary value back as a 32-bit value in CDAB layout.
// The client receives low 4 digits at Address and high 4 digits at Address+1.
int decodedLow = decoded % 10_000;
int decodedHigh = decoded / 10_000;
pdu[lowByteOff] = (byte)(decodedLow >> 8);
pdu[lowByteOff + 1] = (byte)(decodedLow & 0xFF);
pdu[highByteOff] = (byte)(decodedHigh >> 8);
pdu[highByteOff + 1] = (byte)(decodedHigh & 0xFF);
ctx.Counters.AddRewrittenSlots(2);
}
else
{
// 16-bit tag.
if (offsetWords < 0 || offsetWords >= effectiveQty)
continue;
int byteOff = dataOffset + offsetWords * 2;
if (byteOff + 2 > pdu.Length)
continue;
ushort raw = (ushort)((pdu[byteOff] << 8) | pdu[byteOff + 1]);
int decoded;
try
{
decoded = BcdCodec.Decode16(raw);
}
catch (FormatException)
{
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, tag.Address, raw, "Read");
ctx.Counters.IncrementInvalidBcd();
continue;
}
pdu[byteOff] = (byte)(decoded >> 8);
pdu[byteOff + 1] = (byte)(decoded & 0xFF);
ctx.Counters.AddRewrittenSlots(1);
}
}
}
/// <summary>
/// FC06 response: [fc=06][addrHi][addrLo][valHi][valLo] — echoes the register address
/// and the value the PLC wrote (which is now BCD-encoded if the request was rewritten).
/// Decode the BCD nibbles back to the client's original binary integer so the client
/// sees the value it sent and library validation (e.g. NModbus echo-check) passes.
/// </summary>
private static void ProcessFc06Response(Span<byte> pdu, PerPlcContext ctx)
{
if (pdu.Length < 5)
return;
ushort address = (ushort)((pdu[1] << 8) | pdu[2]);
ushort raw = (ushort)((pdu[3] << 8) | pdu[4]);
if (!ctx.TagMap.TryGet(address, out var tag))
return; // not a BCD address
if (tag.IsThirtyTwoBit)
return; // partial-write echo — pass through (already warned on request)
// 16-bit tag: the PLC echoed back BCD nibbles. Decode them back to binary.
int decoded;
try
{
decoded = BcdCodec.Decode16(raw);
}
catch (FormatException)
{
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, address, raw, "Read");
ctx.Counters.IncrementInvalidBcd();
return;
}
pdu[3] = (byte)(decoded >> 8);
pdu[4] = (byte)(decoded & 0xFF);
// Note: the RewrittenSlots counter is NOT incremented here because the request
// already counted this slot on the way out. Incrementing again would double-count.
}
// ── Helpers ──────────────────────────────────────────────────────────────
/// <summary>Returns true if any nibble of <paramref name="raw"/> is >= 0xA.</summary>
private static bool HasBadNibble(ushort raw)
=> ((raw >> 12) & 0xF) >= 0xA
|| ((raw >> 8) & 0xF) >= 0xA
|| ((raw >> 4) & 0xF) >= 0xA
|| (raw & 0xF) >= 0xA;
}
+47
View File
@@ -0,0 +1,47 @@
namespace Mbproxy.Proxy;
/// <summary>
/// Direction of a Modbus PDU being processed by the pipeline.
/// </summary>
public enum MbapDirection
{
/// <summary>A request frame travelling from an upstream client to the backend PLC.</summary>
RequestToBackend,
/// <summary>A response frame travelling from the backend PLC back to the upstream client.</summary>
ResponseToClient,
}
/// <summary>
/// Per-pair context carried through each PDU pipeline call.
/// Phase 03: carries only <see cref="PlcName"/>.
/// Phase 04 extends this via <see cref="PerPlcContext"/>, which carries the BcdTagMap,
/// counters, and logger. Phase 09 added the per-call <c>CurrentRequest</c> slot to
/// <see cref="PerPlcContext"/> for multiplexer-aware response correlation.
/// </summary>
public class PduContext
{
/// <summary>The configured PLC name (from <c>MbproxyOptions.Plcs[i].Name</c>).</summary>
public string PlcName { get; init; } = "";
// Phase 04 adds: BcdTagMap, counters, logger
}
/// <summary>
/// Hook contract for inspecting and rewriting Modbus PDU bytes inline.
/// Called once per frame in each direction (request and response).
///
/// Implementations must be safe to call concurrently from multiple connection pairs.
/// In Phase 03 the only implementation is <see cref="NoopPduPipeline"/> (pass-through).
/// Phase 04 replaces it with a BCD rewriter registered via DI.
/// </summary>
public interface IPduPipeline
{
/// <summary>
/// Processes a single Modbus PDU. Implementations may mutate <paramref name="pdu"/> in place.
/// </summary>
/// <param name="direction">Whether this is a request or a response frame.</param>
/// <param name="mbapHeader">The 7-byte MBAP header (read-only; includes TxId, UnitId, FC is in pdu[0]).</param>
/// <param name="pdu">The PDU bytes starting at the function code. May be mutated in place.</param>
/// <param name="context">Per-pair context (PLC name; extended in phase 04).</param>
void Process(MbapDirection direction, ReadOnlySpan<byte> mbapHeader, Span<byte> pdu, PduContext context);
}
+60
View File
@@ -0,0 +1,60 @@
namespace Mbproxy.Proxy;
/// <summary>
/// Pure, allocation-free helpers for parsing Modbus Application Protocol (MBAP) headers.
///
/// MBAP frame layout (7-byte header + PDU):
/// [0..1] TxId (big-endian uint16)
/// [2..3] ProtocolId (big-endian uint16; always 0 for standard Modbus)
/// [4..5] Length (big-endian uint16; covers UnitId + PDU bytes)
/// [6] UnitId
/// [7..] PDU (function code + data); length is (lengthField - 1) bytes
///
/// Total frame bytes = 6 (fixed header without length's coverage) + lengthField
/// = 7 (header) + (lengthField - 1) (PDU body without UnitId).
/// </summary>
internal static class MbapFrame
{
/// <summary>Number of bytes in the MBAP header (TxId + ProtocolId + Length + UnitId).</summary>
public const int HeaderSize = 7;
/// <summary>Maximum MBAP PDU body size (Modbus spec max: 253 bytes).</summary>
public const int MaxPduBodySize = 253;
/// <summary>Per-pair buffer size: header (7) + max PDU body (253) = 260 bytes.</summary>
public const int BufferSize = HeaderSize + MaxPduBodySize;
/// <summary>
/// Parses all fields from a 7-byte MBAP header buffer.
/// Returns <c>false</c> when <paramref name="buffer"/> is shorter than 7 bytes.
/// Does NOT validate <paramref name="protocolId"/> or <paramref name="length"/> —
/// that is the caller's responsibility (and ultimately the PLC's job).
/// </summary>
public static bool TryParseHeader(
ReadOnlySpan<byte> buffer,
out ushort txId,
out ushort protocolId,
out ushort length,
out byte unitId)
{
if (buffer.Length < HeaderSize)
{
txId = protocolId = length = 0;
unitId = 0;
return false;
}
txId = (ushort)((buffer[0] << 8) | buffer[1]);
protocolId = (ushort)((buffer[2] << 8) | buffer[3]);
length = (ushort)((buffer[4] << 8) | buffer[5]);
unitId = buffer[6];
return true;
}
/// <summary>
/// Returns the total frame length in bytes given the MBAP length field.
/// Formula: 6 (TxId + ProtocolId + LengthField bytes) + lengthField
/// = 7 (full header) + (lengthField - 1) (PDU body without UnitId).
/// </summary>
public static int TotalFrameLength(ushort lengthField) => 6 + lengthField;
}
@@ -0,0 +1,82 @@
using System.Collections.Concurrent;
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Maps a proxy-assigned MBAP TxId → <see cref="InFlightRequest"/>. The multiplexer's
/// per-upstream <c>OnFrame</c> path adds entries; the backend reader task removes them
/// when the matching response arrives.
///
/// <para>Backed by <see cref="ConcurrentDictionary{TKey, TValue}"/>. The single-writer /
/// single-remover pattern in Phase 9 does not strictly require it — but cascade-on-
/// disconnect walks the map from a separate task and Phase 10 adds upstream-side
/// cancellation paths, so the safer primitive is worth the negligible cost.</para>
/// </summary>
internal sealed class CorrelationMap
{
private readonly ConcurrentDictionary<ushort, InFlightRequest> _entries = new();
/// <summary>
/// Adds <paramref name="req"/> under <paramref name="proxyTxId"/>. Returns <c>false</c>
/// if a request was already stored under that key — which would be a programming
/// error (the allocator should never hand out the same key twice while it is still
/// in flight). Callers should treat <c>false</c> as a fatal contract violation and
/// drop the upstream connection.
/// </summary>
public bool TryAdd(ushort proxyTxId, InFlightRequest req)
=> _entries.TryAdd(proxyTxId, req);
/// <summary>
/// Removes the entry under <paramref name="proxyTxId"/>. Returns <c>false</c> when
/// no entry exists (which is normal for cascade cleanup and for stale-response paths).
/// </summary>
public bool TryRemove(ushort proxyTxId, out InFlightRequest req)
=> _entries.TryRemove(proxyTxId, out req!);
/// <summary>Number of currently-in-flight requests.</summary>
public int Count => _entries.Count;
/// <summary>
/// Returns a point-in-time copy of all in-flight requests. Allocates a list; intended
/// for diagnostics (cascade walk on backend disconnect; future drain-on-shutdown).
/// </summary>
public IReadOnlyCollection<InFlightRequest> Snapshot()
{
// ConcurrentDictionary.Values is a snapshot-safe enumerable; materialise to
// detach from the live dictionary and give callers a stable view.
return _entries.Values.ToArray();
}
/// <summary>
/// Returns and removes every entry. Used by the multiplexer's cascade path when the
/// backend socket dies — the multiplexer must close every interested upstream pipe
/// and free every allocated proxy TxId.
/// </summary>
public IReadOnlyList<KeyValuePair<ushort, InFlightRequest>> DrainAll()
{
var drained = new List<KeyValuePair<ushort, InFlightRequest>>(_entries.Count);
foreach (var kvp in _entries)
{
if (_entries.TryRemove(kvp.Key, out var req))
drained.Add(new KeyValuePair<ushort, InFlightRequest>(kvp.Key, req));
}
return drained;
}
/// <summary>
/// Returns a snapshot of (proxyTxId, InFlightRequest) pairs whose <see cref="InFlightRequest.SentAtUtc"/>
/// is older than <paramref name="threshold"/>. Allocates a list; intended for the
/// periodic per-request timeout watchdog only. The entries are NOT removed by this
/// call — the caller decides which to time out.
/// </summary>
public IReadOnlyList<KeyValuePair<ushort, InFlightRequest>> SnapshotOlderThan(DateTimeOffset threshold)
{
var stale = new List<KeyValuePair<ushort, InFlightRequest>>();
foreach (var kvp in _entries)
{
if (kvp.Value.SentAtUtc <= threshold)
stale.Add(new KeyValuePair<ushort, InFlightRequest>(kvp.Key, kvp.Value));
}
return stale;
}
}
@@ -0,0 +1,41 @@
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// One upstream party interested in a single backend round-trip. Carries the upstream
/// pipe to deliver the response to AND the original MBAP TxId that the party sent — the
/// multiplexer must rewrite the response's MBAP TxId back to <see cref="OriginalTxId"/>
/// before handing the frame to the pipe, so each upstream sees the proxy as transparent.
///
/// <para><b>Phase 9 invariant:</b> exactly one <see cref="InterestedParty"/> per
/// <see cref="InFlightRequest"/>. <b>Phase 10 (read coalescing)</b> reuses this exact
/// shape to fan-out a single backend response to multiple upstream parties. Do not
/// collapse this into a single field on <see cref="InFlightRequest"/>.</para>
/// </summary>
internal sealed record InterestedParty(UpstreamPipe Pipe, ushort OriginalTxId);
/// <summary>
/// Per-backend-request correlation record. Stored in <see cref="CorrelationMap"/> keyed
/// by the proxy-assigned TxId; looked up by the backend reader task to:
/// <list type="bullet">
/// <item><description>Restore each interested party's original MBAP TxId before forwarding
/// the response upstream (transparent multiplexing contract).</description></item>
/// <item><description>Provide the BCD rewriter with the originating request's
/// <c>StartAddress</c> / <c>Qty</c> for FC03/FC04 response decoding — the response
/// PDU itself does not carry the start address.</description></item>
/// <item><description>Measure backend round-trip time via <see cref="SentAtUtc"/>
/// (replaces the per-pair stopwatch slot from the 1:1 model).</description></item>
/// </list>
///
/// <para><b>Phase 9:</b> <see cref="InterestedParties"/> always has exactly one element.
/// The list shape is the load-bearing seam that <b>Phase 10 — read coalescing</b> hooks
/// into to fan out a single PLC response to multiple upstream clients without further
/// refactor of the multiplexer's data model. Reviewer note: do <i>not</i> simplify back
/// to a single <c>UpstreamPipe</c> field.</para>
/// </summary>
internal sealed record InFlightRequest(
byte UnitId,
byte Fc,
ushort StartAddress,
ushort Qty,
IReadOnlyList<InterestedParty> InterestedParties,
DateTimeOffset SentAtUtc);
@@ -0,0 +1,121 @@
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Source-generated <see cref="LoggerMessage"/> definitions for the TxId-multiplexing
/// connection layer. Event names are stable — do not rename without updating
/// docs/design.md's "Logging" event-name table.
/// </summary>
internal static partial class MultiplexerLogEvents
{
/// <summary>
/// Emitted once per upstream client accept. Replaces the per-pair
/// <c>mbproxy.client.connected</c> event from the 1:1 model (same event name,
/// same property shape — operators' log queries are unchanged).
/// </summary>
[LoggerMessage(
EventId = 110,
EventName = "mbproxy.client.connected",
Level = LogLevel.Information,
Message = "Client connected: Plc={Plc} RemoteEp={RemoteEp}")]
public static partial void ClientConnected(
ILogger logger,
string plc,
string remoteEp);
/// <summary>
/// Emitted when an upstream pipe is closed (clean disconnect, fault, or cascade).
/// </summary>
[LoggerMessage(
EventId = 111,
EventName = "mbproxy.client.disconnected",
Level = LogLevel.Information,
Message = "Client disconnected: Plc={Plc} RemoteEp={RemoteEp} Reason={Reason}")]
public static partial void ClientDisconnected(
ILogger logger,
string plc,
string remoteEp,
string reason);
/// <summary>
/// Emitted when the multiplexer successfully opens its single backend connection to a PLC.
/// </summary>
[LoggerMessage(
EventId = 112,
EventName = "mbproxy.multiplex.backend.connected",
Level = LogLevel.Information,
Message = "Backend multiplex connection up: Plc={Plc} Host={Host} Port={Port}")]
public static partial void BackendConnected(
ILogger logger,
string plc,
string host,
int port);
/// <summary>
/// Emitted when the multiplexer cascades a backend disconnect to all attached upstream
/// clients. <c>UpstreamCount</c> is the number of upstream pipes that were closed and
/// <c>InFlightCount</c> is the number of in-flight requests dropped.
/// </summary>
[LoggerMessage(
EventId = 113,
EventName = "mbproxy.multiplex.backend.disconnected",
Level = LogLevel.Warning,
Message = "Backend multiplex connection down: Plc={Plc} UpstreamCount={UpstreamCount} InFlightCount={InFlightCount} Reason={Reason}")]
public static partial void BackendDisconnected(
ILogger logger,
string plc,
int upstreamCount,
int inFlightCount,
string reason);
/// <summary>
/// Emitted once when the TxId allocator refuses to allocate — every slot in the 16-bit
/// space is currently in flight. The multiplexer responds to the upstream with a
/// Modbus exception (code 04 / Slave Device Failure). Realistically unreachable under
/// normal load (ECOM serializes at ~2-10 ms per request); a stress-only path.
/// </summary>
[LoggerMessage(
EventId = 114,
EventName = "mbproxy.multiplex.saturated",
Level = LogLevel.Error,
Message = "Multiplexer TxId space saturated — returning exception 04 to upstream: Plc={Plc} RemoteEp={RemoteEp}")]
public static partial void Saturated(
ILogger logger,
string plc,
string remoteEp);
/// <summary>
/// Emitted when the backend connect Polly pipeline fails. Mirrors the existing
/// <c>mbproxy.backend.failed</c> event from the 1:1 model so operators' alerts keep
/// working unchanged after Phase 9.
/// </summary>
[LoggerMessage(
EventId = 115,
EventName = "mbproxy.backend.failed",
Level = LogLevel.Warning,
Message = "Backend connect failed: Plc={Plc} Reason={Reason}")]
public static partial void BackendFailed(
ILogger logger,
string plc,
string reason);
/// <summary>
/// Emitted when the per-request watchdog times out an in-flight request whose response
/// never arrived within <c>BackendRequestTimeoutMs</c>. The upstream party receives a
/// Modbus exception (code 0x0B / Gateway Target Device Failed To Respond) and the
/// proxy TxId is freed. Causes include: PLC dropped the response, network packet loss,
/// or a backend that echoes the wrong MBAP TxId (e.g. pymodbus 3.13.0's
/// concurrent-multiplexed-request bug).
/// </summary>
[LoggerMessage(
EventId = 116,
EventName = "mbproxy.multiplex.request.timeout",
Level = LogLevel.Warning,
Message = "In-flight request timed out: Plc={Plc} ProxyTxId={ProxyTxId} OriginalTxId={OriginalTxId} Fc={Fc} ElapsedMs={ElapsedMs}")]
public static partial void RequestTimeout(
ILogger logger,
string plc,
ushort proxyTxId,
ushort originalTxId,
byte fc,
long elapsedMs);
}
@@ -0,0 +1,664 @@
using System.Collections.Concurrent;
using System.Diagnostics;
using System.Net.Sockets;
using System.Threading.Channels;
using Mbproxy.Options;
using Polly;
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Owner of the single backend TCP connection to one PLC. Multiplexes many
/// <see cref="UpstreamPipe"/> instances onto that one socket by rewriting MBAP transaction
/// IDs so concurrent in-flight requests from different upstream clients remain
/// distinguishable on the shared wire. The multiplexer:
///
/// <list type="bullet">
/// <item><description>Opens and re-opens the backend socket through a Polly retry pipeline
/// that matches the <see cref="ResilienceOptions.BackendConnect"/> profile.</description></item>
/// <item><description>Runs one backend writer task that drains <see cref="_outboundChannel"/>
/// into the backend socket (single writer; no socket-level synchronisation needed).</description></item>
/// <item><description>Runs one backend reader task that decodes MBAP frames from the backend,
/// looks each frame up in the <see cref="CorrelationMap"/>, restores each interested
/// party's original TxId, and hands the frame to that party's
/// <see cref="UpstreamPipe._responseChannel"/>.</description></item>
/// <item><description>Cascades a backend disconnect by closing every attached pipe and
/// freeing every allocated proxy TxId, then waits for the next upstream request to
/// arrive (which triggers a fresh backend connect via Polly).</description></item>
/// </list>
///
/// <para><b>Threading invariants:</b> a single backend writer touches the backend socket
/// for sends; a single backend reader touches the same socket for receives. Per-upstream
/// read tasks call <see cref="OnUpstreamFrameAsync"/>, which allocates a proxy TxId, queues
/// the request frame into <see cref="_outboundChannel"/>, and returns. Upstream-side writes
/// flow through each pipe's response channel — never directly through this class.</para>
///
/// <para><b>Lifecycle:</b> the multiplexer is created with the backend offline. The first
/// <see cref="OnUpstreamFrameAsync"/> call (or the first <see cref="Attach"/> if you prefer
/// eager-start) triggers backend connect through the Polly pipeline. Subsequent in-flight
/// requests reuse the same socket. <see cref="DisposeAsync"/> tears down the backend
/// socket, the writer/reader tasks, and every attached pipe.</para>
/// </summary>
internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvider
{
private const int OutboundChannelCapacity = 256;
private readonly PlcOptions _plc;
private readonly ConnectionOptions _connectionOptions;
private readonly IPduPipeline _pipeline;
private readonly PerPlcContext _ctx;
private readonly ILogger<PlcMultiplexer> _logger;
private readonly ResiliencePipeline? _backendConnectPipeline;
private readonly TxIdAllocator _allocator = new();
private readonly CorrelationMap _correlation = new();
private readonly Channel<byte[]> _outboundChannel = Channel.CreateBounded<byte[]>(
new BoundedChannelOptions(OutboundChannelCapacity)
{
FullMode = BoundedChannelFullMode.Wait,
SingleReader = true,
SingleWriter = false,
});
// Attached pipes — Phase 9 needs the list for the status page; Phase 10 will need it for
// coalescing (fan-out). ConcurrentDictionary keyed on UpstreamPipe.Id for O(1) detach.
private readonly ConcurrentDictionary<Guid, UpstreamPipe> _pipes = new();
// Lifecycle plumbing. Backend tasks share a CTS; cascading disconnect cancels it,
// which terminates both the writer and reader tasks. The next call to
// EnsureBackendConnectedAsync constructs a fresh CTS and a fresh backend socket.
private readonly object _backendLock = new();
private Socket? _backendSocket;
private CancellationTokenSource? _backendCts;
private Task? _backendWriterTask;
private Task? _backendReaderTask;
private readonly CancellationTokenSource _disposeCts = new();
private bool _disposed;
private Task? _watchdogTask;
public PlcMultiplexer(
PlcOptions plc,
ConnectionOptions connectionOptions,
IPduPipeline pipeline,
PerPlcContext perPlcContext,
ILogger<PlcMultiplexer> logger,
ResiliencePipeline? backendConnectPipeline = null)
{
_plc = plc;
_connectionOptions = connectionOptions;
_pipeline = pipeline;
_ctx = perPlcContext;
_logger = logger;
_backendConnectPipeline = backendConnectPipeline;
// Register this multiplexer as the live telemetry source for the PLC's counters.
_ctx.Counters.SetMultiplexProvider(this);
// Spin up the per-request timeout watchdog. It scans the correlation map at a fixed
// interval and times out any in-flight request older than BackendRequestTimeoutMs.
// Critical for: lost responses, dead-PLC paths, and backends that mis-echo TxIds
// (e.g. pymodbus 3.13.0's concurrent-multiplexed-request bug — see test files).
_watchdogTask = Task.Run(() => RunRequestTimeoutWatchdogAsync(_disposeCts.Token), CancellationToken.None);
}
// ── IMultiplexCountersProvider ────────────────────────────────────────────
public long InFlightCount => _allocator.InFlightCount;
public long TxIdWraps => _allocator.WrapCount;
public long BackendQueueDepth => _outboundChannel.Reader.Count;
// ── Public surface ────────────────────────────────────────────────────────
/// <summary>
/// Read-only collection of currently-attached upstream pipes. Used by the status page.
/// </summary>
public IReadOnlyCollection<UpstreamPipe> AttachedPipes => _pipes.Values.ToArray();
/// <summary>
/// Attaches an upstream pipe to this multiplexer. The caller is responsible for
/// running the pipe's read+write loops (typically via <see cref="StartPipeAsync"/>)
/// which wires the pipe's OnFrame callback back into <see cref="OnUpstreamFrameAsync"/>.
/// </summary>
public void Attach(UpstreamPipe pipe)
{
if (_disposed)
throw new ObjectDisposedException(nameof(PlcMultiplexer));
_pipes[pipe.Id] = pipe;
}
/// <summary>
/// Starts the read+write tasks for <paramref name="pipe"/> and returns a task that
/// completes when the pipe's read loop ends. The multiplexer detaches the pipe when
/// its read loop returns.
/// </summary>
public Task StartPipeAsync(UpstreamPipe pipe, CancellationToken ct)
{
Attach(pipe);
// The write loop runs to completion when the pipe is disposed or the channel
// completes. We don't await it directly — it's joined inside DisposeAsync of the pipe.
_ = Task.Run(() => pipe.RunWriteLoopAsync(ct), CancellationToken.None);
var readLoop = pipe.RunReadLoopAsync(
(frame, frameCt) => OnUpstreamFrameAsync(pipe, frame, frameCt),
ct);
// When the pipe's read loop finishes, detach it. Don't dispose it here; the
// listener (or the cascade walker) owns disposal.
_ = readLoop.ContinueWith(prev =>
{
_pipes.TryRemove(pipe.Id, out _);
}, TaskScheduler.Default);
return readLoop;
}
/// <summary>
/// Tears down the multiplexer: closes the backend connection, cancels both backend
/// tasks, drains every in-flight correlation entry, and closes every attached pipe.
/// </summary>
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
// Stop the counters provider link so a status snapshot during teardown doesn't
// see live-but-soon-to-be-empty internal state.
_ctx.Counters.SetMultiplexProvider(null);
await _disposeCts.CancelAsync().ConfigureAwait(false);
// Best-effort join the watchdog so its in-flight log/dispatch settles before tests
// assert on counter state.
if (_watchdogTask is not null)
{
try { await _watchdogTask.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); }
catch { /* swallow */ }
}
await TearDownBackendAsync("disposing", cascadeUpstreams: true).ConfigureAwait(false);
_outboundChannel.Writer.TryComplete();
// Dispose all attached pipes.
foreach (var pipe in _pipes.Values)
{
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
}
_pipes.Clear();
_disposeCts.Dispose();
}
// ── Backend connect / teardown ────────────────────────────────────────────
private async Task<bool> EnsureBackendConnectedAsync(CancellationToken ct)
{
if (_disposed) return false;
// Fast path: already connected.
if (_backendSocket is { Connected: true } && _backendCts is { IsCancellationRequested: false })
return true;
// Serialise concurrent connect attempts from many upstream pipes.
await _connectGate.WaitAsync(ct).ConfigureAwait(false);
try
{
// Re-check after acquiring the gate.
if (_backendSocket is { Connected: true } && _backendCts is { IsCancellationRequested: false })
return true;
// Build a fresh backend socket and Polly-connect.
var backend = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)
{ NoDelay = true };
try
{
if (_backendConnectPipeline is not null)
{
await _backendConnectPipeline.ExecuteAsync(async attemptToken =>
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(attemptToken);
cts.CancelAfter(_connectionOptions.BackendConnectTimeoutMs);
await backend.ConnectAsync(_plc.Host, _plc.Port, cts.Token).ConfigureAwait(false);
}, ct).ConfigureAwait(false);
}
else
{
using var connectCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
connectCts.CancelAfter(_connectionOptions.BackendConnectTimeoutMs);
await backend.ConnectAsync(_plc.Host, _plc.Port, connectCts.Token).ConfigureAwait(false);
}
}
catch (Exception ex)
{
string reason = ex is OperationCanceledException
? $"Backend connect timed out or cancelled after {_connectionOptions.BackendConnectTimeoutMs} ms"
: ex.Message;
MultiplexerLogEvents.BackendFailed(_logger, _plc.Name, reason);
_ctx.Counters.IncrementConnectFailed();
backend.Dispose();
return false;
}
// Successful connect. Wire up the backend tasks.
var cts2 = CancellationTokenSource.CreateLinkedTokenSource(_disposeCts.Token);
lock (_backendLock)
{
_backendSocket = backend;
_backendCts = cts2;
_backendWriterTask = Task.Run(() => RunBackendWriterAsync(backend, cts2.Token), CancellationToken.None);
_backendReaderTask = Task.Run(() => RunBackendReaderAsync(backend, cts2.Token), CancellationToken.None);
}
_ctx.Counters.IncrementConnectSuccess();
MultiplexerLogEvents.BackendConnected(_logger, _plc.Name, _plc.Host, _plc.Port);
return true;
}
finally
{
_connectGate.Release();
}
}
private readonly SemaphoreSlim _connectGate = new(1, 1);
private async Task TearDownBackendAsync(string reason, bool cascadeUpstreams)
{
Socket? oldSocket;
CancellationTokenSource? oldCts;
Task? writer, reader;
lock (_backendLock)
{
oldSocket = _backendSocket;
oldCts = _backendCts;
writer = _backendWriterTask;
reader = _backendReaderTask;
_backendSocket = null;
_backendCts = null;
_backendWriterTask = null;
_backendReaderTask = null;
}
if (oldSocket is null && oldCts is null) return;
try { oldCts?.Cancel(); } catch { /* best effort */ }
try { oldSocket?.Shutdown(SocketShutdown.Both); } catch { /* already closed */ }
try { oldSocket?.Dispose(); } catch { /* best effort */ }
// Drain correlation map; cascade-close every interested upstream pipe.
var dropped = _correlation.DrainAll();
var cascadeIds = new HashSet<Guid>();
foreach (var kvp in dropped)
{
_allocator.Release(kvp.Key);
foreach (var party in kvp.Value.InterestedParties)
cascadeIds.Add(party.Pipe.Id);
}
int upstreamCount = 0;
if (cascadeUpstreams)
{
// Close every attached pipe that had a request in flight; the others will
// simply re-issue on next request through a fresh backend connect.
// Per the design doc, ALL attached upstreams cascade on backend disconnect.
upstreamCount = _pipes.Count;
// Snapshot keys before disposal modifies the dictionary indirectly.
var pipeList = _pipes.Values.ToArray();
foreach (var pipe in pipeList)
{
try { await pipe.DisposeAsync().ConfigureAwait(false); }
catch { /* best effort */ }
}
_pipes.Clear();
_ctx.Counters.AddDisconnectCascades(upstreamCount);
}
// Best-effort join.
try { if (writer is not null) await writer.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); } catch { /* swallow */ }
try { if (reader is not null) await reader.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); } catch { /* swallow */ }
oldCts?.Dispose();
if (upstreamCount > 0 || dropped.Count > 0)
MultiplexerLogEvents.BackendDisconnected(_logger, _plc.Name, upstreamCount, dropped.Count, reason);
}
// ── Backend writer / reader tasks ─────────────────────────────────────────
private async Task RunBackendWriterAsync(Socket backend, CancellationToken ct)
{
try
{
await foreach (var frame in _outboundChannel.Reader.ReadAllAsync(ct).ConfigureAwait(false))
{
int sent = 0;
while (sent < frame.Length)
{
int n = await backend.SendAsync(
frame.AsMemory(sent, frame.Length - sent),
SocketFlags.None,
ct).ConfigureAwait(false);
if (n == 0) throw new SocketException((int)SocketError.ConnectionReset);
sent += n;
}
}
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
// Backend failure — cascade.
_ = TearDownBackendAsync($"writer fault: {ex.Message}", cascadeUpstreams: true);
}
}
private async Task RunBackendReaderAsync(Socket backend, CancellationToken ct)
{
byte[] headerBuf = new byte[MbapFrame.HeaderSize];
try
{
while (!ct.IsCancellationRequested)
{
if (!await FillAsync(backend, headerBuf, 0, MbapFrame.HeaderSize, ct).ConfigureAwait(false))
break;
if (!MbapFrame.TryParseHeader(headerBuf.AsSpan(),
out ushort proxyTxId, out _, out ushort length, out _))
break;
if (length < 1)
{
// Degenerate frame — drop.
continue;
}
int pduBodyLen = length - 1;
if (pduBodyLen > MbapFrame.MaxPduBodySize)
{
// Frame too large — backend is misbehaving; force teardown.
_logger.LogWarning(
"Oversized backend frame: Plc={Plc} PduBody={Body} > Max={Max}",
_plc.Name, pduBodyLen, MbapFrame.MaxPduBodySize);
break;
}
byte[] frame = new byte[MbapFrame.HeaderSize + pduBodyLen];
Buffer.BlockCopy(headerBuf, 0, frame, 0, MbapFrame.HeaderSize);
if (!await FillAsync(backend, frame, MbapFrame.HeaderSize, pduBodyLen, ct).ConfigureAwait(false))
break;
if (!_correlation.TryRemove(proxyTxId, out var inFlight))
{
// No correlation entry — either a stale response after cascade, or
// the PLC sent something unsolicited. Drop the frame.
continue;
}
// Free the allocator slot immediately so it can be reused.
_allocator.Release(proxyTxId);
// Update EWMA round-trip from when we sent the request.
long elapsedMs = (DateTimeOffset.UtcNow - inFlight.SentAtUtc).Ticks * 100; // 100 ns per tick
// UpdateRoundTripEwma expects Stopwatch ticks, but we have wall-clock.
// Convert ms back to Stopwatch ticks:
long ticks = (long)((double)(DateTimeOffset.UtcNow - inFlight.SentAtUtc).TotalSeconds * Stopwatch.Frequency);
if (ticks > 0)
_ctx.Counters.UpdateRoundTripEwma(ticks);
// Apply the BCD rewriter on the response. Build a per-call context clone
// that carries CurrentRequest so the rewriter can decode FC03/04 slots.
var responseCtx = _ctx.WithCurrentRequest(inFlight);
_pipeline.Process(
MbapDirection.ResponseToClient,
frame.AsSpan(0, MbapFrame.HeaderSize),
frame.AsSpan(MbapFrame.HeaderSize, pduBodyLen),
responseCtx);
// Fan out to each interested party with their original TxId restored.
// Phase 9: always exactly one party. Phase 10: N parties (read coalescing).
foreach (var party in inFlight.InterestedParties)
{
if (!party.Pipe.IsAlive)
continue;
// The frame buffer is private to this iteration; if there are multiple
// parties (Phase 10), each gets its own copy with its own original TxId
// patched in. Phase 9 always has Count == 1, so the single-buffer path
// is the common case; we copy to keep Phase-10 forward compatibility.
byte[] outFrame = inFlight.InterestedParties.Count == 1
? frame
: (byte[])frame.Clone();
outFrame[0] = (byte)(party.OriginalTxId >> 8);
outFrame[1] = (byte)(party.OriginalTxId & 0xFF);
await party.Pipe.SendResponseAsync(outFrame, ct).ConfigureAwait(false);
}
}
// Reader exited cleanly — backend closed by remote. Cascade.
_ = TearDownBackendAsync("backend reader EOF", cascadeUpstreams: true);
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
_ = TearDownBackendAsync($"reader fault: {ex.Message}", cascadeUpstreams: true);
}
}
// ── Upstream → multiplexer entry point ────────────────────────────────────
private async ValueTask OnUpstreamFrameAsync(UpstreamPipe pipe, byte[] frame, CancellationToken ct)
{
if (_disposed) return;
// Ensure backend is connected. Failure here means we cannot service the request;
// close the upstream pipe (consistent with the 1:1 model's behaviour on connect
// failure).
if (!await EnsureBackendConnectedAsync(ct).ConfigureAwait(false))
{
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
return;
}
if (frame.Length < MbapFrame.HeaderSize)
return;
if (!MbapFrame.TryParseHeader(frame.AsSpan(0, MbapFrame.HeaderSize),
out ushort originalTxId, out _, out _, out byte unitId))
return;
if (!_allocator.TryAllocate(out ushort proxyTxId))
{
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
// Synthesize Modbus exception 04 (Slave Device Failure).
byte fc = frame.Length > MbapFrame.HeaderSize ? frame[MbapFrame.HeaderSize] : (byte)0;
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fc, exceptionCode: 4);
await pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
return;
}
// Parse the PDU FC + start/qty (for FC03/04) so the response decoder has the
// correlation it needs.
int pduOffset = MbapFrame.HeaderSize;
byte fcByte = frame[pduOffset];
ushort startAddr = 0;
ushort qty = 0;
if (fcByte is 0x03 or 0x04 && frame.Length >= pduOffset + 5)
{
startAddr = (ushort)((frame[pduOffset + 1] << 8) | frame[pduOffset + 2]);
qty = (ushort)((frame[pduOffset + 3] << 8) | frame[pduOffset + 4]);
}
var inFlight = new InFlightRequest(
UnitId: unitId,
Fc: fcByte,
StartAddress: startAddr,
Qty: qty,
InterestedParties: [new InterestedParty(pipe, originalTxId)],
SentAtUtc: DateTimeOffset.UtcNow);
if (!_correlation.TryAdd(proxyTxId, inFlight))
{
// Should be impossible: the allocator just guaranteed proxyTxId is free.
_allocator.Release(proxyTxId);
_logger.LogError("CorrelationMap.TryAdd failed for already-free proxyTxId {ProxyTxId}", proxyTxId);
return;
}
// Peak in-flight tracking.
_ctx.Counters.ObserveInFlight(_allocator.InFlightCount);
// Apply the BCD rewriter on the request. Use a per-call context with CurrentRequest
// (the rewriter doesn't currently need it on request, but Phase 10 may).
var requestCtx = _ctx.WithCurrentRequest(inFlight);
_pipeline.Process(
MbapDirection.RequestToBackend,
frame.AsSpan(0, MbapFrame.HeaderSize),
frame.AsSpan(MbapFrame.HeaderSize, frame.Length - MbapFrame.HeaderSize),
requestCtx);
// Overwrite the MBAP TxId with the proxy TxId.
frame[0] = (byte)(proxyTxId >> 8);
frame[1] = (byte)(proxyTxId & 0xFF);
// Enqueue for the backend writer task.
try
{
await _outboundChannel.Writer.WriteAsync(frame, ct).ConfigureAwait(false);
}
catch (ChannelClosedException)
{
// Channel completed during shutdown — release the proxy TxId.
if (_correlation.TryRemove(proxyTxId, out _))
_allocator.Release(proxyTxId);
}
}
// ── Per-request timeout watchdog ──────────────────────────────────────────
/// <summary>
/// Periodically scans the correlation map for in-flight requests whose response has
/// not arrived within <see cref="ConnectionOptions.BackendRequestTimeoutMs"/>. For each
/// stale entry: removes it from the map, frees its allocator slot, and delivers a
/// Modbus exception (code 0x0B / Gateway Target Device Failed To Respond) to each
/// interested party with the original TxId restored.
///
/// <para><b>Why this exists.</b> In the 1:1 connection model, a lost response would
/// fault the dedicated backend socket and the upstream pair would close. The multiplexed
/// model needs an explicit per-request timer because a single missing or mis-routed
/// response would otherwise leak a correlation entry forever and hang the upstream
/// pipe indefinitely. Real-world causes: PLC drops a response, network packet loss,
/// backend that mis-echoes MBAP TxIds.</para>
/// </summary>
private async Task RunRequestTimeoutWatchdogAsync(CancellationToken ct)
{
// Tick at ~quarter of the request timeout for responsive cleanup, but cap to a
// 1-second floor so the watchdog doesn't busy-wake on very small timeouts.
int tickMs = Math.Max(100, _connectionOptions.BackendRequestTimeoutMs / 4);
try
{
while (!ct.IsCancellationRequested)
{
await Task.Delay(tickMs, ct).ConfigureAwait(false);
var threshold = DateTimeOffset.UtcNow.AddMilliseconds(-_connectionOptions.BackendRequestTimeoutMs);
var stale = _correlation.SnapshotOlderThan(threshold);
if (stale.Count == 0) continue;
foreach (var kvp in stale)
{
ushort proxyTxId = kvp.Key;
// Try to claim the entry; if another path (response, cascade) already removed it,
// skip — no work to do.
if (!_correlation.TryRemove(proxyTxId, out var req))
continue;
_allocator.Release(proxyTxId);
long elapsedMs = (long)(DateTimeOffset.UtcNow - req.SentAtUtc).TotalMilliseconds;
foreach (var party in req.InterestedParties)
{
MultiplexerLogEvents.RequestTimeout(
_logger, _plc.Name, proxyTxId, party.OriginalTxId, req.Fc, elapsedMs);
if (!party.Pipe.IsAlive)
continue;
// Deliver Modbus exception 0x0B (Gateway Target Device Failed To Respond)
// to the upstream client. This lets the client's library raise a clean
// ModbusException rather than hanging on a timeout.
byte[] excFrame = BuildExceptionFrame(party.OriginalTxId, req.UnitId, req.Fc, exceptionCode: 0x0B);
try
{
await party.Pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
}
catch
{
// Best-effort delivery; if the pipe is going down, the client
// discovers the failure through its own socket close path.
}
}
}
}
}
catch (OperationCanceledException)
{
// Normal teardown.
}
catch (Exception ex)
{
_logger.LogError(ex, "Request-timeout watchdog faulted: Plc={Plc}", _plc.Name);
}
}
// ── Helpers ───────────────────────────────────────────────────────────────
private static async Task<bool> FillAsync(
Socket socket, byte[] buf, int offset, int count, CancellationToken ct)
{
int remaining = count;
while (remaining > 0)
{
int n = await socket.ReceiveAsync(
buf.AsMemory(offset + (count - remaining), remaining),
SocketFlags.None, ct).ConfigureAwait(false);
if (n == 0) return false;
remaining -= n;
}
return true;
}
private static byte[] BuildExceptionFrame(ushort originalTxId, byte unitId, byte fc, byte exceptionCode)
{
// Modbus exception PDU = [fc | 0x80][exceptionCode].
// MBAP length covers UnitId (1) + PDU (2) = 3.
var frame = new byte[MbapFrame.HeaderSize + 2];
frame[0] = (byte)(originalTxId >> 8);
frame[1] = (byte)(originalTxId & 0xFF);
frame[2] = 0; // ProtocolId
frame[3] = 0;
frame[4] = 0; // Length high
frame[5] = 3; // Length low: UnitId(1) + ExFc(1) + ExCode(1)
frame[6] = unitId;
frame[7] = (byte)(fc | 0x80);
frame[8] = exceptionCode;
return frame;
}
}
@@ -0,0 +1,142 @@
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// Allocates 16-bit MBAP transaction IDs (proxy TxIds) used to multiplex many upstream
/// clients onto a single shared backend connection per PLC. The allocator tracks which
/// IDs are currently in flight and scans forward from a rolling cursor to find the next
/// free slot, mimicking the natural cadence of Modbus clients while keeping reuse
/// distance maximally large in steady state.
///
/// <para>State is protected by a single <see cref="object"/> lock. Contention is
/// negligible in practice — the allocator is per-PLC and one PLC's wire rate is bounded
/// by the controller's internal scan time (a few ms per request on an H2-ECOM100).
/// The lock is preferred over a lock-free approach for readability and worst-case
/// determinism (Polly retries, cascade cleanup, and saturation paths must not race).</para>
///
/// <para><b>Memory:</b> <c>bool[65536]</c> (~64 KB) per PLC. With ~54 PLCs that is
/// ~3.4 MB total — well within budget for a service that already ships at ~30 MB working
/// set under load.</para>
///
/// <para><b>Wrap counter:</b> increments every time the rolling cursor rolls over
/// 0xFFFF → 0x0000 during a successful allocation scan. Frequent wraps indicate either
/// very high churn or extreme in-flight depth and are surfaced as a telemetry signal,
/// not an error.</para>
/// </summary>
internal sealed class TxIdAllocator
{
// 65,536 slots total — the full uint16 space.
private const int SlotCount = 65536;
private readonly object _lock = new();
private readonly bool[] _inUse = new bool[SlotCount];
private ushort _next; // rolling cursor; 0 on construction
private int _inFlightCount; // 0..65536
private long _wrapCount; // monotonic; never resets
/// <summary>
/// Number of currently-in-flight proxy TxIds (i.e., allocated but not yet released).
/// Read under the same lock that mutates it; the snapshot is a simple atomic read of
/// an int but we still hold the lock for cross-field consistency with <c>_inUse</c>.
/// </summary>
public int InFlightCount
{
get
{
lock (_lock)
{
return _inFlightCount;
}
}
}
/// <summary>
/// Number of times the rolling cursor has wrapped 0xFFFF → 0x0000 during a
/// successful allocation since the allocator was constructed. Read without locking
/// via <see cref="Interlocked.Read"/> for the hot status-page path.
/// </summary>
public long WrapCount => Interlocked.Read(ref _wrapCount);
/// <summary>
/// Attempts to allocate the next free proxy TxId.
/// Returns <c>true</c> with <paramref name="id"/> set when an ID was allocated.
/// Returns <c>false</c> when every slot in the 16-bit space is currently in use;
/// the caller is responsible for emitting <c>mbproxy.multiplex.saturated</c> and
/// returning a Modbus exception (code 04 / Slave Device Failure) to the upstream.
/// </summary>
public bool TryAllocate(out ushort id)
{
lock (_lock)
{
if (_inFlightCount >= SlotCount)
{
id = 0;
return false;
}
// Scan forward from _next for the next free slot. _inFlightCount < SlotCount
// guarantees at least one free slot, so the loop terminates within at most
// SlotCount iterations even in the pathological full-minus-one case.
ushort start = _next;
ushort cursor = start;
do
{
if (!_inUse[cursor])
{
_inUse[cursor] = true;
_inFlightCount++;
// Advance the cursor; track wrap.
unchecked
{
ushort nextCursor = (ushort)(cursor + 1);
if (nextCursor == 0)
Interlocked.Increment(ref _wrapCount);
_next = nextCursor;
}
id = cursor;
return true;
}
unchecked
{
cursor = (ushort)(cursor + 1);
}
}
while (cursor != start);
// Defensive: should be unreachable given the InFlightCount check above.
id = 0;
return false;
}
}
/// <summary>
/// Releases a previously-allocated proxy TxId. Releasing an ID that is not currently
/// allocated is a no-op (defensive: cascade-on-disconnect can call <see cref="Release"/>
/// after a concurrent timeout path has already done so).
/// </summary>
public void Release(ushort id)
{
lock (_lock)
{
if (_inUse[id])
{
_inUse[id] = false;
_inFlightCount--;
}
}
}
/// <summary>
/// Test-only: returns whether the given proxy TxId is currently marked in use.
/// Internal so it remains usable from unit tests via InternalsVisibleTo.
/// </summary>
internal bool IsAllocated(ushort id)
{
lock (_lock)
{
return _inUse[id];
}
}
}
@@ -0,0 +1,281 @@
using System.Net;
using System.Net.Sockets;
using System.Threading.Channels;
namespace Mbproxy.Proxy.Multiplexing;
/// <summary>
/// One accepted upstream client socket, exposed as an asynchronous frame pipe to the
/// owning <see cref="PlcMultiplexer"/>. The pipe reads complete MBAP frames from the
/// upstream socket and hands each frame to a multiplexer-supplied <c>onFrame</c> callback;
/// it also exposes a write channel that the multiplexer drains to send response frames
/// back to the upstream client.
///
/// <para><b>Lifecycle:</b> constructed by <see cref="PlcListener"/> on accept; attached
/// to the multiplexer; runs its read loop until the upstream socket closes, the pipe is
/// disposed, or the multiplexer cascades a backend disconnect.</para>
///
/// <para><b>Concurrency model:</b> each pipe runs exactly two tasks — a read task and a
/// write task. The read task drives the multiplexer (one frame at a time, which preserves
/// the per-upstream-client one-in-flight invariant); the write task drains
/// <see cref="_responseChannel"/> and writes each frame to the socket. No third task ever
/// touches the socket.</para>
///
/// <para><b>One-in-flight-per-upstream:</b> the read loop processes frames sequentially.
/// A multi-PDU-pipelined client would still get correct service because the multiplexer
/// can have multiple distinct <c>OnFrame</c> calls outstanding from <i>different</i>
/// upstream pipes; a single upstream cannot multi-PDU-pipeline itself.</para>
/// </summary>
internal sealed partial class UpstreamPipe : IAsyncDisposable
{
// Capacity 16: enough to buffer responses while the upstream's TCP send buffer drains,
// small enough that backpressure kicks in on a wedged consumer. Drop-on-fault behaviour
// applies — if the upstream is dead, _alive flips to false and pending writes are
// discarded by the multiplexer before they ever enter the channel.
private const int ResponseChannelCapacity = 16;
private readonly Socket _upstream;
private readonly ILogger _logger;
private readonly string _plcName;
private readonly Channel<byte[]> _responseChannel = Channel.CreateBounded<byte[]>(
new BoundedChannelOptions(ResponseChannelCapacity)
{
FullMode = BoundedChannelFullMode.Wait, // backpressure, not drop
SingleReader = true,
SingleWriter = false, // multiplexer adds; potential future paths too
});
// Internal CTS lets the multiplexer signal "drop this pipe now" without waiting for
// the upstream socket to close cleanly.
private readonly CancellationTokenSource _cts = new();
private bool _disposed;
// Phase 9: per-pipe forwarded-PDU counter (replaces the per-pair counter from the
// 1:1 model). Read by the status page.
private long _pdusForwardedCount;
/// <summary>Stable identity for status-page reporting and cascade cleanup.</summary>
public Guid Id { get; } = Guid.NewGuid();
/// <summary>The upstream client's remote endpoint, captured at construction.</summary>
public IPEndPoint? RemoteEp { get; }
/// <summary>UTC time at which the upstream socket was accepted.</summary>
public DateTimeOffset ConnectedAtUtc { get; } = DateTimeOffset.UtcNow;
/// <summary>
/// Number of request PDUs read from this upstream and forwarded into the multiplexer.
/// Incremented by <see cref="RunReadLoopAsync"/> after each successful frame parse.
/// </summary>
public long PdusForwardedCount => Interlocked.Read(ref _pdusForwardedCount);
/// <summary>
/// <c>true</c> while the pipe's read+write tasks are running. Flips to <c>false</c>
/// on disposal or any fault on either direction.
/// </summary>
public bool IsAlive => !_disposed && !_cts.IsCancellationRequested;
public UpstreamPipe(Socket upstream, string plcName, ILogger logger)
{
_upstream = upstream;
_upstream.NoDelay = true;
RemoteEp = upstream.RemoteEndPoint as IPEndPoint;
_plcName = plcName;
_logger = logger;
string remoteStr = RemoteEp?.ToString() ?? "?";
MultiplexerLogEvents.ClientConnected(_logger, _plcName, remoteStr);
}
/// <summary>
/// Runs the read side of the pipe. Reads complete MBAP frames from the upstream
/// socket and invokes <paramref name="onFrame"/> for each. Returns when:
/// <list type="bullet">
/// <item><description>The upstream closes cleanly (clean EOF on the first byte of a frame).</description></item>
/// <item><description>The pipe is disposed (CTS fires).</description></item>
/// <item><description>An exception is thrown by <paramref name="onFrame"/>.</description></item>
/// </list>
///
/// <para>The frame buffer is owned by this loop; <paramref name="onFrame"/> receives
/// a fresh <see cref="byte"/>[] each call (the multiplexer needs to retain a copy to
/// build <see cref="InFlightRequest"/>, so we don't try to share the buffer).</para>
/// </summary>
public async Task RunReadLoopAsync(
Func<byte[], CancellationToken, ValueTask> onFrame,
CancellationToken ct)
{
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct, _cts.Token);
var token = linked.Token;
// 7-byte header + max 253-byte PDU body = 260 bytes per frame.
byte[] headerBuf = new byte[MbapFrame.HeaderSize];
try
{
while (!token.IsCancellationRequested)
{
// Read the 7-byte MBAP header.
if (!await FillAsync(_upstream, headerBuf, 0, MbapFrame.HeaderSize, token).ConfigureAwait(false))
return; // clean EOF — upstream went away.
if (!MbapFrame.TryParseHeader(headerBuf.AsSpan(),
out _, out _, out ushort length, out _))
return;
if (length < 1)
{
// Length field claims no body — forward the header alone via a fresh buffer.
byte[] degenerate = new byte[MbapFrame.HeaderSize];
Buffer.BlockCopy(headerBuf, 0, degenerate, 0, MbapFrame.HeaderSize);
await onFrame(degenerate, token).ConfigureAwait(false);
Interlocked.Increment(ref _pdusForwardedCount);
continue;
}
int pduBodyLen = length - 1;
if (pduBodyLen > MbapFrame.MaxPduBodySize)
{
// Frame too large for the buffer — close the upstream.
_logger.LogWarning(
"Oversized upstream frame: Plc={Plc} PduBody={Body} > Max={Max}",
_plcName, pduBodyLen, MbapFrame.MaxPduBodySize);
return;
}
// Allocate a fresh frame buffer per PDU; the multiplexer retains it.
byte[] frame = new byte[MbapFrame.HeaderSize + pduBodyLen];
Buffer.BlockCopy(headerBuf, 0, frame, 0, MbapFrame.HeaderSize);
if (!await FillAsync(_upstream, frame, MbapFrame.HeaderSize, pduBodyLen, token)
.ConfigureAwait(false))
return;
Interlocked.Increment(ref _pdusForwardedCount);
await onFrame(frame, token).ConfigureAwait(false);
}
}
catch (OperationCanceledException)
{
// Normal shutdown.
}
catch (SocketException)
{
// Upstream socket closed by remote end — normal.
}
catch (ObjectDisposedException)
{
// Socket disposed by write loop or DisposeAsync — normal.
}
}
/// <summary>
/// Runs the write side of the pipe. Drains <see cref="_responseChannel"/> and writes
/// each frame to the upstream socket. Returns when the channel completes or the
/// upstream socket fails.
/// </summary>
public async Task RunWriteLoopAsync(CancellationToken ct)
{
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct, _cts.Token);
var token = linked.Token;
try
{
await foreach (var frame in _responseChannel.Reader.ReadAllAsync(token).ConfigureAwait(false))
{
await SendAllAsync(_upstream, frame.AsMemory(), token).ConfigureAwait(false);
}
}
catch (OperationCanceledException)
{
// Normal shutdown.
}
catch (SocketException)
{
// Upstream remote closed — normal.
}
catch (ObjectDisposedException)
{
// Socket disposed elsewhere — normal.
}
}
/// <summary>
/// Enqueues <paramref name="frame"/> for delivery on the upstream socket. Returns
/// without blocking when the pipe is no longer alive (the multiplexer will discover
/// the dead pipe on its next correlation lookup and drop responses bound for it).
/// </summary>
public async ValueTask SendResponseAsync(byte[] frame, CancellationToken ct)
{
if (!IsAlive)
return;
try
{
await _responseChannel.Writer.WriteAsync(frame, ct).ConfigureAwait(false);
}
catch (ChannelClosedException)
{
// Pipe disposed mid-write — drop silently.
}
catch (OperationCanceledException)
{
// Caller cancelled — drop silently.
}
}
/// <summary>
/// Closes the pipe: cancels the read+write loops and shuts down the socket. Idempotent.
/// </summary>
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
try { _responseChannel.Writer.TryComplete(); } catch { /* already complete */ }
await _cts.CancelAsync().ConfigureAwait(false);
try { _upstream.Shutdown(SocketShutdown.Both); } catch { /* already closed */ }
_upstream.Dispose();
_cts.Dispose();
string remoteStr = RemoteEp?.ToString() ?? "?";
MultiplexerLogEvents.ClientDisconnected(_logger, _plcName, remoteStr, "Pipe disposed");
}
// ── Low-level I/O helpers ─────────────────────────────────────────────────────
private static async Task<bool> FillAsync(
Socket socket, byte[] buf, int offset, int count, CancellationToken ct)
{
int remaining = count;
bool firstRead = true;
while (remaining > 0)
{
int received = await socket.ReceiveAsync(
buf.AsMemory(offset + (count - remaining), remaining),
SocketFlags.None,
ct).ConfigureAwait(false);
if (received == 0)
return firstRead && remaining == count ? false : false;
remaining -= received;
firstRead = false;
}
return true;
}
private static async Task SendAllAsync(Socket socket, Memory<byte> memory, CancellationToken ct)
{
while (memory.Length > 0)
{
int sent = await socket.SendAsync(memory, SocketFlags.None, ct).ConfigureAwait(false);
if (sent == 0) throw new SocketException((int)SocketError.ConnectionReset);
memory = memory[sent..];
}
}
}
@@ -0,0 +1,19 @@
namespace Mbproxy.Proxy;
/// <summary>
/// No-op PDU pipeline: passes every frame through byte-for-byte without rewriting.
/// Registered as the <see cref="IPduPipeline"/> singleton in Phase 03.
/// Phase 04 replaces this registration with BcdPduPipeline.
/// </summary>
internal sealed class NoopPduPipeline : IPduPipeline
{
public void Process(
MbapDirection direction,
ReadOnlySpan<byte> mbapHeader,
Span<byte> pdu,
PduContext context)
{
// Intentional no-op: bytes forwarded unmodified.
// Phase 04: replace this registration with BcdPduPipeline.
}
}
@@ -0,0 +1,60 @@
using Mbproxy.Bcd;
using Mbproxy.Proxy.Multiplexing;
namespace Mbproxy.Proxy;
/// <summary>
/// Per-PLC context holding the resolved BCD tag map, live counters, and a logger.
/// Derives from <see cref="PduContext"/> so it can be passed wherever a
/// <see cref="PduContext"/> is expected.
///
/// One instance per configured PLC is constructed at <see cref="ProxyWorker"/> startup
/// and lives for the lifetime of the listener. It is shared across all upstream pipes
/// served by the same <see cref="Multiplexing.PlcMultiplexer"/>; all mutable state is
/// accessed through <see cref="ProxyCounters"/> which uses Interlocked for thread-safety.
///
/// <para><b>Phase 9 — request correlation:</b> the multiplexer sets <see cref="CurrentRequest"/>
/// before calling the pipeline on each direction. On the request path the pipeline can
/// peek at the future correlation entry it just enqueued; on the response path the pipeline
/// uses the request's <c>StartAddress</c>/<c>Qty</c> to decode FC03/FC04 BCD slots. Different
/// in-flight responses use different <see cref="InFlightRequest"/> instances, so there is no
/// cross-talk between concurrent multiplexed requests.</para>
///
/// <para><b>Concurrency:</b> a single <see cref="PerPlcContext"/> instance is shared across
/// the per-upstream read tasks (which call the pipeline on the request path) and the
/// single backend reader task (which calls the pipeline on the response path). Because the
/// per-call <see cref="CurrentRequest"/> would be racy if mutated on the shared context,
/// the multiplexer constructs a lightweight per-call clone (<see cref="WithCurrentRequest"/>)
/// for each pipeline invocation. The shared mutable state — the tag map, counters, logger —
/// is read-only or Interlocked.</para>
/// </summary>
internal class PerPlcContext : PduContext
{
public BcdTagMap TagMap { get; init; } = BcdTagMap.Empty;
public ProxyCounters Counters { get; init; } = new();
public ILogger Logger { get; init; } = Microsoft.Extensions.Logging.Abstractions.NullLogger.Instance;
/// <summary>
/// Per-PDU-call correlation entry. Non-null on response calls (set by the multiplexer's
/// backend reader task to the matched <see cref="InFlightRequest"/>); <c>null</c> on
/// request calls. The BCD rewriter reads this on response to learn the originating
/// FC03/FC04 start address and quantity (which are not present in the response PDU).
/// </summary>
internal InFlightRequest? CurrentRequest { get; init; }
/// <summary>
/// Returns a shallow clone of this context with <see cref="CurrentRequest"/> set to
/// <paramref name="req"/>. The clone is cheap (one allocation per response) and avoids
/// any race on the shared context across concurrent multiplexed responses.
/// </summary>
internal PerPlcContext WithCurrentRequest(InFlightRequest? req) => new()
{
PlcName = PlcName,
TagMap = TagMap,
Counters = Counters,
Logger = Logger,
CurrentRequest = req,
};
}
+188
View File
@@ -0,0 +1,188 @@
using System.Collections.Concurrent;
using System.Net;
using System.Net.Sockets;
using Mbproxy.Options;
using Mbproxy.Proxy.Multiplexing;
using Polly;
namespace Mbproxy.Proxy;
/// <summary>
/// Owns one <see cref="TcpListener"/> bound to a PLC's configured listen port and one
/// <see cref="PlcMultiplexer"/> that owns the single backend connection to the PLC.
///
/// <para><b>Phase 9 — TxId multiplexing:</b> the listener no longer pairs each upstream
/// socket with a dedicated backend socket. Instead, every accepted upstream is wrapped
/// in an <see cref="UpstreamPipe"/> and handed to the multiplexer. The multiplexer holds
/// at most one TCP connection to the PLC, eliminating the H2-ECOM100's 4-concurrent-client
/// cap from the upstream side.</para>
///
/// <para>The listener's accept loop is otherwise unchanged. <see cref="StartAsync"/>
/// binds the socket; <see cref="RunAsync"/> runs until cancelled or the listener faults;
/// <see cref="DisposeAsync"/> tears down both the listener and the multiplexer.</para>
/// </summary>
internal sealed partial class PlcListener : IAsyncDisposable
{
private readonly PlcOptions _plc;
private readonly ConnectionOptions _connectionOptions;
private readonly IPduPipeline _pipeline;
private readonly ILogger<PlcListener> _listenerLogger;
private readonly ILogger<PlcMultiplexer> _multiplexerLogger;
private readonly ILogger _pipeLogger;
private readonly PerPlcContext? _perPlcContext;
private readonly ResiliencePipeline? _backendConnectPipeline;
private TcpListener? _listener;
private PlcMultiplexer? _multiplexer;
private bool _disposed;
// Track active pipe-handling tasks so DisposeAsync can wait for them.
private readonly ConcurrentDictionary<Guid, Task> _pipeTasks = new();
/// <summary>
/// Live collection of active <see cref="UpstreamPipe"/> instances for this listener.
/// Consumed by the status page to report per-client telemetry. Empty when the
/// multiplexer has not yet been constructed (e.g., between StopAsync and a fresh start).
/// </summary>
public IReadOnlyCollection<UpstreamPipe> ActiveUpstreams
=> _multiplexer?.AttachedPipes ?? Array.Empty<UpstreamPipe>();
public PlcListener(
PlcOptions plc,
ConnectionOptions connectionOptions,
IPduPipeline pipeline,
ILogger<PlcListener> listenerLogger,
ILogger<PlcMultiplexer> multiplexerLogger,
ILogger pipeLogger,
PerPlcContext? perPlcContext = null,
ResiliencePipeline? backendConnectPipeline = null)
{
_plc = plc;
_connectionOptions = connectionOptions;
_pipeline = pipeline;
_listenerLogger = listenerLogger;
_multiplexerLogger = multiplexerLogger;
_pipeLogger = pipeLogger;
_perPlcContext = perPlcContext;
_backendConnectPipeline = backendConnectPipeline;
}
/// <summary>
/// Binds the listen socket. Throws <see cref="SocketException"/> on bind failure;
/// the caller (<see cref="Supervision.PlcListenerSupervisor"/>) catches and logs
/// <c>mbproxy.startup.bind.failed</c>.
/// </summary>
public void StartAsync()
{
var endpoint = new IPEndPoint(IPAddress.Any, _plc.ListenPort);
_listener = new TcpListener(endpoint);
_listener.Start();
LogBound(_listenerLogger, _plc.Name, _plc.ListenPort);
// The multiplexer needs a PerPlcContext to share the BCD tag map and counters with
// the pipeline. If the caller (typically a test or pre-Phase-6 startup path) didn't
// supply one, construct a minimal context that exposes only the PlcName so the
// multiplexer + a noop/passthrough pipeline still round-trip frames correctly.
var ctx = _perPlcContext ?? new PerPlcContext
{
PlcName = _plc.Name,
Logger = _pipeLogger,
};
_multiplexer = new PlcMultiplexer(
_plc,
_connectionOptions,
_pipeline,
ctx,
_multiplexerLogger,
_backendConnectPipeline);
}
/// <summary>
/// Runs the accept loop until <paramref name="ct"/> is cancelled or the listener
/// faults. On accept, wraps the socket in an <see cref="UpstreamPipe"/> and attaches
/// it to the multiplexer.
/// </summary>
public async Task RunAsync(CancellationToken ct)
{
if (_listener is null)
throw new InvalidOperationException("StartAsync must be called before RunAsync.");
if (_multiplexer is null)
throw new InvalidOperationException("StartAsync must construct the multiplexer before RunAsync.");
try
{
while (!ct.IsCancellationRequested)
{
Socket upstream = await _listener.AcceptSocketAsync(ct).ConfigureAwait(false);
var pipe = new UpstreamPipe(upstream, _plc.Name, _pipeLogger);
var pipeTask = Task.Run(async () =>
{
try
{
await _multiplexer.StartPipeAsync(pipe, ct).ConfigureAwait(false);
}
finally
{
await pipe.DisposeAsync().ConfigureAwait(false);
}
}, CancellationToken.None);
_pipeTasks[pipe.Id] = pipeTask;
_ = pipeTask.ContinueWith(prev => _pipeTasks.TryRemove(pipe.Id, out _), TaskScheduler.Default);
}
}
catch (OperationCanceledException)
{
// Normal shutdown.
}
catch (Exception ex)
{
// Listener faulted — log and return. The supervisor will restart.
LogListenerFaulted(_listenerLogger, _plc.Name, _plc.ListenPort, ex.Message);
}
}
// ── IAsyncDisposable ──────────────────────────────────────────────────────────────────
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
_listener?.Stop();
if (_multiplexer is not null)
{
await _multiplexer.DisposeAsync().ConfigureAwait(false);
_multiplexer = null;
}
Task[] snapshot = _pipeTasks.Values.ToArray();
if (snapshot.Length > 0)
{
using var timeout = new CancellationTokenSource(TimeSpan.FromSeconds(5));
try
{
await Task.WhenAll(snapshot)
.WaitAsync(timeout.Token)
.ConfigureAwait(false);
}
catch
{
// Best effort.
}
}
}
// ── Logging ───────────────────────────────────────────────────────────────────────────
[LoggerMessage(EventId = 20, EventName = "mbproxy.startup.bind",
Level = LogLevel.Information, Message = "Listener bound: Plc={Plc} Port={Port}")]
private static partial void LogBound(ILogger logger, string plc, int port);
[LoggerMessage(EventId = 22, EventName = "mbproxy.listener.faulted",
Level = LogLevel.Error, Message = "Listener faulted: Plc={Plc} Port={Port} Reason={Reason}")]
private static partial void LogListenerFaulted(ILogger logger, string plc, int port, string reason);
}
+336
View File
@@ -0,0 +1,336 @@
namespace Mbproxy.Proxy;
/// <summary>
/// Immutable snapshot of per-PLC counters. Consumed by Phase 07's status page.
/// All fields are point-in-time reads; no ordering guarantees across fields.
///
/// <para><b>Backwards-compat policy (see docs/kpi.md):</b> fields are <i>added</i>, never
/// renamed or removed. Phase 9 appended <c>InFlightCount</c>, <c>MaxInFlight</c>,
/// <c>TxIdWraps</c>, <c>BackendDisconnectCascades</c>, and <c>BackendQueueDepth</c> for
/// the TxId-multiplexer telemetry surface (Tier 1.6 in docs/kpi.md).</para>
/// </summary>
public sealed record CounterSnapshot(
long PdusForwarded,
long Fc03,
long Fc04,
long Fc06,
long Fc16,
long FcOther,
long RewrittenSlots,
long PartialBcdWarnings,
long InvalidBcdWarnings,
long BackendException01,
long BackendException02,
long BackendException03,
long BackendException04,
long BackendExceptionOther,
long BytesUpstreamIn,
long BytesUpstreamOut,
/// <summary>
/// Total number of failed listener bind attempts over the lifetime of the supervisor.
/// Accumulates; never resets. See <see cref="SupervisorSnapshot.RecoveryAttempts"/> doc.
/// </summary>
long RecoveryAttempts,
/// <summary>
/// Most recent bind failure message (up to 256 chars); <c>null</c> if the listener
/// has never failed to bind.
/// </summary>
string? LastBindError,
/// <summary>
/// EWMA of recent backend round-trip times in milliseconds (α = 0.2).
/// Zero when no successful round-trips have been observed yet.
/// Stored internally as fixed-point microseconds (long * 1000) for Interlocked
/// compatibility; converted to double ms on snapshot.
/// </summary>
double LastRoundTripMs,
/// <summary>
/// Number of backend connections successfully established (Polly final success).
/// </summary>
long ConnectsSuccess,
/// <summary>
/// Number of backend connections that failed on all Polly attempts.
/// </summary>
long ConnectsFailed,
/// <summary>
/// Number of Modbus requests currently in flight on this PLC's multiplexed backend
/// connection (point-in-time snapshot of the correlation map size). Phase 9.
/// </summary>
long InFlightCount,
/// <summary>
/// Peak <see cref="InFlightCount"/> observed since the multiplexer was constructed.
/// Updated via <see cref="Interlocked"/> CAS so concurrent in-flight increments do not
/// lose the high-water mark. Phase 9.
/// </summary>
long MaxInFlight,
/// <summary>
/// Number of times the per-PLC TxId allocator's rolling cursor has wrapped
/// 0xFFFF → 0x0000. A non-zero value is benign; a sudden burst suggests extreme
/// in-flight churn. Phase 9.
/// </summary>
long TxIdWraps,
/// <summary>
/// Cumulative count of upstream pipes closed as a side effect of a backend disconnect.
/// Each backend reconnect cycle adds the number of attached upstream clients at the
/// time of the disconnect. Phase 9.
/// </summary>
long BackendDisconnectCascades,
/// <summary>
/// Current depth of the per-PLC outbound channel feeding the backend writer task
/// (frames queued, not yet on the wire). A sustained non-zero value indicates the
/// backend is slower than upstream demand. Phase 9.
/// </summary>
long BackendQueueDepth);
/// <summary>
/// Thread-safe per-PLC counters backed by <see cref="System.Threading.Interlocked"/> longs.
/// All increment methods are allocation-free (no boxing, no heap traffic on the hot path).
/// <see cref="Snapshot"/> may allocate (record construction) — it is off-path (status page only).
/// </summary>
internal sealed class ProxyCounters
{
// ── Hot-path fields (Interlocked longs) ─────────────────────────────────
private long _pdusForwarded;
private long _fc03;
private long _fc04;
private long _fc06;
private long _fc16;
private long _fcOther;
private long _rewrittenSlots;
private long _partialBcdWarnings;
private long _invalidBcdWarnings;
private long _backendException01;
private long _backendException02;
private long _backendException03;
private long _backendException04;
private long _backendExceptionOther;
private long _bytesUpstreamIn;
private long _bytesUpstreamOut;
private long _recoveryAttempts;
private long _connectsSuccess;
private long _connectsFailed;
// Phase 9 multiplexer telemetry.
private long _maxInFlight;
private long _backendDisconnectCascades;
// Phase 9: live state pulled from the multiplexer's allocator/map/queue on each
// snapshot. The multiplexer registers a single provider via SetMultiplexProvider.
// We use a volatile reference for lock-free read on the snapshot path.
private volatile IMultiplexCountersProvider? _multiplexProvider;
// LastBindError is a string (not a long); accessed via volatile field on ProxyCounters
// but actually stored on the supervisor. We expose it here for snapshot parity.
// Supervisor sets this via SetLastBindError; Snapshot reads it.
private volatile string? _lastBindError;
// EWMA round-trip: stored as fixed-point microseconds (value * 1000) so we can use
// Interlocked.CompareExchange on a long. The EWMA smoothing factor α = 0.2 gives a
// half-life of ~3 samples (responds quickly to changes without being noisy).
// Updated by PlcMultiplexer on each successful response (request→response round-trip,
// measured against InFlightRequest.SentAtUtc).
// 0 = no samples observed yet.
private long _lastRoundTripUsEwma; // fixed-point microseconds
// ── Increment methods ────────────────────────────────────────────────────
public void IncrementPdusForwarded()
=> Interlocked.Increment(ref _pdusForwarded);
public void IncrementFcCount(byte fc)
{
switch (fc)
{
case 0x03: Interlocked.Increment(ref _fc03); break;
case 0x04: Interlocked.Increment(ref _fc04); break;
case 0x06: Interlocked.Increment(ref _fc06); break;
case 0x10: Interlocked.Increment(ref _fc16); break;
default: Interlocked.Increment(ref _fcOther); break;
}
}
public void AddRewrittenSlots(int n)
=> Interlocked.Add(ref _rewrittenSlots, n);
public void IncrementPartialBcd()
=> Interlocked.Increment(ref _partialBcdWarnings);
public void IncrementInvalidBcd()
=> Interlocked.Increment(ref _invalidBcdWarnings);
/// <summary>
/// Increments the backend-exception counter for the given Modbus exception code.
/// Codes 14 map to individual counters; anything else goes to "Other".
/// </summary>
public void IncrementBackendException(byte code)
{
switch (code)
{
case 1: Interlocked.Increment(ref _backendException01); break;
case 2: Interlocked.Increment(ref _backendException02); break;
case 3: Interlocked.Increment(ref _backendException03); break;
case 4: Interlocked.Increment(ref _backendException04); break;
default: Interlocked.Increment(ref _backendExceptionOther); break;
}
}
/// <summary>
/// Adds byte counts for both upstream directions atomically.
/// </summary>
public void AddBytes(long up, long down)
{
Interlocked.Add(ref _bytesUpstreamIn, up);
Interlocked.Add(ref _bytesUpstreamOut, down);
}
/// <summary>
/// Records one successful backend TCP connect (Polly pipeline returned success).
/// </summary>
public void IncrementConnectSuccess()
=> Interlocked.Increment(ref _connectsSuccess);
/// <summary>
/// Records one failed backend TCP connect (all Polly attempts exhausted).
/// </summary>
public void IncrementConnectFailed()
=> Interlocked.Increment(ref _connectsFailed);
/// <summary>
/// Records <paramref name="n"/> upstream pipes closed by a backend disconnect cascade.
/// Phase 9.
/// </summary>
public void AddDisconnectCascades(int n)
=> Interlocked.Add(ref _backendDisconnectCascades, n);
/// <summary>
/// CAS-updates the peak in-flight high-water mark. Called on every successful
/// allocation by the multiplexer. Phase 9.
/// </summary>
public void ObserveInFlight(int currentInFlight)
{
long sample = currentInFlight;
long old;
do
{
old = Interlocked.Read(ref _maxInFlight);
if (sample <= old) return;
}
while (Interlocked.CompareExchange(ref _maxInFlight, sample, old) != old);
}
/// <summary>
/// Wires the live multiplexer telemetry source into this counter set. Called by
/// <see cref="Mbproxy.Proxy.Multiplexing.PlcMultiplexer"/> at construction time so
/// the status page's <see cref="Snapshot"/> can include live in-flight / queue-depth
/// values without polling the multiplexer separately. Phase 9.
/// </summary>
internal void SetMultiplexProvider(IMultiplexCountersProvider? provider)
=> _multiplexProvider = provider;
/// <summary>
/// Increments the recovery-attempt counter and records the bind error message
/// (truncated to 256 chars). Called by the supervisor on each failed bind.
/// </summary>
public void IncrementRecoveryAttempt(string errorMessage)
{
Interlocked.Increment(ref _recoveryAttempts);
_lastBindError = errorMessage.Length > 256 ? errorMessage[..256] : errorMessage;
}
/// <summary>
/// Clears the last bind error after a successful bind.
/// </summary>
public void ClearLastBindError()
{
_lastBindError = null;
}
/// <summary>
/// Updates the EWMA round-trip estimate with a new sample.
/// Uses α = 0.2: new_ewma = 0.2 * sample + 0.8 * old_ewma.
/// <paramref name="elapsedTicks"/> is from <see cref="System.Diagnostics.Stopwatch.GetTimestamp"/>.
/// Thread-safe via CAS loop on a fixed-point microsecond long.
/// </summary>
public void UpdateRoundTripEwma(long elapsedTicks)
{
// Convert ticks to microseconds.
double sampleMs = (double)elapsedTicks / System.Diagnostics.Stopwatch.Frequency * 1000.0;
// Fixed-point: store microseconds * 1000 (i.e. nanoseconds) as long for CAS.
// This gives ~1 µs resolution which is fine for Modbus round-trips (1100 ms range).
long sampleFixed = (long)(sampleMs * 1000.0);
long old, newVal;
do
{
old = Interlocked.Read(ref _lastRoundTripUsEwma);
// If no previous sample, seed with first sample; otherwise apply EWMA.
newVal = old == 0
? sampleFixed
: (long)(0.2 * sampleFixed + 0.8 * old);
}
while (Interlocked.CompareExchange(ref _lastRoundTripUsEwma, newVal, old) != old);
}
// ── Snapshot (off hot-path, may allocate) ────────────────────────────────
/// <summary>
/// Returns a point-in-time snapshot of all counters.
/// Each field is read atomically via <see cref="Interlocked.Read"/>.
/// May allocate (record construction); intended for the status-page path only.
/// </summary>
public CounterSnapshot Snapshot()
{
var provider = _multiplexProvider;
long inFlightNow = provider?.InFlightCount ?? 0;
long txWraps = provider?.TxIdWraps ?? 0;
long queueDepth = provider?.BackendQueueDepth ?? 0;
return new(
PdusForwarded: Interlocked.Read(ref _pdusForwarded),
Fc03: Interlocked.Read(ref _fc03),
Fc04: Interlocked.Read(ref _fc04),
Fc06: Interlocked.Read(ref _fc06),
Fc16: Interlocked.Read(ref _fc16),
FcOther: Interlocked.Read(ref _fcOther),
RewrittenSlots: Interlocked.Read(ref _rewrittenSlots),
PartialBcdWarnings: Interlocked.Read(ref _partialBcdWarnings),
InvalidBcdWarnings: Interlocked.Read(ref _invalidBcdWarnings),
BackendException01: Interlocked.Read(ref _backendException01),
BackendException02: Interlocked.Read(ref _backendException02),
BackendException03: Interlocked.Read(ref _backendException03),
BackendException04: Interlocked.Read(ref _backendException04),
BackendExceptionOther: Interlocked.Read(ref _backendExceptionOther),
BytesUpstreamIn: Interlocked.Read(ref _bytesUpstreamIn),
BytesUpstreamOut: Interlocked.Read(ref _bytesUpstreamOut),
RecoveryAttempts: Interlocked.Read(ref _recoveryAttempts),
LastBindError: _lastBindError,
LastRoundTripMs: Interlocked.Read(ref _lastRoundTripUsEwma) / 1000.0,
ConnectsSuccess: Interlocked.Read(ref _connectsSuccess),
ConnectsFailed: Interlocked.Read(ref _connectsFailed),
InFlightCount: inFlightNow,
MaxInFlight: Interlocked.Read(ref _maxInFlight),
TxIdWraps: txWraps,
BackendDisconnectCascades: Interlocked.Read(ref _backendDisconnectCascades),
BackendQueueDepth: queueDepth);
}
}
/// <summary>
/// Read-only window into the per-PLC multiplexer's live state (allocator counts,
/// outbound-queue depth). Implemented by <see cref="Mbproxy.Proxy.Multiplexing.PlcMultiplexer"/>
/// and registered with <see cref="ProxyCounters.SetMultiplexProvider"/> so
/// <see cref="ProxyCounters.Snapshot"/> can include live mux telemetry without holding
/// a direct reference to the multiplexer (which would couple counter snapshots to the
/// connection layer's lifecycle). Phase 9.
/// </summary>
internal interface IMultiplexCountersProvider
{
/// <summary>Number of currently-in-flight requests on the backend socket.</summary>
long InFlightCount { get; }
/// <summary>Cumulative 0xFFFF → 0x0000 wrap events from the TxId allocator.</summary>
long TxIdWraps { get; }
/// <summary>Current depth of the outbound channel (frames queued for the backend writer).</summary>
long BackendQueueDepth { get; }
}
+218
View File
@@ -0,0 +1,218 @@
using Mbproxy.Bcd;
using Mbproxy.Configuration;
using Mbproxy.Options;
using Mbproxy.Proxy.Multiplexing;
using Mbproxy.Proxy.Supervision;
using Microsoft.Extensions.Options;
using Polly;
namespace Mbproxy.Proxy;
/// <summary>
/// <see cref="BackgroundService"/> that owns all <see cref="PlcListenerSupervisor"/> instances.
///
/// Startup posture (matches design doc "eager, continue on per-port failure"):
/// <list type="number">
/// <item>Enumerate <see cref="MbproxyOptions.Plcs"/> and build one supervisor per PLC.</item>
/// <item>Start all supervisors in parallel. Each supervisor attempts to bind immediately
/// and enters the Polly recovery loop if the bind fails.</item>
/// <item>After all supervisors have completed their first bind attempt (reached
/// <see cref="SupervisorState.Bound"/> or <see cref="SupervisorState.Recovering"/>),
/// log <c>mbproxy.startup.ready</c> with bound/configured counts.</item>
/// </list>
///
/// Phase 06: passes the supervisor dictionary to <see cref="ConfigReconciler.Attach"/>
/// after initial startup so hot-reload changes are applied by the reconciler.
///
/// Stop: cancels all supervisors in parallel with a 5-second hard deadline.
/// </summary>
internal sealed partial class ProxyWorker : BackgroundService
{
private readonly IOptionsMonitor<MbproxyOptions> _options;
private readonly IPduPipeline _pipeline;
private readonly ILogger<ProxyWorker> _logger;
private readonly ILoggerFactory _loggerFactory;
private readonly ConfigReconciler _reconciler;
// Phase 06: supervisors are now managed jointly by ProxyWorker (initial bootstrap)
// and ConfigReconciler (subsequent hot-reload changes). The dictionary is shared
// via ConfigReconciler.Attach() after initial startup.
private readonly Dictionary<string, PlcListenerSupervisor> _supervisors = new(StringComparer.Ordinal);
/// <summary>
/// Read-only view of the live supervisor dictionary. Consumed by Phase 07's
/// <see cref="Admin.StatusSnapshotBuilder"/> to enumerate per-PLC state.
/// The caller should read this on the status-page path only (not the hot path).
/// </summary>
internal IReadOnlyDictionary<string, PlcListenerSupervisor> Supervisors => _supervisors;
public ProxyWorker(
IOptionsMonitor<MbproxyOptions> options,
IPduPipeline pipeline,
ILogger<ProxyWorker> logger,
ILoggerFactory loggerFactory,
ConfigReconciler reconciler)
{
_options = options;
_pipeline = pipeline;
_logger = logger;
_loggerFactory = loggerFactory;
_reconciler = reconciler;
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
{
var opts = _options.CurrentValue;
int plcsConfigured = opts.Plcs.Count;
// ── 1. Build per-PLC BCD tag maps ────────────────────────────────────────────
var plcContexts = new Dictionary<string, PerPlcContext>(opts.Plcs.Count, StringComparer.Ordinal);
foreach (var plc in opts.Plcs)
{
var result = BcdTagMapBuilder.Build(opts.BcdTags, plc.BcdTags);
foreach (var warn in result.Warnings)
_logger.LogWarning("[{Plc}] BCD tag map warning: {Message}", plc.Name, warn.Message);
if (result.Errors.Count > 0)
{
foreach (var err in result.Errors)
_logger.LogError("[{Plc}] BCD tag map error ({Kind}): {Message}",
plc.Name, err.Kind, err.Message);
_logger.LogError("Skipping listener for PLC '{Plc}' due to BCD tag map errors.", plc.Name);
continue;
}
plcContexts[plc.Name] = new PerPlcContext
{
PlcName = plc.Name,
TagMap = result.Map,
Counters = new ProxyCounters(),
Logger = _loggerFactory.CreateLogger($"Mbproxy.Proxy.BcdRewriter.{plc.Name}"),
};
}
// ── 2. Build Polly pipelines once ─────────────────────────────────────────────
// Both pipelines are built from ResilienceOptions and reused across all PLCs.
var resilienceOpts = opts.Resilience;
var backendPipeline = PolicyFactory.BuildBackendConnect(
resilienceOpts.BackendConnect,
_loggerFactory.CreateLogger("Mbproxy.Proxy.BackendConnect"));
// ── 3. Build supervisors ──────────────────────────────────────────────────────
foreach (var plc in opts.Plcs)
{
if (!plcContexts.TryGetValue(plc.Name, out var perPlcContext))
continue; // BCD map failed — skip this PLC.
// Each supervisor gets its own recovery pipeline (with its own logger scope).
var recoveryPipeline = PolicyFactory.BuildListenerRecovery(
resilienceOpts.ListenerRecovery,
_loggerFactory.CreateLogger($"Mbproxy.Proxy.ListenerRecovery.{plc.Name}"));
var supervisor = new PlcListenerSupervisor(
plc,
opts.Connection,
_pipeline,
_loggerFactory.CreateLogger<PlcListener>(),
_loggerFactory.CreateLogger<PlcMultiplexer>(),
_loggerFactory.CreateLogger($"Mbproxy.Proxy.UpstreamPipe.{plc.Name}"),
perPlcContext,
recoveryPipeline,
_loggerFactory.CreateLogger<PlcListenerSupervisor>(),
backendPipeline);
_supervisors[plc.Name] = supervisor;
}
// ── Phase 06: wire reconciler BEFORE starting supervisors ─────────────────
// Attach hands the reconciler the authoritative supervisor dictionary and the
// initial options snapshot. The reconciler won't process OnChange events until
// after this call — the brief window between Attach and first supervisor start
// is safe because the channel signal only enqueues; apply runs asynchronously.
_reconciler.Attach(_supervisors, opts);
if (_supervisors.Count == 0)
{
LogStartupReady(_logger, 0, plcsConfigured);
await Task.Delay(Timeout.Infinite, stoppingToken).ConfigureAwait(false);
return;
}
// ── 4. Start all supervisors in parallel ──────────────────────────────────────
var startTasks = _supervisors.Values
.Select(s => s.StartAsync(stoppingToken))
.ToArray();
await Task.WhenAll(startTasks).ConfigureAwait(false);
// ── 5. Wait for every supervisor to complete its first bind attempt ───────────
// "Ready" = every supervisor has transitioned out of Stopped (i.e. reached
// Bound or Recovering from its first attempt).
using var readyCts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
using var readyLinked = CancellationTokenSource.CreateLinkedTokenSource(
readyCts.Token, stoppingToken);
var waitTasks = _supervisors.Values
.Select(s => s.WaitForInitialBindAttemptAsync(readyLinked.Token))
.ToArray();
try
{
await Task.WhenAll(waitTasks).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
// Either the 30 s deadline fired or the service is stopping.
}
int boundCount = _supervisors.Values.Count(s => s.Snapshot().State == SupervisorState.Bound);
LogStartupReady(_logger, boundCount, plcsConfigured);
// ── 6. Keep the worker alive until the host signals stop ─────────────────────
// Supervisors run their own background loops; ExecuteAsync just waits.
await Task.Delay(Timeout.Infinite, stoppingToken).ConfigureAwait(false);
}
public override async Task StopAsync(CancellationToken cancellationToken)
{
// Cancel ExecuteAsync first.
await base.StopAsync(cancellationToken).ConfigureAwait(false);
// Stop all supervisors in parallel with a 5-second hard deadline.
using var stopCts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
using var linked = CancellationTokenSource.CreateLinkedTokenSource(
stopCts.Token, cancellationToken);
var stopTasks = _supervisors.Values
.Select(s => s.StopAsync(linked.Token))
.ToArray();
try
{
await Task.WhenAll(stopTasks).ConfigureAwait(false);
}
catch
{
// Best effort — don't let individual supervisor failures block shutdown.
}
foreach (var supervisor in _supervisors.Values)
await supervisor.DisposeAsync().ConfigureAwait(false);
_supervisors.Clear();
}
// ── Logging ───────────────────────────────────────────────────────────────────────────
[LoggerMessage(EventId = 1, EventName = "mbproxy.startup.ready",
Level = LogLevel.Information,
Message = "mbproxy service ready — ListenersBound={ListenersBound} PlcsConfigured={PlcsConfigured}")]
private static partial void LogStartupReady(ILogger logger, int listenersBound, int plcsConfigured);
[LoggerMessage(EventId = 21, EventName = "mbproxy.startup.bind.failed",
Level = LogLevel.Error,
Message = "Failed to bind listener: Plc={Plc} Port={Port} Reason={Reason}")]
private static partial void LogBindFailed(ILogger logger, string plc, int port, string reason);
}
@@ -0,0 +1,56 @@
namespace Mbproxy.Proxy;
/// <summary>
/// Source-generated <see cref="LoggerMessage"/> definitions for the BCD rewriter pipeline.
/// All event names are stable — do not rename without updating docs/design.md.
/// </summary>
internal static partial class RewriterLogEvents
{
/// <summary>
/// Emitted when a 32-bit BCD pair is only partially covered by the read/write range.
/// The raw bytes are passed through unchanged; the client or PLC sees the original nibbles.
/// </summary>
[LoggerMessage(
EventId = 30,
EventName = "mbproxy.rewrite.partial_bcd",
Level = LogLevel.Warning,
Message = "Partial BCD overlap — passing through raw: Plc={PlcName} Address={Address} ClientStart={ClientStart} ClientQty={ClientQty}")]
public static partial void PartialBcd(
ILogger logger,
string plcName,
ushort address,
ushort clientStart,
ushort clientQty);
/// <summary>
/// Emitted when a register value at a configured BCD address contains a nibble >= 0xA
/// (i.e. not a valid BCD digit). The raw bytes are passed through unchanged.
/// Direction is "Read" (response from PLC) or "Write" (request from client).
/// </summary>
[LoggerMessage(
EventId = 31,
EventName = "mbproxy.rewrite.invalid_bcd",
Level = LogLevel.Warning,
Message = "Invalid BCD nibble — passing through raw: Plc={PlcName} Address={Address} RawValue=0x{RawValue:X4} Direction={Direction}")]
public static partial void InvalidBcd(
ILogger logger,
string plcName,
ushort address,
ushort rawValue,
string direction);
/// <summary>
/// Emitted when the PLC returns a Modbus exception response (high bit set on FC byte).
/// The frame is forwarded verbatim to the client.
/// </summary>
[LoggerMessage(
EventId = 32,
EventName = "mbproxy.exception.passthrough",
Level = LogLevel.Information,
Message = "Modbus exception forwarded: Plc={PlcName} Fc=0x{Fc:X2} ExceptionCode={ExceptionCode}")]
public static partial void ExceptionPassthrough(
ILogger logger,
string plcName,
byte fc,
byte exceptionCode);
}
@@ -0,0 +1,404 @@
using Mbproxy.Options;
using Mbproxy.Proxy.Multiplexing;
using Polly;
namespace Mbproxy.Proxy.Supervision;
/// <summary>
/// Wraps one <see cref="PlcListener"/> in a Polly-backed recovery loop.
///
/// <para><b>State machine</b>:
/// <list type="bullet">
/// <item><description><b>Bound</b>: listener is accepting connections; <see cref="PlcListener.RunAsync"/> is awaiting.</description></item>
/// <item><description><b>Recovering</b>: bind failed or RunAsync faulted; in Polly's delay window before the next attempt.</description></item>
/// <item><description><b>Stopped</b>: terminal. <see cref="StopAsync"/> was called; no further retries.</description></item>
/// </list>
/// </para>
///
/// <para><b>RecoveryAttempts</b>: the counter accumulates over the lifetime of the
/// supervisor. It is never reset after a successful re-bind so operators can see
/// "this listener has flapped N times since the service started." See also
/// <see cref="SupervisorSnapshot"/> doc comment.</para>
///
/// <para>The supervisor does NOT swallow exceptions from <see cref="PlcListener.RunAsync"/>
/// except <see cref="OperationCanceledException"/>. Every other fault is logged at Warning
/// with the exception message so operators can see WHY the listener was restarted.</para>
/// </summary>
internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
{
private readonly PlcOptions _plc;
private readonly ConnectionOptions _connectionOptions;
private readonly IPduPipeline _pipeline;
private readonly ILogger<PlcListener> _listenerLogger;
private readonly ILogger<PlcMultiplexer> _multiplexerLogger;
private readonly ILogger _pipeLogger;
private readonly PerPlcContext? _perPlcContext;
private readonly ResiliencePipeline _recoveryPipeline;
private readonly ILogger<PlcListenerSupervisor> _logger;
private readonly ResiliencePipeline? _backendConnectPipeline;
// ── Mutable state ────────────────────────────────────────────────────────────────────
// Volatile so Snapshot() reads are coherent without locking.
private volatile SupervisorState _state = SupervisorState.Stopped;
private volatile string? _lastBindError;
private int _recoveryAttempts; // Interlocked
// Phase 07: current active listener for status-page pair enumeration.
private volatile PlcListener? _currentListener;
// Phase 06: _perPlcContext is now mutable so ReplaceContextAsync can swap it.
// Access from the accept loop (RunAsync) and from ReplaceContextAsync must be
// coherent; we use a volatile reference so the accept loop always reads the latest
// context without locking. The PlcListener created on each Polly attempt holds
// its own copy of the context at construction time; existing in-flight connections
// keep their old reference until they complete.
private volatile PerPlcContext? _currentContext;
/// <summary>
/// Per-supervisor CTS: cancelling it stops both the Polly delay and the inner
/// <see cref="PlcListener.RunAsync"/> loop.
/// </summary>
private CancellationTokenSource _supervisorCts = new();
private Task _supervisorTask = Task.CompletedTask;
private bool _disposed;
// ── Public surface ────────────────────────────────────────────────────────────────────
public string PlcName => _plc.Name;
public PlcListenerSupervisor(
PlcOptions plc,
ConnectionOptions connectionOptions,
IPduPipeline pipeline,
ILogger<PlcListener> listenerLogger,
ILogger<PlcMultiplexer> multiplexerLogger,
ILogger pipeLogger,
PerPlcContext? perPlcContext,
ResiliencePipeline recoveryPipeline,
ILogger<PlcListenerSupervisor> logger,
ResiliencePipeline? backendConnectPipeline = null)
{
_plc = plc;
_connectionOptions = connectionOptions;
_pipeline = pipeline;
_listenerLogger = listenerLogger;
_multiplexerLogger = multiplexerLogger;
_pipeLogger = pipeLogger;
_perPlcContext = perPlcContext;
_currentContext = perPlcContext; // Phase 06: live context slot
_recoveryPipeline = recoveryPipeline;
_logger = logger;
_backendConnectPipeline = backendConnectPipeline;
}
/// <summary>
/// Returns the current <see cref="ProxyCounters"/> for this PLC.
/// Used by <see cref="Configuration.ConfigReconciler"/> when building a reseat context
/// so that counters are preserved across a tag-map swap.
/// </summary>
public ProxyCounters CurrentCounters => _currentContext?.Counters ?? new ProxyCounters();
/// <summary>
/// Live collection of active <see cref="UpstreamPipe"/> instances attached to this
/// PLC's multiplexer. Returns an empty collection when the listener is not bound.
/// Consumed by Phase 07's status page (renamed from <c>ActivePairs</c> in Phase 9).
/// </summary>
public IReadOnlyCollection<UpstreamPipe> ActiveUpstreams
=> _currentListener?.ActiveUpstreams ?? Array.Empty<UpstreamPipe>();
/// <summary>
/// Launches the supervisor task. The task tries to bind immediately; if binding
/// fails it enters the Polly recovery loop. The method returns as soon as the
/// background task is started (it does NOT wait for the listener to reach
/// <see cref="SupervisorState.Bound"/>).
///
/// <para>Call <see cref="WaitForInitialBindAttemptAsync"/> after this to block until the
/// supervisor has transitioned out of <see cref="SupervisorState.Stopped"/>.</para>
/// </summary>
public Task StartAsync(CancellationToken ct)
{
_supervisorCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
_supervisorTask = Task.Run(() => RunSupervisorAsync(_supervisorCts.Token), CancellationToken.None);
return Task.CompletedTask;
}
/// <summary>
/// Waits until the supervisor has completed its first bind attempt
/// (transitioned to <see cref="SupervisorState.Bound"/> or
/// <see cref="SupervisorState.Recovering"/>).
/// Returns immediately if the supervisor is already past that point.
/// </summary>
public async Task WaitForInitialBindAttemptAsync(CancellationToken ct)
{
while (_state == SupervisorState.Stopped && !ct.IsCancellationRequested
&& !_supervisorTask.IsCompleted)
{
await Task.Delay(10, ct).ConfigureAwait(false);
}
}
/// <summary>
/// Signals the supervisor to stop, cancels the current Polly delay (if in
/// <see cref="SupervisorState.Recovering"/>) or the <see cref="PlcListener.RunAsync"/>
/// loop (if in <see cref="SupervisorState.Bound"/>), and waits for the background
/// task to complete.
///
/// <para>Completes within ~1 s regardless of backoff window size because Polly's
/// <c>ExecuteAsync(ct)</c> honours the cancellation token.</para>
/// </summary>
public async Task StopAsync(CancellationToken ct)
{
_state = SupervisorState.Stopped;
await _supervisorCts.CancelAsync().ConfigureAwait(false);
try
{
await _supervisorTask.WaitAsync(ct).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
// ct fired before the task completed — supervisor task will terminate
// asynchronously. Acceptable at shutdown.
}
catch (Exception)
{
// Supervisor task faulted — already logged inside RunSupervisorAsync.
}
}
/// <summary>Returns a point-in-time snapshot of this supervisor's state.</summary>
public SupervisorSnapshot Snapshot() => new(
State: _state,
LastBindError: _lastBindError,
RecoveryAttempts: Interlocked.CompareExchange(ref _recoveryAttempts, 0, 0));
/// <summary>
/// Atomically swaps the per-PLC context (tag map) without restarting the listener.
///
/// <para><b>Transition window</b>: there is a brief overlap where the old
/// <see cref="PlcListener"/> is running its accept loop with the old context while the
/// new context reference is being written. The volatile write ensures that the very
/// next <c>PlcListener</c> constructed inside the Polly loop (on any subsequent fault
/// recovery) picks up <paramref name="newCtx"/>. Existing in-flight upstream pipes
/// served by the current multiplexer keep their reference to the context captured at
/// multiplexer construction time; they finish on the old map. New connections after
/// this call use the new map. This is the correct design — partial-BCD rewrites
/// mid-request would be worse than a one-request gap.</para>
///
/// <para>This method is intentionally lightweight: it performs only the volatile write
/// and returns immediately. The <paramref name="ct"/> parameter is present for API
/// symmetry with start/stop and to accommodate future async expansion.</para>
/// </summary>
public Task ReplaceContextAsync(PerPlcContext newCtx, CancellationToken ct)
{
// Volatile write: the next PlcListener created in RunSupervisorAsync will see
// the new context. The accept loop itself does not hold a direct reference to
// _currentContext — it was captured at PlcListener construction time.
_currentContext = newCtx;
return Task.CompletedTask;
}
// ── Supervisor loop ───────────────────────────────────────────────────────────────────
private async Task RunSupervisorAsync(CancellationToken ct)
{
bool firstBind = true;
try
{
// The recovery pipeline wraps the entire try-bind-and-run block.
// When RunAsync returns or throws, the pipeline delays and retries.
// Cancellation of ct exits the pipeline with OperationCanceledException.
await _recoveryPipeline.ExecuteAsync(async token =>
{
// ── Instantiate a fresh listener ─────────────────────────────────
// A faulted listener's TcpListener socket must be disposed before
// re-binding. We create a new PlcListener on each attempt.
//
// Phase 06: use _currentContext (volatile) so that a ReplaceContextAsync
// call between Polly retry attempts is picked up here. Each listener
// captures the context at construction time; existing in-flight pairs
// keep their own reference. See ReplaceContextAsync for the transition
// window documentation.
var listener = new PlcListener(
_plc,
_connectionOptions,
_pipeline,
_listenerLogger,
_multiplexerLogger,
_pipeLogger,
_currentContext,
_backendConnectPipeline);
// Phase 07: expose the current listener for status-page pair enumeration.
_currentListener = listener;
try
{
// ── Bind ─────────────────────────────────────────────────────
listener.StartAsync();
}
catch (Exception bindEx)
{
// Dispose the listener before entering the recovery delay
// so the socket is released and the port can be reused.
_currentListener = null;
await listener.DisposeAsync().ConfigureAwait(false);
Interlocked.Increment(ref _recoveryAttempts);
string reason = bindEx.Message;
string truncated = reason.Length > 256 ? reason[..256] : reason;
_lastBindError = truncated;
_state = SupervisorState.Recovering;
// Also update the per-PLC counters if available (Phase 07 reads these).
_currentContext?.Counters.IncrementRecoveryAttempt(truncated);
LogBindFailed(_logger, _plc.Name, _plc.ListenPort, truncated);
// Re-throw so the Polly pipeline can delay and retry.
throw;
}
// ── Bind succeeded ───────────────────────────────────────────────
if (firstBind)
{
firstBind = false;
LogBound(_logger, _plc.Name, _plc.ListenPort);
}
else
{
// Re-bind after a recovery — emit the "recovered" event once.
int totalAttempts = Interlocked.CompareExchange(ref _recoveryAttempts, 0, 0);
LogListenerRecovered(_logger, _plc.Name, _plc.ListenPort, totalAttempts);
}
// Clear the last bind error on a successful bind.
_lastBindError = null;
_currentContext?.Counters.ClearLastBindError();
_state = SupervisorState.Bound;
// ── Run the accept loop ──────────────────────────────────────────
// RunAsync returns when: (a) token is cancelled (normal shutdown),
// (b) the listener faults (OS reclaims port, transient network reset).
// In both cases we fall through to the Polly retry handler.
try
{
await listener.RunAsync(token).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
// Normal shutdown path — do not enter recovery loop.
_currentListener = null;
await listener.DisposeAsync().ConfigureAwait(false);
throw; // Propagate to exit the Polly pipeline.
}
catch (Exception runEx)
{
// Listener faulted at runtime (port stolen, OS network reset, etc.).
// Log at Warning — operators must see WHY the listener was restarted.
LogListenerFaulted(_logger, _plc.Name, _plc.ListenPort, runEx, runEx.Message);
_currentListener = null;
await listener.DisposeAsync().ConfigureAwait(false);
Interlocked.Increment(ref _recoveryAttempts);
string truncated = runEx.Message.Length > 256 ? runEx.Message[..256] : runEx.Message;
_lastBindError = truncated;
_state = SupervisorState.Recovering;
// Also update the per-PLC counters if available.
_currentContext?.Counters.IncrementRecoveryAttempt(truncated);
// Re-throw so Polly can delay and retry.
throw;
}
// RunAsync returned normally (token was cancelled or listener closed).
// If we got here without an exception, the loop ended cleanly.
_currentListener = null;
await listener.DisposeAsync().ConfigureAwait(false);
// If cancellation is requested, throw so Polly exits cleanly.
token.ThrowIfCancellationRequested();
// Otherwise (listener closed without cancellation — e.g., OS event),
// treat as a fault and re-enter recovery.
Interlocked.Increment(ref _recoveryAttempts);
const string unexpectedEnd = "Listener accept loop ended unexpectedly";
_lastBindError = unexpectedEnd;
_state = SupervisorState.Recovering;
_currentContext?.Counters.IncrementRecoveryAttempt(unexpectedEnd);
LogListenerEnded(_logger, _plc.Name, _plc.ListenPort);
throw new InvalidOperationException(unexpectedEnd);
}, ct).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
// Normal: StopAsync cancelled the token.
}
catch (Exception ex)
{
// Polly pipeline exhausted (should not happen for listener recovery since
// MaxRetryAttempts = int.MaxValue) or an unexpected fault.
_logger.LogError(ex, "Supervisor for Plc={Plc} exited unexpectedly: {Message}",
_plc.Name, ex.Message);
}
finally
{
_state = SupervisorState.Stopped;
_currentListener = null;
}
}
// ── IAsyncDisposable ─────────────────────────────────────────────────────────────────
public async ValueTask DisposeAsync()
{
if (_disposed) return;
_disposed = true;
using var stopCts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
try
{
await StopAsync(stopCts.Token).ConfigureAwait(false);
}
catch
{
// Best-effort cleanup.
}
_supervisorCts.Dispose();
}
// ── Logging ───────────────────────────────────────────────────────────────────────────
[LoggerMessage(EventId = 40, EventName = "mbproxy.startup.bind",
Level = LogLevel.Information,
Message = "Listener bound: Plc={Plc} Port={Port}")]
private static partial void LogBound(ILogger logger, string plc, int port);
[LoggerMessage(EventId = 41, EventName = "mbproxy.startup.bind.failed",
Level = LogLevel.Error,
Message = "Failed to bind listener: Plc={Plc} Port={Port} Reason={Reason}")]
private static partial void LogBindFailed(ILogger logger, string plc, int port, string reason);
[LoggerMessage(EventId = 42, EventName = "mbproxy.listener.recovered",
Level = LogLevel.Information,
Message = "Listener recovered: Plc={Plc} Port={Port} AttemptCount={AttemptCount}")]
private static partial void LogListenerRecovered(ILogger logger, string plc, int port, int attemptCount);
[LoggerMessage(EventId = 43, EventName = "mbproxy.listener.faulted",
Level = LogLevel.Warning,
Message = "Listener faulted (will recover): Plc={Plc} Port={Port} Reason={Reason}")]
private static partial void LogListenerFaulted(ILogger logger, string plc, int port, Exception ex, string reason);
[LoggerMessage(EventId = 44, EventName = "mbproxy.listener.ended",
Level = LogLevel.Warning,
Message = "Listener accept loop ended unexpectedly (will recover): Plc={Plc} Port={Port}")]
private static partial void LogListenerEnded(ILogger logger, string plc, int port);
}
@@ -0,0 +1,125 @@
using System.Net.Sockets;
using Mbproxy.Options;
using Polly;
using Polly.Retry;
namespace Mbproxy.Proxy.Supervision;
/// <summary>
/// Builds Polly v8 <see cref="ResiliencePipeline"/> instances from the typed resilience
/// configuration (<see cref="RetryProfile"/> and <see cref="RecoveryProfile"/>).
///
/// <para>Pipelines are built once at startup and reused across all operations. They are
/// thread-safe and allocation-free on the happy path.</para>
/// </summary>
internal static class PolicyFactory
{
// ── Network errors that are safe to retry on backend connect ────────────────────────
// Only these SocketError values are transient; everything else is a programming error
// or a configuration mistake and should not be retried.
private static readonly HashSet<SocketError> RetryableSocketErrors =
[
SocketError.ConnectionRefused,
SocketError.TimedOut,
SocketError.HostUnreachable,
SocketError.NetworkUnreachable,
];
/// <summary>
/// Builds a retry pipeline for backend (PLC) TCP connect attempts.
///
/// <para>Retries only on <see cref="SocketException"/> with a
/// <see cref="SocketError"/> in <see cref="RetryableSocketErrors"/>. Does NOT retry
/// <see cref="ArgumentException"/>, <see cref="OperationCanceledException"/>, or any
/// non-network exception.</para>
///
/// <para>The delay sequence is taken directly from <see cref="RetryProfile.BackoffMs"/>;
/// element [i] is the delay before attempt i+1 (0-based). If the attempt index
/// exceeds the array, the last element is used.</para>
///
/// <para>After all attempts are exhausted, the pipeline re-throws the last exception
/// so the caller can log <c>mbproxy.backend.failed</c> and close the upstream socket.</para>
/// </summary>
public static ResiliencePipeline BuildBackendConnect(RetryProfile profile, ILogger logger)
{
// MaxAttempts in Polly v8 includes the first attempt.
int maxAttempts = Math.Max(1, profile.MaxAttempts);
var backoffMs = profile.BackoffMs;
return new ResiliencePipelineBuilder()
.AddRetry(new RetryStrategyOptions
{
MaxRetryAttempts = maxAttempts - 1, // retries = total - 1 (first attempt is free)
ShouldHandle = new PredicateBuilder()
.Handle<SocketException>(ex => RetryableSocketErrors.Contains(ex.SocketErrorCode)),
DelayGenerator = args =>
{
int idx = args.AttemptNumber; // 0 = first retry, i.e. after attempt 0
// Clamp to the last element if we exceed the array.
int ms = backoffMs.Count > 0
? backoffMs[Math.Min(idx, backoffMs.Count - 1)]
: 0;
return new ValueTask<TimeSpan?>(TimeSpan.FromMilliseconds(ms));
},
OnRetry = args =>
{
logger.LogDebug(
"Backend connect retry {Attempt}/{Max}: {Error}",
args.AttemptNumber + 1,
maxAttempts - 1,
args.Outcome.Exception?.Message);
return ValueTask.CompletedTask;
},
})
.Build();
}
/// <summary>
/// Builds an infinite-retry pipeline for listener bind recovery.
///
/// <para>The delay sequence is:
/// <list type="bullet">
/// <item><description>Attempts 0 .. (InitialBackoffMs.Length-1) use the initial backoff array.</description></item>
/// <item><description>All subsequent attempts use <see cref="RecoveryProfile.SteadyStateMs"/>.</description></item>
/// </list>
/// The pipeline never exhausts — it retries until the supervisor's cancellation token
/// fires (on <see cref="PlcListenerSupervisor.StopAsync"/>).</para>
///
/// <para>Polly's <c>ExecuteAsync(ct)</c> propagates <see cref="OperationCanceledException"/>
/// when <paramref name="ct"/> fires, so the supervisor exits the loop cleanly.</para>
/// </summary>
public static ResiliencePipeline BuildListenerRecovery(RecoveryProfile profile, ILogger logger)
{
var initialMs = profile.InitialBackoffMs;
int steadyMs = profile.SteadyStateMs;
return new ResiliencePipelineBuilder()
.AddRetry(new RetryStrategyOptions
{
// int.MaxValue makes the pipeline retry indefinitely; cancellation
// is the only exit path (besides the supervisor calling StopAsync).
MaxRetryAttempts = int.MaxValue,
ShouldHandle = new PredicateBuilder().Handle<Exception>(
ex => ex is not OperationCanceledException),
DelayGenerator = args =>
{
// args.AttemptNumber is the zero-based index of the retry
// (0 = first retry, after the first failed attempt).
int idx = args.AttemptNumber;
int ms = idx < initialMs.Count
? initialMs[idx]
: steadyMs;
return new ValueTask<TimeSpan?>(TimeSpan.FromMilliseconds(ms));
},
OnRetry = args =>
{
logger.LogDebug(
"Listener recovery attempt {Attempt}: {Error}",
args.AttemptNumber + 1,
args.Outcome.Exception?.Message);
return ValueTask.CompletedTask;
},
})
.Build();
}
}
@@ -0,0 +1,50 @@
namespace Mbproxy.Proxy.Supervision;
/// <summary>
/// State machine states for <see cref="PlcListenerSupervisor"/>.
/// </summary>
public enum SupervisorState
{
/// <summary>
/// The listener is bound and its accept loop is running.
/// Entry conditions: <see cref="PlcListener.StartAsync"/> succeeded (on first attempt or
/// after a recovery attempt).
/// </summary>
Bound,
/// <summary>
/// The listener is not bound; the supervisor is waiting for the next Polly retry delay
/// before reattempting. Entered after any failed bind (at startup or at runtime).
/// </summary>
Recovering,
/// <summary>
/// Terminal state. <see cref="PlcListenerSupervisor.StopAsync"/> was called; the supervisor
/// task has been cancelled and will not retry.
/// </summary>
Stopped,
}
/// <summary>
/// Immutable point-in-time snapshot of a supervisor's state. Consumed by Phase 07's
/// status page via <see cref="PlcListenerSupervisor.Snapshot"/>.
///
/// <para><b>RecoveryAttempts semantics</b>: this counter <em>accumulates over the lifetime
/// of the supervisor</em> and is never reset. Operators reading the status page should
/// interpret it as "how many times has this listener faulted or failed to bind since
/// the service started" — useful for detecting port-flapping or repeated OS network
/// resets. Phase 07 surfaces it as-is.</para>
/// </summary>
/// <param name="State">Current state of the supervisor.</param>
/// <param name="LastBindError">
/// Most recent bind failure message (up to 256 chars). <c>null</c> if the listener
/// has never failed to bind.
/// </param>
/// <param name="RecoveryAttempts">
/// Total number of failed bind attempts over the lifetime of this supervisor.
/// Accumulates; never resets to 0.
/// </param>
public sealed record SupervisorSnapshot(
SupervisorState State,
string? LastBindError,
int RecoveryAttempts);
+57
View File
@@ -0,0 +1,57 @@
namespace Mbproxy;
/// <summary>
/// Service-wide counters for the mbproxy host. Tracks reload accept/reject counts and
/// timestamps so Phase 07's status page can surface them without coupling to the reconciler.
///
/// <para>Constructed once at DI startup and shared as a singleton. All writes are via
/// dedicated methods that use <see cref="Interlocked"/> so reads from the status page
/// are always coherent without locking.</para>
/// </summary>
public sealed class ServiceCounters
{
// LastReloadUtc: stored as ticks-since-epoch via Interlocked.Exchange.
// 0 = "never reloaded". DateTimeOffset.MinValue.UtcTicks works as the sentinel
// but 0 is simpler. DateTimeOffset.UtcNow.UtcTicks is always > 0 after 1970.
private long _lastReloadUtcTicks; // 0 = never; Interlocked
private int _reloadAppliedCount; // Interlocked
private int _reloadRejectedCount; // Interlocked
/// <summary>Instant at which this service instance was constructed (service start proxy).</summary>
public DateTimeOffset StartedAtUtc { get; } = DateTimeOffset.UtcNow;
/// <summary>
/// UTC timestamp of the last successfully applied hot-reload, or <c>null</c> if no
/// reload has been accepted since the service started.
/// </summary>
public DateTimeOffset? LastReloadUtc
{
get
{
long ticks = Interlocked.Read(ref _lastReloadUtcTicks);
return ticks == 0 ? null : new DateTimeOffset(ticks, TimeSpan.Zero);
}
}
/// <summary>Total number of configuration reloads accepted since service start.</summary>
public int ReloadAppliedCount
=> Interlocked.CompareExchange(ref _reloadAppliedCount, 0, 0);
/// <summary>Total number of configuration reloads rejected since service start.</summary>
public int ReloadRejectedCount
=> Interlocked.CompareExchange(ref _reloadRejectedCount, 0, 0);
/// <summary>
/// Records one accepted reload. Bumps <see cref="ReloadAppliedCount"/> and updates
/// <see cref="LastReloadUtc"/>.
/// </summary>
public void RecordReloadApplied(DateTimeOffset timestamp)
{
Interlocked.Increment(ref _reloadAppliedCount);
Interlocked.Exchange(ref _lastReloadUtcTicks, timestamp.UtcTicks);
}
/// <summary>Bumps <see cref="ReloadRejectedCount"/>.</summary>
public void RecordReloadRejected()
=> Interlocked.Increment(ref _reloadRejectedCount);
}
+50
View File
@@ -0,0 +1,50 @@
{
"Mbproxy": {
"BcdTags": {
"Global": []
},
"Plcs": [],
"AdminPort": 8080,
"Connection": {
"BackendConnectTimeoutMs": 3000,
"BackendRequestTimeoutMs": 3000
},
"Resilience": {
"BackendConnect": {
"MaxAttempts": 3,
"BackoffMs": [ 100, 500, 2000 ]
},
"ListenerRecovery": {
"InitialBackoffMs": [ 1000, 2000, 5000, 15000, 30000 ],
"SteadyStateMs": 30000
}
}
},
"Serilog": {
"Using": [ "Serilog.Sinks.Console", "Serilog.Sinks.File" ],
"MinimumLevel": {
"Default": "Information",
"Override": {
"Microsoft": "Warning",
"System": "Warning"
}
},
"WriteTo": [
{
"Name": "Console",
"Args": {
"outputTemplate": "[{Timestamp:HH:mm:ss} {Level:u3}] {Message:lj} {Properties:j}{NewLine}{Exception}"
}
},
{
"Name": "File",
"Args": {
"path": "C:\\ProgramData\\mbproxy\\logs\\mbproxy-.log",
"rollingInterval": "Day",
"retainedFileCountLimit": 30,
"outputTemplate": "[{Timestamp:yyyy-MM-dd HH:mm:ss.fff zzz} {Level:u3}] {Message:lj} {Properties:j}{NewLine}{Exception}"
}
}
]
}
}