mbproxy: initial commit through Phase 9 (TxId multiplexing)
Adds the mbproxy service end-to-end. Phases 00-08 implement the production-ready single-listener / 1:1-backend transparent Modbus TCP proxy with bidirectional BCD rewriting for the ~54-PLC DL205/DL260 fleet. Phase 9 replaces the connection layer with a single backend socket per PLC plus MBAP TxId rewriting, lifting the H2-ECOM100's 4-concurrent-client cap as an operational ceiling. Phase 9 additions of note: - PlcMultiplexer + UpstreamPipe + TxIdAllocator + CorrelationMap - InFlightRequest with IReadOnlyList<InterestedParty> (load-bearing for Phase 10 read coalescing — do not collapse to a single field) - Per-request watchdog: surfaces Modbus exception 0x0B to upstream on BackendRequestTimeoutMs, defending against lost responses, dead-PLC paths, and pymodbus 3.13.0's concurrent-multiplexed- request bug (its ServerRequestHandler.last_pdu state race) - Status DTO + HTML gain inFlight / maxInFlight / txIdWraps / disconnectCascades / queueDepth (Tier 1.6 in docs/kpi.md) Tests: 263 unit + 38 E2E. Multiplexer correctness under truly concurrent backend traffic is proved against a stub backend in PlcMultiplexerTests; MultiplexerE2ETests paces requests so pymodbus 3.13's single-PDU framer stays in known-good mode. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,225 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.AspNetCore.Builder;
|
||||
using Microsoft.AspNetCore.Hosting;
|
||||
using Microsoft.AspNetCore.Http;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Mbproxy.Options;
|
||||
|
||||
namespace Mbproxy.Admin;
|
||||
|
||||
/// <summary>
|
||||
/// Hosted service that owns the Kestrel-backed admin HTTP endpoint.
|
||||
///
|
||||
/// <para>Lifecycle:</para>
|
||||
/// <list type="bullet">
|
||||
/// <item><see cref="StartAsync"/> builds a <see cref="WebApplication"/> bound to
|
||||
/// <c>Mbproxy.AdminPort</c> and starts it non-blocking.</item>
|
||||
/// <item>If the bind fails (port in use, etc.), logs <c>mbproxy.admin.bind.failed</c>
|
||||
/// at Error and continues — the proxy listeners are unaffected.</item>
|
||||
/// <item>If <c>AdminPort</c> changes via hot-reload, the current app is stopped and a
|
||||
/// new one is started on the new port. Other config changes are ignored here.</item>
|
||||
/// <item><see cref="StopAsync"/> shuts down the current Kestrel app with a 2 s deadline.</item>
|
||||
/// </list>
|
||||
///
|
||||
/// <para>Routes: exactly two — <c>GET /</c> (HTML) and <c>GET /status.json</c> (JSON).</para>
|
||||
/// </summary>
|
||||
internal sealed partial class AdminEndpointHost : IHostedService, IAsyncDisposable
|
||||
{
|
||||
private readonly IOptionsMonitor<MbproxyOptions> _optionsMonitor;
|
||||
private readonly StatusSnapshotBuilder _builder;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly ILogger<AdminEndpointHost> _logger;
|
||||
|
||||
// The currently-running Kestrel app; null when stopped or when bind failed.
|
||||
private WebApplication? _app;
|
||||
|
||||
// Protects concurrent Start/Stop calls (hot-reload + StopAsync racing).
|
||||
private readonly SemaphoreSlim _lock = new(1, 1);
|
||||
|
||||
// Current configured port — used to detect changes on hot-reload.
|
||||
private int _currentPort;
|
||||
|
||||
// Subscription token for IOptionsMonitor.OnChange.
|
||||
private IDisposable? _optionsChangeRegistration;
|
||||
|
||||
public AdminEndpointHost(
|
||||
IOptionsMonitor<MbproxyOptions> optionsMonitor,
|
||||
StatusSnapshotBuilder builder,
|
||||
ILoggerFactory loggerFactory)
|
||||
{
|
||||
_optionsMonitor = optionsMonitor;
|
||||
_builder = builder;
|
||||
_loggerFactory = loggerFactory;
|
||||
_logger = loggerFactory.CreateLogger<AdminEndpointHost>();
|
||||
}
|
||||
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
_currentPort = _optionsMonitor.CurrentValue.AdminPort;
|
||||
|
||||
await StartAppAsync(_currentPort, cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Subscribe to config changes: if AdminPort changes, re-bind.
|
||||
_optionsChangeRegistration = _optionsMonitor.OnChange(opts =>
|
||||
{
|
||||
int newPort = opts.AdminPort;
|
||||
if (newPort == _currentPort) return; // Only care about AdminPort changes.
|
||||
|
||||
// Fire-and-forget: re-bind is async; we can't await in OnChange.
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
await _lock.WaitAsync().ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
if (newPort == _currentPort) return; // double-check under lock
|
||||
|
||||
// Stop the old app.
|
||||
await StopCurrentAppAsync().ConfigureAwait(false);
|
||||
|
||||
_currentPort = newPort;
|
||||
|
||||
// Start on the new port.
|
||||
await StartAppAsync(newPort, CancellationToken.None).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_lock.Release();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
public async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
_optionsChangeRegistration?.Dispose();
|
||||
_optionsChangeRegistration = null;
|
||||
|
||||
await _lock.WaitAsync(cancellationToken).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
await StopCurrentAppAsync().ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_lock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
// ── Internal helpers ─────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Builds and starts a Kestrel <see cref="WebApplication"/> on <paramref name="port"/>.
|
||||
/// On bind failure, logs the error and sets <c>_app = null</c> — does NOT throw.
|
||||
/// Caller must hold <c>_lock</c> or be in a single-threaded context (StartAsync).
|
||||
/// </summary>
|
||||
private async Task StartAppAsync(int port, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Use CreateSlimBuilder with explicit args (empty) to avoid inheriting
|
||||
// process-level environment variables like ASPNETCORE_URLS.
|
||||
var builder = WebApplication.CreateSlimBuilder(new WebApplicationOptions
|
||||
{
|
||||
Args = [],
|
||||
});
|
||||
|
||||
// Suppress Kestrel/ASP.NET Core built-in logging; forward to the outer host's
|
||||
// logger factory so that admin-endpoint errors appear in the proxy's log stream.
|
||||
builder.Logging.ClearProviders();
|
||||
builder.Logging.AddProvider(new ForwardingLoggerProvider(_loggerFactory));
|
||||
|
||||
// Explicit Kestrel listen — overrides any ASPNETCORE_URLS that leaked in.
|
||||
builder.WebHost.UseKestrel(k =>
|
||||
{
|
||||
k.Listen(System.Net.IPAddress.Any, port);
|
||||
});
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
// ── Routes ───────────────────────────────────────────────────────
|
||||
app.MapGet("/", (HttpContext ctx) =>
|
||||
{
|
||||
var snapshot = _builder.Build();
|
||||
string html = StatusHtmlRenderer.Render(snapshot);
|
||||
return Results.Content(html, "text/html; charset=utf-8");
|
||||
});
|
||||
|
||||
app.MapGet("/status.json", (HttpContext ctx) =>
|
||||
{
|
||||
var snapshot = _builder.Build();
|
||||
string json = JsonSerializer.Serialize(snapshot, StatusJsonContext.Default.StatusResponse);
|
||||
return Results.Content(json, "application/json");
|
||||
});
|
||||
|
||||
await app.StartAsync(ct).ConfigureAwait(false);
|
||||
_app = app;
|
||||
|
||||
LogAdminStarted(_logger, port);
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
// Bind failed — log and continue. Proxy listeners are unaffected.
|
||||
LogAdminBindFailed(_logger, port, ex.Message);
|
||||
_app = null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the current <see cref="WebApplication"/> with a 2 s deadline, then disposes it.
|
||||
/// </summary>
|
||||
private async Task StopCurrentAppAsync()
|
||||
{
|
||||
if (_app is null) return;
|
||||
|
||||
var app = _app;
|
||||
_app = null;
|
||||
|
||||
try
|
||||
{
|
||||
using var stopCts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
|
||||
await app.StopAsync(stopCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best-effort.
|
||||
}
|
||||
|
||||
await app.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// ── IAsyncDisposable ─────────────────────────────────────────────────────
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
_optionsChangeRegistration?.Dispose();
|
||||
_lock.Dispose();
|
||||
|
||||
if (_app is { } app)
|
||||
{
|
||||
_app = null;
|
||||
await app.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Logging ──────────────────────────────────────────────────────────────
|
||||
|
||||
[LoggerMessage(EventId = 70, EventName = "mbproxy.admin.started",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Admin endpoint started on port {Port}")]
|
||||
private static partial void LogAdminStarted(ILogger logger, int port);
|
||||
|
||||
[LoggerMessage(EventId = 71, EventName = "mbproxy.admin.bind.failed",
|
||||
Level = LogLevel.Error,
|
||||
Message = "Admin endpoint bind failed — admin page will be unavailable: Port={Port} Reason={Reason}")]
|
||||
private static partial void LogAdminBindFailed(ILogger logger, int port, string reason);
|
||||
|
||||
// ── Inner logger provider (forwards Kestrel/ASP.NET logs to the proxy's factory) ────
|
||||
|
||||
private sealed class ForwardingLoggerProvider : ILoggerProvider
|
||||
{
|
||||
private readonly ILoggerFactory _factory;
|
||||
public ForwardingLoggerProvider(ILoggerFactory factory) => _factory = factory;
|
||||
public ILogger CreateLogger(string categoryName) => _factory.CreateLogger(categoryName);
|
||||
public void Dispose() { }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
using System.Reflection;
|
||||
|
||||
namespace Mbproxy.Admin;
|
||||
|
||||
/// <summary>
|
||||
/// Reads <see cref="AssemblyInformationalVersionAttribute"/> once at startup and caches the
|
||||
/// result as a string. Used for the <c>service.version</c> field on the status page.
|
||||
///
|
||||
/// <para>Note: <see cref="Assembly.Location"/> is unreliable under single-file publish
|
||||
/// (Phase 08). We use <c>Assembly.GetExecutingAssembly().GetCustomAttribute<>()</c>
|
||||
/// which works correctly regardless of publish mode.</para>
|
||||
/// </summary>
|
||||
internal sealed class AssemblyVersionAccessor
|
||||
{
|
||||
/// <summary>
|
||||
/// The cached informational version string, e.g. <c>"1.2.3+gitsha"</c>.
|
||||
/// Falls back to <c>"0.0.0"</c> when the attribute is absent (e.g., unit-test host).
|
||||
/// </summary>
|
||||
public string Version { get; } =
|
||||
Assembly.GetExecutingAssembly()
|
||||
.GetCustomAttribute<AssemblyInformationalVersionAttribute>()
|
||||
?.InformationalVersion
|
||||
?? "0.0.0";
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Mbproxy.Admin;
|
||||
|
||||
// ── Wire DTOs for GET /status.json ───────────────────────────────────────────
|
||||
// Field names must match design.md "Status page" tables EXACTLY (camelCase via
|
||||
// JsonKnownNamingPolicy.CamelCase on the source-gen context).
|
||||
|
||||
/// <summary>
|
||||
/// Top-level response envelope for <c>GET /status.json</c>.
|
||||
/// </summary>
|
||||
public sealed record StatusResponse(
|
||||
ServiceFields Service,
|
||||
ListenersAggregate Listeners,
|
||||
IReadOnlyList<PlcStatus> Plcs);
|
||||
|
||||
/// <summary>Service-wide identity and reload counters.</summary>
|
||||
public sealed record ServiceFields(
|
||||
long UptimeSeconds,
|
||||
string Version,
|
||||
DateTimeOffset? ConfigLastReloadUtc,
|
||||
int ConfigReloadCount,
|
||||
int ConfigReloadRejectedCount);
|
||||
|
||||
/// <summary>Aggregate listener state across all configured PLCs.</summary>
|
||||
public sealed record ListenersAggregate(int Bound, int Configured);
|
||||
|
||||
/// <summary>Per-PLC status row.</summary>
|
||||
public sealed record PlcStatus(
|
||||
string Name,
|
||||
string Host,
|
||||
int ListenPort,
|
||||
PlcListenerStatus Listener,
|
||||
PlcClientsStatus Clients,
|
||||
PlcPdusStatus Pdus,
|
||||
PlcBackendStatus Backend,
|
||||
PlcBytesStatus Bytes);
|
||||
|
||||
/// <summary>Listener state sub-object.</summary>
|
||||
public sealed record PlcListenerStatus(
|
||||
string State,
|
||||
string? LastBindError,
|
||||
int RecoveryAttempts);
|
||||
|
||||
/// <summary>Connected-clients sub-object.</summary>
|
||||
public sealed record PlcClientsStatus(
|
||||
int Connected,
|
||||
IReadOnlyList<ClientSnapshot> RemoteEndpoints);
|
||||
|
||||
/// <summary>Per-connection-pair snapshot for the status page.</summary>
|
||||
public sealed record ClientSnapshot(
|
||||
string Remote,
|
||||
DateTimeOffset ConnectedAtUtc,
|
||||
long PdusForwarded);
|
||||
|
||||
/// <summary>PDU counters sub-object.</summary>
|
||||
public sealed record PlcPdusStatus(
|
||||
long Forwarded,
|
||||
FcCounts ByFc,
|
||||
long RewrittenSlots,
|
||||
long PartialBcdWarnings);
|
||||
|
||||
/// <summary>Per-function-code request counts.</summary>
|
||||
public sealed record FcCounts(
|
||||
long Fc03,
|
||||
long Fc04,
|
||||
long Fc06,
|
||||
long Fc16,
|
||||
long Other);
|
||||
|
||||
/// <summary>
|
||||
/// Backend connect, exception, and multiplexer telemetry. Phase 9 added
|
||||
/// <c>InFlight</c>, <c>MaxInFlight</c>, <c>TxIdWraps</c>, <c>DisconnectCascades</c>, and
|
||||
/// <c>QueueDepth</c> to surface the live state of the per-PLC TxId-multiplexed connection.
|
||||
/// </summary>
|
||||
public sealed record PlcBackendStatus(
|
||||
long ConnectsSuccess,
|
||||
long ConnectsFailed,
|
||||
ExceptionCounts ExceptionsByCode,
|
||||
double LastRoundTripMs,
|
||||
long InFlight,
|
||||
long MaxInFlight,
|
||||
long TxIdWraps,
|
||||
long DisconnectCascades,
|
||||
long QueueDepth);
|
||||
|
||||
/// <summary>Modbus exception counts by code.</summary>
|
||||
public sealed record ExceptionCounts(
|
||||
long Code01,
|
||||
long Code02,
|
||||
long Code03,
|
||||
long Code04);
|
||||
|
||||
/// <summary>Byte-transfer counters.</summary>
|
||||
public sealed record PlcBytesStatus(
|
||||
long UpstreamIn,
|
||||
long UpstreamOut);
|
||||
|
||||
// ── Source-generation context ─────────────────────────────────────────────────
|
||||
// TreatWarningsAsErrors is on, so the context must include every reachable type.
|
||||
|
||||
[JsonSerializable(typeof(StatusResponse))]
|
||||
[JsonSourceGenerationOptions(
|
||||
WriteIndented = false,
|
||||
PropertyNamingPolicy = JsonKnownNamingPolicy.CamelCase)]
|
||||
internal partial class StatusJsonContext : JsonSerializerContext;
|
||||
@@ -0,0 +1,189 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Mbproxy.Admin;
|
||||
|
||||
/// <summary>
|
||||
/// Renders a <see cref="StatusResponse"/> as a self-contained HTML page.
|
||||
///
|
||||
/// <para>Constraints (from design.md Phase 07):</para>
|
||||
/// <list type="bullet">
|
||||
/// <item>No external assets (CSS/JS/fonts/favicons) — firewalled networks only.</item>
|
||||
/// <item><c><meta http-equiv="refresh" content="5"></c> for auto-refresh.</item>
|
||||
/// <item>Page weight ≤ 50 KB for a 54-PLC fleet.</item>
|
||||
/// <item>Listener state colour-coded: bound=green, recovering=orange, stopped=grey.</item>
|
||||
/// <item>Connected clients rendered as compact <c>[remote (n PDUs)]</c> list (not nested table).</item>
|
||||
/// </list>
|
||||
/// </summary>
|
||||
internal static class StatusHtmlRenderer
|
||||
{
|
||||
private const string Css = """
|
||||
body{font-family:monospace;font-size:13px;margin:1em}
|
||||
h1{font-size:1.1em;margin-bottom:.3em}
|
||||
.meta{color:#555;margin-bottom:.8em;font-size:12px}
|
||||
table{border-collapse:collapse;width:100%}
|
||||
th,td{border:1px solid #ccc;padding:3px 6px;white-space:nowrap}
|
||||
th{background:#f0f0f0;text-align:left}
|
||||
tr:nth-child(even)td{background:#fafafa}
|
||||
.bound{color:green;font-weight:bold}
|
||||
.recovering{color:darkorange;font-weight:bold}
|
||||
.stopped{color:grey}
|
||||
.err{font-size:11px;color:#a00}
|
||||
.clients{font-size:11px;color:#333}
|
||||
""";
|
||||
|
||||
/// <summary>
|
||||
/// Renders the status page as a complete HTML document string.
|
||||
/// May allocate; intended for the status-page read path only.
|
||||
/// </summary>
|
||||
public static string Render(StatusResponse status)
|
||||
{
|
||||
var sb = new StringBuilder(4096);
|
||||
|
||||
sb.Append("<!DOCTYPE html><html lang=\"en\"><head><meta charset=\"utf-8\">");
|
||||
sb.Append("<meta http-equiv=\"refresh\" content=\"5\">");
|
||||
sb.Append("<title>mbproxy status</title>");
|
||||
sb.Append("<style>").Append(Css).Append("</style>");
|
||||
sb.Append("</head><body>");
|
||||
|
||||
// ── Header ────────────────────────────────────────────────────────────
|
||||
sb.Append("<h1>mbproxy status</h1>");
|
||||
sb.Append("<div class=\"meta\">");
|
||||
sb.Append("Version: ").Append(HtmlEncode(status.Service.Version));
|
||||
sb.Append(" | Uptime: ").Append(FormatUptime(status.Service.UptimeSeconds));
|
||||
sb.Append(" | Listeners: ")
|
||||
.Append(status.Listeners.Bound).Append('/').Append(status.Listeners.Configured)
|
||||
.Append(" bound");
|
||||
if (status.Service.ConfigLastReloadUtc.HasValue)
|
||||
{
|
||||
sb.Append(" | Last reload: ")
|
||||
.Append(HtmlEncode(status.Service.ConfigLastReloadUtc.Value.ToString("yyyy-MM-dd HH:mm:ss") + "Z"));
|
||||
}
|
||||
sb.Append(" | Reloads: ").Append(status.Service.ConfigReloadCount);
|
||||
if (status.Service.ConfigReloadRejectedCount > 0)
|
||||
sb.Append(" (").Append(status.Service.ConfigReloadRejectedCount).Append(" rejected)");
|
||||
sb.Append("</div>");
|
||||
|
||||
// ── PLC table ─────────────────────────────────────────────────────────
|
||||
if (status.Plcs.Count == 0)
|
||||
{
|
||||
sb.Append("<p><em>No PLCs configured.</em></p>");
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append("<table>");
|
||||
sb.Append("<thead><tr>");
|
||||
sb.Append("<th>Name</th><th>Host</th><th>Port</th><th>State</th>");
|
||||
sb.Append("<th>Clients</th><th>PDUs fwd</th><th>FC03</th><th>FC04</th>");
|
||||
sb.Append("<th>FC06</th><th>FC16</th><th>FC?</th><th>BCD slots</th>");
|
||||
sb.Append("<th>Partial BCD</th><th>Ex 01</th><th>Ex 02</th><th>Ex 03</th><th>Ex 04</th>");
|
||||
sb.Append("<th>RTT ms</th><th>Bytes in</th><th>Bytes out</th>");
|
||||
// Phase 9: multiplexer telemetry columns.
|
||||
sb.Append("<th>In-flight</th><th>Max in-flight</th><th>TxId wraps</th>");
|
||||
sb.Append("<th>Cascades</th><th>Queue</th>");
|
||||
sb.Append("</tr></thead><tbody>");
|
||||
|
||||
foreach (var plc in status.Plcs)
|
||||
{
|
||||
sb.Append("<tr>");
|
||||
sb.Append("<td>").Append(HtmlEncode(plc.Name)).Append("</td>");
|
||||
sb.Append("<td>").Append(HtmlEncode(plc.Host)).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.ListenPort).Append("</td>");
|
||||
|
||||
// State cell with colour coding
|
||||
string stateClass = plc.Listener.State switch
|
||||
{
|
||||
"bound" => "bound",
|
||||
"recovering" => "recovering",
|
||||
_ => "stopped",
|
||||
};
|
||||
sb.Append("<td><span class=\"").Append(stateClass).Append("\">")
|
||||
.Append(HtmlEncode(plc.Listener.State)).Append("</span>");
|
||||
if (plc.Listener.State == "recovering" && plc.Listener.LastBindError is { } err)
|
||||
{
|
||||
sb.Append("<br><span class=\"err\">")
|
||||
.Append(HtmlEncode(err))
|
||||
.Append(" (attempt ").Append(plc.Listener.RecoveryAttempts).Append(")")
|
||||
.Append("</span>");
|
||||
}
|
||||
sb.Append("</td>");
|
||||
|
||||
// Connected clients
|
||||
sb.Append("<td><span class=\"clients\">");
|
||||
sb.Append(plc.Clients.Connected);
|
||||
if (plc.Clients.RemoteEndpoints.Count > 0)
|
||||
{
|
||||
sb.Append("<br>");
|
||||
bool first = true;
|
||||
foreach (var c in plc.Clients.RemoteEndpoints)
|
||||
{
|
||||
if (!first) sb.Append(", ");
|
||||
sb.Append(HtmlEncode(c.Remote))
|
||||
.Append(" (").Append(c.PdusForwarded).Append(')');
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
sb.Append("</span></td>");
|
||||
|
||||
// Counter cells
|
||||
sb.Append("<td>").Append(plc.Pdus.Forwarded).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Pdus.ByFc.Fc03).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Pdus.ByFc.Fc04).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Pdus.ByFc.Fc06).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Pdus.ByFc.Fc16).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Pdus.ByFc.Other).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Pdus.RewrittenSlots).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Pdus.PartialBcdWarnings).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Backend.ExceptionsByCode.Code01).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Backend.ExceptionsByCode.Code02).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Backend.ExceptionsByCode.Code03).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Backend.ExceptionsByCode.Code04).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Backend.LastRoundTripMs.ToString("F1")).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Bytes.UpstreamIn).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Bytes.UpstreamOut).Append("</td>");
|
||||
// Phase 9: multiplexer telemetry cells.
|
||||
sb.Append("<td>").Append(plc.Backend.InFlight).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Backend.MaxInFlight).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Backend.TxIdWraps).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Backend.DisconnectCascades).Append("</td>");
|
||||
sb.Append("<td>").Append(plc.Backend.QueueDepth).Append("</td>");
|
||||
sb.Append("</tr>");
|
||||
}
|
||||
|
||||
sb.Append("</tbody></table>");
|
||||
}
|
||||
|
||||
sb.Append("</body></html>");
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
private static string FormatUptime(long seconds)
|
||||
{
|
||||
var ts = TimeSpan.FromSeconds(seconds);
|
||||
if (ts.TotalHours >= 1)
|
||||
return $"{(int)ts.TotalHours}h {ts.Minutes:D2}m {ts.Seconds:D2}s";
|
||||
if (ts.TotalMinutes >= 1)
|
||||
return $"{ts.Minutes}m {ts.Seconds:D2}s";
|
||||
return $"{seconds}s";
|
||||
}
|
||||
|
||||
private static string HtmlEncode(string s)
|
||||
{
|
||||
// Fast path: no special chars.
|
||||
if (!ContainsHtmlSpecial(s)) return s;
|
||||
|
||||
return s
|
||||
.Replace("&", "&")
|
||||
.Replace("<", "<")
|
||||
.Replace(">", ">")
|
||||
.Replace("\"", """);
|
||||
}
|
||||
|
||||
private static bool ContainsHtmlSpecial(string s)
|
||||
{
|
||||
foreach (char c in s)
|
||||
if (c is '&' or '<' or '>' or '"') return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
using Mbproxy.Options;
|
||||
using Mbproxy.Proxy;
|
||||
using Mbproxy.Proxy.Multiplexing;
|
||||
using Mbproxy.Proxy.Supervision;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace Mbproxy.Admin;
|
||||
|
||||
/// <summary>
|
||||
/// Pure orchestration: reads live state from injected singletons and builds a
|
||||
/// <see cref="StatusResponse"/> for <c>GET /</c> and <c>GET /status.json</c>.
|
||||
///
|
||||
/// <para>No I/O; no side effects. Constructed once via DI; <see cref="Build"/> is the
|
||||
/// only operation and may be called on any thread at any time.</para>
|
||||
/// </summary>
|
||||
internal sealed class StatusSnapshotBuilder
|
||||
{
|
||||
private readonly IOptionsMonitor<MbproxyOptions> _options;
|
||||
private readonly ServiceCounters _serviceCounters;
|
||||
private readonly AssemblyVersionAccessor _version;
|
||||
private readonly ProxyWorker _proxyWorker;
|
||||
|
||||
public StatusSnapshotBuilder(
|
||||
IOptionsMonitor<MbproxyOptions> options,
|
||||
ServiceCounters serviceCounters,
|
||||
AssemblyVersionAccessor version,
|
||||
ProxyWorker proxyWorker)
|
||||
{
|
||||
_options = options;
|
||||
_serviceCounters = serviceCounters;
|
||||
_version = version;
|
||||
_proxyWorker = proxyWorker;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds a point-in-time <see cref="StatusResponse"/>.
|
||||
/// Each counter is read atomically; no locks are held across the build.
|
||||
/// </summary>
|
||||
public StatusResponse Build()
|
||||
{
|
||||
var opts = _options.CurrentValue;
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var started = _serviceCounters.StartedAtUtc;
|
||||
var uptime = (long)(now - started).TotalSeconds;
|
||||
var supervisors = _proxyWorker.Supervisors;
|
||||
|
||||
// ── Build per-PLC status rows ─────────────────────────────────────────
|
||||
var plcStatuses = new List<PlcStatus>(opts.Plcs.Count);
|
||||
int boundCount = 0;
|
||||
|
||||
foreach (var plc in opts.Plcs)
|
||||
{
|
||||
supervisors.TryGetValue(plc.Name, out var supervisor);
|
||||
|
||||
// Supervisor state
|
||||
SupervisorSnapshot? snap = supervisor?.Snapshot();
|
||||
string stateStr = snap?.State switch
|
||||
{
|
||||
SupervisorState.Bound => "bound",
|
||||
SupervisorState.Recovering => "recovering",
|
||||
_ => "stopped",
|
||||
};
|
||||
if (snap?.State == SupervisorState.Bound) boundCount++;
|
||||
|
||||
// Per-client snapshots
|
||||
var activeUpstreams = supervisor?.ActiveUpstreams ?? Array.Empty<UpstreamPipe>();
|
||||
var clientSnapshots = activeUpstreams
|
||||
.Select(p => new ClientSnapshot(
|
||||
Remote: p.RemoteEp?.ToString() ?? p.RemoteEp?.Address.ToString() ?? "?",
|
||||
ConnectedAtUtc: p.ConnectedAtUtc,
|
||||
PdusForwarded: p.PdusForwardedCount))
|
||||
.ToList();
|
||||
|
||||
// Counter snapshot
|
||||
var counters = supervisor?.CurrentCounters.Snapshot()
|
||||
?? new CounterSnapshot(
|
||||
PdusForwarded: 0,
|
||||
Fc03: 0,
|
||||
Fc04: 0,
|
||||
Fc06: 0,
|
||||
Fc16: 0,
|
||||
FcOther: 0,
|
||||
RewrittenSlots: 0,
|
||||
PartialBcdWarnings: 0,
|
||||
InvalidBcdWarnings: 0,
|
||||
BackendException01: 0,
|
||||
BackendException02: 0,
|
||||
BackendException03: 0,
|
||||
BackendException04: 0,
|
||||
BackendExceptionOther: 0,
|
||||
BytesUpstreamIn: 0,
|
||||
BytesUpstreamOut: 0,
|
||||
RecoveryAttempts: 0,
|
||||
LastBindError: null,
|
||||
LastRoundTripMs: 0.0,
|
||||
ConnectsSuccess: 0,
|
||||
ConnectsFailed: 0,
|
||||
InFlightCount: 0,
|
||||
MaxInFlight: 0,
|
||||
TxIdWraps: 0,
|
||||
BackendDisconnectCascades: 0,
|
||||
BackendQueueDepth: 0);
|
||||
|
||||
// Phase 08: ConnectsSuccess / ConnectsFailed are now tracked in ProxyCounters.
|
||||
long connectsSuccess = counters.ConnectsSuccess;
|
||||
long connectsFailed = counters.ConnectsFailed;
|
||||
|
||||
plcStatuses.Add(new PlcStatus(
|
||||
Name: plc.Name,
|
||||
Host: plc.Host,
|
||||
ListenPort: plc.ListenPort,
|
||||
Listener: new PlcListenerStatus(
|
||||
State: stateStr,
|
||||
LastBindError: snap?.LastBindError,
|
||||
RecoveryAttempts: snap?.RecoveryAttempts ?? 0),
|
||||
Clients: new PlcClientsStatus(
|
||||
Connected: clientSnapshots.Count,
|
||||
RemoteEndpoints: clientSnapshots),
|
||||
Pdus: new PlcPdusStatus(
|
||||
Forwarded: counters.PdusForwarded,
|
||||
ByFc: new FcCounts(counters.Fc03, counters.Fc04, counters.Fc06, counters.Fc16, counters.FcOther),
|
||||
RewrittenSlots: counters.RewrittenSlots,
|
||||
PartialBcdWarnings: counters.PartialBcdWarnings),
|
||||
Backend: new PlcBackendStatus(
|
||||
ConnectsSuccess: connectsSuccess,
|
||||
ConnectsFailed: connectsFailed,
|
||||
ExceptionsByCode: new ExceptionCounts(
|
||||
counters.BackendException01,
|
||||
counters.BackendException02,
|
||||
counters.BackendException03,
|
||||
counters.BackendException04),
|
||||
LastRoundTripMs: counters.LastRoundTripMs,
|
||||
InFlight: counters.InFlightCount,
|
||||
MaxInFlight: counters.MaxInFlight,
|
||||
TxIdWraps: counters.TxIdWraps,
|
||||
DisconnectCascades: counters.BackendDisconnectCascades,
|
||||
QueueDepth: counters.BackendQueueDepth),
|
||||
Bytes: new PlcBytesStatus(
|
||||
UpstreamIn: counters.BytesUpstreamIn,
|
||||
UpstreamOut: counters.BytesUpstreamOut)));
|
||||
}
|
||||
|
||||
// ── Service-wide fields ───────────────────────────────────────────────
|
||||
var service = new ServiceFields(
|
||||
UptimeSeconds: uptime,
|
||||
Version: _version.Version,
|
||||
ConfigLastReloadUtc: _serviceCounters.LastReloadUtc,
|
||||
ConfigReloadCount: _serviceCounters.ReloadAppliedCount,
|
||||
ConfigReloadRejectedCount: _serviceCounters.ReloadRejectedCount);
|
||||
|
||||
var listeners = new ListenersAggregate(
|
||||
Bound: boundCount,
|
||||
Configured: opts.Plcs.Count);
|
||||
|
||||
return new StatusResponse(service, listeners, plcStatuses);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
namespace Mbproxy.Bcd;
|
||||
|
||||
/// <summary>
|
||||
/// Pure, allocation-free codec for DirectLOGIC BCD register encoding/decoding.
|
||||
///
|
||||
/// 16-bit BCD: one register holds 4 BCD digits (0–9999).
|
||||
/// Wire value 0x1234 decodes to decimal 1234.
|
||||
///
|
||||
/// 32-bit BCD (CDAB word order, low-word-first):
|
||||
/// Register at Address = low 4 BCD digits (least-significant).
|
||||
/// Register at Address+1 = high 4 BCD digits (most-significant).
|
||||
/// Decoded decimal = Decode16(high) * 10_000 + Decode16(low).
|
||||
/// Example: 12_345_678 → low=0x5678, high=0x1234.
|
||||
///
|
||||
/// Bad-nibble policy: Decode16/Decode32 throw <see cref="FormatException"/>
|
||||
/// (not a sentinel). The Phase 04 rewrite pipeline catches and surfaces the
|
||||
/// exception as an mbproxy.rewrite.invalid_bcd warning event.
|
||||
/// </summary>
|
||||
internal static class BcdCodec
|
||||
{
|
||||
private const int Max16 = 9_999;
|
||||
private const int Max32 = 99_999_999;
|
||||
|
||||
// ── Encode ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Encodes a non-negative integer in [0, 9999] to a 16-bit BCD register.
|
||||
/// E.g. 1234 → 0x1234.
|
||||
/// </summary>
|
||||
/// <exception cref="ArgumentOutOfRangeException">value < 0 or value > 9999.</exception>
|
||||
public static ushort Encode16(int value)
|
||||
{
|
||||
if ((uint)value > Max16)
|
||||
throw new ArgumentOutOfRangeException(nameof(value),
|
||||
value, $"BCD-16 value must be in [0, {Max16}]; got {value}.");
|
||||
|
||||
// Pack four decimal digits into four BCD nibbles.
|
||||
int d3 = value / 1000;
|
||||
int d2 = (value / 100) % 10;
|
||||
int d1 = (value / 10) % 10;
|
||||
int d0 = value % 10;
|
||||
return (ushort)((d3 << 12) | (d2 << 8) | (d1 << 4) | d0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Encodes a non-negative integer in [0, 99_999_999] to a CDAB BCD register pair.
|
||||
/// Returns (low, high) where low holds the 4 least-significant BCD digits and
|
||||
/// high holds the 4 most-significant BCD digits.
|
||||
/// E.g. 12_345_678 → (low: 0x5678, high: 0x1234).
|
||||
/// </summary>
|
||||
/// <exception cref="ArgumentOutOfRangeException">value < 0 or value > 99_999_999.</exception>
|
||||
public static (ushort low, ushort high) Encode32(int value)
|
||||
{
|
||||
if ((uint)value > Max32)
|
||||
throw new ArgumentOutOfRangeException(nameof(value),
|
||||
value, $"BCD-32 value must be in [0, {Max32}]; got {value}.");
|
||||
|
||||
int lo = value % 10_000; // low 4 decimal digits
|
||||
int hi = value / 10_000; // high 4 decimal digits
|
||||
return (Encode16(lo), Encode16(hi));
|
||||
}
|
||||
|
||||
// ── Decode ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Decodes a 16-bit BCD register to a non-negative integer.
|
||||
/// E.g. 0x1234 → 1234.
|
||||
/// </summary>
|
||||
/// <exception cref="FormatException">Any nibble is >= 0xA (not a valid BCD digit).</exception>
|
||||
public static int Decode16(ushort raw)
|
||||
{
|
||||
// Validate all four nibbles first (fail fast with the raw value in the message).
|
||||
if (HasBadNibble(raw))
|
||||
throw new FormatException(
|
||||
$"Register value 0x{raw:X4} is not valid BCD: one or more nibbles are >= 0xA.");
|
||||
|
||||
int d3 = (raw >> 12) & 0xF;
|
||||
int d2 = (raw >> 8) & 0xF;
|
||||
int d1 = (raw >> 4) & 0xF;
|
||||
int d0 = raw & 0xF;
|
||||
return d3 * 1000 + d2 * 100 + d1 * 10 + d0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decodes a CDAB BCD register pair to a non-negative integer.
|
||||
/// <paramref name="low"/> = low 4 BCD digits; <paramref name="high"/> = high 4 BCD digits.
|
||||
/// E.g. (low: 0x5678, high: 0x1234) → 12_345_678.
|
||||
/// </summary>
|
||||
/// <exception cref="FormatException">Either word has a bad nibble.</exception>
|
||||
public static int Decode32(ushort low, ushort high)
|
||||
{
|
||||
// Decode high first: if it throws, we skip decoding low unnecessarily.
|
||||
// But the spec says "throws once with the raw value" per word, so we decode
|
||||
// in natural order. Decode16 throws on the first bad word it encounters.
|
||||
int hiVal = Decode16(high);
|
||||
int loVal = Decode16(low);
|
||||
return hiVal * 10_000 + loVal;
|
||||
}
|
||||
|
||||
// ── Private helpers ─────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>Returns true if any nibble in <paramref name="raw"/> is >= 0xA.</summary>
|
||||
private static bool HasBadNibble(ushort raw)
|
||||
{
|
||||
// Check each nibble independently.
|
||||
return ((raw >> 12) & 0xF) >= 0xA
|
||||
|| ((raw >> 8) & 0xF) >= 0xA
|
||||
|| ((raw >> 4) & 0xF) >= 0xA
|
||||
|| (raw & 0xF) >= 0xA;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
namespace Mbproxy.Bcd;
|
||||
|
||||
/// <summary>
|
||||
/// Immutable description of a single BCD-encoded V-memory tag as seen on the Modbus wire.
|
||||
/// Width is 16 (one register) or 32 (two registers, CDAB low-word-first).
|
||||
/// </summary>
|
||||
public sealed record BcdTag(ushort Address, byte Width)
|
||||
{
|
||||
/// <summary>
|
||||
/// Creates a <see cref="BcdTag"/> and validates that Width is 16 or 32.
|
||||
/// </summary>
|
||||
/// <exception cref="ArgumentException">Width is not 16 or 32.</exception>
|
||||
public static BcdTag Create(ushort address, byte width)
|
||||
{
|
||||
if (width != 16 && width != 32)
|
||||
throw new ArgumentException(
|
||||
$"BCD tag Width must be 16 or 32; got {width} at address {address}.",
|
||||
nameof(width));
|
||||
|
||||
return new BcdTag(address, width);
|
||||
}
|
||||
|
||||
/// <summary>True when this tag occupies two registers (32-bit BCD).</summary>
|
||||
public bool IsThirtyTwoBit => Width == 32;
|
||||
|
||||
/// <summary>
|
||||
/// The address of the high-word register for a 32-bit tag (Address + 1).
|
||||
/// Only valid when <see cref="IsThirtyTwoBit"/> is true.
|
||||
/// </summary>
|
||||
/// <exception cref="InvalidOperationException">Tag is 16-bit.</exception>
|
||||
public ushort HighRegister =>
|
||||
IsThirtyTwoBit
|
||||
? (ushort)(Address + 1)
|
||||
: throw new InvalidOperationException(
|
||||
$"HighRegister is only defined for 32-bit BCD tags (Address {Address} is {Width}-bit).");
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
using System.Collections.Frozen;
|
||||
|
||||
namespace Mbproxy.Bcd;
|
||||
|
||||
/// <summary>
|
||||
/// A hit returned by <see cref="BcdTagMap.TryGetForRange"/>.
|
||||
/// <see cref="OffsetWords"/> is the zero-based word offset of the tag's low register
|
||||
/// within the requested read range [startAddress, startAddress+qty).
|
||||
/// </summary>
|
||||
public readonly record struct RangeHit(int OffsetWords, BcdTag Tag);
|
||||
|
||||
/// <summary>
|
||||
/// Immutable, address-keyed lookup of BCD tags resolved for a single PLC.
|
||||
/// All hot-path methods are allocation-free on the no-hit path.
|
||||
/// </summary>
|
||||
public sealed class BcdTagMap
|
||||
{
|
||||
// ── Empty singleton ──────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>An empty map with no tags. Returned when no tags are configured.</summary>
|
||||
public static BcdTagMap Empty { get; } = new(FrozenDictionary<ushort, BcdTag>.Empty);
|
||||
|
||||
// Reusable empty list for the no-hit path in TryGetForRange — zero allocation.
|
||||
private static readonly IReadOnlyList<RangeHit> s_emptyHits =
|
||||
Array.Empty<RangeHit>();
|
||||
|
||||
// ── State ────────────────────────────────────────────────────────────────
|
||||
|
||||
// FrozenDictionary gives O(1) lookup with minimal overhead after construction.
|
||||
private readonly FrozenDictionary<ushort, BcdTag> _map;
|
||||
|
||||
internal BcdTagMap(FrozenDictionary<ushort, BcdTag> map) => _map = map;
|
||||
|
||||
// ── Public API ───────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>Number of BCD tags in this map.</summary>
|
||||
public int Count => _map.Count;
|
||||
|
||||
/// <summary>All tags in the map (for telemetry / status page).</summary>
|
||||
public IEnumerable<BcdTag> All => _map.Values;
|
||||
|
||||
/// <summary>
|
||||
/// O(1) point lookup by Modbus register address.
|
||||
/// Allocation-free regardless of hit or miss.
|
||||
/// </summary>
|
||||
public bool TryGet(ushort address, out BcdTag tag)
|
||||
=> _map.TryGetValue(address, out tag!);
|
||||
|
||||
/// <summary>
|
||||
/// Returns every BCD tag whose register footprint intersects
|
||||
/// [<paramref name="startAddress"/>, <paramref name="startAddress"/> + <paramref name="qty"/>).
|
||||
///
|
||||
/// A 16-bit tag at address A intersects when A is in [start, start+qty).
|
||||
/// A 32-bit tag at address A intersects when A or A+1 is in [start, start+qty)
|
||||
/// — i.e. when A < start+qty AND A+1 >= start.
|
||||
///
|
||||
/// <see cref="RangeHit.OffsetWords"/> is the zero-based word position of the tag's
|
||||
/// low register relative to <paramref name="startAddress"/> (may be negative for a
|
||||
/// 32-bit tag whose low word starts before the range, but whose high word is in range).
|
||||
///
|
||||
/// Hits are returned sorted ascending by <see cref="RangeHit.OffsetWords"/>.
|
||||
/// On the no-hit path this method does not allocate.
|
||||
/// </summary>
|
||||
public bool TryGetForRange(ushort startAddress, ushort qty,
|
||||
out IReadOnlyList<RangeHit> hits)
|
||||
{
|
||||
if (_map.Count == 0 || qty == 0)
|
||||
{
|
||||
hits = s_emptyHits;
|
||||
return false;
|
||||
}
|
||||
|
||||
int rangeEnd = startAddress + qty; // exclusive upper bound (int to avoid overflow)
|
||||
List<RangeHit>? result = null;
|
||||
|
||||
foreach (var kvp in _map)
|
||||
{
|
||||
var tag = kvp.Value;
|
||||
int addr = tag.Address;
|
||||
|
||||
bool intersects;
|
||||
if (tag.IsThirtyTwoBit)
|
||||
{
|
||||
// 32-bit tag occupies [addr, addr+2).
|
||||
// Intersects when addr < rangeEnd AND addr+2 > startAddress.
|
||||
intersects = addr < rangeEnd && (addr + 2) > startAddress;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 16-bit tag occupies [addr, addr+1).
|
||||
intersects = addr >= startAddress && addr < rangeEnd;
|
||||
}
|
||||
|
||||
if (intersects)
|
||||
{
|
||||
result ??= new List<RangeHit>(4);
|
||||
result.Add(new RangeHit(addr - startAddress, tag));
|
||||
}
|
||||
}
|
||||
|
||||
if (result is null || result.Count == 0)
|
||||
{
|
||||
hits = s_emptyHits;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Sort ascending by offset so Phase 04 can iterate in wire order.
|
||||
result.Sort(static (a, b) => a.OffsetWords.CompareTo(b.OffsetWords));
|
||||
hits = result;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
using System.Collections.Frozen;
|
||||
using Mbproxy.Options;
|
||||
|
||||
namespace Mbproxy.Bcd;
|
||||
|
||||
/// <summary>
|
||||
/// Builds an immutable <see cref="BcdTagMap"/> from global options and optional per-PLC overrides.
|
||||
///
|
||||
/// Resolution algorithm (per design.md):
|
||||
/// 1. Start with the global tag list.
|
||||
/// 2. Remove any address present in perPlc.Remove.
|
||||
/// 3. Merge in perPlc.Add entries — if an address exists in the working set the Add entry wins
|
||||
/// (this is how a per-PLC width override is expressed).
|
||||
///
|
||||
/// Validation:
|
||||
/// - Duplicate address in the resolved list → BcdError(DuplicateAddress).
|
||||
/// - 32-bit high register (Address+1) collides with any other entry → BcdError(OverlappingHighRegister).
|
||||
/// - Width not 16 or 32 → BcdError(InvalidWidth).
|
||||
/// - Remove address not found in global → BcdWarning (not an error).
|
||||
/// </summary>
|
||||
public static class BcdTagMapBuilder
|
||||
{
|
||||
/// <summary>
|
||||
/// Resolves the effective BCD tag list for one PLC and validates it.
|
||||
/// </summary>
|
||||
/// <param name="global">The global BCD tag list from <c>appsettings.json</c>.</param>
|
||||
/// <param name="perPlc">Optional per-PLC overrides (Add + Remove). May be null.</param>
|
||||
/// <returns>
|
||||
/// A <see cref="ValidationResult"/> whose <see cref="ValidationResult.Map"/> contains
|
||||
/// only the entries that passed validation. Callers should treat non-empty
|
||||
/// <see cref="ValidationResult.Errors"/> as a fatal configuration problem.
|
||||
/// </returns>
|
||||
public static ValidationResult Build(BcdTagListOptions global, PlcBcdOverrides? perPlc)
|
||||
{
|
||||
var errors = new List<BcdError>();
|
||||
var warnings = new List<BcdWarning>();
|
||||
|
||||
// ── Step 1: collect the working set keyed by address ─────────────────
|
||||
// Dictionary preserves last-write-wins semantics for the Add override.
|
||||
var working = new Dictionary<ushort, BcdTagOptions>(global.Global.Count);
|
||||
|
||||
foreach (var tag in global.Global)
|
||||
working[tag.Address] = tag;
|
||||
|
||||
// ── Step 2: apply Remove ─────────────────────────────────────────────
|
||||
if (perPlc?.Remove is { } removeList)
|
||||
{
|
||||
foreach (var addr in removeList)
|
||||
{
|
||||
if (!working.Remove(addr))
|
||||
warnings.Add(new BcdWarning(
|
||||
$"Remove entry for address {addr} does not match any global tag; " +
|
||||
"the entry is probably stale.", addr));
|
||||
}
|
||||
}
|
||||
|
||||
// ── Step 3: apply Add (override wins) ────────────────────────────────
|
||||
if (perPlc?.Add is { } addList)
|
||||
{
|
||||
foreach (var tag in addList)
|
||||
working[tag.Address] = tag;
|
||||
}
|
||||
|
||||
// ── Step 4: validate the resolved list ───────────────────────────────
|
||||
// We build a validated-entries list; only clean entries go into the map.
|
||||
var validated = new Dictionary<ushort, BcdTag>(working.Count);
|
||||
var seenAddresses = new HashSet<ushort>(working.Count);
|
||||
|
||||
foreach (var (addr, opt) in working)
|
||||
{
|
||||
// Width check first (defensive — IValidateOptions should have caught this already).
|
||||
if (opt.Width != 16 && opt.Width != 32)
|
||||
{
|
||||
errors.Add(new BcdError(BcdValidationError.InvalidWidth,
|
||||
$"Address {addr}: Width {opt.Width} is not 16 or 32.", addr));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Duplicate address check.
|
||||
if (!seenAddresses.Add(addr))
|
||||
{
|
||||
errors.Add(new BcdError(BcdValidationError.DuplicateAddress,
|
||||
$"Address {addr} appears more than once in the resolved tag list.", addr));
|
||||
continue;
|
||||
}
|
||||
|
||||
validated[addr] = BcdTag.Create(addr, opt.Width);
|
||||
}
|
||||
|
||||
// High-register collision check (only meaningful for 32-bit entries).
|
||||
foreach (var tag in validated.Values)
|
||||
{
|
||||
if (!tag.IsThirtyTwoBit)
|
||||
continue;
|
||||
|
||||
ushort highReg = tag.HighRegister;
|
||||
if (validated.TryGetValue(highReg, out var collision))
|
||||
{
|
||||
errors.Add(new BcdError(BcdValidationError.OverlappingHighRegister,
|
||||
$"32-bit BCD tag at address {tag.Address} has its high register " +
|
||||
$"({highReg}) colliding with the entry at address {collision.Address}.",
|
||||
tag.Address));
|
||||
}
|
||||
}
|
||||
|
||||
// ── Step 5: build the frozen map from entries that have no errors ─────
|
||||
// Entries implicated in an OverlappingHighRegister error are still included
|
||||
// in the map so that the caller can see all context; the error list tells them
|
||||
// the config is invalid and must be corrected before the service is safe to run.
|
||||
// (If callers want to exclude bad entries they should check Errors.Count > 0
|
||||
// and refuse to start the listener for that PLC.)
|
||||
var frozen = validated.ToFrozenDictionary();
|
||||
var map = frozen.Count > 0 ? new BcdTagMap(frozen) : BcdTagMap.Empty;
|
||||
|
||||
return new ValidationResult(map, errors, warnings);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
namespace Mbproxy.Bcd;
|
||||
|
||||
/// <summary>Discriminates the class of validation failure in a resolved BCD tag list.</summary>
|
||||
public enum BcdValidationError
|
||||
{
|
||||
/// <summary>Two or more entries share the same Modbus register address.</summary>
|
||||
DuplicateAddress,
|
||||
|
||||
/// <summary>
|
||||
/// A 32-bit entry's high register (Address+1) collides with another entry's address.
|
||||
/// </summary>
|
||||
OverlappingHighRegister,
|
||||
|
||||
/// <summary>An entry has a Width that is not 16 or 32.</summary>
|
||||
InvalidWidth,
|
||||
}
|
||||
|
||||
/// <summary>A hard validation failure that prevents the map from being used.</summary>
|
||||
public sealed record BcdError(BcdValidationError Kind, string Message, ushort? Address);
|
||||
|
||||
/// <summary>A non-fatal advisory that rides along with the map.</summary>
|
||||
public sealed record BcdWarning(string Message, ushort? Address);
|
||||
|
||||
/// <summary>
|
||||
/// Result of a <see cref="BcdTagMapBuilder.Build"/> call.
|
||||
/// When <see cref="Errors"/> is non-empty the map is partial (only valid entries are included).
|
||||
/// Callers should treat any error as a fatal configuration problem at startup.
|
||||
/// </summary>
|
||||
public sealed record ValidationResult(
|
||||
BcdTagMap Map,
|
||||
IReadOnlyList<BcdError> Errors,
|
||||
IReadOnlyList<BcdWarning> Warnings);
|
||||
@@ -0,0 +1,463 @@
|
||||
using System.Threading.Channels;
|
||||
using Mbproxy.Bcd;
|
||||
using Mbproxy.Options;
|
||||
using Mbproxy.Proxy;
|
||||
using Mbproxy.Proxy.Multiplexing;
|
||||
using Mbproxy.Proxy.Supervision;
|
||||
using Microsoft.Extensions.Options;
|
||||
using PolicyFactory = Mbproxy.Proxy.Supervision.PolicyFactory;
|
||||
|
||||
namespace Mbproxy.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Subscribes to <see cref="IOptionsMonitor{TOptions}.OnChange"/> and reconciles the
|
||||
/// running set of <see cref="PlcListenerSupervisor"/> instances against the new
|
||||
/// <see cref="MbproxyOptions"/> snapshot.
|
||||
///
|
||||
/// <para><b>Threading model</b>:
|
||||
/// <list type="bullet">
|
||||
/// <item>The <c>OnChange</c> callback is not allowed to block. It enqueues a
|
||||
/// sentinel to a <see cref="Channel{T}"/> and returns immediately.</item>
|
||||
/// <item>A dedicated background loop drains the channel, debounces rapid saves
|
||||
/// (250 ms quiescent window), and then calls <see cref="ApplyAsync"/>.</item>
|
||||
/// <item><see cref="ApplyAsync"/> is guarded by a <see cref="SemaphoreSlim(1,1)"/>
|
||||
/// so concurrent reloads are serialised — the second change waits until the
|
||||
/// first apply finishes. The last change wins.</item>
|
||||
/// </list>
|
||||
/// </para>
|
||||
///
|
||||
/// <para><b>Debounce rationale</b>: text editors on Windows commonly write via a
|
||||
/// rename-and-replace pattern, which triggers 2–3 <c>FileSystemWatcher</c> events for
|
||||
/// a single save. Without debouncing, the reconciler would run 2–3 times per save and
|
||||
/// see intermediate half-written files. 250 ms covers every editor pattern observed in
|
||||
/// practice while adding imperceptible latency for operators.</para>
|
||||
///
|
||||
/// <para><b>Partial-apply on error</b>: if one step of the apply sequence throws, the
|
||||
/// exception is logged at Error and execution continues with the remaining steps. The
|
||||
/// validator should have caught most preconditions; a runtime exception here is a true
|
||||
/// bug worth surfacing. The host stays up regardless.</para>
|
||||
/// </summary>
|
||||
internal sealed partial class ConfigReconciler : IDisposable
|
||||
{
|
||||
// Dependencies
|
||||
private readonly IOptionsMonitor<MbproxyOptions> _monitor;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly ILogger<ConfigReconciler> _logger;
|
||||
private readonly ServiceCounters _serviceCounters;
|
||||
|
||||
// The supervisor dictionary is set by ProxyWorker after initial startup.
|
||||
// All mutations happen inside ApplyAsync which is serialised by the semaphore.
|
||||
private Dictionary<string, PlcListenerSupervisor>? _supervisors;
|
||||
private MbproxyOptions? _currentOptions;
|
||||
|
||||
// ── Debounce + serialisation machinery ───────────────────────────────────────────────
|
||||
|
||||
// Channel carries Unit to signal "something changed — please check".
|
||||
// The background loop drains it with a 250 ms quiescent window.
|
||||
private readonly Channel<bool> _changeSignal =
|
||||
Channel.CreateBounded<bool>(new BoundedChannelOptions(1)
|
||||
{
|
||||
FullMode = BoundedChannelFullMode.DropOldest,
|
||||
});
|
||||
|
||||
// Serialises concurrent ApplyAsync invocations.
|
||||
// A slow apply will queue the next one, and the last enqueued state wins.
|
||||
private readonly SemaphoreSlim _applySemaphore = new(1, 1);
|
||||
|
||||
private readonly CancellationTokenSource _disposalCts = new();
|
||||
private readonly IDisposable? _changeRegistration;
|
||||
private readonly Task _debounceLoop;
|
||||
|
||||
// Debounce window: how long to wait for additional OnChange events before applying.
|
||||
private static readonly TimeSpan DebounceWindow = TimeSpan.FromMilliseconds(250);
|
||||
|
||||
// ── Construction ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
public ConfigReconciler(
|
||||
IOptionsMonitor<MbproxyOptions> monitor,
|
||||
ILoggerFactory loggerFactory,
|
||||
ServiceCounters serviceCounters)
|
||||
{
|
||||
_monitor = monitor;
|
||||
_loggerFactory = loggerFactory;
|
||||
_logger = loggerFactory.CreateLogger<ConfigReconciler>();
|
||||
_serviceCounters = serviceCounters;
|
||||
|
||||
// Subscribe to OnChange. The callback must return immediately — enqueue only.
|
||||
_changeRegistration = _monitor.OnChange((_, _) =>
|
||||
{
|
||||
// Best-effort write — if the channel is full (BoundedChannelFullMode.DropOldest)
|
||||
// the oldest signal is dropped and replaced; the reconciler will still see the
|
||||
// latest options value when it wakes up. No blocking.
|
||||
_changeSignal.Writer.TryWrite(true);
|
||||
});
|
||||
|
||||
// Start the debounce/apply background loop.
|
||||
_debounceLoop = Task.Run(() => DebounceLoopAsync(_disposalCts.Token));
|
||||
}
|
||||
|
||||
// ── Wire-up called by ProxyWorker after initial startup ──────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Provides the reconciler with the supervisor dictionary and the initial options
|
||||
/// snapshot. Must be called exactly once by <see cref="Proxy.ProxyWorker"/> before
|
||||
/// any <c>OnChange</c> events can arrive (i.e. immediately after the supervisors are
|
||||
/// created). Thread-safe: the reconciler hasn't started processing changes yet at this
|
||||
/// point.
|
||||
/// </summary>
|
||||
public void Attach(
|
||||
Dictionary<string, PlcListenerSupervisor> supervisors,
|
||||
MbproxyOptions initialOptions)
|
||||
{
|
||||
_supervisors = supervisors;
|
||||
_currentOptions = initialOptions;
|
||||
}
|
||||
|
||||
// ── ApplyAsync (exposed for tests) ───────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Validates <paramref name="next"/>, computes a <see cref="ReloadPlan"/>, and applies
|
||||
/// it to the running supervisor set. Serialised by <c>_applySemaphore</c> so two
|
||||
/// concurrent calls never interleave.
|
||||
///
|
||||
/// <para>Returns <c>true</c> if the reload was accepted and applied (even partially).
|
||||
/// Returns <c>false</c> if validation failed — no state was mutated.</para>
|
||||
/// </summary>
|
||||
public async Task<bool> ApplyAsync(MbproxyOptions next, CancellationToken ct)
|
||||
{
|
||||
await _applySemaphore.WaitAsync(ct).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
return await ApplyUnderLockAsync(next, ct).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_applySemaphore.Release();
|
||||
}
|
||||
}
|
||||
|
||||
// ── Debounce loop ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
private async Task DebounceLoopAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
// Wait for the first signal.
|
||||
await _changeSignal.Reader.WaitToReadAsync(ct).ConfigureAwait(false);
|
||||
|
||||
// Drain and keep waiting until no new signal arrives for DebounceWindow.
|
||||
// This merges bursts of 2–3 events from rename-and-replace saves into one apply.
|
||||
bool gotSignal;
|
||||
do
|
||||
{
|
||||
_changeSignal.Reader.TryRead(out _); // consume the pending signal
|
||||
using var debounceCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
debounceCts.CancelAfter(DebounceWindow);
|
||||
|
||||
try
|
||||
{
|
||||
gotSignal = await _changeSignal.Reader.WaitToReadAsync(debounceCts.Token)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
|
||||
{
|
||||
// Debounce window elapsed with no new signal — good, proceed with apply.
|
||||
gotSignal = false;
|
||||
}
|
||||
}
|
||||
while (gotSignal);
|
||||
|
||||
if (ct.IsCancellationRequested) break;
|
||||
|
||||
// Snapshot the current options value (IOptionsMonitor always returns the latest).
|
||||
var next = _monitor.CurrentValue;
|
||||
try
|
||||
{
|
||||
await ApplyAsync(next, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Unexpected exception in ConfigReconciler debounce loop: {Message}", ex.Message);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal: disposal cancelled the token.
|
||||
}
|
||||
}
|
||||
|
||||
// ── Core apply logic (runs under _applySemaphore) ─────────────────────────────────────
|
||||
|
||||
private async Task<bool> ApplyUnderLockAsync(MbproxyOptions next, CancellationToken ct)
|
||||
{
|
||||
// If Attach() hasn't been called yet, skip (initial startup is still in progress).
|
||||
if (_supervisors is null || _currentOptions is null)
|
||||
{
|
||||
_logger.LogDebug("ConfigReconciler.ApplyAsync called before Attach() — skipping.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// ── 1. Validate atomically ────────────────────────────────────────────
|
||||
if (!ReloadValidator.Validate(next, out var errors))
|
||||
{
|
||||
string joined = string.Join("; ", errors);
|
||||
LogReloadRejected(_logger, joined);
|
||||
_serviceCounters.RecordReloadRejected();
|
||||
return false;
|
||||
}
|
||||
|
||||
// ── 2. Compute the plan ───────────────────────────────────────────────
|
||||
var plan = ReloadPlan.Compute(_currentOptions, next);
|
||||
|
||||
int plcsAdded = plan.ToAdd.Count;
|
||||
int plcsRemoved = plan.ToRemove.Count;
|
||||
int plcsRestarted = plan.ToRestart.Count;
|
||||
int plcsReseated = plan.ToReseat.Count;
|
||||
|
||||
// Compute global tag delta (count of entries that differ).
|
||||
int globalTagDelta = ComputeGlobalTagDelta(_currentOptions.BcdTags, next.BcdTags);
|
||||
|
||||
// ── 3. Apply: Remove ─────────────────────────────────────────────────
|
||||
if (plan.ToRemove.Count > 0)
|
||||
{
|
||||
var removeTasks = plan.ToRemove
|
||||
.Where(name => _supervisors.ContainsKey(name))
|
||||
.Select(async name =>
|
||||
{
|
||||
try
|
||||
{
|
||||
var s = _supervisors[name];
|
||||
_supervisors.Remove(name);
|
||||
using var stopCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
stopCts.CancelAfter(TimeSpan.FromSeconds(10));
|
||||
await s.StopAsync(stopCts.Token).ConfigureAwait(false);
|
||||
await s.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error stopping supervisor for removed PLC '{Plc}': {Message}",
|
||||
name, ex.Message);
|
||||
}
|
||||
})
|
||||
.ToArray();
|
||||
|
||||
await Task.WhenAll(removeTasks).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// ── 4. Apply: Restart (stop + rebuild + start) ───────────────────────
|
||||
if (plan.ToRestart.Count > 0)
|
||||
{
|
||||
var resilienceOpts = next.Resilience;
|
||||
var backendPipeline = PolicyFactory.BuildBackendConnect(
|
||||
resilienceOpts.BackendConnect,
|
||||
_loggerFactory.CreateLogger("Mbproxy.Proxy.BackendConnect"));
|
||||
|
||||
var restartTasks = plan.ToRestart.Select(async entry =>
|
||||
{
|
||||
var (name, plcNew) = entry;
|
||||
try
|
||||
{
|
||||
// Stop old supervisor.
|
||||
if (_supervisors.TryGetValue(name, out var old))
|
||||
{
|
||||
_supervisors.Remove(name);
|
||||
using var stopCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
stopCts.CancelAfter(TimeSpan.FromSeconds(10));
|
||||
await old.StopAsync(stopCts.Token).ConfigureAwait(false);
|
||||
await old.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// Build fresh context.
|
||||
var result = BcdTagMapBuilder.Build(next.BcdTags, plcNew.BcdTags);
|
||||
var newCtx = new PerPlcContext
|
||||
{
|
||||
PlcName = plcNew.Name,
|
||||
TagMap = result.Map,
|
||||
Counters = new Proxy.ProxyCounters(),
|
||||
Logger = _loggerFactory.CreateLogger($"Mbproxy.Proxy.BcdRewriter.{plcNew.Name}"),
|
||||
};
|
||||
|
||||
// Build and start new supervisor.
|
||||
var recoveryPipeline = PolicyFactory.BuildListenerRecovery(
|
||||
resilienceOpts.ListenerRecovery,
|
||||
_loggerFactory.CreateLogger($"Mbproxy.Proxy.ListenerRecovery.{plcNew.Name}"));
|
||||
|
||||
var newSupervisor = new PlcListenerSupervisor(
|
||||
plcNew,
|
||||
next.Connection,
|
||||
new Proxy.BcdPduPipeline(),
|
||||
_loggerFactory.CreateLogger<Proxy.PlcListener>(),
|
||||
_loggerFactory.CreateLogger<PlcMultiplexer>(),
|
||||
_loggerFactory.CreateLogger($"Mbproxy.Proxy.UpstreamPipe.{plcNew.Name}"),
|
||||
newCtx,
|
||||
recoveryPipeline,
|
||||
_loggerFactory.CreateLogger<PlcListenerSupervisor>(),
|
||||
backendPipeline);
|
||||
|
||||
_supervisors[name] = newSupervisor;
|
||||
await newSupervisor.StartAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error restarting supervisor for PLC '{Plc}': {Message}",
|
||||
name, ex.Message);
|
||||
}
|
||||
}).ToArray();
|
||||
|
||||
await Task.WhenAll(restartTasks).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// ── 5. Apply: Reseat (swap tag map, keep listener socket) ────────────
|
||||
foreach (var (name, newMap) in plan.ToReseat)
|
||||
{
|
||||
if (!_supervisors.TryGetValue(name, out var supervisor))
|
||||
continue;
|
||||
|
||||
try
|
||||
{
|
||||
var plcNew = next.Plcs.First(p => p.Name == name);
|
||||
var newCtx = new PerPlcContext
|
||||
{
|
||||
PlcName = name,
|
||||
TagMap = newMap,
|
||||
// Preserve existing counters so operators see real history.
|
||||
Counters = supervisor.CurrentCounters,
|
||||
Logger = _loggerFactory.CreateLogger($"Mbproxy.Proxy.BcdRewriter.{name}"),
|
||||
};
|
||||
|
||||
using var reseatCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
reseatCts.CancelAfter(TimeSpan.FromSeconds(5));
|
||||
await supervisor.ReplaceContextAsync(newCtx, reseatCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error reseating context for PLC '{Plc}': {Message}",
|
||||
name, ex.Message);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 6. Apply: Add new PLCs ────────────────────────────────────────────
|
||||
if (plan.ToAdd.Count > 0)
|
||||
{
|
||||
var resilienceOpts = next.Resilience;
|
||||
var backendPipeline = PolicyFactory.BuildBackendConnect(
|
||||
resilienceOpts.BackendConnect,
|
||||
_loggerFactory.CreateLogger("Mbproxy.Proxy.BackendConnect"));
|
||||
|
||||
var addTasks = plan.ToAdd.Select(async plcNew =>
|
||||
{
|
||||
try
|
||||
{
|
||||
var result = BcdTagMapBuilder.Build(next.BcdTags, plcNew.BcdTags);
|
||||
var newCtx = new PerPlcContext
|
||||
{
|
||||
PlcName = plcNew.Name,
|
||||
TagMap = result.Map,
|
||||
Counters = new Proxy.ProxyCounters(),
|
||||
Logger = _loggerFactory.CreateLogger($"Mbproxy.Proxy.BcdRewriter.{plcNew.Name}"),
|
||||
};
|
||||
|
||||
var recoveryPipeline = PolicyFactory.BuildListenerRecovery(
|
||||
resilienceOpts.ListenerRecovery,
|
||||
_loggerFactory.CreateLogger($"Mbproxy.Proxy.ListenerRecovery.{plcNew.Name}"));
|
||||
|
||||
var newSupervisor = new PlcListenerSupervisor(
|
||||
plcNew,
|
||||
next.Connection,
|
||||
new Proxy.BcdPduPipeline(),
|
||||
_loggerFactory.CreateLogger<Proxy.PlcListener>(),
|
||||
_loggerFactory.CreateLogger<PlcMultiplexer>(),
|
||||
_loggerFactory.CreateLogger($"Mbproxy.Proxy.UpstreamPipe.{plcNew.Name}"),
|
||||
newCtx,
|
||||
recoveryPipeline,
|
||||
_loggerFactory.CreateLogger<PlcListenerSupervisor>(),
|
||||
backendPipeline);
|
||||
|
||||
_supervisors[plcNew.Name] = newSupervisor;
|
||||
await newSupervisor.StartAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error adding supervisor for PLC '{Plc}': {Message}",
|
||||
plcNew.Name, ex.Message);
|
||||
}
|
||||
}).ToArray();
|
||||
|
||||
await Task.WhenAll(addTasks).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
// ── 7. Record success ─────────────────────────────────────────────────
|
||||
_currentOptions = next;
|
||||
var appliedAt = DateTimeOffset.UtcNow;
|
||||
_serviceCounters.RecordReloadApplied(appliedAt);
|
||||
|
||||
LogReloadApplied(_logger, plcsAdded, plcsRemoved, plcsRestarted, plcsReseated, globalTagDelta);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────────────
|
||||
|
||||
private static int ComputeGlobalTagDelta(BcdTagListOptions before, BcdTagListOptions after)
|
||||
{
|
||||
// Count entries in before but not in after (removed), plus entries in after
|
||||
// but not in before (added), plus entries with the same address but different width.
|
||||
var beforeDict = before.Global.ToDictionary(t => t.Address);
|
||||
var afterDict = after.Global.ToDictionary(t => t.Address);
|
||||
|
||||
int delta = 0;
|
||||
foreach (var addr in beforeDict.Keys.Union(afterDict.Keys).Distinct())
|
||||
{
|
||||
bool inBefore = beforeDict.TryGetValue(addr, out var bTag);
|
||||
bool inAfter = afterDict.TryGetValue(addr, out var aTag);
|
||||
|
||||
if (!inBefore || !inAfter)
|
||||
delta++; // added or removed
|
||||
else if (bTag!.Width != aTag!.Width)
|
||||
delta++; // width changed
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
// ── IDisposable ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_changeRegistration?.Dispose();
|
||||
_disposalCts.Cancel();
|
||||
|
||||
try
|
||||
{
|
||||
_debounceLoop.Wait(TimeSpan.FromSeconds(2));
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort.
|
||||
}
|
||||
|
||||
_disposalCts.Dispose();
|
||||
_applySemaphore.Dispose();
|
||||
}
|
||||
|
||||
// ── Logging ───────────────────────────────────────────────────────────────────────────
|
||||
|
||||
[LoggerMessage(EventId = 60, EventName = "mbproxy.config.reload.applied",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Config reload applied — PlcsAdded={PlcsAdded} PlcsRemoved={PlcsRemoved} " +
|
||||
"PlcsRestarted={PlcsRestarted} PlcsReseated={PlcsReseated} GlobalTagDelta={GlobalTagDelta}")]
|
||||
private static partial void LogReloadApplied(
|
||||
ILogger logger, int plcsAdded, int plcsRemoved, int plcsRestarted, int plcsReseated, int globalTagDelta);
|
||||
|
||||
[LoggerMessage(EventId = 61, EventName = "mbproxy.config.reload.rejected",
|
||||
Level = LogLevel.Error,
|
||||
Message = "Config reload rejected — Errors={Errors}")]
|
||||
private static partial void LogReloadRejected(ILogger logger, string errors);
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
using Mbproxy.Bcd;
|
||||
using Mbproxy.Options;
|
||||
|
||||
namespace Mbproxy.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Immutable record describing what needs to change between two <see cref="MbproxyOptions"/>
|
||||
/// snapshots. Computed by <see cref="Compute"/> — a pure function with no side effects.
|
||||
///
|
||||
/// <para><b>PLC identity is keyed on <c>Name</c>, not <c>ListenPort</c>.</b>
|
||||
/// A PLC whose <c>ListenPort</c> changes is still the same PLC (treated as a restart).
|
||||
/// A PLC whose <c>Name</c> changes is treated as remove-the-old + add-the-new.</para>
|
||||
///
|
||||
/// <para><b>Reseat vs. Restart</b>:
|
||||
/// <list type="bullet">
|
||||
/// <item><see cref="ToRestart"/> — PLC host, ListenPort, or backend Port changed.
|
||||
/// The supervisor must stop and start (new TCP socket needed).</item>
|
||||
/// <item><see cref="ToReseat"/> — Only the resolved <see cref="BcdTagMap"/> changed
|
||||
/// (via global tag list or per-PLC overrides). The supervisor can keep its
|
||||
/// listener socket; only the context needs a map swap.</item>
|
||||
/// </list>
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed record ReloadPlan(
|
||||
IReadOnlyList<PlcOptions> ToAdd,
|
||||
IReadOnlyList<string> ToRemove, // PLC names
|
||||
IReadOnlyList<(string Name, PlcOptions New)> ToRestart, // network identity changed
|
||||
IReadOnlyList<(string Name, BcdTagMap NewMap)> ToReseat, // only tag map changed
|
||||
ConnectionOptions Connection)
|
||||
{
|
||||
/// <summary>
|
||||
/// Computes the reload plan that transforms <paramref name="current"/> into
|
||||
/// <paramref name="next"/>. Called after <see cref="ReloadValidator.Validate"/>
|
||||
/// has already confirmed <paramref name="next"/> is self-consistent.
|
||||
/// </summary>
|
||||
public static ReloadPlan Compute(MbproxyOptions current, MbproxyOptions next)
|
||||
{
|
||||
// Index current PLCs by name for O(1) lookup.
|
||||
var currentByName = current.Plcs.ToDictionary(p => p.Name, StringComparer.Ordinal);
|
||||
var nextByName = next.Plcs.ToDictionary(p => p.Name, StringComparer.Ordinal);
|
||||
|
||||
var toAdd = new List<PlcOptions>();
|
||||
var toRemove = new List<string>();
|
||||
var toRestart = new List<(string, PlcOptions)>();
|
||||
var toReseat = new List<(string, BcdTagMap)>();
|
||||
|
||||
// ── PLCs in next but not in current → Add ────────────────────────────
|
||||
foreach (var (name, plcNew) in nextByName)
|
||||
{
|
||||
if (!currentByName.ContainsKey(name))
|
||||
toAdd.Add(plcNew);
|
||||
}
|
||||
|
||||
// ── PLCs in current but not in next → Remove ─────────────────────────
|
||||
foreach (var (name, _) in currentByName)
|
||||
{
|
||||
if (!nextByName.ContainsKey(name))
|
||||
toRemove.Add(name);
|
||||
}
|
||||
|
||||
// ── PLCs in both → compare ────────────────────────────────────────────
|
||||
foreach (var (name, plcOld) in currentByName)
|
||||
{
|
||||
if (!nextByName.TryGetValue(name, out var plcNew))
|
||||
continue; // Already in ToRemove.
|
||||
|
||||
// Network-identity change → restart (stop old TCP socket, start new one).
|
||||
bool networkChanged = plcOld.Host != plcNew.Host
|
||||
|| plcOld.ListenPort != plcNew.ListenPort
|
||||
|| plcOld.Port != plcNew.Port;
|
||||
|
||||
if (networkChanged)
|
||||
{
|
||||
toRestart.Add((name, plcNew));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Tag-map change → reseat (swap context, keep socket).
|
||||
// We must build both maps to compare them structurally.
|
||||
// Compute happens after validation so Build should never return errors here.
|
||||
var oldMap = BcdTagMapBuilder.Build(current.BcdTags, plcOld.BcdTags).Map;
|
||||
var newMap = BcdTagMapBuilder.Build(next.BcdTags, plcNew.BcdTags).Map;
|
||||
|
||||
if (!TagMapsEqual(oldMap, newMap))
|
||||
toReseat.Add((name, newMap));
|
||||
|
||||
// Otherwise: PLC is unchanged — no action needed.
|
||||
}
|
||||
|
||||
return new ReloadPlan(toAdd, toRemove, toRestart, toReseat, next.Connection);
|
||||
}
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Structural equality between two <see cref="BcdTagMap"/> instances: same set of
|
||||
/// (Address, Width) pairs. Order doesn't matter — we compare as sets.
|
||||
/// </summary>
|
||||
private static bool TagMapsEqual(BcdTagMap a, BcdTagMap b)
|
||||
{
|
||||
if (a.Count != b.Count) return false;
|
||||
|
||||
foreach (var tag in a.All)
|
||||
{
|
||||
if (!b.TryGet(tag.Address, out var bTag))
|
||||
return false;
|
||||
if (tag.Width != bTag.Width)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
using Mbproxy.Bcd;
|
||||
using Mbproxy.Options;
|
||||
|
||||
namespace Mbproxy.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Validates an incoming <see cref="MbproxyOptions"/> snapshot before any state mutation
|
||||
/// is attempted. All cross-PLC checks (uniqueness, port collisions) live here.
|
||||
/// Per-PLC tag-list well-formedness is delegated to <see cref="BcdTagMapBuilder.Build"/>.
|
||||
///
|
||||
/// <para>Usage:</para>
|
||||
/// <code>
|
||||
/// if (!ReloadValidator.Validate(next, out var errors))
|
||||
/// // log errors and abort reload
|
||||
/// </code>
|
||||
/// </summary>
|
||||
internal static class ReloadValidator
|
||||
{
|
||||
/// <summary>
|
||||
/// Validates <paramref name="next"/>. Returns <c>true</c> when valid.
|
||||
///
|
||||
/// <para>Checks performed (in order):</para>
|
||||
/// <list type="number">
|
||||
/// <item>All PLC names are non-empty and unique (ordinal comparison).</item>
|
||||
/// <item>All <c>ListenPort</c> values are in [1, 65535] and unique.</item>
|
||||
/// <item><c>AdminPort</c> is in [1, 65535] and does not collide with any <c>ListenPort</c>.</item>
|
||||
/// <item>For each PLC, <see cref="BcdTagMapBuilder.Build"/> reports no errors.</item>
|
||||
/// </list>
|
||||
/// </summary>
|
||||
public static bool Validate(MbproxyOptions next, out IReadOnlyList<string> errors)
|
||||
{
|
||||
var errs = new List<string>();
|
||||
|
||||
// ── 1. PLC name uniqueness ────────────────────────────────────────────
|
||||
var seenNames = new HashSet<string>(StringComparer.Ordinal);
|
||||
for (int i = 0; i < next.Plcs.Count; i++)
|
||||
{
|
||||
var plc = next.Plcs[i];
|
||||
if (string.IsNullOrWhiteSpace(plc.Name))
|
||||
{
|
||||
errs.Add($"Plcs[{i}]: Name must be non-empty.");
|
||||
}
|
||||
else if (!seenNames.Add(plc.Name))
|
||||
{
|
||||
errs.Add($"Plcs[{i}]: Duplicate PLC name '{plc.Name}'.");
|
||||
}
|
||||
}
|
||||
|
||||
// ── 2. ListenPort uniqueness and range ────────────────────────────────
|
||||
var seenPorts = new Dictionary<int, string>(next.Plcs.Count); // port → PLC name
|
||||
foreach (var plc in next.Plcs)
|
||||
{
|
||||
if (plc.ListenPort is < 1 or > 65535)
|
||||
{
|
||||
errs.Add($"Plc '{plc.Name}': ListenPort {plc.ListenPort} is out of range [1, 65535].");
|
||||
}
|
||||
else if (!seenPorts.TryAdd(plc.ListenPort, plc.Name))
|
||||
{
|
||||
errs.Add($"Plc '{plc.Name}': Duplicate ListenPort {plc.ListenPort} " +
|
||||
$"(already used by '{seenPorts[plc.ListenPort]}').");
|
||||
}
|
||||
}
|
||||
|
||||
// ── 3. AdminPort range and collision ─────────────────────────────────
|
||||
int adminPort = next.AdminPort;
|
||||
if (adminPort is < 1 or > 65535)
|
||||
{
|
||||
errs.Add($"AdminPort {adminPort} is out of range [1, 65535].");
|
||||
}
|
||||
else if (seenPorts.TryGetValue(adminPort, out string? clashPlc))
|
||||
{
|
||||
errs.Add($"AdminPort {adminPort} collides with ListenPort of PLC '{clashPlc}'.");
|
||||
}
|
||||
|
||||
// ── 4. Per-PLC tag-map build ──────────────────────────────────────────
|
||||
// BcdTagMapBuilder.Build is the single source of truth for tag-list
|
||||
// well-formedness; we must not duplicate its validation logic here.
|
||||
foreach (var plc in next.Plcs)
|
||||
{
|
||||
var result = BcdTagMapBuilder.Build(next.BcdTags, plc.BcdTags);
|
||||
foreach (var err in result.Errors)
|
||||
errs.Add($"Plc '{plc.Name}': BCD tag map error ({err.Kind}): {err.Message}");
|
||||
}
|
||||
|
||||
errors = errs;
|
||||
return errs.Count == 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.Versioning;
|
||||
using Serilog.Core;
|
||||
using Serilog.Events;
|
||||
|
||||
namespace Mbproxy.Diagnostics;
|
||||
|
||||
/// <summary>
|
||||
/// Serilog sink that writes events at level Error and above to the Windows Event Log
|
||||
/// under source <c>mbproxy</c>.
|
||||
///
|
||||
/// <para>This sink is only active when the service is running as a Windows Service
|
||||
/// (<see cref="Microsoft.Extensions.Hosting.WindowsServices.WindowsServiceHelpers.IsWindowsService"/>
|
||||
/// returns <c>true</c>). Under <c>dotnet run</c> / test / interactive launch, the sink is
|
||||
/// a no-op so that the Event Log source registration (which requires admin rights) is not
|
||||
/// required in development.</para>
|
||||
///
|
||||
/// <para>The Event Log source <c>mbproxy</c> must be created by <c>install.ps1</c> before
|
||||
/// the service starts. The bridge does NOT attempt to create the source at runtime — the
|
||||
/// service account may not hold the required admin rights.</para>
|
||||
///
|
||||
/// <para>Messages are capped at 32 KB (the Windows Event Log single-entry limit).</para>
|
||||
/// </summary>
|
||||
[SupportedOSPlatform("windows")]
|
||||
internal sealed class EventLogBridge : ILogEventSink
|
||||
{
|
||||
private const string Source = "mbproxy";
|
||||
private const string LogName = "Application";
|
||||
private const int MaxMessageBytes = 32 * 1024; // 32 KB Event Log limit
|
||||
|
||||
private readonly bool _enabled;
|
||||
|
||||
public EventLogBridge(bool enabled)
|
||||
{
|
||||
_enabled = enabled;
|
||||
}
|
||||
|
||||
/// <inheritdoc/>
|
||||
public void Emit(LogEvent logEvent)
|
||||
{
|
||||
if (!_enabled) return;
|
||||
if (logEvent.Level < LogEventLevel.Error) return;
|
||||
|
||||
// Check that the source exists; if not, silently swallow — the service
|
||||
// account may not be able to create it and we must not crash the logger.
|
||||
if (!EventLog.SourceExists(Source)) return;
|
||||
|
||||
string message = logEvent.RenderMessage();
|
||||
|
||||
// Append exception detail when present.
|
||||
if (logEvent.Exception is not null)
|
||||
{
|
||||
message += Environment.NewLine + logEvent.Exception;
|
||||
}
|
||||
|
||||
// Truncate to the Event Log single-entry limit.
|
||||
if (message.Length * 2 > MaxMessageBytes) // rough UTF-16 upper bound
|
||||
{
|
||||
int charLimit = MaxMessageBytes / 2 - 3;
|
||||
message = message[..charLimit] + "...";
|
||||
}
|
||||
|
||||
var type = logEvent.Level switch
|
||||
{
|
||||
LogEventLevel.Fatal => EventLogEntryType.Error,
|
||||
LogEventLevel.Error => EventLogEntryType.Error,
|
||||
LogEventLevel.Warning => EventLogEntryType.Warning,
|
||||
_ => EventLogEntryType.Information,
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
EventLog.WriteEntry(Source, message, type);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Swallow: if the Event Log write fails (e.g., source not registered,
|
||||
// quota exceeded) we must not crash the application or recurse.
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,212 @@
|
||||
using System.Diagnostics;
|
||||
using Mbproxy.Admin;
|
||||
using Mbproxy.Options;
|
||||
using Mbproxy.Proxy;
|
||||
using Mbproxy.Proxy.Multiplexing;
|
||||
using Mbproxy.Proxy.Supervision;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace Mbproxy.Diagnostics;
|
||||
|
||||
// ── Testability interfaces ────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Abstraction over a supervisor's stop operation and its multiplexer's in-flight count.
|
||||
/// Introduced so <see cref="ShutdownCoordinator"/> unit tests can inject fakes
|
||||
/// without needing a real <see cref="PlcListenerSupervisor"/>.
|
||||
///
|
||||
/// <para><b>Phase 9:</b> in-flight tracking is now per-multiplexer (the
|
||||
/// <see cref="CorrelationMap"/>) rather than per-pair. <see cref="InFlightCount"/>
|
||||
/// replaces <c>ActivePairs.IsProcessing</c> from the 1:1 model.</para>
|
||||
/// </summary>
|
||||
internal interface ISupervisorHandle
|
||||
{
|
||||
Task StopAsync(CancellationToken ct);
|
||||
|
||||
/// <summary>
|
||||
/// Current number of in-flight Modbus requests on this PLC's multiplexed backend.
|
||||
/// Zero if the multiplexer has no in-flight requests (idle).
|
||||
/// </summary>
|
||||
int InFlightCount { get; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Abstraction over the admin endpoint stop operation.
|
||||
/// </summary>
|
||||
internal interface IAdminEndpointHandle
|
||||
{
|
||||
Task StopAsync(CancellationToken ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adapts a concrete <see cref="PlcListenerSupervisor"/> to <see cref="ISupervisorHandle"/>.
|
||||
/// </summary>
|
||||
internal sealed class PlcSupervisorHandle : ISupervisorHandle
|
||||
{
|
||||
private readonly PlcListenerSupervisor _supervisor;
|
||||
public PlcSupervisorHandle(PlcListenerSupervisor supervisor) => _supervisor = supervisor;
|
||||
public Task StopAsync(CancellationToken ct) => _supervisor.StopAsync(ct);
|
||||
|
||||
public int InFlightCount
|
||||
{
|
||||
get
|
||||
{
|
||||
// CurrentCounters.Snapshot pulls live values from the multiplexer's
|
||||
// IMultiplexCountersProvider hook; InFlightCount is point-in-time.
|
||||
return (int)_supervisor.CurrentCounters.Snapshot().InFlightCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adapts <see cref="AdminEndpointHost"/> to <see cref="IAdminEndpointHandle"/>.
|
||||
/// </summary>
|
||||
internal sealed class AdminEndpointHandle : IAdminEndpointHandle
|
||||
{
|
||||
private readonly AdminEndpointHost _host;
|
||||
public AdminEndpointHandle(AdminEndpointHost host) => _host = host;
|
||||
public Task StopAsync(CancellationToken ct) => _host.StopAsync(ct);
|
||||
}
|
||||
|
||||
// ── ShutdownCoordinator ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Orchestrates graceful shutdown of the proxy service.
|
||||
///
|
||||
/// <para>Shutdown sequence:</para>
|
||||
/// <list type="number">
|
||||
/// <item>Stop accepting new upstream connections on all supervisors.</item>
|
||||
/// <item>Wait for in-flight Modbus requests to drain (polls
|
||||
/// <see cref="ISupervisorHandle.InFlightCount"/> across all supervisors) until
|
||||
/// <see cref="ConnectionOptions.GracefulShutdownTimeoutMs"/> expires.</item>
|
||||
/// <item>Stop the admin endpoint.</item>
|
||||
/// <item>Log <c>mbproxy.shutdown.complete</c> with <c>InFlightAtCancel</c> and <c>ElapsedMs</c>.</item>
|
||||
/// </list>
|
||||
///
|
||||
/// <para>This type is internal. It is registered in DI as a singleton and wired to
|
||||
/// <see cref="IHostApplicationLifetime.ApplicationStopping"/> in <c>Program.cs</c>.</para>
|
||||
/// </summary>
|
||||
internal sealed partial class ShutdownCoordinator
|
||||
{
|
||||
private readonly IReadOnlyList<ISupervisorHandle> _supervisors;
|
||||
private readonly IAdminEndpointHandle _adminEndpoint;
|
||||
private readonly IOptions<MbproxyOptions> _options;
|
||||
private readonly ILogger<ShutdownCoordinator> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Production constructor — wraps concrete types in their adapter handles.
|
||||
/// </summary>
|
||||
public ShutdownCoordinator(
|
||||
IEnumerable<PlcListenerSupervisor> supervisors,
|
||||
AdminEndpointHost adminEndpoint,
|
||||
IOptions<MbproxyOptions> options,
|
||||
ILogger<ShutdownCoordinator> logger)
|
||||
: this(
|
||||
supervisors.Select(s => (ISupervisorHandle)new PlcSupervisorHandle(s)).ToList(),
|
||||
new AdminEndpointHandle(adminEndpoint),
|
||||
options,
|
||||
logger)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Testability constructor — accepts abstractions so unit tests can inject fakes.
|
||||
/// </summary>
|
||||
internal ShutdownCoordinator(
|
||||
IReadOnlyList<ISupervisorHandle> supervisors,
|
||||
IAdminEndpointHandle adminEndpoint,
|
||||
IOptions<MbproxyOptions> options,
|
||||
ILogger<ShutdownCoordinator> logger)
|
||||
{
|
||||
_supervisors = supervisors;
|
||||
_adminEndpoint = adminEndpoint;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs the graceful shutdown sequence.
|
||||
/// </summary>
|
||||
/// <param name="timeoutMs">
|
||||
/// Override the configured <c>Connection.GracefulShutdownTimeoutMs</c> (use -1 to
|
||||
/// read from options, which is the normal runtime path). Tests pass an explicit value.
|
||||
/// </param>
|
||||
/// <param name="hostCt">
|
||||
/// The host lifetime cancellation token. Not used to gate the drain loop — the
|
||||
/// coordinator manages its own deadline so it can log completion regardless.
|
||||
/// </param>
|
||||
public async Task ShutdownAsync(int timeoutMs = -1, CancellationToken hostCt = default)
|
||||
{
|
||||
int deadline = timeoutMs >= 0
|
||||
? timeoutMs
|
||||
: _options.Value.Connection.GracefulShutdownTimeoutMs;
|
||||
|
||||
var sw = Stopwatch.StartNew();
|
||||
|
||||
// ── Step 1: stop accepting new connections ────────────────────────────────────
|
||||
using var stopCts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
|
||||
var stopTasks = _supervisors
|
||||
.Select(s => s.StopAsync(stopCts.Token))
|
||||
.ToArray();
|
||||
|
||||
try
|
||||
{
|
||||
await Task.WhenAll(stopTasks).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best-effort: individual supervisor failures must not abort shutdown.
|
||||
}
|
||||
|
||||
// ── Step 2: wait for in-flight PDUs to drain ──────────────────────────────────
|
||||
int inFlightAtCancel = 0;
|
||||
|
||||
using var drainCts = new CancellationTokenSource(TimeSpan.FromMilliseconds(deadline));
|
||||
try
|
||||
{
|
||||
while (!drainCts.Token.IsCancellationRequested)
|
||||
{
|
||||
int inFlight = CountInFlight(_supervisors);
|
||||
if (inFlight == 0) break;
|
||||
|
||||
await Task.Delay(10, drainCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Deadline expired — count remaining in-flight and proceed.
|
||||
inFlightAtCancel = CountInFlight(_supervisors);
|
||||
}
|
||||
|
||||
// ── Step 3: stop the admin endpoint ──────────────────────────────────────────
|
||||
// Admin is stopped AFTER listeners to preserve ordering guarantee:
|
||||
// supervisors stop → drain → admin stops.
|
||||
try
|
||||
{
|
||||
using var adminCts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
|
||||
await _adminEndpoint.StopAsync(adminCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best-effort.
|
||||
}
|
||||
|
||||
// ── Step 4: log completion ────────────────────────────────────────────────────
|
||||
LogShutdownComplete(_logger, inFlightAtCancel, sw.ElapsedMilliseconds);
|
||||
}
|
||||
|
||||
private static int CountInFlight(IReadOnlyList<ISupervisorHandle> supervisors)
|
||||
{
|
||||
int count = 0;
|
||||
foreach (var supervisor in supervisors)
|
||||
{
|
||||
count += supervisor.InFlightCount;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
[LoggerMessage(EventId = 80, EventName = "mbproxy.shutdown.complete",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Graceful shutdown complete: InFlightAtCancel={InFlightAtCancel} ElapsedMs={ElapsedMs}")]
|
||||
private static partial void LogShutdownComplete(ILogger logger, int inFlightAtCancel, long elapsedMs);
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
using Mbproxy.Admin;
|
||||
using Mbproxy.Configuration;
|
||||
using Mbproxy.Diagnostics;
|
||||
using Mbproxy.Options;
|
||||
using Mbproxy.Proxy;
|
||||
using Serilog;
|
||||
|
||||
namespace Mbproxy;
|
||||
|
||||
internal static class HostingExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Registers the <c>"Mbproxy"</c> configuration section, binds it to
|
||||
/// <see cref="MbproxyOptions"/> via <c>IOptionsMonitor</c>, and registers
|
||||
/// the schema-level <see cref="MbproxyOptionsValidator"/>.
|
||||
///
|
||||
/// Phase 06: also registers <see cref="ServiceCounters"/> (singleton) and
|
||||
/// <see cref="ConfigReconciler"/> (singleton) so they can be injected into
|
||||
/// <see cref="Proxy.ProxyWorker"/>.
|
||||
/// </summary>
|
||||
public static IHostApplicationBuilder AddMbproxyOptions(this IHostApplicationBuilder builder)
|
||||
{
|
||||
builder.Services
|
||||
.AddOptions<MbproxyOptions>()
|
||||
.BindConfiguration("Mbproxy")
|
||||
.ValidateOnStart();
|
||||
|
||||
builder.Services.AddSingleton<
|
||||
Microsoft.Extensions.Options.IValidateOptions<MbproxyOptions>,
|
||||
MbproxyOptionsValidator>();
|
||||
|
||||
// Phase 06: service-wide counters (read by Phase 07 status page).
|
||||
builder.Services.AddSingleton<ServiceCounters>();
|
||||
|
||||
// Phase 06: hot-reload reconciler (singleton; subscribes to IOptionsMonitor.OnChange).
|
||||
builder.Services.AddSingleton<ConfigReconciler>();
|
||||
|
||||
return builder;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Registers Phase 07 admin endpoint services:
|
||||
/// <list type="bullet">
|
||||
/// <item><see cref="AssemblyVersionAccessor"/> (singleton — reads version attribute once).</item>
|
||||
/// <item><see cref="StatusSnapshotBuilder"/> (singleton — pure orchestration).</item>
|
||||
/// <item><see cref="AdminEndpointHost"/> (hosted service — owns the Kestrel admin server).</item>
|
||||
/// </list>
|
||||
/// Must be called after <see cref="AddMbproxyOptions"/> and after
|
||||
/// <c>AddHostedService<ProxyWorker></c> (so ProxyWorker is available via DI).
|
||||
/// </summary>
|
||||
public static IHostApplicationBuilder AddMbproxyAdmin(this IHostApplicationBuilder builder)
|
||||
{
|
||||
builder.Services.AddSingleton<AssemblyVersionAccessor>();
|
||||
builder.Services.AddSingleton<StatusSnapshotBuilder>();
|
||||
// Register AdminEndpointHost as a singleton so ShutdownCoordinator can inject it
|
||||
// directly without going through the IHostedService collection.
|
||||
builder.Services.AddSingleton<AdminEndpointHost>();
|
||||
builder.Services.AddHostedService(sp => sp.GetRequiredService<AdminEndpointHost>());
|
||||
return builder;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Configures Serilog from the <c>"Serilog"</c> configuration section,
|
||||
/// with console and rolling-file sinks as defaults.
|
||||
///
|
||||
/// <para>Phase 08: when <paramref name="addEventLogBridge"/> is <c>true</c>, the
|
||||
/// <see cref="Diagnostics.EventLogBridge"/> is added as a sub-sink for events at
|
||||
/// <see cref="Serilog.Events.LogEventLevel.Error"/> and above. This flag should only be
|
||||
/// set when the service is running as a Windows Service — the bridge silently ignores
|
||||
/// events when the Event Log source is not registered.</para>
|
||||
/// </summary>
|
||||
public static IHostApplicationBuilder AddMbproxySerilog(
|
||||
this IHostApplicationBuilder builder,
|
||||
bool addEventLogBridge = false)
|
||||
{
|
||||
var cfg = new LoggerConfiguration()
|
||||
.ReadFrom.Configuration(builder.Configuration);
|
||||
|
||||
if (addEventLogBridge && OperatingSystem.IsWindows())
|
||||
{
|
||||
cfg = cfg.WriteTo.Sink(
|
||||
new EventLogBridge(enabled: true),
|
||||
Serilog.Events.LogEventLevel.Error);
|
||||
}
|
||||
|
||||
Log.Logger = cfg.CreateLogger();
|
||||
|
||||
builder.Services.AddSerilog(dispose: true);
|
||||
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk.Worker">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<OutputType>Exe</OutputType>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<RootNamespace>Mbproxy</RootNamespace>
|
||||
<AssemblyName>Mbproxy</AssemblyName>
|
||||
<!-- Phase 08: Assembly version. CI can override via /p:InformationalVersion=... -->
|
||||
<InformationalVersion>1.0.0</InformationalVersion>
|
||||
</PropertyGroup>
|
||||
|
||||
<!-- Phase 08: single-file self-contained publish (Release only; Debug stays normal for fast iteration).
|
||||
NOTE: the resulting Mbproxy.exe is ~100 MB because the self-contained publish bundles the full
|
||||
.NET 10 + ASP.NET Core runtime. This exceeds the original 50 MB target in the phase spec;
|
||||
the runtime size is a fixed cost of self-contained deployment on .NET 10 with ASP.NET Core.
|
||||
Operators who need a smaller footprint can use a framework-dependent publish
|
||||
(dotnet publish -c Release -r win-x64 - -self-contained false /p:PublishSingleFile=true)
|
||||
if the target machine has .NET 10 installed. -->
|
||||
<PropertyGroup Condition="'$(Configuration)' == 'Release'">
|
||||
<PublishSingleFile>true</PublishSingleFile>
|
||||
<SelfContained>true</SelfContained>
|
||||
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
|
||||
<IncludeNativeLibrariesForSelfExtract>true</IncludeNativeLibrariesForSelfExtract>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<!-- ASP.NET Core for the Phase 07 Kestrel-hosted admin endpoint. -->
|
||||
<FrameworkReference Include="Microsoft.AspNetCore.App" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<!-- Microsoft.Extensions.Hosting is already included transitively via
|
||||
Microsoft.AspNetCore.App — do not re-add it explicitly. -->
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting.WindowsServices" Version="10.0.8" />
|
||||
<PackageReference Include="Serilog.Extensions.Hosting" Version="10.0.0" />
|
||||
<PackageReference Include="Serilog.Settings.Configuration" Version="10.0.0" />
|
||||
<PackageReference Include="Serilog.Sinks.Console" Version="6.1.1" />
|
||||
<PackageReference Include="Serilog.Sinks.File" Version="7.0.0" />
|
||||
<!-- Referenced now so phase 04/05 don't need to touch this csproj; usage is deferred -->
|
||||
<PackageReference Include="Polly" Version="8.6.6" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<!-- Allow test project to access internal types (HeartbeatWorker, HostingExtensions, etc.) -->
|
||||
<InternalsVisibleTo Include="Mbproxy.Tests" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Content Update="appsettings.json">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</Content>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,12 @@
|
||||
namespace Mbproxy.Options;
|
||||
|
||||
public sealed class BcdTagListOptions
|
||||
{
|
||||
public IReadOnlyList<BcdTagOptions> Global { get; init; } = [];
|
||||
}
|
||||
|
||||
public sealed class PlcBcdOverrides
|
||||
{
|
||||
public IReadOnlyList<BcdTagOptions> Add { get; init; } = [];
|
||||
public IReadOnlyList<ushort> Remove { get; init; } = [];
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
namespace Mbproxy.Options;
|
||||
|
||||
public sealed class BcdTagOptions
|
||||
{
|
||||
public ushort Address { get; init; }
|
||||
public byte Width { get; init; } // 16 or 32
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
namespace Mbproxy.Options;
|
||||
|
||||
public sealed class ConnectionOptions
|
||||
{
|
||||
public int BackendConnectTimeoutMs { get; init; } = 3000;
|
||||
public int BackendRequestTimeoutMs { get; init; } = 3000;
|
||||
/// <summary>
|
||||
/// Maximum time in milliseconds to wait for in-flight PDUs to complete during
|
||||
/// graceful shutdown before cancelling them. Default: 10000 (10 s).
|
||||
/// </summary>
|
||||
public int GracefulShutdownTimeoutMs { get; init; } = 10000;
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace Mbproxy.Options;
|
||||
|
||||
public sealed class MbproxyOptions
|
||||
{
|
||||
public BcdTagListOptions BcdTags { get; init; } = new();
|
||||
public IReadOnlyList<PlcOptions> Plcs { get; init; } = [];
|
||||
public int AdminPort { get; init; } = 8080;
|
||||
public ConnectionOptions Connection { get; init; } = new();
|
||||
public ResilienceOptions Resilience { get; init; } = new();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Schema-level validation for <see cref="MbproxyOptions"/>.
|
||||
/// Business-rule validation (duplicate addresses, port conflicts) is deferred to phase 06.
|
||||
/// </summary>
|
||||
public sealed class MbproxyOptionsValidator : IValidateOptions<MbproxyOptions>
|
||||
{
|
||||
public ValidateOptionsResult Validate(string? name, MbproxyOptions options)
|
||||
{
|
||||
var errors = new List<string>();
|
||||
|
||||
foreach (var tag in options.BcdTags.Global)
|
||||
{
|
||||
if (tag.Width != 16 && tag.Width != 32)
|
||||
errors.Add($"BcdTags.Global: Address {tag.Address} has invalid Width {tag.Width}; must be 16 or 32.");
|
||||
}
|
||||
|
||||
for (int i = 0; i < options.Plcs.Count; i++)
|
||||
{
|
||||
var plc = options.Plcs[i];
|
||||
if (plc.BcdTags is { } overrides)
|
||||
{
|
||||
foreach (var tag in overrides.Add)
|
||||
{
|
||||
if (tag.Width != 16 && tag.Width != 32)
|
||||
errors.Add($"Plcs[{i}] ({plc.Name}): BcdTags.Add Address {tag.Address} has invalid Width {tag.Width}; must be 16 or 32.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return errors.Count > 0
|
||||
? ValidateOptionsResult.Fail(errors)
|
||||
: ValidateOptionsResult.Success;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
namespace Mbproxy.Options;
|
||||
|
||||
public sealed class PlcOptions
|
||||
{
|
||||
public string Name { get; init; } = "";
|
||||
public int ListenPort { get; init; }
|
||||
public string Host { get; init; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Backend Modbus TCP port on the PLC. Defaults to 502 (standard Modbus TCP port).
|
||||
/// </summary>
|
||||
public int Port { get; init; } = 502;
|
||||
|
||||
public PlcBcdOverrides? BcdTags { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
namespace Mbproxy.Options;
|
||||
|
||||
public sealed class ResilienceOptions
|
||||
{
|
||||
public RetryProfile BackendConnect { get; init; } = new() { MaxAttempts = 3, BackoffMs = [100, 500, 2000] };
|
||||
public RecoveryProfile ListenerRecovery { get; init; } = new()
|
||||
{
|
||||
InitialBackoffMs = [1000, 2000, 5000, 15000, 30000],
|
||||
SteadyStateMs = 30000,
|
||||
};
|
||||
}
|
||||
|
||||
public sealed class RetryProfile
|
||||
{
|
||||
public int MaxAttempts { get; init; }
|
||||
public IReadOnlyList<int> BackoffMs { get; init; } = [];
|
||||
}
|
||||
|
||||
public sealed class RecoveryProfile
|
||||
{
|
||||
public IReadOnlyList<int> InitialBackoffMs { get; init; } = [];
|
||||
public int SteadyStateMs { get; init; }
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
using Mbproxy;
|
||||
using Mbproxy.Admin;
|
||||
using Mbproxy.Diagnostics;
|
||||
using Mbproxy.Options;
|
||||
using Mbproxy.Proxy;
|
||||
using Microsoft.Extensions.Hosting.WindowsServices;
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
var builder = Host.CreateApplicationBuilder(args);
|
||||
|
||||
// Windows Service support; no-op when running under dotnet run / console.
|
||||
builder.Services.AddWindowsService();
|
||||
|
||||
// Phase 08: wire EventLogBridge only when actually running as a Windows Service.
|
||||
bool isWindowsService = WindowsServiceHelpers.IsWindowsService();
|
||||
|
||||
// Wire up structured config, Serilog, and typed options.
|
||||
builder.AddMbproxySerilog(addEventLogBridge: isWindowsService);
|
||||
builder.AddMbproxyOptions();
|
||||
|
||||
// PDU pipeline: BcdPduPipeline is stateless (Phase 9: per-call correlation flows through
|
||||
// PerPlcContext.CurrentRequest set by the multiplexer); registering as singleton is fine
|
||||
// and avoids repeated construction.
|
||||
builder.Services.AddSingleton<IPduPipeline, BcdPduPipeline>();
|
||||
|
||||
// Proxy worker — owns all PlcListeners and logs mbproxy.startup.ready.
|
||||
// Registered as singleton so StatusSnapshotBuilder can inject ProxyWorker directly
|
||||
// and access its Supervisors dictionary.
|
||||
builder.Services.AddSingleton<ProxyWorker>();
|
||||
builder.Services.AddHostedService(sp => sp.GetRequiredService<ProxyWorker>());
|
||||
|
||||
// Phase 07: admin endpoint (Kestrel read-only status page).
|
||||
builder.AddMbproxyAdmin();
|
||||
|
||||
// Phase 08: graceful-shutdown coordinator.
|
||||
// ShutdownCoordinator depends on PlcListenerSupervisor instances via ProxyWorker.Supervisors.
|
||||
// Registered as a singleton so Program can resolve it after the host is built.
|
||||
builder.Services.AddSingleton<ShutdownCoordinator>(sp =>
|
||||
{
|
||||
var worker = sp.GetRequiredService<ProxyWorker>();
|
||||
var admin = sp.GetRequiredService<AdminEndpointHost>();
|
||||
var options = sp.GetRequiredService<IOptions<MbproxyOptions>>();
|
||||
var logger = sp.GetRequiredService<ILogger<ShutdownCoordinator>>();
|
||||
// Supervisors is populated after ProxyWorker.StartAsync; the coordinator only
|
||||
// enumerates them during ShutdownAsync, which runs on ApplicationStopping —
|
||||
// after the host is fully started.
|
||||
return new ShutdownCoordinator(
|
||||
worker.Supervisors.Values,
|
||||
admin,
|
||||
options,
|
||||
logger);
|
||||
});
|
||||
|
||||
var host = builder.Build();
|
||||
|
||||
// Wire ApplicationStopping → ShutdownCoordinator BEFORE hosted services start.
|
||||
// The callback fires when the host signals stop; it drains in-flight PDUs and stops
|
||||
// the admin endpoint before the host tears down individual services.
|
||||
var lifetime = host.Services.GetRequiredService<IHostApplicationLifetime>();
|
||||
lifetime.ApplicationStopping.Register(() =>
|
||||
{
|
||||
// IHostApplicationLifetime callbacks do not support async — block briefly.
|
||||
// The coordinator manages its own drain deadline so the host is not held indefinitely.
|
||||
var coordinator = host.Services.GetRequiredService<ShutdownCoordinator>();
|
||||
coordinator.ShutdownAsync().GetAwaiter().GetResult();
|
||||
});
|
||||
|
||||
await host.RunAsync();
|
||||
@@ -0,0 +1,460 @@
|
||||
using Mbproxy.Bcd;
|
||||
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// BCD-rewriting PDU pipeline. Registered as the singleton <see cref="IPduPipeline"/>
|
||||
/// in production (replaces <see cref="NoopPduPipeline"/> from Phase 03).
|
||||
///
|
||||
/// FC scope (per design.md):
|
||||
/// FC03 / FC04 response — decode covered BCD slots from raw nibbles → binary integer.
|
||||
/// FC06 request — encode binary integer → BCD nibbles.
|
||||
/// FC16 request — per-register over the configured slots.
|
||||
/// All other FCs — pass through byte-for-byte.
|
||||
///
|
||||
/// MBAP transparency contract: the MBAP length field is NEVER modified. Re-encoded slots
|
||||
/// are the same byte width as the originals (ushort → ushort), so the PDU length is stable.
|
||||
///
|
||||
/// <para><b>Phase 9 — request correlation:</b> FC03/FC04 responses do not carry the
|
||||
/// original start address. The multiplexer builds an <see cref="Multiplexing.InFlightRequest"/>
|
||||
/// on the request path, stores it in its <see cref="Multiplexing.CorrelationMap"/>, and
|
||||
/// attaches it to the per-call <see cref="PerPlcContext.CurrentRequest"/> on the response
|
||||
/// path. The rewriter consumes <c>CurrentRequest</c> instead of a per-pair last-request
|
||||
/// slot, so concurrent responses from different upstream clients each decode against
|
||||
/// their own request range without cross-talk.</para>
|
||||
///
|
||||
/// <para>This class is stateless. All per-call state arrives via <see cref="PduContext"/>
|
||||
/// (specifically <see cref="PerPlcContext.CurrentRequest"/> on response). It is safe to
|
||||
/// call concurrently from multiple upstream-read tasks and the single backend reader task.</para>
|
||||
/// </summary>
|
||||
internal sealed class BcdPduPipeline : IPduPipeline
|
||||
{
|
||||
// ── IPduPipeline.Process ─────────────────────────────────────────────────
|
||||
|
||||
public void Process(
|
||||
MbapDirection direction,
|
||||
ReadOnlySpan<byte> mbapHeader,
|
||||
Span<byte> pdu,
|
||||
PduContext context)
|
||||
{
|
||||
// PerPlcContext carries the BCD map, counters, and logger.
|
||||
// If the caller passes a plain PduContext (e.g. in unit tests using NoopPduPipeline
|
||||
// alongside this one), we skip BCD processing gracefully.
|
||||
if (context is not PerPlcContext ctx)
|
||||
return;
|
||||
|
||||
if (pdu.Length < 1)
|
||||
return;
|
||||
|
||||
byte fc = pdu[0];
|
||||
ctx.Counters.IncrementPdusForwarded();
|
||||
ctx.Counters.IncrementFcCount(fc);
|
||||
|
||||
if (direction == MbapDirection.RequestToBackend)
|
||||
{
|
||||
ProcessRequest(fc, pdu, ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
ProcessResponse(fc, pdu, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Request processing (FC06 / FC16) ────────────────────────────────────
|
||||
|
||||
private static void ProcessRequest(byte fc, Span<byte> pdu, PerPlcContext ctx)
|
||||
{
|
||||
switch (fc)
|
||||
{
|
||||
case 0x06:
|
||||
ProcessFc06Request(pdu, ctx);
|
||||
break;
|
||||
|
||||
case 0x10:
|
||||
ProcessFc16Request(pdu, ctx);
|
||||
break;
|
||||
|
||||
// All other FCs: transparent pass-through.
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// FC06 Write Single Register request: [fc=06][addrHi][addrLo][valHi][valLo]
|
||||
/// If the address is a configured 16-bit BCD tag, encode the client's binary integer
|
||||
/// as BCD nibbles before forwarding to the PLC.
|
||||
/// Partial-overlap (address is part of a 32-bit pair): warn + pass through raw.
|
||||
/// </summary>
|
||||
private static void ProcessFc06Request(Span<byte> pdu, PerPlcContext ctx)
|
||||
{
|
||||
if (pdu.Length < 5)
|
||||
return;
|
||||
|
||||
ushort address = (ushort)((pdu[1] << 8) | pdu[2]);
|
||||
ushort value = (ushort)((pdu[3] << 8) | pdu[4]);
|
||||
|
||||
// Direct point lookup at the exact address.
|
||||
if (!ctx.TagMap.TryGet(address, out var tag))
|
||||
{
|
||||
// Not a BCD address — but check whether this address is the HIGH register
|
||||
// of a 32-bit pair (Address+1 where Address is configured as 32-bit).
|
||||
// TryGetForRange with qty=1 will catch that partial-overlap case.
|
||||
if (ctx.TagMap.TryGetForRange(address, 1, out var hits) && hits.Count > 0)
|
||||
{
|
||||
// The only hit should be a 32-bit tag whose high register is at `address`.
|
||||
foreach (var hit in hits)
|
||||
{
|
||||
if (hit.Tag.IsThirtyTwoBit && hit.OffsetWords < 0)
|
||||
{
|
||||
// This address is the high register of the 32-bit pair.
|
||||
RewriterLogEvents.PartialBcd(ctx.Logger, ctx.PlcName, address, address, 1);
|
||||
ctx.Counters.IncrementPartialBcd();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (tag.IsThirtyTwoBit)
|
||||
{
|
||||
// FC06 writes exactly one register. If this is the LOW address of a 32-bit tag,
|
||||
// that's a partial write. Per design partial-overlap policy: warn + pass through.
|
||||
RewriterLogEvents.PartialBcd(ctx.Logger, ctx.PlcName, address, address, 1);
|
||||
ctx.Counters.IncrementPartialBcd();
|
||||
return;
|
||||
}
|
||||
|
||||
// 16-bit tag: encode client's binary integer as BCD nibbles.
|
||||
ushort encoded;
|
||||
try
|
||||
{
|
||||
encoded = BcdCodec.Encode16(value);
|
||||
}
|
||||
catch (ArgumentOutOfRangeException)
|
||||
{
|
||||
// Value is outside [0, 9999] — cannot represent as 4-digit BCD.
|
||||
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, address, value, "Write");
|
||||
ctx.Counters.IncrementInvalidBcd();
|
||||
return; // pass through raw
|
||||
}
|
||||
|
||||
pdu[3] = (byte)(encoded >> 8);
|
||||
pdu[4] = (byte)(encoded & 0xFF);
|
||||
ctx.Counters.AddRewrittenSlots(1);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// FC16 Write Multiple Registers request:
|
||||
/// [fc=10][startHi][startLo][qtyHi][qtyLo][byteCount][reg0Hi][reg0Lo]...
|
||||
/// Re-encodes binary integers at configured BCD addresses to BCD nibbles.
|
||||
/// </summary>
|
||||
private static void ProcessFc16Request(Span<byte> pdu, PerPlcContext ctx)
|
||||
{
|
||||
// Minimum FC16 request PDU: fc(1) + start(2) + qty(2) + byteCount(1) = 6 bytes.
|
||||
if (pdu.Length < 6)
|
||||
return;
|
||||
|
||||
ushort startAddress = (ushort)((pdu[1] << 8) | pdu[2]);
|
||||
ushort qty = (ushort)((pdu[3] << 8) | pdu[4]);
|
||||
// byte byteCount = pdu[5]; (qty * 2, not used directly)
|
||||
|
||||
if (!ctx.TagMap.TryGetForRange(startAddress, qty, out var hits))
|
||||
return; // no BCD tags in this range
|
||||
|
||||
int dataOffset = 6; // pdu[6..] = register data, 2 bytes per register
|
||||
|
||||
foreach (var hit in hits)
|
||||
{
|
||||
int offsetWords = hit.OffsetWords;
|
||||
var tag = hit.Tag;
|
||||
|
||||
if (tag.IsThirtyTwoBit)
|
||||
{
|
||||
// Full 32-bit pair fits if both low (offsetWords) and high (offsetWords+1)
|
||||
// are within the [0, qty) range.
|
||||
bool lowInRange = offsetWords >= 0 && offsetWords < qty;
|
||||
bool highInRange = (offsetWords + 1) >= 0 && (offsetWords + 1) < qty;
|
||||
|
||||
if (!lowInRange || !highInRange)
|
||||
{
|
||||
// Partial overlap — one of the two registers is outside the write range.
|
||||
RewriterLogEvents.PartialBcd(ctx.Logger, ctx.PlcName,
|
||||
tag.Address, startAddress, qty);
|
||||
ctx.Counters.IncrementPartialBcd();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Both registers are in range. Read the low/high words from the PDU.
|
||||
int lowByteOff = dataOffset + offsetWords * 2;
|
||||
int highByteOff = dataOffset + (offsetWords + 1) * 2;
|
||||
|
||||
if (lowByteOff + 2 > pdu.Length || highByteOff + 2 > pdu.Length)
|
||||
continue; // malformed PDU — skip safely
|
||||
|
||||
// Per CDAB layout:
|
||||
// pdu[lowByteOff..+2] = low register (low 4 BCD digits of value)
|
||||
// pdu[highByteOff..+2] = high register (high 4 BCD digits of value)
|
||||
// The client sends binary integers; encode to BCD nibbles.
|
||||
//
|
||||
// Design note: for a 32-bit write the client sends a 32-bit binary value
|
||||
// split across two registers in CDAB order (low word at Address,
|
||||
// high word at Address+1). We reconstruct the int and encode it.
|
||||
ushort clientLow = (ushort)((pdu[lowByteOff] << 8) | pdu[lowByteOff + 1]);
|
||||
ushort clientHigh = (ushort)((pdu[highByteOff] << 8) | pdu[highByteOff + 1]);
|
||||
|
||||
// Reconstruct the 32-bit binary value (CDAB: low-word = low digits).
|
||||
int binaryValue = clientHigh * 10_000 + clientLow;
|
||||
|
||||
ushort bcdLow, bcdHigh;
|
||||
try
|
||||
{
|
||||
(bcdLow, bcdHigh) = BcdCodec.Encode32(binaryValue);
|
||||
}
|
||||
catch (ArgumentOutOfRangeException)
|
||||
{
|
||||
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, tag.Address,
|
||||
clientLow, "Write");
|
||||
ctx.Counters.IncrementInvalidBcd();
|
||||
continue;
|
||||
}
|
||||
|
||||
pdu[lowByteOff] = (byte)(bcdLow >> 8);
|
||||
pdu[lowByteOff + 1] = (byte)(bcdLow & 0xFF);
|
||||
pdu[highByteOff] = (byte)(bcdHigh >> 8);
|
||||
pdu[highByteOff + 1] = (byte)(bcdHigh & 0xFF);
|
||||
ctx.Counters.AddRewrittenSlots(2);
|
||||
}
|
||||
else
|
||||
{
|
||||
// 16-bit tag.
|
||||
if (offsetWords < 0 || offsetWords >= qty)
|
||||
continue; // outside range (shouldn't happen for 16-bit but be defensive)
|
||||
|
||||
int byteOff = dataOffset + offsetWords * 2;
|
||||
if (byteOff + 2 > pdu.Length)
|
||||
continue;
|
||||
|
||||
ushort clientValue = (ushort)((pdu[byteOff] << 8) | pdu[byteOff + 1]);
|
||||
|
||||
ushort encoded;
|
||||
try
|
||||
{
|
||||
encoded = BcdCodec.Encode16(clientValue);
|
||||
}
|
||||
catch (ArgumentOutOfRangeException)
|
||||
{
|
||||
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, tag.Address,
|
||||
clientValue, "Write");
|
||||
ctx.Counters.IncrementInvalidBcd();
|
||||
continue;
|
||||
}
|
||||
|
||||
pdu[byteOff] = (byte)(encoded >> 8);
|
||||
pdu[byteOff + 1] = (byte)(encoded & 0xFF);
|
||||
ctx.Counters.AddRewrittenSlots(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Response processing (FC03 / FC04) ───────────────────────────────────
|
||||
|
||||
private static void ProcessResponse(byte fc, Span<byte> pdu, PerPlcContext ctx)
|
||||
{
|
||||
// Check for Modbus exception response (high bit of FC is set).
|
||||
if ((fc & 0x80) != 0)
|
||||
{
|
||||
// Exception response: [fc|0x80][exceptionCode]
|
||||
byte originalFc = (byte)(fc & 0x7F);
|
||||
byte exceptionCode = pdu.Length >= 2 ? pdu[1] : (byte)0;
|
||||
|
||||
RewriterLogEvents.ExceptionPassthrough(ctx.Logger, ctx.PlcName, originalFc, exceptionCode);
|
||||
ctx.Counters.IncrementBackendException(exceptionCode);
|
||||
return; // pass through raw
|
||||
}
|
||||
|
||||
switch (fc)
|
||||
{
|
||||
case 0x03:
|
||||
case 0x04:
|
||||
// Handled below.
|
||||
break;
|
||||
|
||||
case 0x06:
|
||||
// FC06 response echoes [fc][addrHi][addrLo][valHi][valLo].
|
||||
// Since the proxy re-encoded the request (binary→BCD), the PLC echoes back
|
||||
// BCD nibbles. The client expects its original binary value. Decode here.
|
||||
ProcessFc06Response(pdu, ctx);
|
||||
return;
|
||||
|
||||
case 0x10:
|
||||
// FC16 response: [fc][startHi][startLo][qtyHi][qtyLo] — no register data.
|
||||
return;
|
||||
|
||||
default:
|
||||
return; // all other FCs pass through
|
||||
}
|
||||
|
||||
// FC03/04 response: [fc][byteCount][reg0Hi][reg0Lo]...
|
||||
// The start address is NOT in the response — the multiplexer attaches the matched
|
||||
// InFlightRequest to ctx.CurrentRequest on the response path. Without it (e.g., a
|
||||
// unit-test fixture invoking the pipeline directly without correlation) we cannot
|
||||
// decode safely; pass the bytes through.
|
||||
var currentReq = ctx.CurrentRequest;
|
||||
if (currentReq is null)
|
||||
return;
|
||||
|
||||
// Only FC03/04 responses should consult start/qty.
|
||||
if (currentReq.Fc != 0x03 && currentReq.Fc != 0x04)
|
||||
return;
|
||||
|
||||
ushort startAddress = currentReq.StartAddress;
|
||||
ushort qty = currentReq.Qty;
|
||||
|
||||
if (pdu.Length < 2)
|
||||
return;
|
||||
|
||||
int byteCount = pdu[1];
|
||||
int wordsInResponse = byteCount / 2;
|
||||
|
||||
// Sanity: the qty in the request should match the words in the response.
|
||||
// Use the smaller of the two to stay in bounds.
|
||||
ushort effectiveQty = (ushort)Math.Min(qty, wordsInResponse);
|
||||
|
||||
if (!ctx.TagMap.TryGetForRange(startAddress, effectiveQty, out var hits))
|
||||
return;
|
||||
|
||||
int dataOffset = 2; // pdu[2..] = register data
|
||||
|
||||
foreach (var hit in hits)
|
||||
{
|
||||
int offsetWords = hit.OffsetWords;
|
||||
var tag = hit.Tag;
|
||||
|
||||
if (tag.IsThirtyTwoBit)
|
||||
{
|
||||
bool lowInRange = offsetWords >= 0 && offsetWords < effectiveQty;
|
||||
bool highInRange = (offsetWords + 1) >= 0 && (offsetWords + 1) < effectiveQty;
|
||||
|
||||
if (!lowInRange || !highInRange)
|
||||
{
|
||||
RewriterLogEvents.PartialBcd(ctx.Logger, ctx.PlcName,
|
||||
tag.Address, startAddress, qty);
|
||||
ctx.Counters.IncrementPartialBcd();
|
||||
continue;
|
||||
}
|
||||
|
||||
int lowByteOff = dataOffset + offsetWords * 2;
|
||||
int highByteOff = dataOffset + (offsetWords + 1) * 2;
|
||||
|
||||
if (lowByteOff + 2 > pdu.Length || highByteOff + 2 > pdu.Length)
|
||||
continue;
|
||||
|
||||
// CDAB: Address = low register (low 4 BCD digits), Address+1 = high register
|
||||
ushort rawLow = (ushort)((pdu[lowByteOff] << 8) | pdu[lowByteOff + 1]);
|
||||
ushort rawHigh = (ushort)((pdu[highByteOff] << 8) | pdu[highByteOff + 1]);
|
||||
|
||||
int decoded;
|
||||
try
|
||||
{
|
||||
decoded = BcdCodec.Decode32(rawLow, rawHigh);
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
// Emit invalid_bcd for the low register (first bad word we'd encounter).
|
||||
ushort badRaw = HasBadNibble(rawLow) ? rawLow : rawHigh;
|
||||
ushort badAddr = HasBadNibble(rawLow) ? tag.Address : tag.HighRegister;
|
||||
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, badAddr, badRaw, "Read");
|
||||
ctx.Counters.IncrementInvalidBcd();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write decoded binary value back as a 32-bit value in CDAB layout.
|
||||
// The client receives low 4 digits at Address and high 4 digits at Address+1.
|
||||
int decodedLow = decoded % 10_000;
|
||||
int decodedHigh = decoded / 10_000;
|
||||
|
||||
pdu[lowByteOff] = (byte)(decodedLow >> 8);
|
||||
pdu[lowByteOff + 1] = (byte)(decodedLow & 0xFF);
|
||||
pdu[highByteOff] = (byte)(decodedHigh >> 8);
|
||||
pdu[highByteOff + 1] = (byte)(decodedHigh & 0xFF);
|
||||
ctx.Counters.AddRewrittenSlots(2);
|
||||
}
|
||||
else
|
||||
{
|
||||
// 16-bit tag.
|
||||
if (offsetWords < 0 || offsetWords >= effectiveQty)
|
||||
continue;
|
||||
|
||||
int byteOff = dataOffset + offsetWords * 2;
|
||||
if (byteOff + 2 > pdu.Length)
|
||||
continue;
|
||||
|
||||
ushort raw = (ushort)((pdu[byteOff] << 8) | pdu[byteOff + 1]);
|
||||
|
||||
int decoded;
|
||||
try
|
||||
{
|
||||
decoded = BcdCodec.Decode16(raw);
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, tag.Address, raw, "Read");
|
||||
ctx.Counters.IncrementInvalidBcd();
|
||||
continue;
|
||||
}
|
||||
|
||||
pdu[byteOff] = (byte)(decoded >> 8);
|
||||
pdu[byteOff + 1] = (byte)(decoded & 0xFF);
|
||||
ctx.Counters.AddRewrittenSlots(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// FC06 response: [fc=06][addrHi][addrLo][valHi][valLo] — echoes the register address
|
||||
/// and the value the PLC wrote (which is now BCD-encoded if the request was rewritten).
|
||||
/// Decode the BCD nibbles back to the client's original binary integer so the client
|
||||
/// sees the value it sent and library validation (e.g. NModbus echo-check) passes.
|
||||
/// </summary>
|
||||
private static void ProcessFc06Response(Span<byte> pdu, PerPlcContext ctx)
|
||||
{
|
||||
if (pdu.Length < 5)
|
||||
return;
|
||||
|
||||
ushort address = (ushort)((pdu[1] << 8) | pdu[2]);
|
||||
ushort raw = (ushort)((pdu[3] << 8) | pdu[4]);
|
||||
|
||||
if (!ctx.TagMap.TryGet(address, out var tag))
|
||||
return; // not a BCD address
|
||||
|
||||
if (tag.IsThirtyTwoBit)
|
||||
return; // partial-write echo — pass through (already warned on request)
|
||||
|
||||
// 16-bit tag: the PLC echoed back BCD nibbles. Decode them back to binary.
|
||||
int decoded;
|
||||
try
|
||||
{
|
||||
decoded = BcdCodec.Decode16(raw);
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
RewriterLogEvents.InvalidBcd(ctx.Logger, ctx.PlcName, address, raw, "Read");
|
||||
ctx.Counters.IncrementInvalidBcd();
|
||||
return;
|
||||
}
|
||||
|
||||
pdu[3] = (byte)(decoded >> 8);
|
||||
pdu[4] = (byte)(decoded & 0xFF);
|
||||
// Note: the RewrittenSlots counter is NOT incremented here because the request
|
||||
// already counted this slot on the way out. Incrementing again would double-count.
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>Returns true if any nibble of <paramref name="raw"/> is >= 0xA.</summary>
|
||||
private static bool HasBadNibble(ushort raw)
|
||||
=> ((raw >> 12) & 0xF) >= 0xA
|
||||
|| ((raw >> 8) & 0xF) >= 0xA
|
||||
|| ((raw >> 4) & 0xF) >= 0xA
|
||||
|| (raw & 0xF) >= 0xA;
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// Direction of a Modbus PDU being processed by the pipeline.
|
||||
/// </summary>
|
||||
public enum MbapDirection
|
||||
{
|
||||
/// <summary>A request frame travelling from an upstream client to the backend PLC.</summary>
|
||||
RequestToBackend,
|
||||
|
||||
/// <summary>A response frame travelling from the backend PLC back to the upstream client.</summary>
|
||||
ResponseToClient,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Per-pair context carried through each PDU pipeline call.
|
||||
/// Phase 03: carries only <see cref="PlcName"/>.
|
||||
/// Phase 04 extends this via <see cref="PerPlcContext"/>, which carries the BcdTagMap,
|
||||
/// counters, and logger. Phase 09 added the per-call <c>CurrentRequest</c> slot to
|
||||
/// <see cref="PerPlcContext"/> for multiplexer-aware response correlation.
|
||||
/// </summary>
|
||||
public class PduContext
|
||||
{
|
||||
/// <summary>The configured PLC name (from <c>MbproxyOptions.Plcs[i].Name</c>).</summary>
|
||||
public string PlcName { get; init; } = "";
|
||||
// Phase 04 adds: BcdTagMap, counters, logger
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Hook contract for inspecting and rewriting Modbus PDU bytes inline.
|
||||
/// Called once per frame in each direction (request and response).
|
||||
///
|
||||
/// Implementations must be safe to call concurrently from multiple connection pairs.
|
||||
/// In Phase 03 the only implementation is <see cref="NoopPduPipeline"/> (pass-through).
|
||||
/// Phase 04 replaces it with a BCD rewriter registered via DI.
|
||||
/// </summary>
|
||||
public interface IPduPipeline
|
||||
{
|
||||
/// <summary>
|
||||
/// Processes a single Modbus PDU. Implementations may mutate <paramref name="pdu"/> in place.
|
||||
/// </summary>
|
||||
/// <param name="direction">Whether this is a request or a response frame.</param>
|
||||
/// <param name="mbapHeader">The 7-byte MBAP header (read-only; includes TxId, UnitId, FC is in pdu[0]).</param>
|
||||
/// <param name="pdu">The PDU bytes starting at the function code. May be mutated in place.</param>
|
||||
/// <param name="context">Per-pair context (PLC name; extended in phase 04).</param>
|
||||
void Process(MbapDirection direction, ReadOnlySpan<byte> mbapHeader, Span<byte> pdu, PduContext context);
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// Pure, allocation-free helpers for parsing Modbus Application Protocol (MBAP) headers.
|
||||
///
|
||||
/// MBAP frame layout (7-byte header + PDU):
|
||||
/// [0..1] TxId (big-endian uint16)
|
||||
/// [2..3] ProtocolId (big-endian uint16; always 0 for standard Modbus)
|
||||
/// [4..5] Length (big-endian uint16; covers UnitId + PDU bytes)
|
||||
/// [6] UnitId
|
||||
/// [7..] PDU (function code + data); length is (lengthField - 1) bytes
|
||||
///
|
||||
/// Total frame bytes = 6 (fixed header without length's coverage) + lengthField
|
||||
/// = 7 (header) + (lengthField - 1) (PDU body without UnitId).
|
||||
/// </summary>
|
||||
internal static class MbapFrame
|
||||
{
|
||||
/// <summary>Number of bytes in the MBAP header (TxId + ProtocolId + Length + UnitId).</summary>
|
||||
public const int HeaderSize = 7;
|
||||
|
||||
/// <summary>Maximum MBAP PDU body size (Modbus spec max: 253 bytes).</summary>
|
||||
public const int MaxPduBodySize = 253;
|
||||
|
||||
/// <summary>Per-pair buffer size: header (7) + max PDU body (253) = 260 bytes.</summary>
|
||||
public const int BufferSize = HeaderSize + MaxPduBodySize;
|
||||
|
||||
/// <summary>
|
||||
/// Parses all fields from a 7-byte MBAP header buffer.
|
||||
/// Returns <c>false</c> when <paramref name="buffer"/> is shorter than 7 bytes.
|
||||
/// Does NOT validate <paramref name="protocolId"/> or <paramref name="length"/> —
|
||||
/// that is the caller's responsibility (and ultimately the PLC's job).
|
||||
/// </summary>
|
||||
public static bool TryParseHeader(
|
||||
ReadOnlySpan<byte> buffer,
|
||||
out ushort txId,
|
||||
out ushort protocolId,
|
||||
out ushort length,
|
||||
out byte unitId)
|
||||
{
|
||||
if (buffer.Length < HeaderSize)
|
||||
{
|
||||
txId = protocolId = length = 0;
|
||||
unitId = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
txId = (ushort)((buffer[0] << 8) | buffer[1]);
|
||||
protocolId = (ushort)((buffer[2] << 8) | buffer[3]);
|
||||
length = (ushort)((buffer[4] << 8) | buffer[5]);
|
||||
unitId = buffer[6];
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the total frame length in bytes given the MBAP length field.
|
||||
/// Formula: 6 (TxId + ProtocolId + LengthField bytes) + lengthField
|
||||
/// = 7 (full header) + (lengthField - 1) (PDU body without UnitId).
|
||||
/// </summary>
|
||||
public static int TotalFrameLength(ushort lengthField) => 6 + lengthField;
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
/// <summary>
|
||||
/// Maps a proxy-assigned MBAP TxId → <see cref="InFlightRequest"/>. The multiplexer's
|
||||
/// per-upstream <c>OnFrame</c> path adds entries; the backend reader task removes them
|
||||
/// when the matching response arrives.
|
||||
///
|
||||
/// <para>Backed by <see cref="ConcurrentDictionary{TKey, TValue}"/>. The single-writer /
|
||||
/// single-remover pattern in Phase 9 does not strictly require it — but cascade-on-
|
||||
/// disconnect walks the map from a separate task and Phase 10 adds upstream-side
|
||||
/// cancellation paths, so the safer primitive is worth the negligible cost.</para>
|
||||
/// </summary>
|
||||
internal sealed class CorrelationMap
|
||||
{
|
||||
private readonly ConcurrentDictionary<ushort, InFlightRequest> _entries = new();
|
||||
|
||||
/// <summary>
|
||||
/// Adds <paramref name="req"/> under <paramref name="proxyTxId"/>. Returns <c>false</c>
|
||||
/// if a request was already stored under that key — which would be a programming
|
||||
/// error (the allocator should never hand out the same key twice while it is still
|
||||
/// in flight). Callers should treat <c>false</c> as a fatal contract violation and
|
||||
/// drop the upstream connection.
|
||||
/// </summary>
|
||||
public bool TryAdd(ushort proxyTxId, InFlightRequest req)
|
||||
=> _entries.TryAdd(proxyTxId, req);
|
||||
|
||||
/// <summary>
|
||||
/// Removes the entry under <paramref name="proxyTxId"/>. Returns <c>false</c> when
|
||||
/// no entry exists (which is normal for cascade cleanup and for stale-response paths).
|
||||
/// </summary>
|
||||
public bool TryRemove(ushort proxyTxId, out InFlightRequest req)
|
||||
=> _entries.TryRemove(proxyTxId, out req!);
|
||||
|
||||
/// <summary>Number of currently-in-flight requests.</summary>
|
||||
public int Count => _entries.Count;
|
||||
|
||||
/// <summary>
|
||||
/// Returns a point-in-time copy of all in-flight requests. Allocates a list; intended
|
||||
/// for diagnostics (cascade walk on backend disconnect; future drain-on-shutdown).
|
||||
/// </summary>
|
||||
public IReadOnlyCollection<InFlightRequest> Snapshot()
|
||||
{
|
||||
// ConcurrentDictionary.Values is a snapshot-safe enumerable; materialise to
|
||||
// detach from the live dictionary and give callers a stable view.
|
||||
return _entries.Values.ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns and removes every entry. Used by the multiplexer's cascade path when the
|
||||
/// backend socket dies — the multiplexer must close every interested upstream pipe
|
||||
/// and free every allocated proxy TxId.
|
||||
/// </summary>
|
||||
public IReadOnlyList<KeyValuePair<ushort, InFlightRequest>> DrainAll()
|
||||
{
|
||||
var drained = new List<KeyValuePair<ushort, InFlightRequest>>(_entries.Count);
|
||||
foreach (var kvp in _entries)
|
||||
{
|
||||
if (_entries.TryRemove(kvp.Key, out var req))
|
||||
drained.Add(new KeyValuePair<ushort, InFlightRequest>(kvp.Key, req));
|
||||
}
|
||||
return drained;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a snapshot of (proxyTxId, InFlightRequest) pairs whose <see cref="InFlightRequest.SentAtUtc"/>
|
||||
/// is older than <paramref name="threshold"/>. Allocates a list; intended for the
|
||||
/// periodic per-request timeout watchdog only. The entries are NOT removed by this
|
||||
/// call — the caller decides which to time out.
|
||||
/// </summary>
|
||||
public IReadOnlyList<KeyValuePair<ushort, InFlightRequest>> SnapshotOlderThan(DateTimeOffset threshold)
|
||||
{
|
||||
var stale = new List<KeyValuePair<ushort, InFlightRequest>>();
|
||||
foreach (var kvp in _entries)
|
||||
{
|
||||
if (kvp.Value.SentAtUtc <= threshold)
|
||||
stale.Add(new KeyValuePair<ushort, InFlightRequest>(kvp.Key, kvp.Value));
|
||||
}
|
||||
return stale;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
namespace Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
/// <summary>
|
||||
/// One upstream party interested in a single backend round-trip. Carries the upstream
|
||||
/// pipe to deliver the response to AND the original MBAP TxId that the party sent — the
|
||||
/// multiplexer must rewrite the response's MBAP TxId back to <see cref="OriginalTxId"/>
|
||||
/// before handing the frame to the pipe, so each upstream sees the proxy as transparent.
|
||||
///
|
||||
/// <para><b>Phase 9 invariant:</b> exactly one <see cref="InterestedParty"/> per
|
||||
/// <see cref="InFlightRequest"/>. <b>Phase 10 (read coalescing)</b> reuses this exact
|
||||
/// shape to fan-out a single backend response to multiple upstream parties. Do not
|
||||
/// collapse this into a single field on <see cref="InFlightRequest"/>.</para>
|
||||
/// </summary>
|
||||
internal sealed record InterestedParty(UpstreamPipe Pipe, ushort OriginalTxId);
|
||||
|
||||
/// <summary>
|
||||
/// Per-backend-request correlation record. Stored in <see cref="CorrelationMap"/> keyed
|
||||
/// by the proxy-assigned TxId; looked up by the backend reader task to:
|
||||
/// <list type="bullet">
|
||||
/// <item><description>Restore each interested party's original MBAP TxId before forwarding
|
||||
/// the response upstream (transparent multiplexing contract).</description></item>
|
||||
/// <item><description>Provide the BCD rewriter with the originating request's
|
||||
/// <c>StartAddress</c> / <c>Qty</c> for FC03/FC04 response decoding — the response
|
||||
/// PDU itself does not carry the start address.</description></item>
|
||||
/// <item><description>Measure backend round-trip time via <see cref="SentAtUtc"/>
|
||||
/// (replaces the per-pair stopwatch slot from the 1:1 model).</description></item>
|
||||
/// </list>
|
||||
///
|
||||
/// <para><b>Phase 9:</b> <see cref="InterestedParties"/> always has exactly one element.
|
||||
/// The list shape is the load-bearing seam that <b>Phase 10 — read coalescing</b> hooks
|
||||
/// into to fan out a single PLC response to multiple upstream clients without further
|
||||
/// refactor of the multiplexer's data model. Reviewer note: do <i>not</i> simplify back
|
||||
/// to a single <c>UpstreamPipe</c> field.</para>
|
||||
/// </summary>
|
||||
internal sealed record InFlightRequest(
|
||||
byte UnitId,
|
||||
byte Fc,
|
||||
ushort StartAddress,
|
||||
ushort Qty,
|
||||
IReadOnlyList<InterestedParty> InterestedParties,
|
||||
DateTimeOffset SentAtUtc);
|
||||
@@ -0,0 +1,121 @@
|
||||
namespace Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
/// <summary>
|
||||
/// Source-generated <see cref="LoggerMessage"/> definitions for the TxId-multiplexing
|
||||
/// connection layer. Event names are stable — do not rename without updating
|
||||
/// docs/design.md's "Logging" event-name table.
|
||||
/// </summary>
|
||||
internal static partial class MultiplexerLogEvents
|
||||
{
|
||||
/// <summary>
|
||||
/// Emitted once per upstream client accept. Replaces the per-pair
|
||||
/// <c>mbproxy.client.connected</c> event from the 1:1 model (same event name,
|
||||
/// same property shape — operators' log queries are unchanged).
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 110,
|
||||
EventName = "mbproxy.client.connected",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Client connected: Plc={Plc} RemoteEp={RemoteEp}")]
|
||||
public static partial void ClientConnected(
|
||||
ILogger logger,
|
||||
string plc,
|
||||
string remoteEp);
|
||||
|
||||
/// <summary>
|
||||
/// Emitted when an upstream pipe is closed (clean disconnect, fault, or cascade).
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 111,
|
||||
EventName = "mbproxy.client.disconnected",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Client disconnected: Plc={Plc} RemoteEp={RemoteEp} Reason={Reason}")]
|
||||
public static partial void ClientDisconnected(
|
||||
ILogger logger,
|
||||
string plc,
|
||||
string remoteEp,
|
||||
string reason);
|
||||
|
||||
/// <summary>
|
||||
/// Emitted when the multiplexer successfully opens its single backend connection to a PLC.
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 112,
|
||||
EventName = "mbproxy.multiplex.backend.connected",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Backend multiplex connection up: Plc={Plc} Host={Host} Port={Port}")]
|
||||
public static partial void BackendConnected(
|
||||
ILogger logger,
|
||||
string plc,
|
||||
string host,
|
||||
int port);
|
||||
|
||||
/// <summary>
|
||||
/// Emitted when the multiplexer cascades a backend disconnect to all attached upstream
|
||||
/// clients. <c>UpstreamCount</c> is the number of upstream pipes that were closed and
|
||||
/// <c>InFlightCount</c> is the number of in-flight requests dropped.
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 113,
|
||||
EventName = "mbproxy.multiplex.backend.disconnected",
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Backend multiplex connection down: Plc={Plc} UpstreamCount={UpstreamCount} InFlightCount={InFlightCount} Reason={Reason}")]
|
||||
public static partial void BackendDisconnected(
|
||||
ILogger logger,
|
||||
string plc,
|
||||
int upstreamCount,
|
||||
int inFlightCount,
|
||||
string reason);
|
||||
|
||||
/// <summary>
|
||||
/// Emitted once when the TxId allocator refuses to allocate — every slot in the 16-bit
|
||||
/// space is currently in flight. The multiplexer responds to the upstream with a
|
||||
/// Modbus exception (code 04 / Slave Device Failure). Realistically unreachable under
|
||||
/// normal load (ECOM serializes at ~2-10 ms per request); a stress-only path.
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 114,
|
||||
EventName = "mbproxy.multiplex.saturated",
|
||||
Level = LogLevel.Error,
|
||||
Message = "Multiplexer TxId space saturated — returning exception 04 to upstream: Plc={Plc} RemoteEp={RemoteEp}")]
|
||||
public static partial void Saturated(
|
||||
ILogger logger,
|
||||
string plc,
|
||||
string remoteEp);
|
||||
|
||||
/// <summary>
|
||||
/// Emitted when the backend connect Polly pipeline fails. Mirrors the existing
|
||||
/// <c>mbproxy.backend.failed</c> event from the 1:1 model so operators' alerts keep
|
||||
/// working unchanged after Phase 9.
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 115,
|
||||
EventName = "mbproxy.backend.failed",
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Backend connect failed: Plc={Plc} Reason={Reason}")]
|
||||
public static partial void BackendFailed(
|
||||
ILogger logger,
|
||||
string plc,
|
||||
string reason);
|
||||
|
||||
/// <summary>
|
||||
/// Emitted when the per-request watchdog times out an in-flight request whose response
|
||||
/// never arrived within <c>BackendRequestTimeoutMs</c>. The upstream party receives a
|
||||
/// Modbus exception (code 0x0B / Gateway Target Device Failed To Respond) and the
|
||||
/// proxy TxId is freed. Causes include: PLC dropped the response, network packet loss,
|
||||
/// or a backend that echoes the wrong MBAP TxId (e.g. pymodbus 3.13.0's
|
||||
/// concurrent-multiplexed-request bug).
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 116,
|
||||
EventName = "mbproxy.multiplex.request.timeout",
|
||||
Level = LogLevel.Warning,
|
||||
Message = "In-flight request timed out: Plc={Plc} ProxyTxId={ProxyTxId} OriginalTxId={OriginalTxId} Fc={Fc} ElapsedMs={ElapsedMs}")]
|
||||
public static partial void RequestTimeout(
|
||||
ILogger logger,
|
||||
string plc,
|
||||
ushort proxyTxId,
|
||||
ushort originalTxId,
|
||||
byte fc,
|
||||
long elapsedMs);
|
||||
}
|
||||
@@ -0,0 +1,664 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Diagnostics;
|
||||
using System.Net.Sockets;
|
||||
using System.Threading.Channels;
|
||||
using Mbproxy.Options;
|
||||
using Polly;
|
||||
|
||||
namespace Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
/// <summary>
|
||||
/// Owner of the single backend TCP connection to one PLC. Multiplexes many
|
||||
/// <see cref="UpstreamPipe"/> instances onto that one socket by rewriting MBAP transaction
|
||||
/// IDs so concurrent in-flight requests from different upstream clients remain
|
||||
/// distinguishable on the shared wire. The multiplexer:
|
||||
///
|
||||
/// <list type="bullet">
|
||||
/// <item><description>Opens and re-opens the backend socket through a Polly retry pipeline
|
||||
/// that matches the <see cref="ResilienceOptions.BackendConnect"/> profile.</description></item>
|
||||
/// <item><description>Runs one backend writer task that drains <see cref="_outboundChannel"/>
|
||||
/// into the backend socket (single writer; no socket-level synchronisation needed).</description></item>
|
||||
/// <item><description>Runs one backend reader task that decodes MBAP frames from the backend,
|
||||
/// looks each frame up in the <see cref="CorrelationMap"/>, restores each interested
|
||||
/// party's original TxId, and hands the frame to that party's
|
||||
/// <see cref="UpstreamPipe._responseChannel"/>.</description></item>
|
||||
/// <item><description>Cascades a backend disconnect by closing every attached pipe and
|
||||
/// freeing every allocated proxy TxId, then waits for the next upstream request to
|
||||
/// arrive (which triggers a fresh backend connect via Polly).</description></item>
|
||||
/// </list>
|
||||
///
|
||||
/// <para><b>Threading invariants:</b> a single backend writer touches the backend socket
|
||||
/// for sends; a single backend reader touches the same socket for receives. Per-upstream
|
||||
/// read tasks call <see cref="OnUpstreamFrameAsync"/>, which allocates a proxy TxId, queues
|
||||
/// the request frame into <see cref="_outboundChannel"/>, and returns. Upstream-side writes
|
||||
/// flow through each pipe's response channel — never directly through this class.</para>
|
||||
///
|
||||
/// <para><b>Lifecycle:</b> the multiplexer is created with the backend offline. The first
|
||||
/// <see cref="OnUpstreamFrameAsync"/> call (or the first <see cref="Attach"/> if you prefer
|
||||
/// eager-start) triggers backend connect through the Polly pipeline. Subsequent in-flight
|
||||
/// requests reuse the same socket. <see cref="DisposeAsync"/> tears down the backend
|
||||
/// socket, the writer/reader tasks, and every attached pipe.</para>
|
||||
/// </summary>
|
||||
internal sealed class PlcMultiplexer : IAsyncDisposable, IMultiplexCountersProvider
|
||||
{
|
||||
private const int OutboundChannelCapacity = 256;
|
||||
|
||||
private readonly PlcOptions _plc;
|
||||
private readonly ConnectionOptions _connectionOptions;
|
||||
private readonly IPduPipeline _pipeline;
|
||||
private readonly PerPlcContext _ctx;
|
||||
private readonly ILogger<PlcMultiplexer> _logger;
|
||||
private readonly ResiliencePipeline? _backendConnectPipeline;
|
||||
|
||||
private readonly TxIdAllocator _allocator = new();
|
||||
private readonly CorrelationMap _correlation = new();
|
||||
|
||||
private readonly Channel<byte[]> _outboundChannel = Channel.CreateBounded<byte[]>(
|
||||
new BoundedChannelOptions(OutboundChannelCapacity)
|
||||
{
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
SingleReader = true,
|
||||
SingleWriter = false,
|
||||
});
|
||||
|
||||
// Attached pipes — Phase 9 needs the list for the status page; Phase 10 will need it for
|
||||
// coalescing (fan-out). ConcurrentDictionary keyed on UpstreamPipe.Id for O(1) detach.
|
||||
private readonly ConcurrentDictionary<Guid, UpstreamPipe> _pipes = new();
|
||||
|
||||
// Lifecycle plumbing. Backend tasks share a CTS; cascading disconnect cancels it,
|
||||
// which terminates both the writer and reader tasks. The next call to
|
||||
// EnsureBackendConnectedAsync constructs a fresh CTS and a fresh backend socket.
|
||||
private readonly object _backendLock = new();
|
||||
private Socket? _backendSocket;
|
||||
private CancellationTokenSource? _backendCts;
|
||||
private Task? _backendWriterTask;
|
||||
private Task? _backendReaderTask;
|
||||
|
||||
private readonly CancellationTokenSource _disposeCts = new();
|
||||
private bool _disposed;
|
||||
private Task? _watchdogTask;
|
||||
|
||||
public PlcMultiplexer(
|
||||
PlcOptions plc,
|
||||
ConnectionOptions connectionOptions,
|
||||
IPduPipeline pipeline,
|
||||
PerPlcContext perPlcContext,
|
||||
ILogger<PlcMultiplexer> logger,
|
||||
ResiliencePipeline? backendConnectPipeline = null)
|
||||
{
|
||||
_plc = plc;
|
||||
_connectionOptions = connectionOptions;
|
||||
_pipeline = pipeline;
|
||||
_ctx = perPlcContext;
|
||||
_logger = logger;
|
||||
_backendConnectPipeline = backendConnectPipeline;
|
||||
|
||||
// Register this multiplexer as the live telemetry source for the PLC's counters.
|
||||
_ctx.Counters.SetMultiplexProvider(this);
|
||||
|
||||
// Spin up the per-request timeout watchdog. It scans the correlation map at a fixed
|
||||
// interval and times out any in-flight request older than BackendRequestTimeoutMs.
|
||||
// Critical for: lost responses, dead-PLC paths, and backends that mis-echo TxIds
|
||||
// (e.g. pymodbus 3.13.0's concurrent-multiplexed-request bug — see test files).
|
||||
_watchdogTask = Task.Run(() => RunRequestTimeoutWatchdogAsync(_disposeCts.Token), CancellationToken.None);
|
||||
}
|
||||
|
||||
// ── IMultiplexCountersProvider ────────────────────────────────────────────
|
||||
|
||||
public long InFlightCount => _allocator.InFlightCount;
|
||||
public long TxIdWraps => _allocator.WrapCount;
|
||||
public long BackendQueueDepth => _outboundChannel.Reader.Count;
|
||||
|
||||
// ── Public surface ────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Read-only collection of currently-attached upstream pipes. Used by the status page.
|
||||
/// </summary>
|
||||
public IReadOnlyCollection<UpstreamPipe> AttachedPipes => _pipes.Values.ToArray();
|
||||
|
||||
/// <summary>
|
||||
/// Attaches an upstream pipe to this multiplexer. The caller is responsible for
|
||||
/// running the pipe's read+write loops (typically via <see cref="StartPipeAsync"/>)
|
||||
/// which wires the pipe's OnFrame callback back into <see cref="OnUpstreamFrameAsync"/>.
|
||||
/// </summary>
|
||||
public void Attach(UpstreamPipe pipe)
|
||||
{
|
||||
if (_disposed)
|
||||
throw new ObjectDisposedException(nameof(PlcMultiplexer));
|
||||
|
||||
_pipes[pipe.Id] = pipe;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the read+write tasks for <paramref name="pipe"/> and returns a task that
|
||||
/// completes when the pipe's read loop ends. The multiplexer detaches the pipe when
|
||||
/// its read loop returns.
|
||||
/// </summary>
|
||||
public Task StartPipeAsync(UpstreamPipe pipe, CancellationToken ct)
|
||||
{
|
||||
Attach(pipe);
|
||||
|
||||
// The write loop runs to completion when the pipe is disposed or the channel
|
||||
// completes. We don't await it directly — it's joined inside DisposeAsync of the pipe.
|
||||
_ = Task.Run(() => pipe.RunWriteLoopAsync(ct), CancellationToken.None);
|
||||
|
||||
var readLoop = pipe.RunReadLoopAsync(
|
||||
(frame, frameCt) => OnUpstreamFrameAsync(pipe, frame, frameCt),
|
||||
ct);
|
||||
|
||||
// When the pipe's read loop finishes, detach it. Don't dispose it here; the
|
||||
// listener (or the cascade walker) owns disposal.
|
||||
_ = readLoop.ContinueWith(prev =>
|
||||
{
|
||||
_pipes.TryRemove(pipe.Id, out _);
|
||||
}, TaskScheduler.Default);
|
||||
|
||||
return readLoop;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tears down the multiplexer: closes the backend connection, cancels both backend
|
||||
/// tasks, drains every in-flight correlation entry, and closes every attached pipe.
|
||||
/// </summary>
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
// Stop the counters provider link so a status snapshot during teardown doesn't
|
||||
// see live-but-soon-to-be-empty internal state.
|
||||
_ctx.Counters.SetMultiplexProvider(null);
|
||||
|
||||
await _disposeCts.CancelAsync().ConfigureAwait(false);
|
||||
|
||||
// Best-effort join the watchdog so its in-flight log/dispatch settles before tests
|
||||
// assert on counter state.
|
||||
if (_watchdogTask is not null)
|
||||
{
|
||||
try { await _watchdogTask.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); }
|
||||
catch { /* swallow */ }
|
||||
}
|
||||
|
||||
await TearDownBackendAsync("disposing", cascadeUpstreams: true).ConfigureAwait(false);
|
||||
_outboundChannel.Writer.TryComplete();
|
||||
|
||||
// Dispose all attached pipes.
|
||||
foreach (var pipe in _pipes.Values)
|
||||
{
|
||||
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
|
||||
}
|
||||
_pipes.Clear();
|
||||
|
||||
_disposeCts.Dispose();
|
||||
}
|
||||
|
||||
// ── Backend connect / teardown ────────────────────────────────────────────
|
||||
|
||||
private async Task<bool> EnsureBackendConnectedAsync(CancellationToken ct)
|
||||
{
|
||||
if (_disposed) return false;
|
||||
|
||||
// Fast path: already connected.
|
||||
if (_backendSocket is { Connected: true } && _backendCts is { IsCancellationRequested: false })
|
||||
return true;
|
||||
|
||||
// Serialise concurrent connect attempts from many upstream pipes.
|
||||
await _connectGate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
// Re-check after acquiring the gate.
|
||||
if (_backendSocket is { Connected: true } && _backendCts is { IsCancellationRequested: false })
|
||||
return true;
|
||||
|
||||
// Build a fresh backend socket and Polly-connect.
|
||||
var backend = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)
|
||||
{ NoDelay = true };
|
||||
|
||||
try
|
||||
{
|
||||
if (_backendConnectPipeline is not null)
|
||||
{
|
||||
await _backendConnectPipeline.ExecuteAsync(async attemptToken =>
|
||||
{
|
||||
using var cts = CancellationTokenSource.CreateLinkedTokenSource(attemptToken);
|
||||
cts.CancelAfter(_connectionOptions.BackendConnectTimeoutMs);
|
||||
await backend.ConnectAsync(_plc.Host, _plc.Port, cts.Token).ConfigureAwait(false);
|
||||
}, ct).ConfigureAwait(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
using var connectCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
connectCts.CancelAfter(_connectionOptions.BackendConnectTimeoutMs);
|
||||
await backend.ConnectAsync(_plc.Host, _plc.Port, connectCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
string reason = ex is OperationCanceledException
|
||||
? $"Backend connect timed out or cancelled after {_connectionOptions.BackendConnectTimeoutMs} ms"
|
||||
: ex.Message;
|
||||
MultiplexerLogEvents.BackendFailed(_logger, _plc.Name, reason);
|
||||
_ctx.Counters.IncrementConnectFailed();
|
||||
backend.Dispose();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Successful connect. Wire up the backend tasks.
|
||||
var cts2 = CancellationTokenSource.CreateLinkedTokenSource(_disposeCts.Token);
|
||||
lock (_backendLock)
|
||||
{
|
||||
_backendSocket = backend;
|
||||
_backendCts = cts2;
|
||||
_backendWriterTask = Task.Run(() => RunBackendWriterAsync(backend, cts2.Token), CancellationToken.None);
|
||||
_backendReaderTask = Task.Run(() => RunBackendReaderAsync(backend, cts2.Token), CancellationToken.None);
|
||||
}
|
||||
|
||||
_ctx.Counters.IncrementConnectSuccess();
|
||||
MultiplexerLogEvents.BackendConnected(_logger, _plc.Name, _plc.Host, _plc.Port);
|
||||
return true;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_connectGate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private readonly SemaphoreSlim _connectGate = new(1, 1);
|
||||
|
||||
private async Task TearDownBackendAsync(string reason, bool cascadeUpstreams)
|
||||
{
|
||||
Socket? oldSocket;
|
||||
CancellationTokenSource? oldCts;
|
||||
Task? writer, reader;
|
||||
lock (_backendLock)
|
||||
{
|
||||
oldSocket = _backendSocket;
|
||||
oldCts = _backendCts;
|
||||
writer = _backendWriterTask;
|
||||
reader = _backendReaderTask;
|
||||
|
||||
_backendSocket = null;
|
||||
_backendCts = null;
|
||||
_backendWriterTask = null;
|
||||
_backendReaderTask = null;
|
||||
}
|
||||
|
||||
if (oldSocket is null && oldCts is null) return;
|
||||
|
||||
try { oldCts?.Cancel(); } catch { /* best effort */ }
|
||||
|
||||
try { oldSocket?.Shutdown(SocketShutdown.Both); } catch { /* already closed */ }
|
||||
try { oldSocket?.Dispose(); } catch { /* best effort */ }
|
||||
|
||||
// Drain correlation map; cascade-close every interested upstream pipe.
|
||||
var dropped = _correlation.DrainAll();
|
||||
var cascadeIds = new HashSet<Guid>();
|
||||
|
||||
foreach (var kvp in dropped)
|
||||
{
|
||||
_allocator.Release(kvp.Key);
|
||||
foreach (var party in kvp.Value.InterestedParties)
|
||||
cascadeIds.Add(party.Pipe.Id);
|
||||
}
|
||||
|
||||
int upstreamCount = 0;
|
||||
if (cascadeUpstreams)
|
||||
{
|
||||
// Close every attached pipe that had a request in flight; the others will
|
||||
// simply re-issue on next request through a fresh backend connect.
|
||||
// Per the design doc, ALL attached upstreams cascade on backend disconnect.
|
||||
upstreamCount = _pipes.Count;
|
||||
|
||||
// Snapshot keys before disposal modifies the dictionary indirectly.
|
||||
var pipeList = _pipes.Values.ToArray();
|
||||
foreach (var pipe in pipeList)
|
||||
{
|
||||
try { await pipe.DisposeAsync().ConfigureAwait(false); }
|
||||
catch { /* best effort */ }
|
||||
}
|
||||
_pipes.Clear();
|
||||
|
||||
_ctx.Counters.AddDisconnectCascades(upstreamCount);
|
||||
}
|
||||
|
||||
// Best-effort join.
|
||||
try { if (writer is not null) await writer.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); } catch { /* swallow */ }
|
||||
try { if (reader is not null) await reader.WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(false); } catch { /* swallow */ }
|
||||
|
||||
oldCts?.Dispose();
|
||||
|
||||
if (upstreamCount > 0 || dropped.Count > 0)
|
||||
MultiplexerLogEvents.BackendDisconnected(_logger, _plc.Name, upstreamCount, dropped.Count, reason);
|
||||
}
|
||||
|
||||
// ── Backend writer / reader tasks ─────────────────────────────────────────
|
||||
|
||||
private async Task RunBackendWriterAsync(Socket backend, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
await foreach (var frame in _outboundChannel.Reader.ReadAllAsync(ct).ConfigureAwait(false))
|
||||
{
|
||||
int sent = 0;
|
||||
while (sent < frame.Length)
|
||||
{
|
||||
int n = await backend.SendAsync(
|
||||
frame.AsMemory(sent, frame.Length - sent),
|
||||
SocketFlags.None,
|
||||
ct).ConfigureAwait(false);
|
||||
if (n == 0) throw new SocketException((int)SocketError.ConnectionReset);
|
||||
sent += n;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal teardown.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Backend failure — cascade.
|
||||
_ = TearDownBackendAsync($"writer fault: {ex.Message}", cascadeUpstreams: true);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RunBackendReaderAsync(Socket backend, CancellationToken ct)
|
||||
{
|
||||
byte[] headerBuf = new byte[MbapFrame.HeaderSize];
|
||||
try
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
if (!await FillAsync(backend, headerBuf, 0, MbapFrame.HeaderSize, ct).ConfigureAwait(false))
|
||||
break;
|
||||
|
||||
if (!MbapFrame.TryParseHeader(headerBuf.AsSpan(),
|
||||
out ushort proxyTxId, out _, out ushort length, out _))
|
||||
break;
|
||||
|
||||
if (length < 1)
|
||||
{
|
||||
// Degenerate frame — drop.
|
||||
continue;
|
||||
}
|
||||
|
||||
int pduBodyLen = length - 1;
|
||||
if (pduBodyLen > MbapFrame.MaxPduBodySize)
|
||||
{
|
||||
// Frame too large — backend is misbehaving; force teardown.
|
||||
_logger.LogWarning(
|
||||
"Oversized backend frame: Plc={Plc} PduBody={Body} > Max={Max}",
|
||||
_plc.Name, pduBodyLen, MbapFrame.MaxPduBodySize);
|
||||
break;
|
||||
}
|
||||
|
||||
byte[] frame = new byte[MbapFrame.HeaderSize + pduBodyLen];
|
||||
Buffer.BlockCopy(headerBuf, 0, frame, 0, MbapFrame.HeaderSize);
|
||||
|
||||
if (!await FillAsync(backend, frame, MbapFrame.HeaderSize, pduBodyLen, ct).ConfigureAwait(false))
|
||||
break;
|
||||
|
||||
if (!_correlation.TryRemove(proxyTxId, out var inFlight))
|
||||
{
|
||||
// No correlation entry — either a stale response after cascade, or
|
||||
// the PLC sent something unsolicited. Drop the frame.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Free the allocator slot immediately so it can be reused.
|
||||
_allocator.Release(proxyTxId);
|
||||
|
||||
// Update EWMA round-trip from when we sent the request.
|
||||
long elapsedMs = (DateTimeOffset.UtcNow - inFlight.SentAtUtc).Ticks * 100; // 100 ns per tick
|
||||
// UpdateRoundTripEwma expects Stopwatch ticks, but we have wall-clock.
|
||||
// Convert ms back to Stopwatch ticks:
|
||||
long ticks = (long)((double)(DateTimeOffset.UtcNow - inFlight.SentAtUtc).TotalSeconds * Stopwatch.Frequency);
|
||||
if (ticks > 0)
|
||||
_ctx.Counters.UpdateRoundTripEwma(ticks);
|
||||
|
||||
// Apply the BCD rewriter on the response. Build a per-call context clone
|
||||
// that carries CurrentRequest so the rewriter can decode FC03/04 slots.
|
||||
var responseCtx = _ctx.WithCurrentRequest(inFlight);
|
||||
_pipeline.Process(
|
||||
MbapDirection.ResponseToClient,
|
||||
frame.AsSpan(0, MbapFrame.HeaderSize),
|
||||
frame.AsSpan(MbapFrame.HeaderSize, pduBodyLen),
|
||||
responseCtx);
|
||||
|
||||
// Fan out to each interested party with their original TxId restored.
|
||||
// Phase 9: always exactly one party. Phase 10: N parties (read coalescing).
|
||||
foreach (var party in inFlight.InterestedParties)
|
||||
{
|
||||
if (!party.Pipe.IsAlive)
|
||||
continue;
|
||||
|
||||
// The frame buffer is private to this iteration; if there are multiple
|
||||
// parties (Phase 10), each gets its own copy with its own original TxId
|
||||
// patched in. Phase 9 always has Count == 1, so the single-buffer path
|
||||
// is the common case; we copy to keep Phase-10 forward compatibility.
|
||||
byte[] outFrame = inFlight.InterestedParties.Count == 1
|
||||
? frame
|
||||
: (byte[])frame.Clone();
|
||||
|
||||
outFrame[0] = (byte)(party.OriginalTxId >> 8);
|
||||
outFrame[1] = (byte)(party.OriginalTxId & 0xFF);
|
||||
|
||||
await party.Pipe.SendResponseAsync(outFrame, ct).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
// Reader exited cleanly — backend closed by remote. Cascade.
|
||||
_ = TearDownBackendAsync("backend reader EOF", cascadeUpstreams: true);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal teardown.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_ = TearDownBackendAsync($"reader fault: {ex.Message}", cascadeUpstreams: true);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Upstream → multiplexer entry point ────────────────────────────────────
|
||||
|
||||
private async ValueTask OnUpstreamFrameAsync(UpstreamPipe pipe, byte[] frame, CancellationToken ct)
|
||||
{
|
||||
if (_disposed) return;
|
||||
|
||||
// Ensure backend is connected. Failure here means we cannot service the request;
|
||||
// close the upstream pipe (consistent with the 1:1 model's behaviour on connect
|
||||
// failure).
|
||||
if (!await EnsureBackendConnectedAsync(ct).ConfigureAwait(false))
|
||||
{
|
||||
try { await pipe.DisposeAsync().ConfigureAwait(false); } catch { /* best effort */ }
|
||||
return;
|
||||
}
|
||||
|
||||
if (frame.Length < MbapFrame.HeaderSize)
|
||||
return;
|
||||
|
||||
if (!MbapFrame.TryParseHeader(frame.AsSpan(0, MbapFrame.HeaderSize),
|
||||
out ushort originalTxId, out _, out _, out byte unitId))
|
||||
return;
|
||||
|
||||
if (!_allocator.TryAllocate(out ushort proxyTxId))
|
||||
{
|
||||
MultiplexerLogEvents.Saturated(_logger, _plc.Name, pipe.RemoteEp?.ToString() ?? "?");
|
||||
// Synthesize Modbus exception 04 (Slave Device Failure).
|
||||
byte fc = frame.Length > MbapFrame.HeaderSize ? frame[MbapFrame.HeaderSize] : (byte)0;
|
||||
byte[] excFrame = BuildExceptionFrame(originalTxId, unitId, fc, exceptionCode: 4);
|
||||
await pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse the PDU FC + start/qty (for FC03/04) so the response decoder has the
|
||||
// correlation it needs.
|
||||
int pduOffset = MbapFrame.HeaderSize;
|
||||
byte fcByte = frame[pduOffset];
|
||||
ushort startAddr = 0;
|
||||
ushort qty = 0;
|
||||
if (fcByte is 0x03 or 0x04 && frame.Length >= pduOffset + 5)
|
||||
{
|
||||
startAddr = (ushort)((frame[pduOffset + 1] << 8) | frame[pduOffset + 2]);
|
||||
qty = (ushort)((frame[pduOffset + 3] << 8) | frame[pduOffset + 4]);
|
||||
}
|
||||
|
||||
var inFlight = new InFlightRequest(
|
||||
UnitId: unitId,
|
||||
Fc: fcByte,
|
||||
StartAddress: startAddr,
|
||||
Qty: qty,
|
||||
InterestedParties: [new InterestedParty(pipe, originalTxId)],
|
||||
SentAtUtc: DateTimeOffset.UtcNow);
|
||||
|
||||
if (!_correlation.TryAdd(proxyTxId, inFlight))
|
||||
{
|
||||
// Should be impossible: the allocator just guaranteed proxyTxId is free.
|
||||
_allocator.Release(proxyTxId);
|
||||
_logger.LogError("CorrelationMap.TryAdd failed for already-free proxyTxId {ProxyTxId}", proxyTxId);
|
||||
return;
|
||||
}
|
||||
|
||||
// Peak in-flight tracking.
|
||||
_ctx.Counters.ObserveInFlight(_allocator.InFlightCount);
|
||||
|
||||
// Apply the BCD rewriter on the request. Use a per-call context with CurrentRequest
|
||||
// (the rewriter doesn't currently need it on request, but Phase 10 may).
|
||||
var requestCtx = _ctx.WithCurrentRequest(inFlight);
|
||||
_pipeline.Process(
|
||||
MbapDirection.RequestToBackend,
|
||||
frame.AsSpan(0, MbapFrame.HeaderSize),
|
||||
frame.AsSpan(MbapFrame.HeaderSize, frame.Length - MbapFrame.HeaderSize),
|
||||
requestCtx);
|
||||
|
||||
// Overwrite the MBAP TxId with the proxy TxId.
|
||||
frame[0] = (byte)(proxyTxId >> 8);
|
||||
frame[1] = (byte)(proxyTxId & 0xFF);
|
||||
|
||||
// Enqueue for the backend writer task.
|
||||
try
|
||||
{
|
||||
await _outboundChannel.Writer.WriteAsync(frame, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (ChannelClosedException)
|
||||
{
|
||||
// Channel completed during shutdown — release the proxy TxId.
|
||||
if (_correlation.TryRemove(proxyTxId, out _))
|
||||
_allocator.Release(proxyTxId);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Per-request timeout watchdog ──────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Periodically scans the correlation map for in-flight requests whose response has
|
||||
/// not arrived within <see cref="ConnectionOptions.BackendRequestTimeoutMs"/>. For each
|
||||
/// stale entry: removes it from the map, frees its allocator slot, and delivers a
|
||||
/// Modbus exception (code 0x0B / Gateway Target Device Failed To Respond) to each
|
||||
/// interested party with the original TxId restored.
|
||||
///
|
||||
/// <para><b>Why this exists.</b> In the 1:1 connection model, a lost response would
|
||||
/// fault the dedicated backend socket and the upstream pair would close. The multiplexed
|
||||
/// model needs an explicit per-request timer because a single missing or mis-routed
|
||||
/// response would otherwise leak a correlation entry forever and hang the upstream
|
||||
/// pipe indefinitely. Real-world causes: PLC drops a response, network packet loss,
|
||||
/// backend that mis-echoes MBAP TxIds.</para>
|
||||
/// </summary>
|
||||
private async Task RunRequestTimeoutWatchdogAsync(CancellationToken ct)
|
||||
{
|
||||
// Tick at ~quarter of the request timeout for responsive cleanup, but cap to a
|
||||
// 1-second floor so the watchdog doesn't busy-wake on very small timeouts.
|
||||
int tickMs = Math.Max(100, _connectionOptions.BackendRequestTimeoutMs / 4);
|
||||
|
||||
try
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
await Task.Delay(tickMs, ct).ConfigureAwait(false);
|
||||
|
||||
var threshold = DateTimeOffset.UtcNow.AddMilliseconds(-_connectionOptions.BackendRequestTimeoutMs);
|
||||
var stale = _correlation.SnapshotOlderThan(threshold);
|
||||
if (stale.Count == 0) continue;
|
||||
|
||||
foreach (var kvp in stale)
|
||||
{
|
||||
ushort proxyTxId = kvp.Key;
|
||||
// Try to claim the entry; if another path (response, cascade) already removed it,
|
||||
// skip — no work to do.
|
||||
if (!_correlation.TryRemove(proxyTxId, out var req))
|
||||
continue;
|
||||
|
||||
_allocator.Release(proxyTxId);
|
||||
|
||||
long elapsedMs = (long)(DateTimeOffset.UtcNow - req.SentAtUtc).TotalMilliseconds;
|
||||
|
||||
foreach (var party in req.InterestedParties)
|
||||
{
|
||||
MultiplexerLogEvents.RequestTimeout(
|
||||
_logger, _plc.Name, proxyTxId, party.OriginalTxId, req.Fc, elapsedMs);
|
||||
|
||||
if (!party.Pipe.IsAlive)
|
||||
continue;
|
||||
|
||||
// Deliver Modbus exception 0x0B (Gateway Target Device Failed To Respond)
|
||||
// to the upstream client. This lets the client's library raise a clean
|
||||
// ModbusException rather than hanging on a timeout.
|
||||
byte[] excFrame = BuildExceptionFrame(party.OriginalTxId, req.UnitId, req.Fc, exceptionCode: 0x0B);
|
||||
try
|
||||
{
|
||||
await party.Pipe.SendResponseAsync(excFrame, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best-effort delivery; if the pipe is going down, the client
|
||||
// discovers the failure through its own socket close path.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal teardown.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Request-timeout watchdog faulted: Plc={Plc}", _plc.Name);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
private static async Task<bool> FillAsync(
|
||||
Socket socket, byte[] buf, int offset, int count, CancellationToken ct)
|
||||
{
|
||||
int remaining = count;
|
||||
while (remaining > 0)
|
||||
{
|
||||
int n = await socket.ReceiveAsync(
|
||||
buf.AsMemory(offset + (count - remaining), remaining),
|
||||
SocketFlags.None, ct).ConfigureAwait(false);
|
||||
if (n == 0) return false;
|
||||
remaining -= n;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static byte[] BuildExceptionFrame(ushort originalTxId, byte unitId, byte fc, byte exceptionCode)
|
||||
{
|
||||
// Modbus exception PDU = [fc | 0x80][exceptionCode].
|
||||
// MBAP length covers UnitId (1) + PDU (2) = 3.
|
||||
var frame = new byte[MbapFrame.HeaderSize + 2];
|
||||
frame[0] = (byte)(originalTxId >> 8);
|
||||
frame[1] = (byte)(originalTxId & 0xFF);
|
||||
frame[2] = 0; // ProtocolId
|
||||
frame[3] = 0;
|
||||
frame[4] = 0; // Length high
|
||||
frame[5] = 3; // Length low: UnitId(1) + ExFc(1) + ExCode(1)
|
||||
frame[6] = unitId;
|
||||
frame[7] = (byte)(fc | 0x80);
|
||||
frame[8] = exceptionCode;
|
||||
return frame;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
namespace Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
/// <summary>
|
||||
/// Allocates 16-bit MBAP transaction IDs (proxy TxIds) used to multiplex many upstream
|
||||
/// clients onto a single shared backend connection per PLC. The allocator tracks which
|
||||
/// IDs are currently in flight and scans forward from a rolling cursor to find the next
|
||||
/// free slot, mimicking the natural cadence of Modbus clients while keeping reuse
|
||||
/// distance maximally large in steady state.
|
||||
///
|
||||
/// <para>State is protected by a single <see cref="object"/> lock. Contention is
|
||||
/// negligible in practice — the allocator is per-PLC and one PLC's wire rate is bounded
|
||||
/// by the controller's internal scan time (a few ms per request on an H2-ECOM100).
|
||||
/// The lock is preferred over a lock-free approach for readability and worst-case
|
||||
/// determinism (Polly retries, cascade cleanup, and saturation paths must not race).</para>
|
||||
///
|
||||
/// <para><b>Memory:</b> <c>bool[65536]</c> (~64 KB) per PLC. With ~54 PLCs that is
|
||||
/// ~3.4 MB total — well within budget for a service that already ships at ~30 MB working
|
||||
/// set under load.</para>
|
||||
///
|
||||
/// <para><b>Wrap counter:</b> increments every time the rolling cursor rolls over
|
||||
/// 0xFFFF → 0x0000 during a successful allocation scan. Frequent wraps indicate either
|
||||
/// very high churn or extreme in-flight depth and are surfaced as a telemetry signal,
|
||||
/// not an error.</para>
|
||||
/// </summary>
|
||||
internal sealed class TxIdAllocator
|
||||
{
|
||||
// 65,536 slots total — the full uint16 space.
|
||||
private const int SlotCount = 65536;
|
||||
|
||||
private readonly object _lock = new();
|
||||
private readonly bool[] _inUse = new bool[SlotCount];
|
||||
private ushort _next; // rolling cursor; 0 on construction
|
||||
private int _inFlightCount; // 0..65536
|
||||
private long _wrapCount; // monotonic; never resets
|
||||
|
||||
/// <summary>
|
||||
/// Number of currently-in-flight proxy TxIds (i.e., allocated but not yet released).
|
||||
/// Read under the same lock that mutates it; the snapshot is a simple atomic read of
|
||||
/// an int but we still hold the lock for cross-field consistency with <c>_inUse</c>.
|
||||
/// </summary>
|
||||
public int InFlightCount
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _inFlightCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Number of times the rolling cursor has wrapped 0xFFFF → 0x0000 during a
|
||||
/// successful allocation since the allocator was constructed. Read without locking
|
||||
/// via <see cref="Interlocked.Read"/> for the hot status-page path.
|
||||
/// </summary>
|
||||
public long WrapCount => Interlocked.Read(ref _wrapCount);
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to allocate the next free proxy TxId.
|
||||
/// Returns <c>true</c> with <paramref name="id"/> set when an ID was allocated.
|
||||
/// Returns <c>false</c> when every slot in the 16-bit space is currently in use;
|
||||
/// the caller is responsible for emitting <c>mbproxy.multiplex.saturated</c> and
|
||||
/// returning a Modbus exception (code 04 / Slave Device Failure) to the upstream.
|
||||
/// </summary>
|
||||
public bool TryAllocate(out ushort id)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_inFlightCount >= SlotCount)
|
||||
{
|
||||
id = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Scan forward from _next for the next free slot. _inFlightCount < SlotCount
|
||||
// guarantees at least one free slot, so the loop terminates within at most
|
||||
// SlotCount iterations even in the pathological full-minus-one case.
|
||||
ushort start = _next;
|
||||
ushort cursor = start;
|
||||
do
|
||||
{
|
||||
if (!_inUse[cursor])
|
||||
{
|
||||
_inUse[cursor] = true;
|
||||
_inFlightCount++;
|
||||
|
||||
// Advance the cursor; track wrap.
|
||||
unchecked
|
||||
{
|
||||
ushort nextCursor = (ushort)(cursor + 1);
|
||||
if (nextCursor == 0)
|
||||
Interlocked.Increment(ref _wrapCount);
|
||||
_next = nextCursor;
|
||||
}
|
||||
|
||||
id = cursor;
|
||||
return true;
|
||||
}
|
||||
|
||||
unchecked
|
||||
{
|
||||
cursor = (ushort)(cursor + 1);
|
||||
}
|
||||
}
|
||||
while (cursor != start);
|
||||
|
||||
// Defensive: should be unreachable given the InFlightCount check above.
|
||||
id = 0;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases a previously-allocated proxy TxId. Releasing an ID that is not currently
|
||||
/// allocated is a no-op (defensive: cascade-on-disconnect can call <see cref="Release"/>
|
||||
/// after a concurrent timeout path has already done so).
|
||||
/// </summary>
|
||||
public void Release(ushort id)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_inUse[id])
|
||||
{
|
||||
_inUse[id] = false;
|
||||
_inFlightCount--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test-only: returns whether the given proxy TxId is currently marked in use.
|
||||
/// Internal so it remains usable from unit tests via InternalsVisibleTo.
|
||||
/// </summary>
|
||||
internal bool IsAllocated(ushort id)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _inUse[id];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,281 @@
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Threading.Channels;
|
||||
|
||||
namespace Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
/// <summary>
|
||||
/// One accepted upstream client socket, exposed as an asynchronous frame pipe to the
|
||||
/// owning <see cref="PlcMultiplexer"/>. The pipe reads complete MBAP frames from the
|
||||
/// upstream socket and hands each frame to a multiplexer-supplied <c>onFrame</c> callback;
|
||||
/// it also exposes a write channel that the multiplexer drains to send response frames
|
||||
/// back to the upstream client.
|
||||
///
|
||||
/// <para><b>Lifecycle:</b> constructed by <see cref="PlcListener"/> on accept; attached
|
||||
/// to the multiplexer; runs its read loop until the upstream socket closes, the pipe is
|
||||
/// disposed, or the multiplexer cascades a backend disconnect.</para>
|
||||
///
|
||||
/// <para><b>Concurrency model:</b> each pipe runs exactly two tasks — a read task and a
|
||||
/// write task. The read task drives the multiplexer (one frame at a time, which preserves
|
||||
/// the per-upstream-client one-in-flight invariant); the write task drains
|
||||
/// <see cref="_responseChannel"/> and writes each frame to the socket. No third task ever
|
||||
/// touches the socket.</para>
|
||||
///
|
||||
/// <para><b>One-in-flight-per-upstream:</b> the read loop processes frames sequentially.
|
||||
/// A multi-PDU-pipelined client would still get correct service because the multiplexer
|
||||
/// can have multiple distinct <c>OnFrame</c> calls outstanding from <i>different</i>
|
||||
/// upstream pipes; a single upstream cannot multi-PDU-pipeline itself.</para>
|
||||
/// </summary>
|
||||
internal sealed partial class UpstreamPipe : IAsyncDisposable
|
||||
{
|
||||
// Capacity 16: enough to buffer responses while the upstream's TCP send buffer drains,
|
||||
// small enough that backpressure kicks in on a wedged consumer. Drop-on-fault behaviour
|
||||
// applies — if the upstream is dead, _alive flips to false and pending writes are
|
||||
// discarded by the multiplexer before they ever enter the channel.
|
||||
private const int ResponseChannelCapacity = 16;
|
||||
|
||||
private readonly Socket _upstream;
|
||||
private readonly ILogger _logger;
|
||||
private readonly string _plcName;
|
||||
|
||||
private readonly Channel<byte[]> _responseChannel = Channel.CreateBounded<byte[]>(
|
||||
new BoundedChannelOptions(ResponseChannelCapacity)
|
||||
{
|
||||
FullMode = BoundedChannelFullMode.Wait, // backpressure, not drop
|
||||
SingleReader = true,
|
||||
SingleWriter = false, // multiplexer adds; potential future paths too
|
||||
});
|
||||
|
||||
// Internal CTS lets the multiplexer signal "drop this pipe now" without waiting for
|
||||
// the upstream socket to close cleanly.
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
private bool _disposed;
|
||||
|
||||
// Phase 9: per-pipe forwarded-PDU counter (replaces the per-pair counter from the
|
||||
// 1:1 model). Read by the status page.
|
||||
private long _pdusForwardedCount;
|
||||
|
||||
/// <summary>Stable identity for status-page reporting and cascade cleanup.</summary>
|
||||
public Guid Id { get; } = Guid.NewGuid();
|
||||
|
||||
/// <summary>The upstream client's remote endpoint, captured at construction.</summary>
|
||||
public IPEndPoint? RemoteEp { get; }
|
||||
|
||||
/// <summary>UTC time at which the upstream socket was accepted.</summary>
|
||||
public DateTimeOffset ConnectedAtUtc { get; } = DateTimeOffset.UtcNow;
|
||||
|
||||
/// <summary>
|
||||
/// Number of request PDUs read from this upstream and forwarded into the multiplexer.
|
||||
/// Incremented by <see cref="RunReadLoopAsync"/> after each successful frame parse.
|
||||
/// </summary>
|
||||
public long PdusForwardedCount => Interlocked.Read(ref _pdusForwardedCount);
|
||||
|
||||
/// <summary>
|
||||
/// <c>true</c> while the pipe's read+write tasks are running. Flips to <c>false</c>
|
||||
/// on disposal or any fault on either direction.
|
||||
/// </summary>
|
||||
public bool IsAlive => !_disposed && !_cts.IsCancellationRequested;
|
||||
|
||||
public UpstreamPipe(Socket upstream, string plcName, ILogger logger)
|
||||
{
|
||||
_upstream = upstream;
|
||||
_upstream.NoDelay = true;
|
||||
RemoteEp = upstream.RemoteEndPoint as IPEndPoint;
|
||||
_plcName = plcName;
|
||||
_logger = logger;
|
||||
|
||||
string remoteStr = RemoteEp?.ToString() ?? "?";
|
||||
MultiplexerLogEvents.ClientConnected(_logger, _plcName, remoteStr);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs the read side of the pipe. Reads complete MBAP frames from the upstream
|
||||
/// socket and invokes <paramref name="onFrame"/> for each. Returns when:
|
||||
/// <list type="bullet">
|
||||
/// <item><description>The upstream closes cleanly (clean EOF on the first byte of a frame).</description></item>
|
||||
/// <item><description>The pipe is disposed (CTS fires).</description></item>
|
||||
/// <item><description>An exception is thrown by <paramref name="onFrame"/>.</description></item>
|
||||
/// </list>
|
||||
///
|
||||
/// <para>The frame buffer is owned by this loop; <paramref name="onFrame"/> receives
|
||||
/// a fresh <see cref="byte"/>[] each call (the multiplexer needs to retain a copy to
|
||||
/// build <see cref="InFlightRequest"/>, so we don't try to share the buffer).</para>
|
||||
/// </summary>
|
||||
public async Task RunReadLoopAsync(
|
||||
Func<byte[], CancellationToken, ValueTask> onFrame,
|
||||
CancellationToken ct)
|
||||
{
|
||||
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct, _cts.Token);
|
||||
var token = linked.Token;
|
||||
|
||||
// 7-byte header + max 253-byte PDU body = 260 bytes per frame.
|
||||
byte[] headerBuf = new byte[MbapFrame.HeaderSize];
|
||||
|
||||
try
|
||||
{
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
// Read the 7-byte MBAP header.
|
||||
if (!await FillAsync(_upstream, headerBuf, 0, MbapFrame.HeaderSize, token).ConfigureAwait(false))
|
||||
return; // clean EOF — upstream went away.
|
||||
|
||||
if (!MbapFrame.TryParseHeader(headerBuf.AsSpan(),
|
||||
out _, out _, out ushort length, out _))
|
||||
return;
|
||||
|
||||
if (length < 1)
|
||||
{
|
||||
// Length field claims no body — forward the header alone via a fresh buffer.
|
||||
byte[] degenerate = new byte[MbapFrame.HeaderSize];
|
||||
Buffer.BlockCopy(headerBuf, 0, degenerate, 0, MbapFrame.HeaderSize);
|
||||
await onFrame(degenerate, token).ConfigureAwait(false);
|
||||
Interlocked.Increment(ref _pdusForwardedCount);
|
||||
continue;
|
||||
}
|
||||
|
||||
int pduBodyLen = length - 1;
|
||||
if (pduBodyLen > MbapFrame.MaxPduBodySize)
|
||||
{
|
||||
// Frame too large for the buffer — close the upstream.
|
||||
_logger.LogWarning(
|
||||
"Oversized upstream frame: Plc={Plc} PduBody={Body} > Max={Max}",
|
||||
_plcName, pduBodyLen, MbapFrame.MaxPduBodySize);
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate a fresh frame buffer per PDU; the multiplexer retains it.
|
||||
byte[] frame = new byte[MbapFrame.HeaderSize + pduBodyLen];
|
||||
Buffer.BlockCopy(headerBuf, 0, frame, 0, MbapFrame.HeaderSize);
|
||||
|
||||
if (!await FillAsync(_upstream, frame, MbapFrame.HeaderSize, pduBodyLen, token)
|
||||
.ConfigureAwait(false))
|
||||
return;
|
||||
|
||||
Interlocked.Increment(ref _pdusForwardedCount);
|
||||
await onFrame(frame, token).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal shutdown.
|
||||
}
|
||||
catch (SocketException)
|
||||
{
|
||||
// Upstream socket closed by remote end — normal.
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
// Socket disposed by write loop or DisposeAsync — normal.
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs the write side of the pipe. Drains <see cref="_responseChannel"/> and writes
|
||||
/// each frame to the upstream socket. Returns when the channel completes or the
|
||||
/// upstream socket fails.
|
||||
/// </summary>
|
||||
public async Task RunWriteLoopAsync(CancellationToken ct)
|
||||
{
|
||||
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct, _cts.Token);
|
||||
var token = linked.Token;
|
||||
|
||||
try
|
||||
{
|
||||
await foreach (var frame in _responseChannel.Reader.ReadAllAsync(token).ConfigureAwait(false))
|
||||
{
|
||||
await SendAllAsync(_upstream, frame.AsMemory(), token).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal shutdown.
|
||||
}
|
||||
catch (SocketException)
|
||||
{
|
||||
// Upstream remote closed — normal.
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
// Socket disposed elsewhere — normal.
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Enqueues <paramref name="frame"/> for delivery on the upstream socket. Returns
|
||||
/// without blocking when the pipe is no longer alive (the multiplexer will discover
|
||||
/// the dead pipe on its next correlation lookup and drop responses bound for it).
|
||||
/// </summary>
|
||||
public async ValueTask SendResponseAsync(byte[] frame, CancellationToken ct)
|
||||
{
|
||||
if (!IsAlive)
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
await _responseChannel.Writer.WriteAsync(frame, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (ChannelClosedException)
|
||||
{
|
||||
// Pipe disposed mid-write — drop silently.
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Caller cancelled — drop silently.
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Closes the pipe: cancels the read+write loops and shuts down the socket. Idempotent.
|
||||
/// </summary>
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
try { _responseChannel.Writer.TryComplete(); } catch { /* already complete */ }
|
||||
|
||||
await _cts.CancelAsync().ConfigureAwait(false);
|
||||
|
||||
try { _upstream.Shutdown(SocketShutdown.Both); } catch { /* already closed */ }
|
||||
_upstream.Dispose();
|
||||
_cts.Dispose();
|
||||
|
||||
string remoteStr = RemoteEp?.ToString() ?? "?";
|
||||
MultiplexerLogEvents.ClientDisconnected(_logger, _plcName, remoteStr, "Pipe disposed");
|
||||
}
|
||||
|
||||
// ── Low-level I/O helpers ─────────────────────────────────────────────────────
|
||||
|
||||
private static async Task<bool> FillAsync(
|
||||
Socket socket, byte[] buf, int offset, int count, CancellationToken ct)
|
||||
{
|
||||
int remaining = count;
|
||||
bool firstRead = true;
|
||||
|
||||
while (remaining > 0)
|
||||
{
|
||||
int received = await socket.ReceiveAsync(
|
||||
buf.AsMemory(offset + (count - remaining), remaining),
|
||||
SocketFlags.None,
|
||||
ct).ConfigureAwait(false);
|
||||
|
||||
if (received == 0)
|
||||
return firstRead && remaining == count ? false : false;
|
||||
|
||||
remaining -= received;
|
||||
firstRead = false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static async Task SendAllAsync(Socket socket, Memory<byte> memory, CancellationToken ct)
|
||||
{
|
||||
while (memory.Length > 0)
|
||||
{
|
||||
int sent = await socket.SendAsync(memory, SocketFlags.None, ct).ConfigureAwait(false);
|
||||
if (sent == 0) throw new SocketException((int)SocketError.ConnectionReset);
|
||||
memory = memory[sent..];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// No-op PDU pipeline: passes every frame through byte-for-byte without rewriting.
|
||||
/// Registered as the <see cref="IPduPipeline"/> singleton in Phase 03.
|
||||
/// Phase 04 replaces this registration with BcdPduPipeline.
|
||||
/// </summary>
|
||||
internal sealed class NoopPduPipeline : IPduPipeline
|
||||
{
|
||||
public void Process(
|
||||
MbapDirection direction,
|
||||
ReadOnlySpan<byte> mbapHeader,
|
||||
Span<byte> pdu,
|
||||
PduContext context)
|
||||
{
|
||||
// Intentional no-op: bytes forwarded unmodified.
|
||||
// Phase 04: replace this registration with BcdPduPipeline.
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
using Mbproxy.Bcd;
|
||||
using Mbproxy.Proxy.Multiplexing;
|
||||
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// Per-PLC context holding the resolved BCD tag map, live counters, and a logger.
|
||||
/// Derives from <see cref="PduContext"/> so it can be passed wherever a
|
||||
/// <see cref="PduContext"/> is expected.
|
||||
///
|
||||
/// One instance per configured PLC is constructed at <see cref="ProxyWorker"/> startup
|
||||
/// and lives for the lifetime of the listener. It is shared across all upstream pipes
|
||||
/// served by the same <see cref="Multiplexing.PlcMultiplexer"/>; all mutable state is
|
||||
/// accessed through <see cref="ProxyCounters"/> which uses Interlocked for thread-safety.
|
||||
///
|
||||
/// <para><b>Phase 9 — request correlation:</b> the multiplexer sets <see cref="CurrentRequest"/>
|
||||
/// before calling the pipeline on each direction. On the request path the pipeline can
|
||||
/// peek at the future correlation entry it just enqueued; on the response path the pipeline
|
||||
/// uses the request's <c>StartAddress</c>/<c>Qty</c> to decode FC03/FC04 BCD slots. Different
|
||||
/// in-flight responses use different <see cref="InFlightRequest"/> instances, so there is no
|
||||
/// cross-talk between concurrent multiplexed requests.</para>
|
||||
///
|
||||
/// <para><b>Concurrency:</b> a single <see cref="PerPlcContext"/> instance is shared across
|
||||
/// the per-upstream read tasks (which call the pipeline on the request path) and the
|
||||
/// single backend reader task (which calls the pipeline on the response path). Because the
|
||||
/// per-call <see cref="CurrentRequest"/> would be racy if mutated on the shared context,
|
||||
/// the multiplexer constructs a lightweight per-call clone (<see cref="WithCurrentRequest"/>)
|
||||
/// for each pipeline invocation. The shared mutable state — the tag map, counters, logger —
|
||||
/// is read-only or Interlocked.</para>
|
||||
/// </summary>
|
||||
internal class PerPlcContext : PduContext
|
||||
{
|
||||
public BcdTagMap TagMap { get; init; } = BcdTagMap.Empty;
|
||||
|
||||
public ProxyCounters Counters { get; init; } = new();
|
||||
|
||||
public ILogger Logger { get; init; } = Microsoft.Extensions.Logging.Abstractions.NullLogger.Instance;
|
||||
|
||||
/// <summary>
|
||||
/// Per-PDU-call correlation entry. Non-null on response calls (set by the multiplexer's
|
||||
/// backend reader task to the matched <see cref="InFlightRequest"/>); <c>null</c> on
|
||||
/// request calls. The BCD rewriter reads this on response to learn the originating
|
||||
/// FC03/FC04 start address and quantity (which are not present in the response PDU).
|
||||
/// </summary>
|
||||
internal InFlightRequest? CurrentRequest { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Returns a shallow clone of this context with <see cref="CurrentRequest"/> set to
|
||||
/// <paramref name="req"/>. The clone is cheap (one allocation per response) and avoids
|
||||
/// any race on the shared context across concurrent multiplexed responses.
|
||||
/// </summary>
|
||||
internal PerPlcContext WithCurrentRequest(InFlightRequest? req) => new()
|
||||
{
|
||||
PlcName = PlcName,
|
||||
TagMap = TagMap,
|
||||
Counters = Counters,
|
||||
Logger = Logger,
|
||||
CurrentRequest = req,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
using System.Collections.Concurrent;
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using Mbproxy.Options;
|
||||
using Mbproxy.Proxy.Multiplexing;
|
||||
using Polly;
|
||||
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// Owns one <see cref="TcpListener"/> bound to a PLC's configured listen port and one
|
||||
/// <see cref="PlcMultiplexer"/> that owns the single backend connection to the PLC.
|
||||
///
|
||||
/// <para><b>Phase 9 — TxId multiplexing:</b> the listener no longer pairs each upstream
|
||||
/// socket with a dedicated backend socket. Instead, every accepted upstream is wrapped
|
||||
/// in an <see cref="UpstreamPipe"/> and handed to the multiplexer. The multiplexer holds
|
||||
/// at most one TCP connection to the PLC, eliminating the H2-ECOM100's 4-concurrent-client
|
||||
/// cap from the upstream side.</para>
|
||||
///
|
||||
/// <para>The listener's accept loop is otherwise unchanged. <see cref="StartAsync"/>
|
||||
/// binds the socket; <see cref="RunAsync"/> runs until cancelled or the listener faults;
|
||||
/// <see cref="DisposeAsync"/> tears down both the listener and the multiplexer.</para>
|
||||
/// </summary>
|
||||
internal sealed partial class PlcListener : IAsyncDisposable
|
||||
{
|
||||
private readonly PlcOptions _plc;
|
||||
private readonly ConnectionOptions _connectionOptions;
|
||||
private readonly IPduPipeline _pipeline;
|
||||
private readonly ILogger<PlcListener> _listenerLogger;
|
||||
private readonly ILogger<PlcMultiplexer> _multiplexerLogger;
|
||||
private readonly ILogger _pipeLogger;
|
||||
private readonly PerPlcContext? _perPlcContext;
|
||||
private readonly ResiliencePipeline? _backendConnectPipeline;
|
||||
|
||||
private TcpListener? _listener;
|
||||
private PlcMultiplexer? _multiplexer;
|
||||
private bool _disposed;
|
||||
|
||||
// Track active pipe-handling tasks so DisposeAsync can wait for them.
|
||||
private readonly ConcurrentDictionary<Guid, Task> _pipeTasks = new();
|
||||
|
||||
/// <summary>
|
||||
/// Live collection of active <see cref="UpstreamPipe"/> instances for this listener.
|
||||
/// Consumed by the status page to report per-client telemetry. Empty when the
|
||||
/// multiplexer has not yet been constructed (e.g., between StopAsync and a fresh start).
|
||||
/// </summary>
|
||||
public IReadOnlyCollection<UpstreamPipe> ActiveUpstreams
|
||||
=> _multiplexer?.AttachedPipes ?? Array.Empty<UpstreamPipe>();
|
||||
|
||||
public PlcListener(
|
||||
PlcOptions plc,
|
||||
ConnectionOptions connectionOptions,
|
||||
IPduPipeline pipeline,
|
||||
ILogger<PlcListener> listenerLogger,
|
||||
ILogger<PlcMultiplexer> multiplexerLogger,
|
||||
ILogger pipeLogger,
|
||||
PerPlcContext? perPlcContext = null,
|
||||
ResiliencePipeline? backendConnectPipeline = null)
|
||||
{
|
||||
_plc = plc;
|
||||
_connectionOptions = connectionOptions;
|
||||
_pipeline = pipeline;
|
||||
_listenerLogger = listenerLogger;
|
||||
_multiplexerLogger = multiplexerLogger;
|
||||
_pipeLogger = pipeLogger;
|
||||
_perPlcContext = perPlcContext;
|
||||
_backendConnectPipeline = backendConnectPipeline;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Binds the listen socket. Throws <see cref="SocketException"/> on bind failure;
|
||||
/// the caller (<see cref="Supervision.PlcListenerSupervisor"/>) catches and logs
|
||||
/// <c>mbproxy.startup.bind.failed</c>.
|
||||
/// </summary>
|
||||
public void StartAsync()
|
||||
{
|
||||
var endpoint = new IPEndPoint(IPAddress.Any, _plc.ListenPort);
|
||||
_listener = new TcpListener(endpoint);
|
||||
_listener.Start();
|
||||
LogBound(_listenerLogger, _plc.Name, _plc.ListenPort);
|
||||
|
||||
// The multiplexer needs a PerPlcContext to share the BCD tag map and counters with
|
||||
// the pipeline. If the caller (typically a test or pre-Phase-6 startup path) didn't
|
||||
// supply one, construct a minimal context that exposes only the PlcName so the
|
||||
// multiplexer + a noop/passthrough pipeline still round-trip frames correctly.
|
||||
var ctx = _perPlcContext ?? new PerPlcContext
|
||||
{
|
||||
PlcName = _plc.Name,
|
||||
Logger = _pipeLogger,
|
||||
};
|
||||
_multiplexer = new PlcMultiplexer(
|
||||
_plc,
|
||||
_connectionOptions,
|
||||
_pipeline,
|
||||
ctx,
|
||||
_multiplexerLogger,
|
||||
_backendConnectPipeline);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs the accept loop until <paramref name="ct"/> is cancelled or the listener
|
||||
/// faults. On accept, wraps the socket in an <see cref="UpstreamPipe"/> and attaches
|
||||
/// it to the multiplexer.
|
||||
/// </summary>
|
||||
public async Task RunAsync(CancellationToken ct)
|
||||
{
|
||||
if (_listener is null)
|
||||
throw new InvalidOperationException("StartAsync must be called before RunAsync.");
|
||||
|
||||
if (_multiplexer is null)
|
||||
throw new InvalidOperationException("StartAsync must construct the multiplexer before RunAsync.");
|
||||
|
||||
try
|
||||
{
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
Socket upstream = await _listener.AcceptSocketAsync(ct).ConfigureAwait(false);
|
||||
|
||||
var pipe = new UpstreamPipe(upstream, _plc.Name, _pipeLogger);
|
||||
var pipeTask = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await _multiplexer.StartPipeAsync(pipe, ct).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
await pipe.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
}, CancellationToken.None);
|
||||
|
||||
_pipeTasks[pipe.Id] = pipeTask;
|
||||
_ = pipeTask.ContinueWith(prev => _pipeTasks.TryRemove(pipe.Id, out _), TaskScheduler.Default);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal shutdown.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Listener faulted — log and return. The supervisor will restart.
|
||||
LogListenerFaulted(_listenerLogger, _plc.Name, _plc.ListenPort, ex.Message);
|
||||
}
|
||||
}
|
||||
|
||||
// ── IAsyncDisposable ──────────────────────────────────────────────────────────────────
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
_listener?.Stop();
|
||||
|
||||
if (_multiplexer is not null)
|
||||
{
|
||||
await _multiplexer.DisposeAsync().ConfigureAwait(false);
|
||||
_multiplexer = null;
|
||||
}
|
||||
|
||||
Task[] snapshot = _pipeTasks.Values.ToArray();
|
||||
if (snapshot.Length > 0)
|
||||
{
|
||||
using var timeout = new CancellationTokenSource(TimeSpan.FromSeconds(5));
|
||||
try
|
||||
{
|
||||
await Task.WhenAll(snapshot)
|
||||
.WaitAsync(timeout.Token)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Logging ───────────────────────────────────────────────────────────────────────────
|
||||
|
||||
[LoggerMessage(EventId = 20, EventName = "mbproxy.startup.bind",
|
||||
Level = LogLevel.Information, Message = "Listener bound: Plc={Plc} Port={Port}")]
|
||||
private static partial void LogBound(ILogger logger, string plc, int port);
|
||||
|
||||
[LoggerMessage(EventId = 22, EventName = "mbproxy.listener.faulted",
|
||||
Level = LogLevel.Error, Message = "Listener faulted: Plc={Plc} Port={Port} Reason={Reason}")]
|
||||
private static partial void LogListenerFaulted(ILogger logger, string plc, int port, string reason);
|
||||
}
|
||||
@@ -0,0 +1,336 @@
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// Immutable snapshot of per-PLC counters. Consumed by Phase 07's status page.
|
||||
/// All fields are point-in-time reads; no ordering guarantees across fields.
|
||||
///
|
||||
/// <para><b>Backwards-compat policy (see docs/kpi.md):</b> fields are <i>added</i>, never
|
||||
/// renamed or removed. Phase 9 appended <c>InFlightCount</c>, <c>MaxInFlight</c>,
|
||||
/// <c>TxIdWraps</c>, <c>BackendDisconnectCascades</c>, and <c>BackendQueueDepth</c> for
|
||||
/// the TxId-multiplexer telemetry surface (Tier 1.6 in docs/kpi.md).</para>
|
||||
/// </summary>
|
||||
public sealed record CounterSnapshot(
|
||||
long PdusForwarded,
|
||||
long Fc03,
|
||||
long Fc04,
|
||||
long Fc06,
|
||||
long Fc16,
|
||||
long FcOther,
|
||||
long RewrittenSlots,
|
||||
long PartialBcdWarnings,
|
||||
long InvalidBcdWarnings,
|
||||
long BackendException01,
|
||||
long BackendException02,
|
||||
long BackendException03,
|
||||
long BackendException04,
|
||||
long BackendExceptionOther,
|
||||
long BytesUpstreamIn,
|
||||
long BytesUpstreamOut,
|
||||
/// <summary>
|
||||
/// Total number of failed listener bind attempts over the lifetime of the supervisor.
|
||||
/// Accumulates; never resets. See <see cref="SupervisorSnapshot.RecoveryAttempts"/> doc.
|
||||
/// </summary>
|
||||
long RecoveryAttempts,
|
||||
/// <summary>
|
||||
/// Most recent bind failure message (up to 256 chars); <c>null</c> if the listener
|
||||
/// has never failed to bind.
|
||||
/// </summary>
|
||||
string? LastBindError,
|
||||
/// <summary>
|
||||
/// EWMA of recent backend round-trip times in milliseconds (α = 0.2).
|
||||
/// Zero when no successful round-trips have been observed yet.
|
||||
/// Stored internally as fixed-point microseconds (long * 1000) for Interlocked
|
||||
/// compatibility; converted to double ms on snapshot.
|
||||
/// </summary>
|
||||
double LastRoundTripMs,
|
||||
/// <summary>
|
||||
/// Number of backend connections successfully established (Polly final success).
|
||||
/// </summary>
|
||||
long ConnectsSuccess,
|
||||
/// <summary>
|
||||
/// Number of backend connections that failed on all Polly attempts.
|
||||
/// </summary>
|
||||
long ConnectsFailed,
|
||||
/// <summary>
|
||||
/// Number of Modbus requests currently in flight on this PLC's multiplexed backend
|
||||
/// connection (point-in-time snapshot of the correlation map size). Phase 9.
|
||||
/// </summary>
|
||||
long InFlightCount,
|
||||
/// <summary>
|
||||
/// Peak <see cref="InFlightCount"/> observed since the multiplexer was constructed.
|
||||
/// Updated via <see cref="Interlocked"/> CAS so concurrent in-flight increments do not
|
||||
/// lose the high-water mark. Phase 9.
|
||||
/// </summary>
|
||||
long MaxInFlight,
|
||||
/// <summary>
|
||||
/// Number of times the per-PLC TxId allocator's rolling cursor has wrapped
|
||||
/// 0xFFFF → 0x0000. A non-zero value is benign; a sudden burst suggests extreme
|
||||
/// in-flight churn. Phase 9.
|
||||
/// </summary>
|
||||
long TxIdWraps,
|
||||
/// <summary>
|
||||
/// Cumulative count of upstream pipes closed as a side effect of a backend disconnect.
|
||||
/// Each backend reconnect cycle adds the number of attached upstream clients at the
|
||||
/// time of the disconnect. Phase 9.
|
||||
/// </summary>
|
||||
long BackendDisconnectCascades,
|
||||
/// <summary>
|
||||
/// Current depth of the per-PLC outbound channel feeding the backend writer task
|
||||
/// (frames queued, not yet on the wire). A sustained non-zero value indicates the
|
||||
/// backend is slower than upstream demand. Phase 9.
|
||||
/// </summary>
|
||||
long BackendQueueDepth);
|
||||
|
||||
/// <summary>
|
||||
/// Thread-safe per-PLC counters backed by <see cref="System.Threading.Interlocked"/> longs.
|
||||
/// All increment methods are allocation-free (no boxing, no heap traffic on the hot path).
|
||||
/// <see cref="Snapshot"/> may allocate (record construction) — it is off-path (status page only).
|
||||
/// </summary>
|
||||
internal sealed class ProxyCounters
|
||||
{
|
||||
// ── Hot-path fields (Interlocked longs) ─────────────────────────────────
|
||||
|
||||
private long _pdusForwarded;
|
||||
private long _fc03;
|
||||
private long _fc04;
|
||||
private long _fc06;
|
||||
private long _fc16;
|
||||
private long _fcOther;
|
||||
private long _rewrittenSlots;
|
||||
private long _partialBcdWarnings;
|
||||
private long _invalidBcdWarnings;
|
||||
private long _backendException01;
|
||||
private long _backendException02;
|
||||
private long _backendException03;
|
||||
private long _backendException04;
|
||||
private long _backendExceptionOther;
|
||||
private long _bytesUpstreamIn;
|
||||
private long _bytesUpstreamOut;
|
||||
private long _recoveryAttempts;
|
||||
private long _connectsSuccess;
|
||||
private long _connectsFailed;
|
||||
|
||||
// Phase 9 multiplexer telemetry.
|
||||
private long _maxInFlight;
|
||||
private long _backendDisconnectCascades;
|
||||
|
||||
// Phase 9: live state pulled from the multiplexer's allocator/map/queue on each
|
||||
// snapshot. The multiplexer registers a single provider via SetMultiplexProvider.
|
||||
// We use a volatile reference for lock-free read on the snapshot path.
|
||||
private volatile IMultiplexCountersProvider? _multiplexProvider;
|
||||
// LastBindError is a string (not a long); accessed via volatile field on ProxyCounters
|
||||
// but actually stored on the supervisor. We expose it here for snapshot parity.
|
||||
// Supervisor sets this via SetLastBindError; Snapshot reads it.
|
||||
private volatile string? _lastBindError;
|
||||
|
||||
// EWMA round-trip: stored as fixed-point microseconds (value * 1000) so we can use
|
||||
// Interlocked.CompareExchange on a long. The EWMA smoothing factor α = 0.2 gives a
|
||||
// half-life of ~3 samples (responds quickly to changes without being noisy).
|
||||
// Updated by PlcMultiplexer on each successful response (request→response round-trip,
|
||||
// measured against InFlightRequest.SentAtUtc).
|
||||
// 0 = no samples observed yet.
|
||||
private long _lastRoundTripUsEwma; // fixed-point microseconds
|
||||
|
||||
// ── Increment methods ────────────────────────────────────────────────────
|
||||
|
||||
public void IncrementPdusForwarded()
|
||||
=> Interlocked.Increment(ref _pdusForwarded);
|
||||
|
||||
public void IncrementFcCount(byte fc)
|
||||
{
|
||||
switch (fc)
|
||||
{
|
||||
case 0x03: Interlocked.Increment(ref _fc03); break;
|
||||
case 0x04: Interlocked.Increment(ref _fc04); break;
|
||||
case 0x06: Interlocked.Increment(ref _fc06); break;
|
||||
case 0x10: Interlocked.Increment(ref _fc16); break;
|
||||
default: Interlocked.Increment(ref _fcOther); break;
|
||||
}
|
||||
}
|
||||
|
||||
public void AddRewrittenSlots(int n)
|
||||
=> Interlocked.Add(ref _rewrittenSlots, n);
|
||||
|
||||
public void IncrementPartialBcd()
|
||||
=> Interlocked.Increment(ref _partialBcdWarnings);
|
||||
|
||||
public void IncrementInvalidBcd()
|
||||
=> Interlocked.Increment(ref _invalidBcdWarnings);
|
||||
|
||||
/// <summary>
|
||||
/// Increments the backend-exception counter for the given Modbus exception code.
|
||||
/// Codes 1–4 map to individual counters; anything else goes to "Other".
|
||||
/// </summary>
|
||||
public void IncrementBackendException(byte code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case 1: Interlocked.Increment(ref _backendException01); break;
|
||||
case 2: Interlocked.Increment(ref _backendException02); break;
|
||||
case 3: Interlocked.Increment(ref _backendException03); break;
|
||||
case 4: Interlocked.Increment(ref _backendException04); break;
|
||||
default: Interlocked.Increment(ref _backendExceptionOther); break;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds byte counts for both upstream directions atomically.
|
||||
/// </summary>
|
||||
public void AddBytes(long up, long down)
|
||||
{
|
||||
Interlocked.Add(ref _bytesUpstreamIn, up);
|
||||
Interlocked.Add(ref _bytesUpstreamOut, down);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records one successful backend TCP connect (Polly pipeline returned success).
|
||||
/// </summary>
|
||||
public void IncrementConnectSuccess()
|
||||
=> Interlocked.Increment(ref _connectsSuccess);
|
||||
|
||||
/// <summary>
|
||||
/// Records one failed backend TCP connect (all Polly attempts exhausted).
|
||||
/// </summary>
|
||||
public void IncrementConnectFailed()
|
||||
=> Interlocked.Increment(ref _connectsFailed);
|
||||
|
||||
/// <summary>
|
||||
/// Records <paramref name="n"/> upstream pipes closed by a backend disconnect cascade.
|
||||
/// Phase 9.
|
||||
/// </summary>
|
||||
public void AddDisconnectCascades(int n)
|
||||
=> Interlocked.Add(ref _backendDisconnectCascades, n);
|
||||
|
||||
/// <summary>
|
||||
/// CAS-updates the peak in-flight high-water mark. Called on every successful
|
||||
/// allocation by the multiplexer. Phase 9.
|
||||
/// </summary>
|
||||
public void ObserveInFlight(int currentInFlight)
|
||||
{
|
||||
long sample = currentInFlight;
|
||||
long old;
|
||||
do
|
||||
{
|
||||
old = Interlocked.Read(ref _maxInFlight);
|
||||
if (sample <= old) return;
|
||||
}
|
||||
while (Interlocked.CompareExchange(ref _maxInFlight, sample, old) != old);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wires the live multiplexer telemetry source into this counter set. Called by
|
||||
/// <see cref="Mbproxy.Proxy.Multiplexing.PlcMultiplexer"/> at construction time so
|
||||
/// the status page's <see cref="Snapshot"/> can include live in-flight / queue-depth
|
||||
/// values without polling the multiplexer separately. Phase 9.
|
||||
/// </summary>
|
||||
internal void SetMultiplexProvider(IMultiplexCountersProvider? provider)
|
||||
=> _multiplexProvider = provider;
|
||||
|
||||
/// <summary>
|
||||
/// Increments the recovery-attempt counter and records the bind error message
|
||||
/// (truncated to 256 chars). Called by the supervisor on each failed bind.
|
||||
/// </summary>
|
||||
public void IncrementRecoveryAttempt(string errorMessage)
|
||||
{
|
||||
Interlocked.Increment(ref _recoveryAttempts);
|
||||
_lastBindError = errorMessage.Length > 256 ? errorMessage[..256] : errorMessage;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Clears the last bind error after a successful bind.
|
||||
/// </summary>
|
||||
public void ClearLastBindError()
|
||||
{
|
||||
_lastBindError = null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Updates the EWMA round-trip estimate with a new sample.
|
||||
/// Uses α = 0.2: new_ewma = 0.2 * sample + 0.8 * old_ewma.
|
||||
/// <paramref name="elapsedTicks"/> is from <see cref="System.Diagnostics.Stopwatch.GetTimestamp"/>.
|
||||
/// Thread-safe via CAS loop on a fixed-point microsecond long.
|
||||
/// </summary>
|
||||
public void UpdateRoundTripEwma(long elapsedTicks)
|
||||
{
|
||||
// Convert ticks to microseconds.
|
||||
double sampleMs = (double)elapsedTicks / System.Diagnostics.Stopwatch.Frequency * 1000.0;
|
||||
|
||||
// Fixed-point: store microseconds * 1000 (i.e. nanoseconds) as long for CAS.
|
||||
// This gives ~1 µs resolution which is fine for Modbus round-trips (1–100 ms range).
|
||||
long sampleFixed = (long)(sampleMs * 1000.0);
|
||||
|
||||
long old, newVal;
|
||||
do
|
||||
{
|
||||
old = Interlocked.Read(ref _lastRoundTripUsEwma);
|
||||
// If no previous sample, seed with first sample; otherwise apply EWMA.
|
||||
newVal = old == 0
|
||||
? sampleFixed
|
||||
: (long)(0.2 * sampleFixed + 0.8 * old);
|
||||
}
|
||||
while (Interlocked.CompareExchange(ref _lastRoundTripUsEwma, newVal, old) != old);
|
||||
}
|
||||
|
||||
// ── Snapshot (off hot-path, may allocate) ────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Returns a point-in-time snapshot of all counters.
|
||||
/// Each field is read atomically via <see cref="Interlocked.Read"/>.
|
||||
/// May allocate (record construction); intended for the status-page path only.
|
||||
/// </summary>
|
||||
public CounterSnapshot Snapshot()
|
||||
{
|
||||
var provider = _multiplexProvider;
|
||||
long inFlightNow = provider?.InFlightCount ?? 0;
|
||||
long txWraps = provider?.TxIdWraps ?? 0;
|
||||
long queueDepth = provider?.BackendQueueDepth ?? 0;
|
||||
|
||||
return new(
|
||||
PdusForwarded: Interlocked.Read(ref _pdusForwarded),
|
||||
Fc03: Interlocked.Read(ref _fc03),
|
||||
Fc04: Interlocked.Read(ref _fc04),
|
||||
Fc06: Interlocked.Read(ref _fc06),
|
||||
Fc16: Interlocked.Read(ref _fc16),
|
||||
FcOther: Interlocked.Read(ref _fcOther),
|
||||
RewrittenSlots: Interlocked.Read(ref _rewrittenSlots),
|
||||
PartialBcdWarnings: Interlocked.Read(ref _partialBcdWarnings),
|
||||
InvalidBcdWarnings: Interlocked.Read(ref _invalidBcdWarnings),
|
||||
BackendException01: Interlocked.Read(ref _backendException01),
|
||||
BackendException02: Interlocked.Read(ref _backendException02),
|
||||
BackendException03: Interlocked.Read(ref _backendException03),
|
||||
BackendException04: Interlocked.Read(ref _backendException04),
|
||||
BackendExceptionOther: Interlocked.Read(ref _backendExceptionOther),
|
||||
BytesUpstreamIn: Interlocked.Read(ref _bytesUpstreamIn),
|
||||
BytesUpstreamOut: Interlocked.Read(ref _bytesUpstreamOut),
|
||||
RecoveryAttempts: Interlocked.Read(ref _recoveryAttempts),
|
||||
LastBindError: _lastBindError,
|
||||
LastRoundTripMs: Interlocked.Read(ref _lastRoundTripUsEwma) / 1000.0,
|
||||
ConnectsSuccess: Interlocked.Read(ref _connectsSuccess),
|
||||
ConnectsFailed: Interlocked.Read(ref _connectsFailed),
|
||||
InFlightCount: inFlightNow,
|
||||
MaxInFlight: Interlocked.Read(ref _maxInFlight),
|
||||
TxIdWraps: txWraps,
|
||||
BackendDisconnectCascades: Interlocked.Read(ref _backendDisconnectCascades),
|
||||
BackendQueueDepth: queueDepth);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read-only window into the per-PLC multiplexer's live state (allocator counts,
|
||||
/// outbound-queue depth). Implemented by <see cref="Mbproxy.Proxy.Multiplexing.PlcMultiplexer"/>
|
||||
/// and registered with <see cref="ProxyCounters.SetMultiplexProvider"/> so
|
||||
/// <see cref="ProxyCounters.Snapshot"/> can include live mux telemetry without holding
|
||||
/// a direct reference to the multiplexer (which would couple counter snapshots to the
|
||||
/// connection layer's lifecycle). Phase 9.
|
||||
/// </summary>
|
||||
internal interface IMultiplexCountersProvider
|
||||
{
|
||||
/// <summary>Number of currently-in-flight requests on the backend socket.</summary>
|
||||
long InFlightCount { get; }
|
||||
|
||||
/// <summary>Cumulative 0xFFFF → 0x0000 wrap events from the TxId allocator.</summary>
|
||||
long TxIdWraps { get; }
|
||||
|
||||
/// <summary>Current depth of the outbound channel (frames queued for the backend writer).</summary>
|
||||
long BackendQueueDepth { get; }
|
||||
}
|
||||
@@ -0,0 +1,218 @@
|
||||
using Mbproxy.Bcd;
|
||||
using Mbproxy.Configuration;
|
||||
using Mbproxy.Options;
|
||||
using Mbproxy.Proxy.Multiplexing;
|
||||
using Mbproxy.Proxy.Supervision;
|
||||
using Microsoft.Extensions.Options;
|
||||
using Polly;
|
||||
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="BackgroundService"/> that owns all <see cref="PlcListenerSupervisor"/> instances.
|
||||
///
|
||||
/// Startup posture (matches design doc "eager, continue on per-port failure"):
|
||||
/// <list type="number">
|
||||
/// <item>Enumerate <see cref="MbproxyOptions.Plcs"/> and build one supervisor per PLC.</item>
|
||||
/// <item>Start all supervisors in parallel. Each supervisor attempts to bind immediately
|
||||
/// and enters the Polly recovery loop if the bind fails.</item>
|
||||
/// <item>After all supervisors have completed their first bind attempt (reached
|
||||
/// <see cref="SupervisorState.Bound"/> or <see cref="SupervisorState.Recovering"/>),
|
||||
/// log <c>mbproxy.startup.ready</c> with bound/configured counts.</item>
|
||||
/// </list>
|
||||
///
|
||||
/// Phase 06: passes the supervisor dictionary to <see cref="ConfigReconciler.Attach"/>
|
||||
/// after initial startup so hot-reload changes are applied by the reconciler.
|
||||
///
|
||||
/// Stop: cancels all supervisors in parallel with a 5-second hard deadline.
|
||||
/// </summary>
|
||||
internal sealed partial class ProxyWorker : BackgroundService
|
||||
{
|
||||
private readonly IOptionsMonitor<MbproxyOptions> _options;
|
||||
private readonly IPduPipeline _pipeline;
|
||||
private readonly ILogger<ProxyWorker> _logger;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private readonly ConfigReconciler _reconciler;
|
||||
|
||||
// Phase 06: supervisors are now managed jointly by ProxyWorker (initial bootstrap)
|
||||
// and ConfigReconciler (subsequent hot-reload changes). The dictionary is shared
|
||||
// via ConfigReconciler.Attach() after initial startup.
|
||||
private readonly Dictionary<string, PlcListenerSupervisor> _supervisors = new(StringComparer.Ordinal);
|
||||
|
||||
/// <summary>
|
||||
/// Read-only view of the live supervisor dictionary. Consumed by Phase 07's
|
||||
/// <see cref="Admin.StatusSnapshotBuilder"/> to enumerate per-PLC state.
|
||||
/// The caller should read this on the status-page path only (not the hot path).
|
||||
/// </summary>
|
||||
internal IReadOnlyDictionary<string, PlcListenerSupervisor> Supervisors => _supervisors;
|
||||
|
||||
public ProxyWorker(
|
||||
IOptionsMonitor<MbproxyOptions> options,
|
||||
IPduPipeline pipeline,
|
||||
ILogger<ProxyWorker> logger,
|
||||
ILoggerFactory loggerFactory,
|
||||
ConfigReconciler reconciler)
|
||||
{
|
||||
_options = options;
|
||||
_pipeline = pipeline;
|
||||
_logger = logger;
|
||||
_loggerFactory = loggerFactory;
|
||||
_reconciler = reconciler;
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
var opts = _options.CurrentValue;
|
||||
int plcsConfigured = opts.Plcs.Count;
|
||||
|
||||
// ── 1. Build per-PLC BCD tag maps ────────────────────────────────────────────
|
||||
var plcContexts = new Dictionary<string, PerPlcContext>(opts.Plcs.Count, StringComparer.Ordinal);
|
||||
|
||||
foreach (var plc in opts.Plcs)
|
||||
{
|
||||
var result = BcdTagMapBuilder.Build(opts.BcdTags, plc.BcdTags);
|
||||
|
||||
foreach (var warn in result.Warnings)
|
||||
_logger.LogWarning("[{Plc}] BCD tag map warning: {Message}", plc.Name, warn.Message);
|
||||
|
||||
if (result.Errors.Count > 0)
|
||||
{
|
||||
foreach (var err in result.Errors)
|
||||
_logger.LogError("[{Plc}] BCD tag map error ({Kind}): {Message}",
|
||||
plc.Name, err.Kind, err.Message);
|
||||
|
||||
_logger.LogError("Skipping listener for PLC '{Plc}' due to BCD tag map errors.", plc.Name);
|
||||
continue;
|
||||
}
|
||||
|
||||
plcContexts[plc.Name] = new PerPlcContext
|
||||
{
|
||||
PlcName = plc.Name,
|
||||
TagMap = result.Map,
|
||||
Counters = new ProxyCounters(),
|
||||
Logger = _loggerFactory.CreateLogger($"Mbproxy.Proxy.BcdRewriter.{plc.Name}"),
|
||||
};
|
||||
}
|
||||
|
||||
// ── 2. Build Polly pipelines once ─────────────────────────────────────────────
|
||||
// Both pipelines are built from ResilienceOptions and reused across all PLCs.
|
||||
var resilienceOpts = opts.Resilience;
|
||||
var backendPipeline = PolicyFactory.BuildBackendConnect(
|
||||
resilienceOpts.BackendConnect,
|
||||
_loggerFactory.CreateLogger("Mbproxy.Proxy.BackendConnect"));
|
||||
|
||||
// ── 3. Build supervisors ──────────────────────────────────────────────────────
|
||||
foreach (var plc in opts.Plcs)
|
||||
{
|
||||
if (!plcContexts.TryGetValue(plc.Name, out var perPlcContext))
|
||||
continue; // BCD map failed — skip this PLC.
|
||||
|
||||
// Each supervisor gets its own recovery pipeline (with its own logger scope).
|
||||
var recoveryPipeline = PolicyFactory.BuildListenerRecovery(
|
||||
resilienceOpts.ListenerRecovery,
|
||||
_loggerFactory.CreateLogger($"Mbproxy.Proxy.ListenerRecovery.{plc.Name}"));
|
||||
|
||||
var supervisor = new PlcListenerSupervisor(
|
||||
plc,
|
||||
opts.Connection,
|
||||
_pipeline,
|
||||
_loggerFactory.CreateLogger<PlcListener>(),
|
||||
_loggerFactory.CreateLogger<PlcMultiplexer>(),
|
||||
_loggerFactory.CreateLogger($"Mbproxy.Proxy.UpstreamPipe.{plc.Name}"),
|
||||
perPlcContext,
|
||||
recoveryPipeline,
|
||||
_loggerFactory.CreateLogger<PlcListenerSupervisor>(),
|
||||
backendPipeline);
|
||||
|
||||
_supervisors[plc.Name] = supervisor;
|
||||
}
|
||||
|
||||
// ── Phase 06: wire reconciler BEFORE starting supervisors ─────────────────
|
||||
// Attach hands the reconciler the authoritative supervisor dictionary and the
|
||||
// initial options snapshot. The reconciler won't process OnChange events until
|
||||
// after this call — the brief window between Attach and first supervisor start
|
||||
// is safe because the channel signal only enqueues; apply runs asynchronously.
|
||||
_reconciler.Attach(_supervisors, opts);
|
||||
|
||||
if (_supervisors.Count == 0)
|
||||
{
|
||||
LogStartupReady(_logger, 0, plcsConfigured);
|
||||
await Task.Delay(Timeout.Infinite, stoppingToken).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── 4. Start all supervisors in parallel ──────────────────────────────────────
|
||||
var startTasks = _supervisors.Values
|
||||
.Select(s => s.StartAsync(stoppingToken))
|
||||
.ToArray();
|
||||
await Task.WhenAll(startTasks).ConfigureAwait(false);
|
||||
|
||||
// ── 5. Wait for every supervisor to complete its first bind attempt ───────────
|
||||
// "Ready" = every supervisor has transitioned out of Stopped (i.e. reached
|
||||
// Bound or Recovering from its first attempt).
|
||||
using var readyCts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
|
||||
using var readyLinked = CancellationTokenSource.CreateLinkedTokenSource(
|
||||
readyCts.Token, stoppingToken);
|
||||
|
||||
var waitTasks = _supervisors.Values
|
||||
.Select(s => s.WaitForInitialBindAttemptAsync(readyLinked.Token))
|
||||
.ToArray();
|
||||
|
||||
try
|
||||
{
|
||||
await Task.WhenAll(waitTasks).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Either the 30 s deadline fired or the service is stopping.
|
||||
}
|
||||
|
||||
int boundCount = _supervisors.Values.Count(s => s.Snapshot().State == SupervisorState.Bound);
|
||||
LogStartupReady(_logger, boundCount, plcsConfigured);
|
||||
|
||||
// ── 6. Keep the worker alive until the host signals stop ─────────────────────
|
||||
// Supervisors run their own background loops; ExecuteAsync just waits.
|
||||
await Task.Delay(Timeout.Infinite, stoppingToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public override async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
// Cancel ExecuteAsync first.
|
||||
await base.StopAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Stop all supervisors in parallel with a 5-second hard deadline.
|
||||
using var stopCts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
|
||||
using var linked = CancellationTokenSource.CreateLinkedTokenSource(
|
||||
stopCts.Token, cancellationToken);
|
||||
|
||||
var stopTasks = _supervisors.Values
|
||||
.Select(s => s.StopAsync(linked.Token))
|
||||
.ToArray();
|
||||
|
||||
try
|
||||
{
|
||||
await Task.WhenAll(stopTasks).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best effort — don't let individual supervisor failures block shutdown.
|
||||
}
|
||||
|
||||
foreach (var supervisor in _supervisors.Values)
|
||||
await supervisor.DisposeAsync().ConfigureAwait(false);
|
||||
|
||||
_supervisors.Clear();
|
||||
}
|
||||
|
||||
// ── Logging ───────────────────────────────────────────────────────────────────────────
|
||||
|
||||
[LoggerMessage(EventId = 1, EventName = "mbproxy.startup.ready",
|
||||
Level = LogLevel.Information,
|
||||
Message = "mbproxy service ready — ListenersBound={ListenersBound} PlcsConfigured={PlcsConfigured}")]
|
||||
private static partial void LogStartupReady(ILogger logger, int listenersBound, int plcsConfigured);
|
||||
|
||||
[LoggerMessage(EventId = 21, EventName = "mbproxy.startup.bind.failed",
|
||||
Level = LogLevel.Error,
|
||||
Message = "Failed to bind listener: Plc={Plc} Port={Port} Reason={Reason}")]
|
||||
private static partial void LogBindFailed(ILogger logger, string plc, int port, string reason);
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
namespace Mbproxy.Proxy;
|
||||
|
||||
/// <summary>
|
||||
/// Source-generated <see cref="LoggerMessage"/> definitions for the BCD rewriter pipeline.
|
||||
/// All event names are stable — do not rename without updating docs/design.md.
|
||||
/// </summary>
|
||||
internal static partial class RewriterLogEvents
|
||||
{
|
||||
/// <summary>
|
||||
/// Emitted when a 32-bit BCD pair is only partially covered by the read/write range.
|
||||
/// The raw bytes are passed through unchanged; the client or PLC sees the original nibbles.
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 30,
|
||||
EventName = "mbproxy.rewrite.partial_bcd",
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Partial BCD overlap — passing through raw: Plc={PlcName} Address={Address} ClientStart={ClientStart} ClientQty={ClientQty}")]
|
||||
public static partial void PartialBcd(
|
||||
ILogger logger,
|
||||
string plcName,
|
||||
ushort address,
|
||||
ushort clientStart,
|
||||
ushort clientQty);
|
||||
|
||||
/// <summary>
|
||||
/// Emitted when a register value at a configured BCD address contains a nibble >= 0xA
|
||||
/// (i.e. not a valid BCD digit). The raw bytes are passed through unchanged.
|
||||
/// Direction is "Read" (response from PLC) or "Write" (request from client).
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 31,
|
||||
EventName = "mbproxy.rewrite.invalid_bcd",
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Invalid BCD nibble — passing through raw: Plc={PlcName} Address={Address} RawValue=0x{RawValue:X4} Direction={Direction}")]
|
||||
public static partial void InvalidBcd(
|
||||
ILogger logger,
|
||||
string plcName,
|
||||
ushort address,
|
||||
ushort rawValue,
|
||||
string direction);
|
||||
|
||||
/// <summary>
|
||||
/// Emitted when the PLC returns a Modbus exception response (high bit set on FC byte).
|
||||
/// The frame is forwarded verbatim to the client.
|
||||
/// </summary>
|
||||
[LoggerMessage(
|
||||
EventId = 32,
|
||||
EventName = "mbproxy.exception.passthrough",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Modbus exception forwarded: Plc={PlcName} Fc=0x{Fc:X2} ExceptionCode={ExceptionCode}")]
|
||||
public static partial void ExceptionPassthrough(
|
||||
ILogger logger,
|
||||
string plcName,
|
||||
byte fc,
|
||||
byte exceptionCode);
|
||||
}
|
||||
@@ -0,0 +1,404 @@
|
||||
using Mbproxy.Options;
|
||||
using Mbproxy.Proxy.Multiplexing;
|
||||
using Polly;
|
||||
|
||||
namespace Mbproxy.Proxy.Supervision;
|
||||
|
||||
/// <summary>
|
||||
/// Wraps one <see cref="PlcListener"/> in a Polly-backed recovery loop.
|
||||
///
|
||||
/// <para><b>State machine</b>:
|
||||
/// <list type="bullet">
|
||||
/// <item><description><b>Bound</b>: listener is accepting connections; <see cref="PlcListener.RunAsync"/> is awaiting.</description></item>
|
||||
/// <item><description><b>Recovering</b>: bind failed or RunAsync faulted; in Polly's delay window before the next attempt.</description></item>
|
||||
/// <item><description><b>Stopped</b>: terminal. <see cref="StopAsync"/> was called; no further retries.</description></item>
|
||||
/// </list>
|
||||
/// </para>
|
||||
///
|
||||
/// <para><b>RecoveryAttempts</b>: the counter accumulates over the lifetime of the
|
||||
/// supervisor. It is never reset after a successful re-bind so operators can see
|
||||
/// "this listener has flapped N times since the service started." See also
|
||||
/// <see cref="SupervisorSnapshot"/> doc comment.</para>
|
||||
///
|
||||
/// <para>The supervisor does NOT swallow exceptions from <see cref="PlcListener.RunAsync"/>
|
||||
/// except <see cref="OperationCanceledException"/>. Every other fault is logged at Warning
|
||||
/// with the exception message so operators can see WHY the listener was restarted.</para>
|
||||
/// </summary>
|
||||
internal sealed partial class PlcListenerSupervisor : IAsyncDisposable
|
||||
{
|
||||
private readonly PlcOptions _plc;
|
||||
private readonly ConnectionOptions _connectionOptions;
|
||||
private readonly IPduPipeline _pipeline;
|
||||
private readonly ILogger<PlcListener> _listenerLogger;
|
||||
private readonly ILogger<PlcMultiplexer> _multiplexerLogger;
|
||||
private readonly ILogger _pipeLogger;
|
||||
private readonly PerPlcContext? _perPlcContext;
|
||||
private readonly ResiliencePipeline _recoveryPipeline;
|
||||
private readonly ILogger<PlcListenerSupervisor> _logger;
|
||||
private readonly ResiliencePipeline? _backendConnectPipeline;
|
||||
|
||||
// ── Mutable state ────────────────────────────────────────────────────────────────────
|
||||
|
||||
// Volatile so Snapshot() reads are coherent without locking.
|
||||
private volatile SupervisorState _state = SupervisorState.Stopped;
|
||||
private volatile string? _lastBindError;
|
||||
private int _recoveryAttempts; // Interlocked
|
||||
|
||||
// Phase 07: current active listener for status-page pair enumeration.
|
||||
private volatile PlcListener? _currentListener;
|
||||
|
||||
// Phase 06: _perPlcContext is now mutable so ReplaceContextAsync can swap it.
|
||||
// Access from the accept loop (RunAsync) and from ReplaceContextAsync must be
|
||||
// coherent; we use a volatile reference so the accept loop always reads the latest
|
||||
// context without locking. The PlcListener created on each Polly attempt holds
|
||||
// its own copy of the context at construction time; existing in-flight connections
|
||||
// keep their old reference until they complete.
|
||||
private volatile PerPlcContext? _currentContext;
|
||||
|
||||
/// <summary>
|
||||
/// Per-supervisor CTS: cancelling it stops both the Polly delay and the inner
|
||||
/// <see cref="PlcListener.RunAsync"/> loop.
|
||||
/// </summary>
|
||||
private CancellationTokenSource _supervisorCts = new();
|
||||
|
||||
private Task _supervisorTask = Task.CompletedTask;
|
||||
|
||||
private bool _disposed;
|
||||
|
||||
// ── Public surface ────────────────────────────────────────────────────────────────────
|
||||
|
||||
public string PlcName => _plc.Name;
|
||||
|
||||
public PlcListenerSupervisor(
|
||||
PlcOptions plc,
|
||||
ConnectionOptions connectionOptions,
|
||||
IPduPipeline pipeline,
|
||||
ILogger<PlcListener> listenerLogger,
|
||||
ILogger<PlcMultiplexer> multiplexerLogger,
|
||||
ILogger pipeLogger,
|
||||
PerPlcContext? perPlcContext,
|
||||
ResiliencePipeline recoveryPipeline,
|
||||
ILogger<PlcListenerSupervisor> logger,
|
||||
ResiliencePipeline? backendConnectPipeline = null)
|
||||
{
|
||||
_plc = plc;
|
||||
_connectionOptions = connectionOptions;
|
||||
_pipeline = pipeline;
|
||||
_listenerLogger = listenerLogger;
|
||||
_multiplexerLogger = multiplexerLogger;
|
||||
_pipeLogger = pipeLogger;
|
||||
_perPlcContext = perPlcContext;
|
||||
_currentContext = perPlcContext; // Phase 06: live context slot
|
||||
_recoveryPipeline = recoveryPipeline;
|
||||
_logger = logger;
|
||||
_backendConnectPipeline = backendConnectPipeline;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the current <see cref="ProxyCounters"/> for this PLC.
|
||||
/// Used by <see cref="Configuration.ConfigReconciler"/> when building a reseat context
|
||||
/// so that counters are preserved across a tag-map swap.
|
||||
/// </summary>
|
||||
public ProxyCounters CurrentCounters => _currentContext?.Counters ?? new ProxyCounters();
|
||||
|
||||
/// <summary>
|
||||
/// Live collection of active <see cref="UpstreamPipe"/> instances attached to this
|
||||
/// PLC's multiplexer. Returns an empty collection when the listener is not bound.
|
||||
/// Consumed by Phase 07's status page (renamed from <c>ActivePairs</c> in Phase 9).
|
||||
/// </summary>
|
||||
public IReadOnlyCollection<UpstreamPipe> ActiveUpstreams
|
||||
=> _currentListener?.ActiveUpstreams ?? Array.Empty<UpstreamPipe>();
|
||||
|
||||
/// <summary>
|
||||
/// Launches the supervisor task. The task tries to bind immediately; if binding
|
||||
/// fails it enters the Polly recovery loop. The method returns as soon as the
|
||||
/// background task is started (it does NOT wait for the listener to reach
|
||||
/// <see cref="SupervisorState.Bound"/>).
|
||||
///
|
||||
/// <para>Call <see cref="WaitForInitialBindAttemptAsync"/> after this to block until the
|
||||
/// supervisor has transitioned out of <see cref="SupervisorState.Stopped"/>.</para>
|
||||
/// </summary>
|
||||
public Task StartAsync(CancellationToken ct)
|
||||
{
|
||||
_supervisorCts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
_supervisorTask = Task.Run(() => RunSupervisorAsync(_supervisorCts.Token), CancellationToken.None);
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Waits until the supervisor has completed its first bind attempt
|
||||
/// (transitioned to <see cref="SupervisorState.Bound"/> or
|
||||
/// <see cref="SupervisorState.Recovering"/>).
|
||||
/// Returns immediately if the supervisor is already past that point.
|
||||
/// </summary>
|
||||
public async Task WaitForInitialBindAttemptAsync(CancellationToken ct)
|
||||
{
|
||||
while (_state == SupervisorState.Stopped && !ct.IsCancellationRequested
|
||||
&& !_supervisorTask.IsCompleted)
|
||||
{
|
||||
await Task.Delay(10, ct).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Signals the supervisor to stop, cancels the current Polly delay (if in
|
||||
/// <see cref="SupervisorState.Recovering"/>) or the <see cref="PlcListener.RunAsync"/>
|
||||
/// loop (if in <see cref="SupervisorState.Bound"/>), and waits for the background
|
||||
/// task to complete.
|
||||
///
|
||||
/// <para>Completes within ~1 s regardless of backoff window size because Polly's
|
||||
/// <c>ExecuteAsync(ct)</c> honours the cancellation token.</para>
|
||||
/// </summary>
|
||||
public async Task StopAsync(CancellationToken ct)
|
||||
{
|
||||
_state = SupervisorState.Stopped;
|
||||
|
||||
await _supervisorCts.CancelAsync().ConfigureAwait(false);
|
||||
|
||||
try
|
||||
{
|
||||
await _supervisorTask.WaitAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// ct fired before the task completed — supervisor task will terminate
|
||||
// asynchronously. Acceptable at shutdown.
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// Supervisor task faulted — already logged inside RunSupervisorAsync.
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Returns a point-in-time snapshot of this supervisor's state.</summary>
|
||||
public SupervisorSnapshot Snapshot() => new(
|
||||
State: _state,
|
||||
LastBindError: _lastBindError,
|
||||
RecoveryAttempts: Interlocked.CompareExchange(ref _recoveryAttempts, 0, 0));
|
||||
|
||||
/// <summary>
|
||||
/// Atomically swaps the per-PLC context (tag map) without restarting the listener.
|
||||
///
|
||||
/// <para><b>Transition window</b>: there is a brief overlap where the old
|
||||
/// <see cref="PlcListener"/> is running its accept loop with the old context while the
|
||||
/// new context reference is being written. The volatile write ensures that the very
|
||||
/// next <c>PlcListener</c> constructed inside the Polly loop (on any subsequent fault
|
||||
/// recovery) picks up <paramref name="newCtx"/>. Existing in-flight upstream pipes
|
||||
/// served by the current multiplexer keep their reference to the context captured at
|
||||
/// multiplexer construction time; they finish on the old map. New connections after
|
||||
/// this call use the new map. This is the correct design — partial-BCD rewrites
|
||||
/// mid-request would be worse than a one-request gap.</para>
|
||||
///
|
||||
/// <para>This method is intentionally lightweight: it performs only the volatile write
|
||||
/// and returns immediately. The <paramref name="ct"/> parameter is present for API
|
||||
/// symmetry with start/stop and to accommodate future async expansion.</para>
|
||||
/// </summary>
|
||||
public Task ReplaceContextAsync(PerPlcContext newCtx, CancellationToken ct)
|
||||
{
|
||||
// Volatile write: the next PlcListener created in RunSupervisorAsync will see
|
||||
// the new context. The accept loop itself does not hold a direct reference to
|
||||
// _currentContext — it was captured at PlcListener construction time.
|
||||
_currentContext = newCtx;
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
// ── Supervisor loop ───────────────────────────────────────────────────────────────────
|
||||
|
||||
private async Task RunSupervisorAsync(CancellationToken ct)
|
||||
{
|
||||
bool firstBind = true;
|
||||
|
||||
try
|
||||
{
|
||||
// The recovery pipeline wraps the entire try-bind-and-run block.
|
||||
// When RunAsync returns or throws, the pipeline delays and retries.
|
||||
// Cancellation of ct exits the pipeline with OperationCanceledException.
|
||||
await _recoveryPipeline.ExecuteAsync(async token =>
|
||||
{
|
||||
// ── Instantiate a fresh listener ─────────────────────────────────
|
||||
// A faulted listener's TcpListener socket must be disposed before
|
||||
// re-binding. We create a new PlcListener on each attempt.
|
||||
//
|
||||
// Phase 06: use _currentContext (volatile) so that a ReplaceContextAsync
|
||||
// call between Polly retry attempts is picked up here. Each listener
|
||||
// captures the context at construction time; existing in-flight pairs
|
||||
// keep their own reference. See ReplaceContextAsync for the transition
|
||||
// window documentation.
|
||||
var listener = new PlcListener(
|
||||
_plc,
|
||||
_connectionOptions,
|
||||
_pipeline,
|
||||
_listenerLogger,
|
||||
_multiplexerLogger,
|
||||
_pipeLogger,
|
||||
_currentContext,
|
||||
_backendConnectPipeline);
|
||||
|
||||
// Phase 07: expose the current listener for status-page pair enumeration.
|
||||
_currentListener = listener;
|
||||
|
||||
try
|
||||
{
|
||||
// ── Bind ─────────────────────────────────────────────────────
|
||||
listener.StartAsync();
|
||||
}
|
||||
catch (Exception bindEx)
|
||||
{
|
||||
// Dispose the listener before entering the recovery delay
|
||||
// so the socket is released and the port can be reused.
|
||||
_currentListener = null;
|
||||
await listener.DisposeAsync().ConfigureAwait(false);
|
||||
|
||||
Interlocked.Increment(ref _recoveryAttempts);
|
||||
string reason = bindEx.Message;
|
||||
string truncated = reason.Length > 256 ? reason[..256] : reason;
|
||||
_lastBindError = truncated;
|
||||
_state = SupervisorState.Recovering;
|
||||
|
||||
// Also update the per-PLC counters if available (Phase 07 reads these).
|
||||
_currentContext?.Counters.IncrementRecoveryAttempt(truncated);
|
||||
|
||||
LogBindFailed(_logger, _plc.Name, _plc.ListenPort, truncated);
|
||||
|
||||
// Re-throw so the Polly pipeline can delay and retry.
|
||||
throw;
|
||||
}
|
||||
|
||||
// ── Bind succeeded ───────────────────────────────────────────────
|
||||
if (firstBind)
|
||||
{
|
||||
firstBind = false;
|
||||
LogBound(_logger, _plc.Name, _plc.ListenPort);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Re-bind after a recovery — emit the "recovered" event once.
|
||||
int totalAttempts = Interlocked.CompareExchange(ref _recoveryAttempts, 0, 0);
|
||||
LogListenerRecovered(_logger, _plc.Name, _plc.ListenPort, totalAttempts);
|
||||
}
|
||||
|
||||
// Clear the last bind error on a successful bind.
|
||||
_lastBindError = null;
|
||||
_currentContext?.Counters.ClearLastBindError();
|
||||
_state = SupervisorState.Bound;
|
||||
|
||||
// ── Run the accept loop ──────────────────────────────────────────
|
||||
// RunAsync returns when: (a) token is cancelled (normal shutdown),
|
||||
// (b) the listener faults (OS reclaims port, transient network reset).
|
||||
// In both cases we fall through to the Polly retry handler.
|
||||
try
|
||||
{
|
||||
await listener.RunAsync(token).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal shutdown path — do not enter recovery loop.
|
||||
_currentListener = null;
|
||||
await listener.DisposeAsync().ConfigureAwait(false);
|
||||
throw; // Propagate to exit the Polly pipeline.
|
||||
}
|
||||
catch (Exception runEx)
|
||||
{
|
||||
// Listener faulted at runtime (port stolen, OS network reset, etc.).
|
||||
// Log at Warning — operators must see WHY the listener was restarted.
|
||||
LogListenerFaulted(_logger, _plc.Name, _plc.ListenPort, runEx, runEx.Message);
|
||||
_currentListener = null;
|
||||
await listener.DisposeAsync().ConfigureAwait(false);
|
||||
|
||||
Interlocked.Increment(ref _recoveryAttempts);
|
||||
string truncated = runEx.Message.Length > 256 ? runEx.Message[..256] : runEx.Message;
|
||||
_lastBindError = truncated;
|
||||
_state = SupervisorState.Recovering;
|
||||
|
||||
// Also update the per-PLC counters if available.
|
||||
_currentContext?.Counters.IncrementRecoveryAttempt(truncated);
|
||||
|
||||
// Re-throw so Polly can delay and retry.
|
||||
throw;
|
||||
}
|
||||
|
||||
// RunAsync returned normally (token was cancelled or listener closed).
|
||||
// If we got here without an exception, the loop ended cleanly.
|
||||
_currentListener = null;
|
||||
await listener.DisposeAsync().ConfigureAwait(false);
|
||||
|
||||
// If cancellation is requested, throw so Polly exits cleanly.
|
||||
token.ThrowIfCancellationRequested();
|
||||
|
||||
// Otherwise (listener closed without cancellation — e.g., OS event),
|
||||
// treat as a fault and re-enter recovery.
|
||||
Interlocked.Increment(ref _recoveryAttempts);
|
||||
const string unexpectedEnd = "Listener accept loop ended unexpectedly";
|
||||
_lastBindError = unexpectedEnd;
|
||||
_state = SupervisorState.Recovering;
|
||||
_currentContext?.Counters.IncrementRecoveryAttempt(unexpectedEnd);
|
||||
LogListenerEnded(_logger, _plc.Name, _plc.ListenPort);
|
||||
throw new InvalidOperationException(unexpectedEnd);
|
||||
|
||||
}, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal: StopAsync cancelled the token.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Polly pipeline exhausted (should not happen for listener recovery since
|
||||
// MaxRetryAttempts = int.MaxValue) or an unexpected fault.
|
||||
_logger.LogError(ex, "Supervisor for Plc={Plc} exited unexpectedly: {Message}",
|
||||
_plc.Name, ex.Message);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_state = SupervisorState.Stopped;
|
||||
_currentListener = null;
|
||||
}
|
||||
}
|
||||
|
||||
// ── IAsyncDisposable ─────────────────────────────────────────────────────────────────
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
using var stopCts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
|
||||
try
|
||||
{
|
||||
await StopAsync(stopCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best-effort cleanup.
|
||||
}
|
||||
|
||||
_supervisorCts.Dispose();
|
||||
}
|
||||
|
||||
// ── Logging ───────────────────────────────────────────────────────────────────────────
|
||||
|
||||
[LoggerMessage(EventId = 40, EventName = "mbproxy.startup.bind",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Listener bound: Plc={Plc} Port={Port}")]
|
||||
private static partial void LogBound(ILogger logger, string plc, int port);
|
||||
|
||||
[LoggerMessage(EventId = 41, EventName = "mbproxy.startup.bind.failed",
|
||||
Level = LogLevel.Error,
|
||||
Message = "Failed to bind listener: Plc={Plc} Port={Port} Reason={Reason}")]
|
||||
private static partial void LogBindFailed(ILogger logger, string plc, int port, string reason);
|
||||
|
||||
[LoggerMessage(EventId = 42, EventName = "mbproxy.listener.recovered",
|
||||
Level = LogLevel.Information,
|
||||
Message = "Listener recovered: Plc={Plc} Port={Port} AttemptCount={AttemptCount}")]
|
||||
private static partial void LogListenerRecovered(ILogger logger, string plc, int port, int attemptCount);
|
||||
|
||||
[LoggerMessage(EventId = 43, EventName = "mbproxy.listener.faulted",
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Listener faulted (will recover): Plc={Plc} Port={Port} Reason={Reason}")]
|
||||
private static partial void LogListenerFaulted(ILogger logger, string plc, int port, Exception ex, string reason);
|
||||
|
||||
[LoggerMessage(EventId = 44, EventName = "mbproxy.listener.ended",
|
||||
Level = LogLevel.Warning,
|
||||
Message = "Listener accept loop ended unexpectedly (will recover): Plc={Plc} Port={Port}")]
|
||||
private static partial void LogListenerEnded(ILogger logger, string plc, int port);
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
using System.Net.Sockets;
|
||||
using Mbproxy.Options;
|
||||
using Polly;
|
||||
using Polly.Retry;
|
||||
|
||||
namespace Mbproxy.Proxy.Supervision;
|
||||
|
||||
/// <summary>
|
||||
/// Builds Polly v8 <see cref="ResiliencePipeline"/> instances from the typed resilience
|
||||
/// configuration (<see cref="RetryProfile"/> and <see cref="RecoveryProfile"/>).
|
||||
///
|
||||
/// <para>Pipelines are built once at startup and reused across all operations. They are
|
||||
/// thread-safe and allocation-free on the happy path.</para>
|
||||
/// </summary>
|
||||
internal static class PolicyFactory
|
||||
{
|
||||
// ── Network errors that are safe to retry on backend connect ────────────────────────
|
||||
// Only these SocketError values are transient; everything else is a programming error
|
||||
// or a configuration mistake and should not be retried.
|
||||
private static readonly HashSet<SocketError> RetryableSocketErrors =
|
||||
[
|
||||
SocketError.ConnectionRefused,
|
||||
SocketError.TimedOut,
|
||||
SocketError.HostUnreachable,
|
||||
SocketError.NetworkUnreachable,
|
||||
];
|
||||
|
||||
/// <summary>
|
||||
/// Builds a retry pipeline for backend (PLC) TCP connect attempts.
|
||||
///
|
||||
/// <para>Retries only on <see cref="SocketException"/> with a
|
||||
/// <see cref="SocketError"/> in <see cref="RetryableSocketErrors"/>. Does NOT retry
|
||||
/// <see cref="ArgumentException"/>, <see cref="OperationCanceledException"/>, or any
|
||||
/// non-network exception.</para>
|
||||
///
|
||||
/// <para>The delay sequence is taken directly from <see cref="RetryProfile.BackoffMs"/>;
|
||||
/// element [i] is the delay before attempt i+1 (0-based). If the attempt index
|
||||
/// exceeds the array, the last element is used.</para>
|
||||
///
|
||||
/// <para>After all attempts are exhausted, the pipeline re-throws the last exception
|
||||
/// so the caller can log <c>mbproxy.backend.failed</c> and close the upstream socket.</para>
|
||||
/// </summary>
|
||||
public static ResiliencePipeline BuildBackendConnect(RetryProfile profile, ILogger logger)
|
||||
{
|
||||
// MaxAttempts in Polly v8 includes the first attempt.
|
||||
int maxAttempts = Math.Max(1, profile.MaxAttempts);
|
||||
var backoffMs = profile.BackoffMs;
|
||||
|
||||
return new ResiliencePipelineBuilder()
|
||||
.AddRetry(new RetryStrategyOptions
|
||||
{
|
||||
MaxRetryAttempts = maxAttempts - 1, // retries = total - 1 (first attempt is free)
|
||||
ShouldHandle = new PredicateBuilder()
|
||||
.Handle<SocketException>(ex => RetryableSocketErrors.Contains(ex.SocketErrorCode)),
|
||||
DelayGenerator = args =>
|
||||
{
|
||||
int idx = args.AttemptNumber; // 0 = first retry, i.e. after attempt 0
|
||||
// Clamp to the last element if we exceed the array.
|
||||
int ms = backoffMs.Count > 0
|
||||
? backoffMs[Math.Min(idx, backoffMs.Count - 1)]
|
||||
: 0;
|
||||
return new ValueTask<TimeSpan?>(TimeSpan.FromMilliseconds(ms));
|
||||
},
|
||||
OnRetry = args =>
|
||||
{
|
||||
logger.LogDebug(
|
||||
"Backend connect retry {Attempt}/{Max}: {Error}",
|
||||
args.AttemptNumber + 1,
|
||||
maxAttempts - 1,
|
||||
args.Outcome.Exception?.Message);
|
||||
return ValueTask.CompletedTask;
|
||||
},
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds an infinite-retry pipeline for listener bind recovery.
|
||||
///
|
||||
/// <para>The delay sequence is:
|
||||
/// <list type="bullet">
|
||||
/// <item><description>Attempts 0 .. (InitialBackoffMs.Length-1) use the initial backoff array.</description></item>
|
||||
/// <item><description>All subsequent attempts use <see cref="RecoveryProfile.SteadyStateMs"/>.</description></item>
|
||||
/// </list>
|
||||
/// The pipeline never exhausts — it retries until the supervisor's cancellation token
|
||||
/// fires (on <see cref="PlcListenerSupervisor.StopAsync"/>).</para>
|
||||
///
|
||||
/// <para>Polly's <c>ExecuteAsync(ct)</c> propagates <see cref="OperationCanceledException"/>
|
||||
/// when <paramref name="ct"/> fires, so the supervisor exits the loop cleanly.</para>
|
||||
/// </summary>
|
||||
public static ResiliencePipeline BuildListenerRecovery(RecoveryProfile profile, ILogger logger)
|
||||
{
|
||||
var initialMs = profile.InitialBackoffMs;
|
||||
int steadyMs = profile.SteadyStateMs;
|
||||
|
||||
return new ResiliencePipelineBuilder()
|
||||
.AddRetry(new RetryStrategyOptions
|
||||
{
|
||||
// int.MaxValue makes the pipeline retry indefinitely; cancellation
|
||||
// is the only exit path (besides the supervisor calling StopAsync).
|
||||
MaxRetryAttempts = int.MaxValue,
|
||||
ShouldHandle = new PredicateBuilder().Handle<Exception>(
|
||||
ex => ex is not OperationCanceledException),
|
||||
DelayGenerator = args =>
|
||||
{
|
||||
// args.AttemptNumber is the zero-based index of the retry
|
||||
// (0 = first retry, after the first failed attempt).
|
||||
int idx = args.AttemptNumber;
|
||||
int ms = idx < initialMs.Count
|
||||
? initialMs[idx]
|
||||
: steadyMs;
|
||||
return new ValueTask<TimeSpan?>(TimeSpan.FromMilliseconds(ms));
|
||||
},
|
||||
OnRetry = args =>
|
||||
{
|
||||
logger.LogDebug(
|
||||
"Listener recovery attempt {Attempt}: {Error}",
|
||||
args.AttemptNumber + 1,
|
||||
args.Outcome.Exception?.Message);
|
||||
return ValueTask.CompletedTask;
|
||||
},
|
||||
})
|
||||
.Build();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
namespace Mbproxy.Proxy.Supervision;
|
||||
|
||||
/// <summary>
|
||||
/// State machine states for <see cref="PlcListenerSupervisor"/>.
|
||||
/// </summary>
|
||||
public enum SupervisorState
|
||||
{
|
||||
/// <summary>
|
||||
/// The listener is bound and its accept loop is running.
|
||||
/// Entry conditions: <see cref="PlcListener.StartAsync"/> succeeded (on first attempt or
|
||||
/// after a recovery attempt).
|
||||
/// </summary>
|
||||
Bound,
|
||||
|
||||
/// <summary>
|
||||
/// The listener is not bound; the supervisor is waiting for the next Polly retry delay
|
||||
/// before reattempting. Entered after any failed bind (at startup or at runtime).
|
||||
/// </summary>
|
||||
Recovering,
|
||||
|
||||
/// <summary>
|
||||
/// Terminal state. <see cref="PlcListenerSupervisor.StopAsync"/> was called; the supervisor
|
||||
/// task has been cancelled and will not retry.
|
||||
/// </summary>
|
||||
Stopped,
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Immutable point-in-time snapshot of a supervisor's state. Consumed by Phase 07's
|
||||
/// status page via <see cref="PlcListenerSupervisor.Snapshot"/>.
|
||||
///
|
||||
/// <para><b>RecoveryAttempts semantics</b>: this counter <em>accumulates over the lifetime
|
||||
/// of the supervisor</em> and is never reset. Operators reading the status page should
|
||||
/// interpret it as "how many times has this listener faulted or failed to bind since
|
||||
/// the service started" — useful for detecting port-flapping or repeated OS network
|
||||
/// resets. Phase 07 surfaces it as-is.</para>
|
||||
/// </summary>
|
||||
/// <param name="State">Current state of the supervisor.</param>
|
||||
/// <param name="LastBindError">
|
||||
/// Most recent bind failure message (up to 256 chars). <c>null</c> if the listener
|
||||
/// has never failed to bind.
|
||||
/// </param>
|
||||
/// <param name="RecoveryAttempts">
|
||||
/// Total number of failed bind attempts over the lifetime of this supervisor.
|
||||
/// Accumulates; never resets to 0.
|
||||
/// </param>
|
||||
public sealed record SupervisorSnapshot(
|
||||
SupervisorState State,
|
||||
string? LastBindError,
|
||||
int RecoveryAttempts);
|
||||
@@ -0,0 +1,57 @@
|
||||
namespace Mbproxy;
|
||||
|
||||
/// <summary>
|
||||
/// Service-wide counters for the mbproxy host. Tracks reload accept/reject counts and
|
||||
/// timestamps so Phase 07's status page can surface them without coupling to the reconciler.
|
||||
///
|
||||
/// <para>Constructed once at DI startup and shared as a singleton. All writes are via
|
||||
/// dedicated methods that use <see cref="Interlocked"/> so reads from the status page
|
||||
/// are always coherent without locking.</para>
|
||||
/// </summary>
|
||||
public sealed class ServiceCounters
|
||||
{
|
||||
// LastReloadUtc: stored as ticks-since-epoch via Interlocked.Exchange.
|
||||
// 0 = "never reloaded". DateTimeOffset.MinValue.UtcTicks works as the sentinel
|
||||
// but 0 is simpler. DateTimeOffset.UtcNow.UtcTicks is always > 0 after 1970.
|
||||
private long _lastReloadUtcTicks; // 0 = never; Interlocked
|
||||
private int _reloadAppliedCount; // Interlocked
|
||||
private int _reloadRejectedCount; // Interlocked
|
||||
|
||||
/// <summary>Instant at which this service instance was constructed (service start proxy).</summary>
|
||||
public DateTimeOffset StartedAtUtc { get; } = DateTimeOffset.UtcNow;
|
||||
|
||||
/// <summary>
|
||||
/// UTC timestamp of the last successfully applied hot-reload, or <c>null</c> if no
|
||||
/// reload has been accepted since the service started.
|
||||
/// </summary>
|
||||
public DateTimeOffset? LastReloadUtc
|
||||
{
|
||||
get
|
||||
{
|
||||
long ticks = Interlocked.Read(ref _lastReloadUtcTicks);
|
||||
return ticks == 0 ? null : new DateTimeOffset(ticks, TimeSpan.Zero);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Total number of configuration reloads accepted since service start.</summary>
|
||||
public int ReloadAppliedCount
|
||||
=> Interlocked.CompareExchange(ref _reloadAppliedCount, 0, 0);
|
||||
|
||||
/// <summary>Total number of configuration reloads rejected since service start.</summary>
|
||||
public int ReloadRejectedCount
|
||||
=> Interlocked.CompareExchange(ref _reloadRejectedCount, 0, 0);
|
||||
|
||||
/// <summary>
|
||||
/// Records one accepted reload. Bumps <see cref="ReloadAppliedCount"/> and updates
|
||||
/// <see cref="LastReloadUtc"/>.
|
||||
/// </summary>
|
||||
public void RecordReloadApplied(DateTimeOffset timestamp)
|
||||
{
|
||||
Interlocked.Increment(ref _reloadAppliedCount);
|
||||
Interlocked.Exchange(ref _lastReloadUtcTicks, timestamp.UtcTicks);
|
||||
}
|
||||
|
||||
/// <summary>Bumps <see cref="ReloadRejectedCount"/>.</summary>
|
||||
public void RecordReloadRejected()
|
||||
=> Interlocked.Increment(ref _reloadRejectedCount);
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
{
|
||||
"Mbproxy": {
|
||||
"BcdTags": {
|
||||
"Global": []
|
||||
},
|
||||
"Plcs": [],
|
||||
"AdminPort": 8080,
|
||||
"Connection": {
|
||||
"BackendConnectTimeoutMs": 3000,
|
||||
"BackendRequestTimeoutMs": 3000
|
||||
},
|
||||
"Resilience": {
|
||||
"BackendConnect": {
|
||||
"MaxAttempts": 3,
|
||||
"BackoffMs": [ 100, 500, 2000 ]
|
||||
},
|
||||
"ListenerRecovery": {
|
||||
"InitialBackoffMs": [ 1000, 2000, 5000, 15000, 30000 ],
|
||||
"SteadyStateMs": 30000
|
||||
}
|
||||
}
|
||||
},
|
||||
"Serilog": {
|
||||
"Using": [ "Serilog.Sinks.Console", "Serilog.Sinks.File" ],
|
||||
"MinimumLevel": {
|
||||
"Default": "Information",
|
||||
"Override": {
|
||||
"Microsoft": "Warning",
|
||||
"System": "Warning"
|
||||
}
|
||||
},
|
||||
"WriteTo": [
|
||||
{
|
||||
"Name": "Console",
|
||||
"Args": {
|
||||
"outputTemplate": "[{Timestamp:HH:mm:ss} {Level:u3}] {Message:lj} {Properties:j}{NewLine}{Exception}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"Name": "File",
|
||||
"Args": {
|
||||
"path": "C:\\ProgramData\\mbproxy\\logs\\mbproxy-.log",
|
||||
"rollingInterval": "Day",
|
||||
"retainedFileCountLimit": 30,
|
||||
"outputTemplate": "[{Timestamp:yyyy-MM-dd HH:mm:ss.fff zzz} {Level:u3}] {Message:lj} {Properties:j}{NewLine}{Exception}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user