56eee3c563
Adds the mbproxy service end-to-end. Phases 00-08 implement the production-ready single-listener / 1:1-backend transparent Modbus TCP proxy with bidirectional BCD rewriting for the ~54-PLC DL205/DL260 fleet. Phase 9 replaces the connection layer with a single backend socket per PLC plus MBAP TxId rewriting, lifting the H2-ECOM100's 4-concurrent-client cap as an operational ceiling. Phase 9 additions of note: - PlcMultiplexer + UpstreamPipe + TxIdAllocator + CorrelationMap - InFlightRequest with IReadOnlyList<InterestedParty> (load-bearing for Phase 10 read coalescing — do not collapse to a single field) - Per-request watchdog: surfaces Modbus exception 0x0B to upstream on BackendRequestTimeoutMs, defending against lost responses, dead-PLC paths, and pymodbus 3.13.0's concurrent-multiplexed- request bug (its ServerRequestHandler.last_pdu state race) - Status DTO + HTML gain inFlight / maxInFlight / txIdWraps / disconnectCascades / queueDepth (Tier 1.6 in docs/kpi.md) Tests: 263 unit + 38 E2E. Multiplexer correctness under truly concurrent backend traffic is proved against a stub backend in PlcMultiplexerTests; MultiplexerE2ETests paces requests so pymodbus 3.13's single-PDU framer stays in known-good mode. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
287 lines
11 KiB
C#
287 lines
11 KiB
C#
using System.Collections.Concurrent;
|
|
using System.Diagnostics;
|
|
using System.Net;
|
|
using System.Net.Sockets;
|
|
using System.Reflection;
|
|
using System.Text;
|
|
using Xunit;
|
|
|
|
namespace Mbproxy.Tests.Sim;
|
|
|
|
/// <summary>
|
|
/// xUnit v3 async fixture that manages the lifecycle of a pymodbus DL205 simulator
|
|
/// process for end-to-end tests.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Usage: declare <c>[Collection(nameof(DL205SimulatorCollection))]</c> on any test
|
|
/// class that needs a live simulator. The fixture is shared across all tests in the
|
|
/// collection (one process per test run).
|
|
///
|
|
/// <para><b>Skip policy:</b> if Python or pymodbus is unavailable,
|
|
/// <see cref="SkipReason"/> is populated and tests should call
|
|
/// <c>Assert.Skip(fixture.SkipReason)</c> rather than failing.</para>
|
|
/// </remarks>
|
|
public sealed class DL205SimulatorFixture : IAsyncLifetime
|
|
{
|
|
// ── Public surface ────────────────────────────────────────────────────────
|
|
|
|
/// <summary>Always <c>"127.0.0.1"</c>.</summary>
|
|
public string Host { get; } = "127.0.0.1";
|
|
|
|
/// <summary>The free port picked for this fixture instance.</summary>
|
|
public int Port { get; private set; }
|
|
|
|
/// <summary>
|
|
/// Non-null when the simulator could not start (Python missing, venv provisioning
|
|
/// failed, etc.). Tests should call <c>Assert.Skip(fixture.SkipReason)</c>.
|
|
/// </summary>
|
|
public string? SkipReason { get; private set; }
|
|
|
|
/// <summary>Last ~50 lines of the simulator's stderr, for diagnosis.</summary>
|
|
public string LogTail => BuildLogTail();
|
|
|
|
// ── Private state ─────────────────────────────────────────────────────────
|
|
|
|
private Process? _process;
|
|
|
|
/// <summary>Ring buffer of captured stderr lines (capacity = 50).</summary>
|
|
private readonly ConcurrentQueue<string> _stderrLines = new();
|
|
|
|
private const int LogTailLines = 50;
|
|
|
|
// ── IAsyncLifetime ────────────────────────────────────────────────────────
|
|
|
|
// Total time to wait for the simulator to accept a TCP connection.
|
|
// On a warm run (venv exists) this is typically < 2 s.
|
|
// On a cold run (first-ever provisioning) pip-installing pymodbus can take 30-90 s
|
|
// depending on network speed, so we allow 120 s to cover both paths.
|
|
// The spec's "up to 10 s" refers to warm-run server startup; cold-run provisioning
|
|
// is additive and cannot be separated without a separate pre-provision step.
|
|
private static readonly TimeSpan ReadinessTimeout = TimeSpan.FromSeconds(120);
|
|
|
|
/// <summary>
|
|
/// Picks a free port, spawns <c>pwsh run-dl205-sim.ps1</c>, and polls for TCP
|
|
/// readiness for up to <see cref="ReadinessTimeout"/>.
|
|
/// </summary>
|
|
public async ValueTask InitializeAsync()
|
|
{
|
|
// ── 1. Pick a free local port ─────────────────────────────────────────
|
|
// TOCTOU note: we bind on :0, capture the OS-assigned port, then release
|
|
// the listener. Between the release and pymodbus binding there is a window
|
|
// where another process could grab the port. This race is rare in practice
|
|
// and is an acceptable trade-off for the simplicity of a plain TcpListener
|
|
// approach. A retry loop in step 3 provides resilience if the port is stolen.
|
|
Port = PickFreePort();
|
|
|
|
// ── 2. Locate the launcher script ─────────────────────────────────────
|
|
var scriptPath = ResolveScriptPath();
|
|
if (scriptPath is null)
|
|
{
|
|
SkipReason = "Could not locate tests/sim/run-dl205-sim.ps1 next to the test assembly.";
|
|
return;
|
|
}
|
|
|
|
// ── 3. Verify pwsh (PowerShell 7+) is on PATH ─────────────────────────
|
|
if (!PwshIsAvailable())
|
|
{
|
|
SkipReason = "pwsh (PowerShell 7+) is not available on PATH; cannot launch the simulator.";
|
|
return;
|
|
}
|
|
|
|
// ── 4. Spawn the simulator ────────────────────────────────────────────
|
|
var psi = new ProcessStartInfo
|
|
{
|
|
FileName = "pwsh",
|
|
Arguments = $"-NoProfile -File \"{scriptPath}\" -Port {Port}",
|
|
UseShellExecute = false,
|
|
RedirectStandardOutput = true,
|
|
RedirectStandardError = true,
|
|
CreateNoWindow = true,
|
|
};
|
|
|
|
try
|
|
{
|
|
_process = Process.Start(psi)
|
|
?? throw new InvalidOperationException("Process.Start returned null.");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
SkipReason = $"Failed to spawn pwsh: {ex.Message}";
|
|
return;
|
|
}
|
|
|
|
// Drain stdout and stderr asynchronously into the ring buffer so the
|
|
// child process is never blocked on a full pipe buffer.
|
|
_process.OutputDataReceived += (_, e) => AppendLine(e.Data);
|
|
_process.ErrorDataReceived += (_, e) => AppendLine(e.Data);
|
|
_process.BeginOutputReadLine();
|
|
_process.BeginErrorReadLine();
|
|
|
|
// ── 5. Poll for TCP readiness (up to ReadinessTimeout) ───────────────
|
|
using var deadline = new CancellationTokenSource(ReadinessTimeout);
|
|
using var linked = CancellationTokenSource.CreateLinkedTokenSource(
|
|
deadline.Token, CancellationToken.None);
|
|
|
|
bool ready = false;
|
|
while (!linked.Token.IsCancellationRequested)
|
|
{
|
|
// If the process exited early, no point waiting further.
|
|
if (_process.HasExited)
|
|
break;
|
|
|
|
try
|
|
{
|
|
using var probe = new TcpClient();
|
|
await probe.ConnectAsync(Host, Port, linked.Token).ConfigureAwait(false);
|
|
ready = true;
|
|
break;
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
break;
|
|
}
|
|
catch
|
|
{
|
|
// Not ready yet — wait 100 ms and retry.
|
|
try { await Task.Delay(100, linked.Token).ConfigureAwait(false); }
|
|
catch (OperationCanceledException) { break; }
|
|
}
|
|
}
|
|
|
|
if (!ready)
|
|
{
|
|
// Capture why before we kill the process.
|
|
string tail = BuildLogTail();
|
|
await DisposeProcessAsync().ConfigureAwait(false);
|
|
|
|
SkipReason = _process?.HasExited == true
|
|
? $"Simulator process exited prematurely (exit code {_process.ExitCode}). " +
|
|
$"Likely cause: Python not found or pymodbus not installed. Log tail:\n{tail}"
|
|
: $"Simulator did not accept a TCP connection on port {Port} within {ReadinessTimeout.TotalSeconds} s. " +
|
|
$"Log tail:\n{tail}";
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Kills the simulator process tree and waits up to 5 s for it to exit.
|
|
/// </summary>
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
await DisposeProcessAsync().ConfigureAwait(false);
|
|
}
|
|
|
|
// ── Private helpers ───────────────────────────────────────────────────────
|
|
|
|
private static int PickFreePort()
|
|
{
|
|
// Bind on loopback:0 so the OS picks a free port, read it, then stop.
|
|
// See TOCTOU note in InitializeAsync.
|
|
var listener = new TcpListener(IPAddress.Loopback, 0);
|
|
listener.Start();
|
|
int port = ((IPEndPoint)listener.LocalEndpoint).Port;
|
|
listener.Stop();
|
|
return port;
|
|
}
|
|
|
|
private static string? ResolveScriptPath()
|
|
{
|
|
// Walk upward from the assembly directory looking for tests/sim/run-dl205-sim.ps1.
|
|
// The assembly is typically at tests/Mbproxy.Tests/bin/<config>/net10.0/
|
|
var assemblyDir = Path.GetDirectoryName(
|
|
Assembly.GetExecutingAssembly().Location) ?? string.Empty;
|
|
|
|
var dir = new DirectoryInfo(assemblyDir);
|
|
while (dir is not null)
|
|
{
|
|
var candidate = Path.Combine(dir.FullName, "tests", "sim", "run-dl205-sim.ps1");
|
|
if (File.Exists(candidate))
|
|
return candidate;
|
|
|
|
// Also check if we're already inside a tests/sim sibling.
|
|
var direct = Path.Combine(dir.FullName, "run-dl205-sim.ps1");
|
|
if (File.Exists(direct))
|
|
return direct;
|
|
|
|
dir = dir.Parent;
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static bool PwshIsAvailable()
|
|
{
|
|
try
|
|
{
|
|
using var p = Process.Start(new ProcessStartInfo
|
|
{
|
|
FileName = "pwsh",
|
|
Arguments = "-NoProfile -Command exit 0",
|
|
UseShellExecute = false,
|
|
RedirectStandardOutput = true,
|
|
RedirectStandardError = true,
|
|
CreateNoWindow = true,
|
|
});
|
|
p?.WaitForExit(3000);
|
|
return p?.ExitCode == 0;
|
|
}
|
|
catch
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private void AppendLine(string? line)
|
|
{
|
|
if (line is null) return;
|
|
_stderrLines.Enqueue(line);
|
|
|
|
// Trim to the last LogTailLines entries.
|
|
while (_stderrLines.Count > LogTailLines)
|
|
_stderrLines.TryDequeue(out _);
|
|
}
|
|
|
|
private string BuildLogTail()
|
|
{
|
|
var sb = new StringBuilder();
|
|
foreach (var line in _stderrLines)
|
|
sb.AppendLine(line);
|
|
return sb.ToString();
|
|
}
|
|
|
|
private async Task DisposeProcessAsync()
|
|
{
|
|
if (_process is null || _process.HasExited)
|
|
return;
|
|
|
|
try
|
|
{
|
|
// Windows lacks a portable "send SIGTERM" from .NET without P/Invoke.
|
|
// Pymodbus handles graceful shutdown via Ctrl-C (SIGINT), but raising
|
|
// Ctrl-C to a child process on Windows requires attaching to its console
|
|
// group, which is fragile. Process.Kill(entireProcessTree: true) is the
|
|
// pragmatic choice: it terminates pymodbus and any child processes it may
|
|
// have spawned (e.g. the pwsh → python chain).
|
|
//
|
|
// Trade-off: pymodbus does not get to flush its log or call atexit
|
|
// handlers, so the last few log lines may be missing. This is acceptable
|
|
// for test cleanup.
|
|
_process.Kill(entireProcessTree: true);
|
|
}
|
|
catch (InvalidOperationException)
|
|
{
|
|
// Process already exited between the HasExited check and Kill().
|
|
}
|
|
|
|
// Wait up to 5 s for the process to actually exit.
|
|
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
|
|
try
|
|
{
|
|
await _process.WaitForExitAsync(cts.Token).ConfigureAwait(false);
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
// 5 s elapsed — give up; the OS will clean up the orphaned process.
|
|
}
|
|
}
|
|
}
|