Files
wwtools/mbproxy/tests/Mbproxy.Tests/Sim/DL205SimulatorFixture.cs
T
Joseph Doherty b222362ce0 mbproxy: remediate the 2026-05-16 code-review findings
Fixes every finding from the codereviews/2026-05-16 multi-agent review
(2 Critical, 20 Major, 38 Minor) and adds that review to the repo.

Highlights: dashboard XSS escape; response cache invalidated on the
write request (not just the response); ReloadValidator now runs at
startup so port collisions / duplicate names / malformed Resilience
profiles fail fast; AdminPort 0 genuinely disables the admin endpoint;
PlcListener accept-loop faults propagate to the supervisor's faulted
path; reconciler Restart builds before removing; Resilience pipelines
are restart-only from a frozen snapshot; multiplexer connect-race leak,
watchdog party-list snapshot, backend-response and FC16 framing
validation; frontend reconnect retry and util.js load guard; plus the
log-event/doc drift sweep and test-port hygiene.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-16 18:08:06 -04:00

290 lines
12 KiB
C#

using System.Collections.Concurrent;
using System.Diagnostics;
using System.Net;
using System.Net.Sockets;
using System.Reflection;
using System.Text;
using Xunit;
namespace Mbproxy.Tests.Sim;
/// <summary>
/// xUnit v3 async fixture that manages the lifecycle of a pymodbus DL205 simulator
/// process for end-to-end tests.
/// </summary>
/// <remarks>
/// Usage: declare <c>[Collection(nameof(DL205SimulatorCollection))]</c> on any test
/// class that needs a live simulator. The fixture is shared across all tests in the
/// collection (one process per test run).
///
/// <para><b>Skip policy:</b> if Python or pymodbus is unavailable,
/// <see cref="SkipReason"/> is populated and tests should call
/// <c>Assert.Skip(fixture.SkipReason)</c> rather than failing.</para>
/// </remarks>
public sealed class DL205SimulatorFixture : IAsyncLifetime
{
// ── Public surface ────────────────────────────────────────────────────────
/// <summary>Always <c>"127.0.0.1"</c>.</summary>
public string Host { get; } = "127.0.0.1";
/// <summary>The free port picked for this fixture instance.</summary>
public int Port { get; private set; }
/// <summary>
/// Non-null when the simulator could not start (Python missing, venv provisioning
/// failed, etc.). Tests should call <c>Assert.Skip(fixture.SkipReason)</c>.
/// </summary>
public string? SkipReason { get; private set; }
/// <summary>Last ~50 lines of the simulator's stderr, for diagnosis.</summary>
public string LogTail => BuildLogTail();
// ── Private state ─────────────────────────────────────────────────────────
private Process? _process;
/// <summary>Ring buffer of captured stderr lines (capacity = 50).</summary>
private readonly ConcurrentQueue<string> _stderrLines = new();
private const int LogTailLines = 50;
// ── IAsyncLifetime ────────────────────────────────────────────────────────
// Total time to wait for the simulator to accept a TCP connection.
// On a warm run (venv exists) this is typically < 2 s.
// On a cold run (first-ever provisioning) pip-installing pymodbus can take 30-90 s
// depending on network speed, so we allow 120 s to cover both paths.
// The spec's "up to 10 s" refers to warm-run server startup; cold-run provisioning
// is additive and cannot be separated without a separate pre-provision step.
private static readonly TimeSpan ReadinessTimeout = TimeSpan.FromSeconds(120);
/// <summary>
/// Picks a free port, spawns <c>pwsh run-dl205-sim.ps1</c>, and polls for TCP
/// readiness for up to <see cref="ReadinessTimeout"/>.
/// </summary>
public async ValueTask InitializeAsync()
{
// ── 1. Pick a free local port ─────────────────────────────────────────
// TOCTOU note: we bind on :0, capture the OS-assigned port, then release
// the listener. Between the release and pymodbus binding there is a window
// where another process could grab the port. This race is rare in practice
// and is an acceptable trade-off for the simplicity of a plain TcpListener
// approach. A retry loop in step 3 provides resilience if the port is stolen.
Port = PickFreePort();
// ── 2. Locate the launcher script ─────────────────────────────────────
var scriptPath = ResolveScriptPath();
if (scriptPath is null)
{
SkipReason = "Could not locate tests/sim/run-dl205-sim.ps1 next to the test assembly.";
return;
}
// ── 3. Verify pwsh (PowerShell 7+) is on PATH ─────────────────────────
if (!PwshIsAvailable())
{
SkipReason = "pwsh (PowerShell 7+) is not available on PATH; cannot launch the simulator.";
return;
}
// ── 4. Spawn the simulator ────────────────────────────────────────────
var psi = new ProcessStartInfo
{
FileName = "pwsh",
Arguments = $"-NoProfile -File \"{scriptPath}\" -Port {Port}",
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true,
};
try
{
_process = Process.Start(psi)
?? throw new InvalidOperationException("Process.Start returned null.");
}
catch (Exception ex)
{
SkipReason = $"Failed to spawn pwsh: {ex.Message}";
return;
}
// Drain stdout and stderr asynchronously into the ring buffer so the
// child process is never blocked on a full pipe buffer.
_process.OutputDataReceived += (_, e) => AppendLine(e.Data);
_process.ErrorDataReceived += (_, e) => AppendLine(e.Data);
_process.BeginOutputReadLine();
_process.BeginErrorReadLine();
// ── 5. Poll for TCP readiness (up to ReadinessTimeout) ───────────────
// Link the readiness deadline against the test-runner's cancellation token so a
// CI job timeout / keyboard interrupt aborts the poll promptly instead of running
// the full 120 s and leaving the spawned Python process orphaned (review M3).
using var deadline = new CancellationTokenSource(ReadinessTimeout);
using var linked = CancellationTokenSource.CreateLinkedTokenSource(
deadline.Token, TestContext.Current.CancellationToken);
bool ready = false;
while (!linked.Token.IsCancellationRequested)
{
// If the process exited early, no point waiting further.
if (_process.HasExited)
break;
try
{
using var probe = new TcpClient();
await probe.ConnectAsync(Host, Port, linked.Token).ConfigureAwait(false);
ready = true;
break;
}
catch (OperationCanceledException)
{
break;
}
catch
{
// Not ready yet — wait 100 ms and retry.
try { await Task.Delay(100, linked.Token).ConfigureAwait(false); }
catch (OperationCanceledException) { break; }
}
}
if (!ready)
{
// Capture why before we kill the process.
string tail = BuildLogTail();
await DisposeProcessAsync().ConfigureAwait(false);
SkipReason = _process?.HasExited == true
? $"Simulator process exited prematurely (exit code {_process.ExitCode}). " +
$"Likely cause: Python not found or pymodbus not installed. Log tail:\n{tail}"
: $"Simulator did not accept a TCP connection on port {Port} within {ReadinessTimeout.TotalSeconds} s. " +
$"Log tail:\n{tail}";
}
}
/// <summary>
/// Kills the simulator process tree and waits up to 5 s for it to exit.
/// </summary>
public async ValueTask DisposeAsync()
{
await DisposeProcessAsync().ConfigureAwait(false);
}
// ── Private helpers ───────────────────────────────────────────────────────
private static int PickFreePort()
{
// Bind on loopback:0 so the OS picks a free port, read it, then stop.
// See TOCTOU note in InitializeAsync.
var listener = new TcpListener(IPAddress.Loopback, 0);
listener.Start();
int port = ((IPEndPoint)listener.LocalEndpoint).Port;
listener.Stop();
return port;
}
private static string? ResolveScriptPath()
{
// Walk upward from the assembly directory looking for tests/sim/run-dl205-sim.ps1.
// The assembly is typically at tests/Mbproxy.Tests/bin/<config>/net10.0/
var assemblyDir = Path.GetDirectoryName(
Assembly.GetExecutingAssembly().Location) ?? string.Empty;
var dir = new DirectoryInfo(assemblyDir);
while (dir is not null)
{
var candidate = Path.Combine(dir.FullName, "tests", "sim", "run-dl205-sim.ps1");
if (File.Exists(candidate))
return candidate;
// Also check if we're already inside a tests/sim sibling.
var direct = Path.Combine(dir.FullName, "run-dl205-sim.ps1");
if (File.Exists(direct))
return direct;
dir = dir.Parent;
}
return null;
}
private static bool PwshIsAvailable()
{
try
{
using var p = Process.Start(new ProcessStartInfo
{
FileName = "pwsh",
Arguments = "-NoProfile -Command exit 0",
UseShellExecute = false,
RedirectStandardOutput = true,
RedirectStandardError = true,
CreateNoWindow = true,
});
p?.WaitForExit(3000);
return p?.ExitCode == 0;
}
catch
{
return false;
}
}
private void AppendLine(string? line)
{
if (line is null) return;
_stderrLines.Enqueue(line);
// Trim to the last LogTailLines entries.
while (_stderrLines.Count > LogTailLines)
_stderrLines.TryDequeue(out _);
}
private string BuildLogTail()
{
var sb = new StringBuilder();
foreach (var line in _stderrLines)
sb.AppendLine(line);
return sb.ToString();
}
private async Task DisposeProcessAsync()
{
if (_process is null || _process.HasExited)
return;
try
{
// Windows lacks a portable "send SIGTERM" from .NET without P/Invoke.
// Pymodbus handles graceful shutdown via Ctrl-C (SIGINT), but raising
// Ctrl-C to a child process on Windows requires attaching to its console
// group, which is fragile. Process.Kill(entireProcessTree: true) is the
// pragmatic choice: it terminates pymodbus and any child processes it may
// have spawned (e.g. the pwsh → python chain).
//
// Trade-off: pymodbus does not get to flush its log or call atexit
// handlers, so the last few log lines may be missing. This is acceptable
// for test cleanup.
_process.Kill(entireProcessTree: true);
}
catch (InvalidOperationException)
{
// Process already exited between the HasExited check and Kill().
}
// Wait up to 5 s for the process to actually exit.
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
try
{
await _process.WaitForExitAsync(cts.Token).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
// 5 s elapsed — give up; the OS will clean up the orphaned process.
}
}
}