From c1188c6957422a921e1cca06e04e638cc3278b62 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 26 Apr 2026 16:38:32 -0400 Subject: [PATCH] Issue #10: implement worker process launcher --- docs/WorkerProcessLauncher.md | 62 ++++ docs/gateway-process-design.md | 9 + gateway.md | 2 + src/MxGateway.Server/GatewayApplication.cs | 2 + .../Workers/IWorkerProcess.cs | 14 + .../Workers/IWorkerProcessFactory.cs | 8 + .../Workers/IWorkerProcessLauncher.cs | 8 + .../Workers/IWorkerStartupProbe.cs | 9 + .../Workers/SystemWorkerProcess.cs | 27 ++ .../Workers/SystemWorkerProcessFactory.cs | 22 ++ .../Workers/WorkerExecutableValidator.cs | 80 +++++ .../Workers/WorkerProcessCommandLine.cs | 30 ++ .../Workers/WorkerProcessHandle.cs | 28 ++ .../Workers/WorkerProcessLaunchErrorCode.cs | 13 + .../Workers/WorkerProcessLaunchException.cs | 23 ++ .../Workers/WorkerProcessLaunchRequest.cs | 8 + .../Workers/WorkerProcessLauncher.cs | 262 +++++++++++++++ .../Workers/WorkerProcessStartedProbe.cs | 19 ++ .../WorkerServiceCollectionExtensions.cs | 13 + .../Workers/WorkerProcessLauncherTests.cs | 307 ++++++++++++++++++ 20 files changed, 946 insertions(+) create mode 100644 docs/WorkerProcessLauncher.md create mode 100644 src/MxGateway.Server/Workers/IWorkerProcess.cs create mode 100644 src/MxGateway.Server/Workers/IWorkerProcessFactory.cs create mode 100644 src/MxGateway.Server/Workers/IWorkerProcessLauncher.cs create mode 100644 src/MxGateway.Server/Workers/IWorkerStartupProbe.cs create mode 100644 src/MxGateway.Server/Workers/SystemWorkerProcess.cs create mode 100644 src/MxGateway.Server/Workers/SystemWorkerProcessFactory.cs create mode 100644 src/MxGateway.Server/Workers/WorkerExecutableValidator.cs create mode 100644 src/MxGateway.Server/Workers/WorkerProcessCommandLine.cs create mode 100644 src/MxGateway.Server/Workers/WorkerProcessHandle.cs create mode 100644 src/MxGateway.Server/Workers/WorkerProcessLaunchErrorCode.cs create mode 100644 src/MxGateway.Server/Workers/WorkerProcessLaunchException.cs create mode 100644 src/MxGateway.Server/Workers/WorkerProcessLaunchRequest.cs create mode 100644 src/MxGateway.Server/Workers/WorkerProcessLauncher.cs create mode 100644 src/MxGateway.Server/Workers/WorkerProcessStartedProbe.cs create mode 100644 src/MxGateway.Server/Workers/WorkerServiceCollectionExtensions.cs create mode 100644 src/MxGateway.Tests/Gateway/Workers/WorkerProcessLauncherTests.cs diff --git a/docs/WorkerProcessLauncher.md b/docs/WorkerProcessLauncher.md new file mode 100644 index 0000000..35dbdc9 --- /dev/null +++ b/docs/WorkerProcessLauncher.md @@ -0,0 +1,62 @@ +# Worker Process Launcher + +The gateway uses `WorkerProcessLauncher` to validate and start one worker +process for a gateway session. The launcher owns process start semantics only; +pipe handshaking and `WorkerReady` validation remain part of the worker client +startup path. + +## Launch Inputs + +`WorkerProcessLaunchRequest` carries the per-session bootstrap values: + +- `SessionId`, +- `PipeName`, +- `ProtocolVersion`, +- `Nonce`, +- optional `PipeReservation` cleanup handle. + +The launcher passes `SessionId`, `PipeName`, and `ProtocolVersion` as command +line arguments: + +```text +--session-id --pipe-name --protocol-version +``` + +The launcher sets the nonce through the `MXGATEWAY_WORKER_NONCE` environment +variable. The nonce is not included in `WorkerProcessCommandLine` so logs and +diagnostics can report the launch command without exposing the secret. + +## Validation And Cleanup + +Before starting the process, the launcher validates that the configured worker +path exists, has a `.exe` extension, contains a valid Windows Portable +Executable header, and matches the configured `RequiredArchitecture`. + +After the process starts, `IWorkerStartupProbe` waits for startup readiness. +The default probe only verifies that the worker did not exit immediately. The +worker client replaces this probe when pipe connection, hello, and +`WorkerReady` handling are implemented. + +If startup fails or exceeds `WorkerOptions.StartupTimeoutSeconds`, the launcher +kills the worker process tree, disposes the process handle, disposes the +optional pipe reservation, records a worker kill metric, and reports a +`WorkerProcessLaunchException`. + +## Verification + +Run the focused launcher tests after changing process launch behavior: + +```bash +dotnet test src/MxGateway.Tests/MxGateway.Tests.csproj --filter WorkerProcessLauncherTests +``` + +Run the gateway build because the launcher is part of `MxGateway.Server`: + +```bash +dotnet build src/MxGateway.Server/MxGateway.Server.csproj +``` + +## Related Documentation + +- [Gateway Process Detailed Design](./gateway-process-design.md) +- [Worker Frame Protocol](./WorkerFrameProtocol.md) diff --git a/docs/gateway-process-design.md b/docs/gateway-process-design.md index 88ade9e..50dc5b6 100644 --- a/docs/gateway-process-design.md +++ b/docs/gateway-process-design.md @@ -360,6 +360,15 @@ Before launch, validate: - worker file version or product version is acceptable, - worker is expected to be x86. +`WorkerProcessLauncher` implements the first validation layer now: it resolves +the worker executable path, requires a `.exe`, validates the Windows Portable +Executable header, and verifies the configured processor architecture. It passes +only `--session-id`, `--pipe-name`, and `--protocol-version` on the command +line. The per-session nonce is set through `MXGATEWAY_WORKER_NONCE` so the +command line remains safe to log. Startup failures and startup timeouts kill and +dispose the worker process and the pre-created pipe reservation before the +session manager observes the failure. + ## Worker IPC The gateway creates the pipe server before launching the worker. diff --git a/gateway.md b/gateway.md index ab18d8c..5d378f2 100644 --- a/gateway.md +++ b/gateway.md @@ -47,6 +47,8 @@ Detailed follow-up docs: security, observability, and test strategy. - `docs/WorkerFrameProtocol.md` covers the gateway-side named-pipe frame reader/writer and `WorkerEnvelope` validation rules. +- `docs/WorkerProcessLauncher.md` covers worker executable validation, process + launch arguments, nonce handling, and startup cleanup behavior. - `docs/mxaccess-worker-instance-design.md` covers each .NET Framework 4.8 x86 MXAccess worker instance, including STA ownership, message pumping, COM lifetime, command dispatch, event sinks, conversion, and shutdown. diff --git a/src/MxGateway.Server/GatewayApplication.cs b/src/MxGateway.Server/GatewayApplication.cs index 1afcd85..4167248 100644 --- a/src/MxGateway.Server/GatewayApplication.cs +++ b/src/MxGateway.Server/GatewayApplication.cs @@ -3,6 +3,7 @@ using MxGateway.Server.Configuration; using MxGateway.Server.Diagnostics; using MxGateway.Server.Metrics; using MxGateway.Server.Security.Authentication; +using MxGateway.Server.Workers; namespace MxGateway.Server; @@ -27,6 +28,7 @@ public static class GatewayApplication builder.Services.AddSqliteAuthStore(); builder.Services.AddHealthChecks(); builder.Services.AddSingleton(); + builder.Services.AddWorkerProcessLauncher(); return builder; } diff --git a/src/MxGateway.Server/Workers/IWorkerProcess.cs b/src/MxGateway.Server/Workers/IWorkerProcess.cs new file mode 100644 index 0000000..0231c49 --- /dev/null +++ b/src/MxGateway.Server/Workers/IWorkerProcess.cs @@ -0,0 +1,14 @@ +namespace MxGateway.Server.Workers; + +public interface IWorkerProcess : IDisposable +{ + int Id { get; } + + bool HasExited { get; } + + int? ExitCode { get; } + + ValueTask WaitForExitAsync(CancellationToken cancellationToken); + + void Kill(bool entireProcessTree); +} diff --git a/src/MxGateway.Server/Workers/IWorkerProcessFactory.cs b/src/MxGateway.Server/Workers/IWorkerProcessFactory.cs new file mode 100644 index 0000000..5741df1 --- /dev/null +++ b/src/MxGateway.Server/Workers/IWorkerProcessFactory.cs @@ -0,0 +1,8 @@ +using System.Diagnostics; + +namespace MxGateway.Server.Workers; + +public interface IWorkerProcessFactory +{ + IWorkerProcess Start(ProcessStartInfo startInfo); +} diff --git a/src/MxGateway.Server/Workers/IWorkerProcessLauncher.cs b/src/MxGateway.Server/Workers/IWorkerProcessLauncher.cs new file mode 100644 index 0000000..ccc1036 --- /dev/null +++ b/src/MxGateway.Server/Workers/IWorkerProcessLauncher.cs @@ -0,0 +1,8 @@ +namespace MxGateway.Server.Workers; + +public interface IWorkerProcessLauncher +{ + Task LaunchAsync( + WorkerProcessLaunchRequest request, + CancellationToken cancellationToken = default); +} diff --git a/src/MxGateway.Server/Workers/IWorkerStartupProbe.cs b/src/MxGateway.Server/Workers/IWorkerStartupProbe.cs new file mode 100644 index 0000000..81245db --- /dev/null +++ b/src/MxGateway.Server/Workers/IWorkerStartupProbe.cs @@ -0,0 +1,9 @@ +namespace MxGateway.Server.Workers; + +public interface IWorkerStartupProbe +{ + Task WaitUntilReadyAsync( + IWorkerProcess process, + WorkerProcessLaunchRequest request, + CancellationToken cancellationToken); +} diff --git a/src/MxGateway.Server/Workers/SystemWorkerProcess.cs b/src/MxGateway.Server/Workers/SystemWorkerProcess.cs new file mode 100644 index 0000000..72adddf --- /dev/null +++ b/src/MxGateway.Server/Workers/SystemWorkerProcess.cs @@ -0,0 +1,27 @@ +using System.Diagnostics; + +namespace MxGateway.Server.Workers; + +internal sealed class SystemWorkerProcess(Process process) : IWorkerProcess +{ + public int Id => process.Id; + + public bool HasExited => process.HasExited; + + public int? ExitCode => process.HasExited ? process.ExitCode : null; + + public async ValueTask WaitForExitAsync(CancellationToken cancellationToken) + { + await process.WaitForExitAsync(cancellationToken).ConfigureAwait(false); + } + + public void Kill(bool entireProcessTree) + { + process.Kill(entireProcessTree); + } + + public void Dispose() + { + process.Dispose(); + } +} diff --git a/src/MxGateway.Server/Workers/SystemWorkerProcessFactory.cs b/src/MxGateway.Server/Workers/SystemWorkerProcessFactory.cs new file mode 100644 index 0000000..bdf7567 --- /dev/null +++ b/src/MxGateway.Server/Workers/SystemWorkerProcessFactory.cs @@ -0,0 +1,22 @@ +using System.Diagnostics; + +namespace MxGateway.Server.Workers; + +public sealed class SystemWorkerProcessFactory : IWorkerProcessFactory +{ + public IWorkerProcess Start(ProcessStartInfo startInfo) + { + Process process = new() + { + StartInfo = startInfo, + }; + + if (!process.Start()) + { + process.Dispose(); + throw new InvalidOperationException("Worker process failed to start."); + } + + return new SystemWorkerProcess(process); + } +} diff --git a/src/MxGateway.Server/Workers/WorkerExecutableValidator.cs b/src/MxGateway.Server/Workers/WorkerExecutableValidator.cs new file mode 100644 index 0000000..db22325 --- /dev/null +++ b/src/MxGateway.Server/Workers/WorkerExecutableValidator.cs @@ -0,0 +1,80 @@ +using System.Buffers.Binary; +using MxGateway.Server.Configuration; + +namespace MxGateway.Server.Workers; + +internal static class WorkerExecutableValidator +{ + private const ushort ImageFileMachineI386 = 0x014c; + private const ushort ImageFileMachineAmd64 = 0x8664; + private const int DosHeaderSignatureOffset = 0; + private const int PeHeaderOffsetPointer = 0x3c; + private const int PeSignatureSize = 4; + private const int MachineOffsetFromPeHeader = PeSignatureSize; + private const int MinimumHeaderSize = 0x40; + + public static void Validate( + string executablePath, + WorkerArchitecture requiredArchitecture) + { + ushort machine = ReadMachineType(executablePath); + ushort expectedMachine = requiredArchitecture switch + { + WorkerArchitecture.X86 => ImageFileMachineI386, + WorkerArchitecture.X64 => ImageFileMachineAmd64, + _ => throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidExecutable, + "Worker executable required architecture is unsupported."), + }; + + if (machine != expectedMachine) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidExecutable, + $"Worker executable architecture does not match required {requiredArchitecture} architecture."); + } + } + + private static ushort ReadMachineType(string executablePath) + { + byte[] header = new byte[MinimumHeaderSize]; + using FileStream stream = File.OpenRead(executablePath); + if (stream.Read(header) < header.Length) + { + throw InvalidExecutable("Worker executable is too small to contain a valid PE header."); + } + + if (header[DosHeaderSignatureOffset] != 'M' || header[DosHeaderSignatureOffset + 1] != 'Z') + { + throw InvalidExecutable("Worker executable does not contain an MZ header."); + } + + int peHeaderOffset = BinaryPrimitives.ReadInt32LittleEndian(header.AsSpan(PeHeaderOffsetPointer, sizeof(int))); + if (peHeaderOffset < MinimumHeaderSize) + { + throw InvalidExecutable("Worker executable PE header offset is invalid."); + } + + byte[] peHeaderBytes = new byte[PeSignatureSize + sizeof(ushort)]; + stream.Position = peHeaderOffset; + if (stream.Read(peHeaderBytes) < peHeaderBytes.Length) + { + throw InvalidExecutable("Worker executable PE header is missing."); + } + + if (peHeaderBytes[0] != 'P' || peHeaderBytes[1] != 'E' || peHeaderBytes[2] != 0 || peHeaderBytes[3] != 0) + { + throw InvalidExecutable("Worker executable does not contain a PE header."); + } + + return BinaryPrimitives.ReadUInt16LittleEndian( + peHeaderBytes.AsSpan(MachineOffsetFromPeHeader, sizeof(ushort))); + } + + private static WorkerProcessLaunchException InvalidExecutable(string message) + { + return new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidExecutable, + message); + } +} diff --git a/src/MxGateway.Server/Workers/WorkerProcessCommandLine.cs b/src/MxGateway.Server/Workers/WorkerProcessCommandLine.cs new file mode 100644 index 0000000..b1a670c --- /dev/null +++ b/src/MxGateway.Server/Workers/WorkerProcessCommandLine.cs @@ -0,0 +1,30 @@ +namespace MxGateway.Server.Workers; + +public sealed class WorkerProcessCommandLine +{ + public WorkerProcessCommandLine( + string executablePath, + IReadOnlyList arguments) + { + ExecutablePath = executablePath; + Arguments = arguments; + } + + public string ExecutablePath { get; } + + public IReadOnlyList Arguments { get; } + + public override string ToString() + { + return string.Join( + " ", + new[] { Quote(ExecutablePath) }.Concat(Arguments.Select(Quote))); + } + + private static string Quote(string value) + { + return value.Contains(' ', StringComparison.Ordinal) + ? $"\"{value}\"" + : value; + } +} diff --git a/src/MxGateway.Server/Workers/WorkerProcessHandle.cs b/src/MxGateway.Server/Workers/WorkerProcessHandle.cs new file mode 100644 index 0000000..ca94b5b --- /dev/null +++ b/src/MxGateway.Server/Workers/WorkerProcessHandle.cs @@ -0,0 +1,28 @@ +namespace MxGateway.Server.Workers; + +public sealed class WorkerProcessHandle : IDisposable +{ + public WorkerProcessHandle( + IWorkerProcess process, + WorkerProcessCommandLine commandLine, + DateTimeOffset launchedAt) + { + Process = process; + ProcessId = process.Id; + CommandLine = commandLine; + LaunchedAt = launchedAt; + } + + public IWorkerProcess Process { get; } + + public int ProcessId { get; } + + public WorkerProcessCommandLine CommandLine { get; } + + public DateTimeOffset LaunchedAt { get; } + + public void Dispose() + { + Process.Dispose(); + } +} diff --git a/src/MxGateway.Server/Workers/WorkerProcessLaunchErrorCode.cs b/src/MxGateway.Server/Workers/WorkerProcessLaunchErrorCode.cs new file mode 100644 index 0000000..9638f3e --- /dev/null +++ b/src/MxGateway.Server/Workers/WorkerProcessLaunchErrorCode.cs @@ -0,0 +1,13 @@ +namespace MxGateway.Server.Workers; + +public enum WorkerProcessLaunchErrorCode +{ + Unknown = 0, + InvalidRequest = 1, + ExecutableNotFound = 2, + InvalidExecutable = 3, + InvalidWorkingDirectory = 4, + StartFailed = 5, + StartupTimeout = 6, + StartupFailed = 7, +} diff --git a/src/MxGateway.Server/Workers/WorkerProcessLaunchException.cs b/src/MxGateway.Server/Workers/WorkerProcessLaunchException.cs new file mode 100644 index 0000000..2ae3b5b --- /dev/null +++ b/src/MxGateway.Server/Workers/WorkerProcessLaunchException.cs @@ -0,0 +1,23 @@ +namespace MxGateway.Server.Workers; + +public sealed class WorkerProcessLaunchException : Exception +{ + public WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode errorCode, + string message) + : base(message) + { + ErrorCode = errorCode; + } + + public WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode errorCode, + string message, + Exception innerException) + : base(message, innerException) + { + ErrorCode = errorCode; + } + + public WorkerProcessLaunchErrorCode ErrorCode { get; } +} diff --git a/src/MxGateway.Server/Workers/WorkerProcessLaunchRequest.cs b/src/MxGateway.Server/Workers/WorkerProcessLaunchRequest.cs new file mode 100644 index 0000000..d477d3c --- /dev/null +++ b/src/MxGateway.Server/Workers/WorkerProcessLaunchRequest.cs @@ -0,0 +1,8 @@ +namespace MxGateway.Server.Workers; + +public sealed record WorkerProcessLaunchRequest( + string SessionId, + string PipeName, + uint ProtocolVersion, + string Nonce, + IDisposable? PipeReservation = null); diff --git a/src/MxGateway.Server/Workers/WorkerProcessLauncher.cs b/src/MxGateway.Server/Workers/WorkerProcessLauncher.cs new file mode 100644 index 0000000..581b8cb --- /dev/null +++ b/src/MxGateway.Server/Workers/WorkerProcessLauncher.cs @@ -0,0 +1,262 @@ +using System.Diagnostics; +using Microsoft.Extensions.Options; +using MxGateway.Server.Configuration; +using MxGateway.Server.Metrics; + +namespace MxGateway.Server.Workers; + +public sealed class WorkerProcessLauncher : IWorkerProcessLauncher +{ + public const string WorkerNonceEnvironmentVariableName = "MXGATEWAY_WORKER_NONCE"; + + private readonly IWorkerProcessFactory _processFactory; + private readonly IWorkerStartupProbe _startupProbe; + private readonly GatewayMetrics _metrics; + private readonly TimeProvider _timeProvider; + private readonly WorkerOptions _workerOptions; + + public WorkerProcessLauncher( + IOptions gatewayOptions, + IWorkerProcessFactory processFactory, + IWorkerStartupProbe startupProbe, + GatewayMetrics metrics, + TimeProvider? timeProvider = null) + { + ArgumentNullException.ThrowIfNull(gatewayOptions); + ArgumentNullException.ThrowIfNull(processFactory); + ArgumentNullException.ThrowIfNull(startupProbe); + ArgumentNullException.ThrowIfNull(metrics); + + _workerOptions = gatewayOptions.Value.Worker; + _processFactory = processFactory; + _startupProbe = startupProbe; + _metrics = metrics; + _timeProvider = timeProvider ?? TimeProvider.System; + } + + public async Task LaunchAsync( + WorkerProcessLaunchRequest request, + CancellationToken cancellationToken = default) + { + try + { + return await LaunchCoreAsync(request, cancellationToken).ConfigureAwait(false); + } + catch + { + request.PipeReservation?.Dispose(); + throw; + } + } + + private async Task LaunchCoreAsync( + WorkerProcessLaunchRequest request, + CancellationToken cancellationToken) + { + ValidateRequest(request); + + DateTimeOffset startedAt = _timeProvider.GetUtcNow(); + ProcessStartInfo startInfo = CreateStartInfo(request, out WorkerProcessCommandLine commandLine); + + IWorkerProcess process; + try + { + process = _processFactory.Start(startInfo); + } + catch (Exception exception) when (exception is not WorkerProcessLaunchException) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.StartFailed, + "Worker process failed to start.", + exception); + } + + try + { + using CancellationTokenSource startupTimeout = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + startupTimeout.CancelAfter(TimeSpan.FromSeconds(_workerOptions.StartupTimeoutSeconds)); + + await _startupProbe + .WaitUntilReadyAsync(process, request, startupTimeout.Token) + .ConfigureAwait(false); + + _metrics.WorkerStarted(_timeProvider.GetUtcNow() - startedAt); + + return new WorkerProcessHandle(process, commandLine, startedAt); + } + catch (OperationCanceledException exception) when (!cancellationToken.IsCancellationRequested) + { + KillAndDispose(process, "StartupTimeout"); + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.StartupTimeout, + "Worker process did not complete startup before the configured timeout.", + exception); + } + catch (OperationCanceledException) + { + KillAndDispose(process, "LaunchCanceled"); + throw; + } + catch (Exception exception) when (exception is not WorkerProcessLaunchException) + { + KillAndDispose(process, "StartupFailed"); + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.StartupFailed, + "Worker process failed during startup.", + exception); + } + catch (WorkerProcessLaunchException) + { + KillAndDispose(process, "StartupFailed"); + throw; + } + } + + private ProcessStartInfo CreateStartInfo( + WorkerProcessLaunchRequest request, + out WorkerProcessCommandLine commandLine) + { + string executablePath = ResolveExecutablePath(); + string workingDirectory = ResolveWorkingDirectory(executablePath); + string[] arguments = + [ + "--session-id", + request.SessionId, + "--pipe-name", + request.PipeName, + "--protocol-version", + request.ProtocolVersion.ToString(System.Globalization.CultureInfo.InvariantCulture), + ]; + + ProcessStartInfo startInfo = new() + { + FileName = executablePath, + WorkingDirectory = workingDirectory, + UseShellExecute = false, + CreateNoWindow = true, + ErrorDialog = false, + }; + + foreach (string argument in arguments) + { + startInfo.ArgumentList.Add(argument); + } + + startInfo.Environment[WorkerNonceEnvironmentVariableName] = request.Nonce; + + commandLine = new WorkerProcessCommandLine(executablePath, arguments); + + return startInfo; + } + + private string ResolveExecutablePath() + { + string executablePath; + try + { + executablePath = Path.GetFullPath(_workerOptions.ExecutablePath); + } + catch (Exception exception) when (exception is ArgumentException or NotSupportedException or PathTooLongException) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidExecutable, + "Worker executable path is not a valid filesystem path.", + exception); + } + + if (!string.Equals(Path.GetExtension(executablePath), ".exe", StringComparison.OrdinalIgnoreCase)) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidExecutable, + "Worker executable path must point to a .exe file."); + } + + if (!File.Exists(executablePath)) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.ExecutableNotFound, + "Worker executable does not exist."); + } + + WorkerExecutableValidator.Validate(executablePath, _workerOptions.RequiredArchitecture); + + return executablePath; + } + + private string ResolveWorkingDirectory(string executablePath) + { + if (string.IsNullOrWhiteSpace(_workerOptions.WorkingDirectory)) + { + return Path.GetDirectoryName(executablePath) ?? Environment.CurrentDirectory; + } + + string workingDirectory; + try + { + workingDirectory = Path.GetFullPath(_workerOptions.WorkingDirectory); + } + catch (Exception exception) when (exception is ArgumentException or NotSupportedException or PathTooLongException) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidWorkingDirectory, + "Worker working directory is not a valid filesystem path.", + exception); + } + + if (!Directory.Exists(workingDirectory)) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidWorkingDirectory, + "Worker working directory does not exist."); + } + + return workingDirectory; + } + + private void KillAndDispose(IWorkerProcess process, string reason) + { + try + { + if (!process.HasExited) + { + process.Kill(entireProcessTree: true); + _metrics.WorkerKilled(reason); + } + } + finally + { + process.Dispose(); + } + } + + private static void ValidateRequest(WorkerProcessLaunchRequest request) + { + if (string.IsNullOrWhiteSpace(request.SessionId)) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidRequest, + "Worker launch requires a session id."); + } + + if (string.IsNullOrWhiteSpace(request.PipeName)) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidRequest, + "Worker launch requires a pipe name."); + } + + if (request.ProtocolVersion == 0) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidRequest, + "Worker launch requires a non-zero protocol version."); + } + + if (string.IsNullOrWhiteSpace(request.Nonce)) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.InvalidRequest, + "Worker launch requires a nonce."); + } + } +} diff --git a/src/MxGateway.Server/Workers/WorkerProcessStartedProbe.cs b/src/MxGateway.Server/Workers/WorkerProcessStartedProbe.cs new file mode 100644 index 0000000..947306c --- /dev/null +++ b/src/MxGateway.Server/Workers/WorkerProcessStartedProbe.cs @@ -0,0 +1,19 @@ +namespace MxGateway.Server.Workers; + +public sealed class WorkerProcessStartedProbe : IWorkerStartupProbe +{ + public Task WaitUntilReadyAsync( + IWorkerProcess process, + WorkerProcessLaunchRequest request, + CancellationToken cancellationToken) + { + if (process.HasExited) + { + throw new WorkerProcessLaunchException( + WorkerProcessLaunchErrorCode.StartupFailed, + $"Worker process exited before startup completed with exit code {process.ExitCode}."); + } + + return Task.CompletedTask; + } +} diff --git a/src/MxGateway.Server/Workers/WorkerServiceCollectionExtensions.cs b/src/MxGateway.Server/Workers/WorkerServiceCollectionExtensions.cs new file mode 100644 index 0000000..7a0b961 --- /dev/null +++ b/src/MxGateway.Server/Workers/WorkerServiceCollectionExtensions.cs @@ -0,0 +1,13 @@ +namespace MxGateway.Server.Workers; + +public static class WorkerServiceCollectionExtensions +{ + public static IServiceCollection AddWorkerProcessLauncher(this IServiceCollection services) + { + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + + return services; + } +} diff --git a/src/MxGateway.Tests/Gateway/Workers/WorkerProcessLauncherTests.cs b/src/MxGateway.Tests/Gateway/Workers/WorkerProcessLauncherTests.cs new file mode 100644 index 0000000..977d3b7 --- /dev/null +++ b/src/MxGateway.Tests/Gateway/Workers/WorkerProcessLauncherTests.cs @@ -0,0 +1,307 @@ +using System.Diagnostics; +using Microsoft.Extensions.Options; +using MxGateway.Contracts; +using MxGateway.Server.Configuration; +using MxGateway.Server.Metrics; +using MxGateway.Server.Workers; + +namespace MxGateway.Tests.Gateway.Workers; + +public sealed class WorkerProcessLauncherTests +{ + private const string SessionId = "session-1"; + private const string PipeName = "mxaccess-gateway-123-session-1"; + private const string Nonce = "super-secret-nonce"; + + [Fact] + public async Task LaunchAsync_WithValidWorker_StartsProcessWithBootstrapArgumentsAndNonceEnvironment() + { + using TestDirectory directory = TestDirectory.Create(); + string executablePath = directory.CreateWorkerExecutable(machine: 0x014c); + FakeWorkerProcess process = new(processId: 1234); + FakePipeReservation pipeReservation = new(); + FakeWorkerProcessFactory processFactory = new(process); + GatewayMetrics metrics = new(); + WorkerProcessLauncher launcher = CreateLauncher(executablePath, processFactory, new SucceedingStartupProbe(), metrics); + + using WorkerProcessHandle handle = await launcher.LaunchAsync(CreateRequest(pipeReservation)); + + Assert.Equal(1234, handle.ProcessId); + Assert.Same(process, handle.Process); + Assert.NotNull(processFactory.LastStartInfo); + Assert.Equal(Path.GetFullPath(executablePath), processFactory.LastStartInfo.FileName); + Assert.False(processFactory.LastStartInfo.UseShellExecute); + Assert.True(processFactory.LastStartInfo.CreateNoWindow); + Assert.Equal( + ["--session-id", SessionId, "--pipe-name", PipeName, "--protocol-version", "1"], + processFactory.LastStartInfo.ArgumentList); + Assert.Equal(Nonce, processFactory.LastStartInfo.Environment[WorkerProcessLauncher.WorkerNonceEnvironmentVariableName]); + Assert.DoesNotContain(Nonce, handle.CommandLine.ToString(), StringComparison.Ordinal); + Assert.DoesNotContain(Nonce, string.Join(" ", handle.CommandLine.Arguments), StringComparison.Ordinal); + Assert.False(pipeReservation.DisposeCalled); + Assert.Equal(1, metrics.GetSnapshot().WorkersRunning); + } + + [Fact] + public async Task LaunchAsync_WhenStartupProbeFails_KillsAndDisposesWorker() + { + using TestDirectory directory = TestDirectory.Create(); + string executablePath = directory.CreateWorkerExecutable(machine: 0x014c); + FakeWorkerProcess process = new(processId: 1234); + FakePipeReservation pipeReservation = new(); + GatewayMetrics metrics = new(); + WorkerProcessLauncher launcher = CreateLauncher( + executablePath, + new FakeWorkerProcessFactory(process), + new FailingStartupProbe(), + metrics); + + WorkerProcessLaunchException exception = + await Assert.ThrowsAsync( + async () => await launcher.LaunchAsync(CreateRequest(pipeReservation))); + + Assert.Equal(WorkerProcessLaunchErrorCode.StartupFailed, exception.ErrorCode); + Assert.True(process.KillCalled); + Assert.True(process.DisposeCalled); + Assert.True(pipeReservation.DisposeCalled); + Assert.Equal(1, metrics.GetSnapshot().WorkerKills); + } + + [Fact] + public async Task LaunchAsync_WhenStartupTimesOut_KillsAndDisposesWorker() + { + using TestDirectory directory = TestDirectory.Create(); + string executablePath = directory.CreateWorkerExecutable(machine: 0x014c); + FakeWorkerProcess process = new(processId: 1234); + GatewayMetrics metrics = new(); + WorkerProcessLauncher launcher = CreateLauncher( + executablePath, + new FakeWorkerProcessFactory(process), + new WaitingStartupProbe(), + metrics, + startupTimeoutSeconds: 1); + + WorkerProcessLaunchException exception = + await Assert.ThrowsAsync( + async () => await launcher.LaunchAsync(CreateRequest())); + + Assert.Equal(WorkerProcessLaunchErrorCode.StartupTimeout, exception.ErrorCode); + Assert.True(process.KillCalled); + Assert.True(process.DisposeCalled); + Assert.Equal(1, metrics.GetSnapshot().WorkerKills); + } + + [Fact] + public async Task LaunchAsync_WhenExecutableDoesNotExist_FailsBeforeStartingProcess() + { + using TestDirectory directory = TestDirectory.Create(); + string executablePath = Path.Combine(directory.Path, "missing-worker.exe"); + FakeWorkerProcessFactory processFactory = new(new FakeWorkerProcess(processId: 1234)); + WorkerProcessLauncher launcher = CreateLauncher(executablePath, processFactory, new SucceedingStartupProbe()); + + WorkerProcessLaunchException exception = + await Assert.ThrowsAsync( + async () => await launcher.LaunchAsync(CreateRequest())); + + Assert.Equal(WorkerProcessLaunchErrorCode.ExecutableNotFound, exception.ErrorCode); + Assert.Null(processFactory.LastStartInfo); + } + + [Fact] + public async Task LaunchAsync_WhenExecutableArchitectureDoesNotMatch_FailsBeforeStartingProcess() + { + using TestDirectory directory = TestDirectory.Create(); + string executablePath = directory.CreateWorkerExecutable(machine: 0x8664); + FakeWorkerProcessFactory processFactory = new(new FakeWorkerProcess(processId: 1234)); + WorkerProcessLauncher launcher = CreateLauncher(executablePath, processFactory, new SucceedingStartupProbe()); + + WorkerProcessLaunchException exception = + await Assert.ThrowsAsync( + async () => await launcher.LaunchAsync(CreateRequest())); + + Assert.Equal(WorkerProcessLaunchErrorCode.InvalidExecutable, exception.ErrorCode); + Assert.Null(processFactory.LastStartInfo); + } + + [Fact] + public async Task LaunchAsync_WhenWorkerAlreadyExited_FailsAndDisposesWorkerWithoutKill() + { + using TestDirectory directory = TestDirectory.Create(); + string executablePath = directory.CreateWorkerExecutable(machine: 0x014c); + FakeWorkerProcess process = new(processId: 1234) + { + HasExited = true, + ExitCode = 42, + }; + WorkerProcessLauncher launcher = CreateLauncher( + executablePath, + new FakeWorkerProcessFactory(process), + new WorkerProcessStartedProbe()); + + WorkerProcessLaunchException exception = + await Assert.ThrowsAsync( + async () => await launcher.LaunchAsync(CreateRequest())); + + Assert.Equal(WorkerProcessLaunchErrorCode.StartupFailed, exception.ErrorCode); + Assert.False(process.KillCalled); + Assert.True(process.DisposeCalled); + } + + private static WorkerProcessLauncher CreateLauncher( + string executablePath, + IWorkerProcessFactory processFactory, + IWorkerStartupProbe startupProbe, + GatewayMetrics? metrics = null, + int startupTimeoutSeconds = 30) + { + GatewayOptions options = new() + { + Worker = new WorkerOptions + { + ExecutablePath = executablePath, + RequiredArchitecture = WorkerArchitecture.X86, + StartupTimeoutSeconds = startupTimeoutSeconds, + }, + }; + + return new WorkerProcessLauncher( + Options.Create(options), + processFactory, + startupProbe, + metrics ?? new GatewayMetrics()); + } + + private static WorkerProcessLaunchRequest CreateRequest(IDisposable? pipeReservation = null) + { + return new WorkerProcessLaunchRequest( + SessionId, + PipeName, + GatewayContractInfo.WorkerProtocolVersion, + Nonce, + pipeReservation); + } + + private sealed class FakeWorkerProcessFactory(IWorkerProcess process) : IWorkerProcessFactory + { + public ProcessStartInfo? LastStartInfo { get; private set; } + + public IWorkerProcess Start(ProcessStartInfo startInfo) + { + LastStartInfo = startInfo; + return process; + } + } + + private sealed class FakeWorkerProcess(int processId) : IWorkerProcess + { + public int Id { get; } = processId; + + public bool HasExited { get; set; } + + public int? ExitCode { get; set; } + + public bool DisposeCalled { get; private set; } + + public bool KillCalled { get; private set; } + + public ValueTask WaitForExitAsync(CancellationToken cancellationToken) + { + return ValueTask.CompletedTask; + } + + public void Kill(bool entireProcessTree) + { + Assert.True(entireProcessTree); + KillCalled = true; + HasExited = true; + } + + public void Dispose() + { + DisposeCalled = true; + } + } + + private sealed class SucceedingStartupProbe : IWorkerStartupProbe + { + public Task WaitUntilReadyAsync( + IWorkerProcess process, + WorkerProcessLaunchRequest request, + CancellationToken cancellationToken) + { + return Task.CompletedTask; + } + } + + private sealed class FailingStartupProbe : IWorkerStartupProbe + { + public Task WaitUntilReadyAsync( + IWorkerProcess process, + WorkerProcessLaunchRequest request, + CancellationToken cancellationToken) + { + throw new InvalidOperationException("Fake worker startup failed."); + } + } + + private sealed class WaitingStartupProbe : IWorkerStartupProbe + { + public async Task WaitUntilReadyAsync( + IWorkerProcess process, + WorkerProcessLaunchRequest request, + CancellationToken cancellationToken) + { + await Task.Delay(Timeout.InfiniteTimeSpan, cancellationToken); + } + } + + private sealed class FakePipeReservation : IDisposable + { + public bool DisposeCalled { get; private set; } + + public void Dispose() + { + DisposeCalled = true; + } + } + + private sealed class TestDirectory : IDisposable + { + private TestDirectory(string path) + { + Path = path; + } + + public string Path { get; } + + public static TestDirectory Create() + { + string path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), $"mxgateway-tests-{Guid.NewGuid():N}"); + Directory.CreateDirectory(path); + + return new TestDirectory(path); + } + + public string CreateWorkerExecutable(ushort machine) + { + string path = System.IO.Path.Combine(Path, "MxGateway.Worker.exe"); + byte[] bytes = new byte[0x100]; + bytes[0] = (byte)'M'; + bytes[1] = (byte)'Z'; + BitConverter.GetBytes(0x80).CopyTo(bytes, 0x3c); + bytes[0x80] = (byte)'P'; + bytes[0x81] = (byte)'E'; + bytes[0x82] = 0; + bytes[0x83] = 0; + BitConverter.GetBytes(machine).CopyTo(bytes, 0x84); + File.WriteAllBytes(path, bytes); + + return path; + } + + public void Dispose() + { + Directory.Delete(Path, recursive: true); + } + } +} -- 2.52.0