Add Polly resilience policies

This commit is contained in:
Joseph Doherty
2026-04-27 15:37:56 -04:00
parent d431ff9660
commit bd4a09a35e
22 changed files with 611 additions and 21 deletions
@@ -20,6 +20,9 @@ public sealed class GatewayOptionsTests
Assert.Equal(@"src\MxGateway.Worker\bin\x86\Release\MxGateway.Worker.exe", options.Worker.ExecutablePath);
Assert.Equal(WorkerArchitecture.X86, options.Worker.RequiredArchitecture);
Assert.Equal(30, options.Worker.StartupTimeoutSeconds);
Assert.Equal(3, options.Worker.StartupProbeRetryAttempts);
Assert.Equal(250, options.Worker.StartupProbeRetryDelayMilliseconds);
Assert.Equal(2000, options.Worker.PipeConnectAttemptTimeoutMilliseconds);
Assert.Equal(10, options.Worker.ShutdownTimeoutSeconds);
Assert.Equal(5, options.Worker.HeartbeatIntervalSeconds);
Assert.Equal(15, options.Worker.HeartbeatGraceSeconds);
@@ -66,6 +69,8 @@ public sealed class GatewayOptionsTests
[Theory]
[InlineData("MxGateway:Worker:ExecutablePath", "worker.dll", "MxGateway:Worker:ExecutablePath must point to a .exe file.")]
[InlineData("MxGateway:Worker:StartupProbeRetryAttempts", "0", "MxGateway:Worker:StartupProbeRetryAttempts must be greater than zero.")]
[InlineData("MxGateway:Worker:PipeConnectAttemptTimeoutMilliseconds", "0", "MxGateway:Worker:PipeConnectAttemptTimeoutMilliseconds must be greater than zero.")]
[InlineData("MxGateway:Events:QueueCapacity", "0", "MxGateway:Events:QueueCapacity must be greater than zero.")]
[InlineData("MxGateway:Authentication:PepperSecretName", "", "MxGateway:Authentication:PepperSecretName is required")]
[InlineData("MxGateway:Dashboard:PathBase", "dashboard", "MxGateway:Dashboard:PathBase must start with '/'.")]
@@ -36,6 +36,10 @@ public sealed class WorkerProcessLauncherTests
["--session-id", SessionId, "--pipe-name", PipeName, "--protocol-version", "1"],
processFactory.LastStartInfo.ArgumentList);
Assert.Equal(Nonce, processFactory.LastStartInfo.Environment[WorkerProcessLauncher.WorkerNonceEnvironmentVariableName]);
Assert.Equal(
"2000",
processFactory.LastStartInfo.Environment[
WorkerProcessLauncher.WorkerPipeConnectAttemptTimeoutEnvironmentVariableName]);
Assert.DoesNotContain(Nonce, handle.CommandLine.ToString(), StringComparison.Ordinal);
Assert.DoesNotContain(Nonce, string.Join(" ", handle.CommandLine.Arguments), StringComparison.Ordinal);
Assert.False(pipeReservation.DisposeCalled);
@@ -67,6 +71,32 @@ public sealed class WorkerProcessLauncherTests
Assert.Equal(1, metrics.GetSnapshot().WorkerKills);
}
[Fact]
public async Task LaunchAsync_WhenStartupProbeFailsTransiently_RetriesWithoutRespawningWorker()
{
using TestDirectory directory = TestDirectory.Create();
string executablePath = directory.CreateWorkerExecutable(machine: 0x014c);
FakeWorkerProcess process = new(processId: 1234);
FakeWorkerProcessFactory processFactory = new(process);
GatewayMetrics metrics = new();
WorkerProcessLauncher launcher = CreateLauncher(
executablePath,
processFactory,
new TransientStartupProbe(failuresBeforeSuccess: 1),
metrics,
startupProbeRetryAttempts: 2,
startupProbeRetryDelayMilliseconds: 1);
using WorkerProcessHandle handle = await launcher.LaunchAsync(CreateRequest());
Assert.Same(process, handle.Process);
Assert.Equal(1, processFactory.StartCount);
Assert.False(process.KillCalled);
GatewayMetricsSnapshot snapshot = metrics.GetSnapshot();
Assert.Equal(1, snapshot.RetryAttempts);
Assert.Equal(1, snapshot.RetryAttemptsByArea["worker_startup"]);
}
[Fact]
public async Task LaunchAsync_WhenStartupTimesOut_KillsAndDisposesWorker()
{
@@ -152,7 +182,9 @@ public sealed class WorkerProcessLauncherTests
IWorkerProcessFactory processFactory,
IWorkerStartupProbe startupProbe,
GatewayMetrics? metrics = null,
int startupTimeoutSeconds = 30)
int startupTimeoutSeconds = 30,
int startupProbeRetryAttempts = 3,
int startupProbeRetryDelayMilliseconds = 250)
{
GatewayOptions options = new()
{
@@ -161,6 +193,8 @@ public sealed class WorkerProcessLauncherTests
ExecutablePath = executablePath,
RequiredArchitecture = WorkerArchitecture.X86,
StartupTimeoutSeconds = startupTimeoutSeconds,
StartupProbeRetryAttempts = startupProbeRetryAttempts,
StartupProbeRetryDelayMilliseconds = startupProbeRetryDelayMilliseconds,
},
};
@@ -185,8 +219,11 @@ public sealed class WorkerProcessLauncherTests
{
public ProcessStartInfo? LastStartInfo { get; private set; }
public int StartCount { get; private set; }
public IWorkerProcess Start(ProcessStartInfo startInfo)
{
StartCount++;
LastStartInfo = startInfo;
return process;
}
@@ -255,6 +292,24 @@ public sealed class WorkerProcessLauncherTests
}
}
private sealed class TransientStartupProbe(int failuresBeforeSuccess) : IWorkerStartupProbe
{
private int _attempts;
public Task WaitUntilReadyAsync(
IWorkerProcess process,
WorkerProcessLaunchRequest request,
CancellationToken cancellationToken)
{
if (Interlocked.Increment(ref _attempts) <= failuresBeforeSuccess)
{
throw new IOException("The worker pipe was not ready yet.");
}
return Task.CompletedTask;
}
}
private sealed class FakePipeReservation : IDisposable
{
public bool DisposeCalled { get; private set; }