Add Polly resilience policies

This commit is contained in:
Joseph Doherty
2026-04-27 15:37:56 -04:00
parent d431ff9660
commit bd4a09a35e
22 changed files with 611 additions and 21 deletions
@@ -36,6 +36,10 @@ public sealed class WorkerProcessLauncherTests
["--session-id", SessionId, "--pipe-name", PipeName, "--protocol-version", "1"],
processFactory.LastStartInfo.ArgumentList);
Assert.Equal(Nonce, processFactory.LastStartInfo.Environment[WorkerProcessLauncher.WorkerNonceEnvironmentVariableName]);
Assert.Equal(
"2000",
processFactory.LastStartInfo.Environment[
WorkerProcessLauncher.WorkerPipeConnectAttemptTimeoutEnvironmentVariableName]);
Assert.DoesNotContain(Nonce, handle.CommandLine.ToString(), StringComparison.Ordinal);
Assert.DoesNotContain(Nonce, string.Join(" ", handle.CommandLine.Arguments), StringComparison.Ordinal);
Assert.False(pipeReservation.DisposeCalled);
@@ -67,6 +71,32 @@ public sealed class WorkerProcessLauncherTests
Assert.Equal(1, metrics.GetSnapshot().WorkerKills);
}
[Fact]
public async Task LaunchAsync_WhenStartupProbeFailsTransiently_RetriesWithoutRespawningWorker()
{
using TestDirectory directory = TestDirectory.Create();
string executablePath = directory.CreateWorkerExecutable(machine: 0x014c);
FakeWorkerProcess process = new(processId: 1234);
FakeWorkerProcessFactory processFactory = new(process);
GatewayMetrics metrics = new();
WorkerProcessLauncher launcher = CreateLauncher(
executablePath,
processFactory,
new TransientStartupProbe(failuresBeforeSuccess: 1),
metrics,
startupProbeRetryAttempts: 2,
startupProbeRetryDelayMilliseconds: 1);
using WorkerProcessHandle handle = await launcher.LaunchAsync(CreateRequest());
Assert.Same(process, handle.Process);
Assert.Equal(1, processFactory.StartCount);
Assert.False(process.KillCalled);
GatewayMetricsSnapshot snapshot = metrics.GetSnapshot();
Assert.Equal(1, snapshot.RetryAttempts);
Assert.Equal(1, snapshot.RetryAttemptsByArea["worker_startup"]);
}
[Fact]
public async Task LaunchAsync_WhenStartupTimesOut_KillsAndDisposesWorker()
{
@@ -152,7 +182,9 @@ public sealed class WorkerProcessLauncherTests
IWorkerProcessFactory processFactory,
IWorkerStartupProbe startupProbe,
GatewayMetrics? metrics = null,
int startupTimeoutSeconds = 30)
int startupTimeoutSeconds = 30,
int startupProbeRetryAttempts = 3,
int startupProbeRetryDelayMilliseconds = 250)
{
GatewayOptions options = new()
{
@@ -161,6 +193,8 @@ public sealed class WorkerProcessLauncherTests
ExecutablePath = executablePath,
RequiredArchitecture = WorkerArchitecture.X86,
StartupTimeoutSeconds = startupTimeoutSeconds,
StartupProbeRetryAttempts = startupProbeRetryAttempts,
StartupProbeRetryDelayMilliseconds = startupProbeRetryDelayMilliseconds,
},
};
@@ -185,8 +219,11 @@ public sealed class WorkerProcessLauncherTests
{
public ProcessStartInfo? LastStartInfo { get; private set; }
public int StartCount { get; private set; }
public IWorkerProcess Start(ProcessStartInfo startInfo)
{
StartCount++;
LastStartInfo = startInfo;
return process;
}
@@ -255,6 +292,24 @@ public sealed class WorkerProcessLauncherTests
}
}
private sealed class TransientStartupProbe(int failuresBeforeSuccess) : IWorkerStartupProbe
{
private int _attempts;
public Task WaitUntilReadyAsync(
IWorkerProcess process,
WorkerProcessLaunchRequest request,
CancellationToken cancellationToken)
{
if (Interlocked.Increment(ref _attempts) <= failuresBeforeSuccess)
{
throw new IOException("The worker pipe was not ready yet.");
}
return Task.CompletedTask;
}
}
private sealed class FakePipeReservation : IDisposable
{
public bool DisposeCalled { get; private set; }