Add Polly resilience policies

This commit is contained in:
Joseph Doherty
2026-04-27 15:37:56 -04:00
parent d431ff9660
commit bd4a09a35e
22 changed files with 611 additions and 21 deletions
@@ -36,11 +36,22 @@ internal sealed class FakeGatewayTransport(MxGatewayClientOptions options) : IMx
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
};
public Queue<Exception> OpenSessionExceptions { get; } = new();
public Queue<Exception> CloseSessionExceptions { get; } = new();
public Queue<Exception> InvokeExceptions { get; } = new();
public Task<OpenSessionReply> OpenSessionAsync(
OpenSessionRequest request,
CallOptions callOptions)
{
OpenSessionCalls.Add((request, callOptions));
if (OpenSessionExceptions.TryDequeue(out Exception? exception))
{
throw exception;
}
return Task.FromResult(OpenSessionReply);
}
@@ -49,6 +60,11 @@ internal sealed class FakeGatewayTransport(MxGatewayClientOptions options) : IMx
CallOptions callOptions)
{
CloseSessionCalls.Add((request, callOptions));
if (CloseSessionExceptions.TryDequeue(out Exception? exception))
{
throw exception;
}
return Task.FromResult(CloseSessionReply);
}
@@ -57,6 +73,11 @@ internal sealed class FakeGatewayTransport(MxGatewayClientOptions options) : IMx
CallOptions callOptions)
{
InvokeCalls.Add((request, callOptions));
if (InvokeExceptions.TryDequeue(out Exception? exception))
{
throw exception;
}
return Task.FromResult(_invokeReplies.Dequeue());
}
@@ -25,4 +25,17 @@ public sealed class MxGatewayClientOptionsTests
Assert.Throws<ArgumentException>(options.Validate);
}
[Fact]
public void Validate_WithInvalidRetryOptions_Throws()
{
var options = new MxGatewayClientOptions
{
Endpoint = new Uri("http://localhost:5000"),
ApiKey = "test-api-key",
Retry = new MxGatewayClientRetryOptions { MaxAttempts = 0 },
};
Assert.Throws<ArgumentOutOfRangeException>(options.Validate);
}
}
@@ -1,4 +1,5 @@
using MxGateway.Contracts.Proto;
using Grpc.Core;
namespace MxGateway.Client.Tests;
@@ -220,6 +221,55 @@ public sealed class MxGatewayClientSessionTests
Assert.Equal("session-fixture", call.Request.SessionId);
}
[Fact]
public async Task InvokeAsync_RetriesSafeDiagnosticCommandOnTransientGrpcFailure()
{
FakeGatewayTransport transport = CreateTransport();
transport.InvokeExceptions.Enqueue(CreateTransientRpcException());
transport.AddInvokeReply(new MxCommandReply
{
SessionId = "session-fixture",
Kind = MxCommandKind.Ping,
ProtocolStatus = new ProtocolStatus { Code = ProtocolStatusCode.Ok },
});
await using MxGatewayClient client = CreateClient(transport);
MxGatewaySession session = await client.OpenSessionAsync();
await session.InvokeAsync(new MxCommandRequest
{
SessionId = session.SessionId,
Command = new MxCommand { Kind = MxCommandKind.Ping, Ping = new PingCommand() },
});
Assert.Equal(2, transport.InvokeCalls.Count);
}
[Fact]
public async Task OpenSessionAsync_DoesNotRetryTransientGrpcFailure()
{
FakeGatewayTransport transport = CreateTransport();
transport.OpenSessionExceptions.Enqueue(CreateTransientRpcException());
await using MxGatewayClient client = CreateClient(transport);
await Assert.ThrowsAsync<RpcException>(async () => await client.OpenSessionAsync());
Assert.Single(transport.OpenSessionCalls);
}
[Fact]
public async Task InvokeAsync_DoesNotRetryWriteCommand()
{
FakeGatewayTransport transport = CreateTransport();
transport.InvokeExceptions.Enqueue(CreateTransientRpcException());
await using MxGatewayClient client = CreateClient(transport);
MxGatewaySession session = await client.OpenSessionAsync();
await Assert.ThrowsAsync<RpcException>(async () =>
await session.WriteRawAsync(1, 2, 3.ToMxValue(), userId: 0));
Assert.Single(transport.InvokeCalls);
}
[Fact]
public async Task InvokeHelpers_PassCancellationTokenToTransport()
{
@@ -252,4 +302,9 @@ public sealed class MxGatewayClientSessionTests
ApiKey = "test-api-key",
});
}
private static RpcException CreateTransientRpcException()
{
return new RpcException(new Status(StatusCode.Unavailable, "gateway unavailable"));
}
}
@@ -7,6 +7,7 @@
<ItemGroup>
<PackageReference Include="Grpc.Net.Client" Version="2.76.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="10.0.7" />
<PackageReference Include="Polly.Core" Version="8.6.6" />
</ItemGroup>
<PropertyGroup>
@@ -1,6 +1,8 @@
using Grpc.Core;
using Grpc.Net.Client;
using Microsoft.Extensions.Logging;
using MxGateway.Contracts.Proto;
using Polly;
namespace MxGateway.Client;
@@ -11,6 +13,7 @@ public sealed class MxGatewayClient : IAsyncDisposable
{
private readonly GrpcChannel _channel;
private readonly IMxGatewayClientTransport _transport;
private readonly ResiliencePipeline _safeUnaryRetryPipeline;
private bool _disposed;
internal MxGatewayClient(
@@ -22,6 +25,9 @@ public sealed class MxGatewayClient : IAsyncDisposable
Options = options;
_transport = transport ?? throw new ArgumentNullException(nameof(transport));
_safeUnaryRetryPipeline = MxGatewayClientRetryPolicy.Create(
options.Retry,
options.LoggerFactory?.CreateLogger<MxGatewayClient>());
_channel = null!;
}
@@ -32,6 +38,9 @@ public sealed class MxGatewayClient : IAsyncDisposable
_channel = channel;
_transport = transport;
Options = transport.Options;
_safeUnaryRetryPipeline = MxGatewayClientRetryPolicy.Create(
Options.Retry,
Options.LoggerFactory?.CreateLogger<MxGatewayClient>());
}
public MxGatewayClientOptions Options { get; }
@@ -88,7 +97,9 @@ public sealed class MxGatewayClient : IAsyncDisposable
ArgumentNullException.ThrowIfNull(request);
ThrowIfDisposed();
return _transport.CloseSessionAsync(request, CreateCallOptions(cancellationToken));
return ExecuteSafeUnaryAsync(
token => _transport.CloseSessionAsync(request, CreateCallOptions(token)),
cancellationToken);
}
public Task<MxCommandReply> InvokeAsync(
@@ -98,6 +109,13 @@ public sealed class MxGatewayClient : IAsyncDisposable
ArgumentNullException.ThrowIfNull(request);
ThrowIfDisposed();
if (MxGatewayClientRetryPolicy.IsRetryableCommand(request.Command?.Kind ?? MxCommandKind.Unspecified))
{
return ExecuteSafeUnaryAsync(
token => _transport.InvokeAsync(request, CreateCallOptions(token)),
cancellationToken);
}
return _transport.InvokeAsync(request, CreateCallOptions(cancellationToken));
}
@@ -136,6 +154,16 @@ public sealed class MxGatewayClient : IAsyncDisposable
cancellationToken);
}
private Task<T> ExecuteSafeUnaryAsync<T>(
Func<CancellationToken, Task<T>> call,
CancellationToken cancellationToken)
{
return _safeUnaryRetryPipeline.ExecuteAsync(
async token => await call(token).ConfigureAwait(false),
cancellationToken)
.AsTask();
}
private void ThrowIfDisposed()
{
ObjectDisposedException.ThrowIf(_disposed, this);
@@ -21,6 +21,8 @@ public sealed class MxGatewayClientOptions
public TimeSpan DefaultCallTimeout { get; init; } = TimeSpan.FromSeconds(30);
public MxGatewayClientRetryOptions Retry { get; init; } = new();
public ILoggerFactory? LoggerFactory { get; init; }
public void Validate()
@@ -54,5 +56,7 @@ public sealed class MxGatewayClientOptions
nameof(DefaultCallTimeout),
"The default call timeout must be greater than zero.");
}
Retry.Validate();
}
}
@@ -0,0 +1,43 @@
namespace MxGateway.Client;
public sealed class MxGatewayClientRetryOptions
{
public int MaxAttempts { get; init; } = 2;
public TimeSpan Delay { get; init; } = TimeSpan.FromMilliseconds(200);
public TimeSpan MaxDelay { get; init; } = TimeSpan.FromSeconds(2);
public bool UseJitter { get; init; } = true;
public void Validate()
{
if (MaxAttempts <= 0)
{
throw new ArgumentOutOfRangeException(
nameof(MaxAttempts),
"The retry max attempts value must be greater than zero.");
}
if (Delay <= TimeSpan.Zero)
{
throw new ArgumentOutOfRangeException(
nameof(Delay),
"The retry delay must be greater than zero.");
}
if (MaxDelay <= TimeSpan.Zero)
{
throw new ArgumentOutOfRangeException(
nameof(MaxDelay),
"The retry max delay must be greater than zero.");
}
if (MaxDelay < Delay)
{
throw new ArgumentOutOfRangeException(
nameof(MaxDelay),
"The retry max delay must be greater than or equal to the retry delay.");
}
}
}
@@ -0,0 +1,62 @@
using Grpc.Core;
using Microsoft.Extensions.Logging;
using MxGateway.Contracts.Proto;
using Polly;
using Polly.Retry;
namespace MxGateway.Client;
internal static class MxGatewayClientRetryPolicy
{
public static ResiliencePipeline Create(
MxGatewayClientRetryOptions options,
ILogger? logger)
{
ArgumentNullException.ThrowIfNull(options);
options.Validate();
return new ResiliencePipelineBuilder()
.AddRetry(new RetryStrategyOptions
{
MaxRetryAttempts = Math.Max(0, options.MaxAttempts - 1),
BackoffType = DelayBackoffType.Exponential,
UseJitter = options.UseJitter,
Delay = options.Delay,
MaxDelay = options.MaxDelay,
ShouldHandle = new PredicateBuilder().Handle<Exception>(IsTransientGrpcFailure),
OnRetry = args =>
{
logger?.LogDebug(
args.Outcome.Exception,
"Retrying MXAccess Gateway client call after transient gRPC failure. Attempt {Attempt}.",
args.AttemptNumber + 1);
return default;
},
})
.Build();
}
public static bool IsRetryableCommand(MxCommandKind kind)
{
return kind is MxCommandKind.Ping
or MxCommandKind.GetSessionState
or MxCommandKind.GetWorkerInfo;
}
private static bool IsTransientGrpcFailure(Exception exception)
{
return exception switch
{
RpcException rpcException => IsTransientStatus(rpcException.StatusCode),
MxGatewayException { InnerException: RpcException rpcException } => IsTransientStatus(rpcException.StatusCode),
_ => false,
};
}
private static bool IsTransientStatus(StatusCode statusCode)
{
return statusCode is StatusCode.Unavailable
or StatusCode.DeadlineExceeded
or StatusCode.ResourceExhausted;
}
}