Files
mxaccessgw/clients/dotnet/MxGateway.Client/MxGatewayClientRetryPolicy.cs
Joseph Doherty 89043cb2b6 Resolve Client.Dotnet-004..008 code-review findings
Client.Dotnet-004: documented DefaultCallTimeout as both the per-attempt
deadline and the shared retry budget, and removed DeadlineExceeded from the
transient-retry set (a client-imposed deadline cannot be helped by retrying).

Client.Dotnet-005: RegisterAsync/AddItemAsync/AddItem2Async silently returned
0 when a successful reply lacked the typed payload. They now throw a
descriptive MxGatewayException.

Client.Dotnet-006: added XML docs to the previously undocumented public
members MaxGrpcMessageBytes, GatewayProtocolVersion, WorkerProtocolVersion.

Client.Dotnet-007: corrected the AcknowledgeAlarmAsync XML comment — the RPC
requires the admin scope, not a non-existent invoke:alarm-ack sub-scope.

Client.Dotnet-008: the CLI redactor missed env-var-sourced keys because the
caller passed only the --api-key option. Redaction now uses the same
resolver, stripping env-var keys too.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 22:42:27 -04:00

74 lines
3.0 KiB
C#

using Grpc.Core;
using Microsoft.Extensions.Logging;
using MxGateway.Contracts.Proto;
using Polly;
using Polly.Retry;
namespace MxGateway.Client;
/// <summary>Factory and helpers for exponential-backoff retry policies on transient gRPC failures.</summary>
internal static class MxGatewayClientRetryPolicy
{
/// <summary>Creates a Polly ResiliencePipeline that retries transient gRPC failures with exponential backoff.</summary>
/// <param name="options">Retry configuration (max attempts, delay bounds, jitter).</param>
/// <param name="logger">Optional logger for retry diagnostics.</param>
public static ResiliencePipeline Create(
MxGatewayClientRetryOptions options,
ILogger? logger)
{
ArgumentNullException.ThrowIfNull(options);
options.Validate();
return new ResiliencePipelineBuilder()
.AddRetry(new RetryStrategyOptions
{
MaxRetryAttempts = Math.Max(0, options.MaxAttempts - 1),
BackoffType = DelayBackoffType.Exponential,
UseJitter = options.UseJitter,
Delay = options.Delay,
MaxDelay = options.MaxDelay,
ShouldHandle = new PredicateBuilder().Handle<Exception>(IsTransientGrpcFailure),
OnRetry = args =>
{
logger?.LogDebug(
args.Outcome.Exception,
"Retrying MXAccess Gateway client call after transient gRPC failure. Attempt {Attempt}.",
args.AttemptNumber + 1);
return default;
},
})
.Build();
}
/// <summary>Returns whether a command kind is eligible for automatic retry on transient failures.</summary>
/// <param name="kind">The command kind to check.</param>
public static bool IsRetryableCommand(MxCommandKind kind)
{
return kind is MxCommandKind.Ping
or MxCommandKind.GetSessionState
or MxCommandKind.GetWorkerInfo;
}
private static bool IsTransientGrpcFailure(Exception exception)
{
return exception switch
{
RpcException rpcException => IsTransientStatus(rpcException.StatusCode),
MxGatewayException { InnerException: RpcException rpcException } => IsTransientStatus(rpcException.StatusCode),
_ => false,
};
}
private static bool IsTransientStatus(StatusCode statusCode)
{
// DeadlineExceeded is intentionally NOT treated as transient. The deadline
// on every unary call is client-imposed (CreateCallOptions stamps the
// DefaultCallTimeout budget), and that same budget is shared across the
// initial attempt plus all retries plus backoff. A DeadlineExceeded means
// the shared budget is exhausted, so an immediate retry would only fail
// again — burning the remaining budget on a call that cannot succeed.
return statusCode is StatusCode.Unavailable
or StatusCode.ResourceExhausted;
}
}