using Grpc.Core; using Microsoft.Extensions.Logging; using ZB.MOM.WW.MxGateway.Contracts.Proto; using Polly; using Polly.Retry; namespace ZB.MOM.WW.MxGateway.Client; /// Factory and helpers for exponential-backoff retry policies on transient gRPC failures. internal static class MxGatewayClientRetryPolicy { /// Creates a Polly ResiliencePipeline that retries transient gRPC failures with exponential backoff. /// Retry configuration (max attempts, delay bounds, jitter). /// Optional logger for retry diagnostics. public static ResiliencePipeline Create( MxGatewayClientRetryOptions options, ILogger? logger) { ArgumentNullException.ThrowIfNull(options); options.Validate(); return new ResiliencePipelineBuilder() .AddRetry(new RetryStrategyOptions { MaxRetryAttempts = Math.Max(0, options.MaxAttempts - 1), BackoffType = DelayBackoffType.Exponential, UseJitter = options.UseJitter, Delay = options.Delay, MaxDelay = options.MaxDelay, ShouldHandle = new PredicateBuilder().Handle(IsTransientGrpcFailure), OnRetry = args => { logger?.LogDebug( args.Outcome.Exception, "Retrying MXAccess Gateway client call after transient gRPC failure. Attempt {Attempt}.", args.AttemptNumber + 1); return default; }, }) .Build(); } /// Returns whether a command kind is eligible for automatic retry on transient failures. /// The command kind to check. public static bool IsRetryableCommand(MxCommandKind kind) { return kind is MxCommandKind.Ping or MxCommandKind.GetSessionState or MxCommandKind.GetWorkerInfo; } private static bool IsTransientGrpcFailure(Exception exception) { return exception switch { RpcException rpcException => IsTransientStatus(rpcException.StatusCode), MxGatewayException { InnerException: RpcException rpcException } => IsTransientStatus(rpcException.StatusCode), _ => false, }; } private static bool IsTransientStatus(StatusCode statusCode) { return statusCode is StatusCode.Unavailable or StatusCode.DeadlineExceeded or StatusCode.ResourceExhausted; } }