Add Polly resilience policies

This commit is contained in:
Joseph Doherty
2026-04-27 15:37:56 -04:00
parent d431ff9660
commit bd4a09a35e
22 changed files with 611 additions and 21 deletions
+18 -1
View File
@@ -20,11 +20,13 @@ public sealed class GatewayMetrics : IDisposable
private readonly Counter<long> _workerExitsCounter;
private readonly Counter<long> _heartbeatFailuresCounter;
private readonly Counter<long> _streamDisconnectsCounter;
private readonly Counter<long> _retryAttemptsCounter;
private readonly Histogram<double> _workerStartupLatencyHistogram;
private readonly Histogram<double> _commandLatencyHistogram;
private readonly Histogram<double> _eventStreamSendLatencyHistogram;
private readonly Dictionary<string, long> _commandFailuresByMethod = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, long> _eventsByFamily = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, long> _retryAttemptsByArea = new(StringComparer.OrdinalIgnoreCase);
private int _openSessions;
private int _workersRunning;
@@ -41,6 +43,7 @@ public sealed class GatewayMetrics : IDisposable
private long _workerExits;
private long _heartbeatFailures;
private long _streamDisconnects;
private long _retryAttempts;
private bool _disposed;
public GatewayMetrics()
@@ -58,6 +61,7 @@ public sealed class GatewayMetrics : IDisposable
_workerExitsCounter = _meter.CreateCounter<long>("mxgateway.workers.exited");
_heartbeatFailuresCounter = _meter.CreateCounter<long>("mxgateway.heartbeats.failed");
_streamDisconnectsCounter = _meter.CreateCounter<long>("mxgateway.grpc.streams.disconnected");
_retryAttemptsCounter = _meter.CreateCounter<long>("mxgateway.retries.attempted");
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "ms");
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "ms");
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "ms");
@@ -238,6 +242,17 @@ public sealed class GatewayMetrics : IDisposable
_streamDisconnectsCounter.Add(1, new KeyValuePair<string, object?>("reason", reason));
}
public void RetryAttempted(string area)
{
lock (_syncRoot)
{
_retryAttempts++;
Increment(_retryAttemptsByArea, area);
}
_retryAttemptsCounter.Add(1, new KeyValuePair<string, object?>("area", area));
}
public GatewayMetricsSnapshot GetSnapshot()
{
lock (_syncRoot)
@@ -258,8 +273,10 @@ public sealed class GatewayMetrics : IDisposable
WorkerExits: _workerExits,
HeartbeatFailures: _heartbeatFailures,
StreamDisconnects: _streamDisconnects,
RetryAttempts: _retryAttempts,
CommandFailuresByMethod: new Dictionary<string, long>(_commandFailuresByMethod, StringComparer.OrdinalIgnoreCase),
EventsByFamily: new Dictionary<string, long>(_eventsByFamily, StringComparer.OrdinalIgnoreCase));
EventsByFamily: new Dictionary<string, long>(_eventsByFamily, StringComparer.OrdinalIgnoreCase),
RetryAttemptsByArea: new Dictionary<string, long>(_retryAttemptsByArea, StringComparer.OrdinalIgnoreCase));
}
}
@@ -16,5 +16,7 @@ public sealed record GatewayMetricsSnapshot(
long WorkerExits,
long HeartbeatFailures,
long StreamDisconnects,
long RetryAttempts,
IReadOnlyDictionary<string, long> CommandFailuresByMethod,
IReadOnlyDictionary<string, long> EventsByFamily);
IReadOnlyDictionary<string, long> EventsByFamily,
IReadOnlyDictionary<string, long> RetryAttemptsByArea);