Improve gateway reliability and dashboard docs

This commit is contained in:
Joseph Doherty
2026-04-28 00:13:22 -04:00
parent bd4a09a35e
commit 4fc355b357
61 changed files with 1722 additions and 150 deletions
+46 -7
View File
@@ -26,11 +26,13 @@ public sealed class GatewayMetrics : IDisposable
private readonly Histogram<double> _eventStreamSendLatencyHistogram;
private readonly Dictionary<string, long> _commandFailuresByMethod = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, long> _eventsByFamily = new(StringComparer.OrdinalIgnoreCase);
private readonly Dictionary<string, long> _eventsBySession = new(StringComparer.Ordinal);
private readonly Dictionary<string, long> _retryAttemptsByArea = new(StringComparer.OrdinalIgnoreCase);
private int _openSessions;
private int _workersRunning;
private int _eventQueueDepth;
private int _workerEventQueueDepth;
private int _grpcEventStreamQueueDepth;
private long _sessionsOpened;
private long _sessionsClosed;
private long _commandsStarted;
@@ -68,7 +70,8 @@ public sealed class GatewayMetrics : IDisposable
_meter.CreateObservableGauge("mxgateway.sessions.open", GetOpenSessions);
_meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning);
_meter.CreateObservableGauge("mxgateway.events.queue.depth", GetEventQueueDepth);
_meter.CreateObservableGauge("mxgateway.events.worker_queue.depth", GetWorkerEventQueueDepth);
_meter.CreateObservableGauge("mxgateway.events.grpc_stream_queue.depth", GetGrpcEventStreamQueueDepth);
}
public void SessionOpened()
@@ -174,11 +177,11 @@ public sealed class GatewayMetrics : IDisposable
{
_eventsReceived++;
Increment(_eventsByFamily, family);
Increment(_eventsBySession, sessionId);
}
_eventsReceivedCounter.Add(
1,
new KeyValuePair<string, object?>("session_id", sessionId),
new KeyValuePair<string, object?>("family", family));
}
@@ -190,6 +193,11 @@ public sealed class GatewayMetrics : IDisposable
}
public void SetEventQueueDepth(int depth)
{
SetWorkerEventQueueDepth(depth);
}
public void SetWorkerEventQueueDepth(int depth)
{
if (depth < 0)
{
@@ -198,7 +206,28 @@ public sealed class GatewayMetrics : IDisposable
lock (_syncRoot)
{
_eventQueueDepth = depth;
_workerEventQueueDepth = depth;
}
}
public void SetGrpcEventStreamQueueDepth(int depth)
{
if (depth < 0)
{
throw new ArgumentOutOfRangeException(nameof(depth), depth, "Queue depth cannot be negative.");
}
lock (_syncRoot)
{
_grpcEventStreamQueueDepth = depth;
}
}
public void RemoveSessionEvents(string sessionId)
{
lock (_syncRoot)
{
_eventsBySession.Remove(sessionId);
}
}
@@ -260,7 +289,8 @@ public sealed class GatewayMetrics : IDisposable
return new GatewayMetricsSnapshot(
OpenSessions: _openSessions,
WorkersRunning: _workersRunning,
EventQueueDepth: _eventQueueDepth,
WorkerEventQueueDepth: _workerEventQueueDepth,
GrpcEventStreamQueueDepth: _grpcEventStreamQueueDepth,
SessionsOpened: _sessionsOpened,
SessionsClosed: _sessionsClosed,
CommandsStarted: _commandsStarted,
@@ -276,6 +306,7 @@ public sealed class GatewayMetrics : IDisposable
RetryAttempts: _retryAttempts,
CommandFailuresByMethod: new Dictionary<string, long>(_commandFailuresByMethod, StringComparer.OrdinalIgnoreCase),
EventsByFamily: new Dictionary<string, long>(_eventsByFamily, StringComparer.OrdinalIgnoreCase),
EventsBySession: new Dictionary<string, long>(_eventsBySession, StringComparer.Ordinal),
RetryAttemptsByArea: new Dictionary<string, long>(_retryAttemptsByArea, StringComparer.OrdinalIgnoreCase));
}
}
@@ -307,11 +338,19 @@ public sealed class GatewayMetrics : IDisposable
}
}
private int GetEventQueueDepth()
private int GetWorkerEventQueueDepth()
{
lock (_syncRoot)
{
return _eventQueueDepth;
return _workerEventQueueDepth;
}
}
private int GetGrpcEventStreamQueueDepth()
{
lock (_syncRoot)
{
return _grpcEventStreamQueueDepth;
}
}
@@ -3,7 +3,8 @@ namespace MxGateway.Server.Metrics;
public sealed record GatewayMetricsSnapshot(
int OpenSessions,
int WorkersRunning,
int EventQueueDepth,
int WorkerEventQueueDepth,
int GrpcEventStreamQueueDepth,
long SessionsOpened,
long SessionsClosed,
long CommandsStarted,
@@ -19,4 +20,5 @@ public sealed record GatewayMetricsSnapshot(
long RetryAttempts,
IReadOnlyDictionary<string, long> CommandFailuresByMethod,
IReadOnlyDictionary<string, long> EventsByFamily,
IReadOnlyDictionary<string, long> EventsBySession,
IReadOnlyDictionary<string, long> RetryAttemptsByArea);