using System.Collections.Concurrent; using System.Diagnostics.Metrics; namespace MxGateway.Server.Metrics; public sealed class GatewayMetrics : IDisposable { public const string MeterName = "MxGateway.Server"; private readonly object _syncRoot = new(); private readonly Meter _meter; private readonly Counter _sessionsOpenedCounter; private readonly Counter _sessionsClosedCounter; private readonly Counter _commandsStartedCounter; private readonly Counter _commandsSucceededCounter; private readonly Counter _commandsFailedCounter; private readonly Counter _eventsReceivedCounter; private readonly Counter _queueOverflowsCounter; private readonly Counter _faultsCounter; private readonly Counter _workerKillsCounter; private readonly Counter _workerExitsCounter; private readonly Counter _heartbeatFailuresCounter; private readonly Counter _streamDisconnectsCounter; private readonly Counter _retryAttemptsCounter; private readonly Histogram _workerStartupLatencyHistogram; private readonly Histogram _commandLatencyHistogram; private readonly Histogram _eventStreamSendLatencyHistogram; private readonly Dictionary _commandFailuresByMethod = new(StringComparer.OrdinalIgnoreCase); private readonly ConcurrentDictionary _eventsByFamily = new(StringComparer.OrdinalIgnoreCase); private readonly ConcurrentDictionary _eventsBySession = new(StringComparer.Ordinal); private readonly Dictionary _retryAttemptsByArea = new(StringComparer.OrdinalIgnoreCase); private int _openSessions; private int _workersRunning; private int _workerEventQueueDepth; private int _grpcEventStreamQueueDepth; private long _sessionsOpened; private long _sessionsClosed; private long _commandsStarted; private long _commandsSucceeded; private long _commandsFailed; private long _eventsReceived; private long _queueOverflows; private long _faults; private long _workerKills; private long _workerExits; private long _heartbeatFailures; private long _streamDisconnects; private long _retryAttempts; private bool _disposed; public GatewayMetrics() { _meter = new Meter(MeterName, typeof(GatewayMetrics).Assembly.GetName().Version?.ToString()); _sessionsOpenedCounter = _meter.CreateCounter("mxgateway.sessions.opened"); _sessionsClosedCounter = _meter.CreateCounter("mxgateway.sessions.closed"); _commandsStartedCounter = _meter.CreateCounter("mxgateway.commands.started"); _commandsSucceededCounter = _meter.CreateCounter("mxgateway.commands.succeeded"); _commandsFailedCounter = _meter.CreateCounter("mxgateway.commands.failed"); _eventsReceivedCounter = _meter.CreateCounter("mxgateway.events.received"); _queueOverflowsCounter = _meter.CreateCounter("mxgateway.queues.overflows"); _faultsCounter = _meter.CreateCounter("mxgateway.faults"); _workerKillsCounter = _meter.CreateCounter("mxgateway.workers.killed"); _workerExitsCounter = _meter.CreateCounter("mxgateway.workers.exited"); _heartbeatFailuresCounter = _meter.CreateCounter("mxgateway.heartbeats.failed"); _streamDisconnectsCounter = _meter.CreateCounter("mxgateway.grpc.streams.disconnected"); _retryAttemptsCounter = _meter.CreateCounter("mxgateway.retries.attempted"); _workerStartupLatencyHistogram = _meter.CreateHistogram("mxgateway.workers.startup.duration", "ms"); _commandLatencyHistogram = _meter.CreateHistogram("mxgateway.commands.duration", "ms"); _eventStreamSendLatencyHistogram = _meter.CreateHistogram("mxgateway.events.stream_send.duration", "ms"); _meter.CreateObservableGauge("mxgateway.sessions.open", GetOpenSessions); _meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning); _meter.CreateObservableGauge("mxgateway.events.worker_queue.depth", GetWorkerEventQueueDepth); _meter.CreateObservableGauge("mxgateway.events.grpc_stream_queue.depth", GetGrpcEventStreamQueueDepth); } public void SessionOpened() { lock (_syncRoot) { _openSessions++; _sessionsOpened++; } _sessionsOpenedCounter.Add(1); } public void SessionClosed() { lock (_syncRoot) { if (_openSessions > 0) { _openSessions--; } _sessionsClosed++; } _sessionsClosedCounter.Add(1); } public void SessionRemoved() { lock (_syncRoot) { if (_openSessions > 0) { _openSessions--; } } } public void WorkerStarted(TimeSpan startupDuration) { lock (_syncRoot) { _workersRunning++; } _workerStartupLatencyHistogram.Record(startupDuration.TotalMilliseconds); } public void WorkerStopped(string reason) { lock (_syncRoot) { if (_workersRunning > 0) { _workersRunning--; } _workerExits++; } _workerExitsCounter.Add(1, new KeyValuePair("reason", reason)); } public void WorkerKilled(string reason) { lock (_syncRoot) { _workerKills++; } _workerKillsCounter.Add(1, new KeyValuePair("reason", reason)); } public void CommandStarted(string method) { lock (_syncRoot) { _commandsStarted++; } _commandsStartedCounter.Add(1, new KeyValuePair("method", method)); } public void CommandSucceeded(string method, TimeSpan duration) { lock (_syncRoot) { _commandsSucceeded++; } KeyValuePair methodTag = new("method", method); _commandsSucceededCounter.Add(1, methodTag); _commandLatencyHistogram.Record(duration.TotalMilliseconds, methodTag); } public void CommandFailed(string method, string category, TimeSpan duration) { lock (_syncRoot) { _commandsFailed++; Increment(_commandFailuresByMethod, method); } KeyValuePair methodTag = new("method", method); KeyValuePair categoryTag = new("category", category); _commandsFailedCounter.Add(1, methodTag, categoryTag); _commandLatencyHistogram.Record(duration.TotalMilliseconds, methodTag, categoryTag); } public void EventReceived(string sessionId, string family) { Interlocked.Increment(ref _eventsReceived); Increment(_eventsByFamily, family); Increment(_eventsBySession, sessionId); _eventsReceivedCounter.Add( 1, new KeyValuePair("family", family)); } public void RecordEventStreamSend(string family, TimeSpan duration) { _eventStreamSendLatencyHistogram.Record( duration.TotalMilliseconds, new KeyValuePair("family", family)); } public void SetEventQueueDepth(int depth) { SetWorkerEventQueueDepth(depth); } public void SetWorkerEventQueueDepth(int depth) { if (depth < 0) { throw new ArgumentOutOfRangeException(nameof(depth), depth, "Queue depth cannot be negative."); } lock (_syncRoot) { _workerEventQueueDepth = depth; } } public void SetGrpcEventStreamQueueDepth(int depth) { if (depth < 0) { throw new ArgumentOutOfRangeException(nameof(depth), depth, "Queue depth cannot be negative."); } lock (_syncRoot) { _grpcEventStreamQueueDepth = depth; } } public void AdjustGrpcEventStreamQueueDepth(int delta) { lock (_syncRoot) { _grpcEventStreamQueueDepth = Math.Max(0, _grpcEventStreamQueueDepth + delta); } } public void RemoveSessionEvents(string sessionId) { _eventsBySession.TryRemove(sessionId, out _); } public void QueueOverflow(string queueName) { lock (_syncRoot) { _queueOverflows++; } _queueOverflowsCounter.Add(1, new KeyValuePair("queue", queueName)); } public void Fault(string category) { lock (_syncRoot) { _faults++; } _faultsCounter.Add(1, new KeyValuePair("category", category)); } public void HeartbeatFailed(string sessionId) { lock (_syncRoot) { _heartbeatFailures++; } _heartbeatFailuresCounter.Add(1, new KeyValuePair("session_id", sessionId)); } public void StreamDisconnected(string reason) { lock (_syncRoot) { _streamDisconnects++; } _streamDisconnectsCounter.Add(1, new KeyValuePair("reason", reason)); } public void RetryAttempted(string area) { lock (_syncRoot) { _retryAttempts++; Increment(_retryAttemptsByArea, area); } _retryAttemptsCounter.Add(1, new KeyValuePair("area", area)); } public GatewayMetricsSnapshot GetSnapshot() { lock (_syncRoot) { return new GatewayMetricsSnapshot( OpenSessions: _openSessions, WorkersRunning: _workersRunning, WorkerEventQueueDepth: _workerEventQueueDepth, GrpcEventStreamQueueDepth: _grpcEventStreamQueueDepth, SessionsOpened: _sessionsOpened, SessionsClosed: _sessionsClosed, CommandsStarted: _commandsStarted, CommandsSucceeded: _commandsSucceeded, CommandsFailed: _commandsFailed, EventsReceived: Interlocked.Read(ref _eventsReceived), QueueOverflows: _queueOverflows, Faults: _faults, WorkerKills: _workerKills, WorkerExits: _workerExits, HeartbeatFailures: _heartbeatFailures, StreamDisconnects: _streamDisconnects, RetryAttempts: _retryAttempts, CommandFailuresByMethod: new Dictionary(_commandFailuresByMethod, StringComparer.OrdinalIgnoreCase), EventsByFamily: new Dictionary(_eventsByFamily, StringComparer.OrdinalIgnoreCase), EventsBySession: new Dictionary(_eventsBySession, StringComparer.Ordinal), RetryAttemptsByArea: new Dictionary(_retryAttemptsByArea, StringComparer.OrdinalIgnoreCase)); } } public void Dispose() { if (_disposed) { return; } _meter.Dispose(); _disposed = true; } private int GetOpenSessions() { lock (_syncRoot) { return _openSessions; } } private int GetWorkersRunning() { lock (_syncRoot) { return _workersRunning; } } private int GetWorkerEventQueueDepth() { lock (_syncRoot) { return _workerEventQueueDepth; } } private int GetGrpcEventStreamQueueDepth() { lock (_syncRoot) { return _grpcEventStreamQueueDepth; } } private static void Increment(Dictionary values, string key) { values.TryGetValue(key, out long currentValue); values[key] = currentValue + 1; } private static void Increment(ConcurrentDictionary values, string key) { values.AddOrUpdate(key, 1, static (_, currentValue) => currentValue + 1); } }