server(metrics): alarm provider mode gauge + provider switch counter (Task 13)

This commit is contained in:
Joseph Doherty
2026-06-13 09:18:11 -04:00
parent f113ca53a1
commit a46ce90e6f
2 changed files with 126 additions and 0 deletions
@@ -1,5 +1,6 @@
using System.Collections.Concurrent;
using System.Diagnostics.Metrics;
using System.Globalization;
namespace ZB.MOM.WW.MxGateway.Server.Metrics;
@@ -22,6 +23,7 @@ public sealed class GatewayMetrics : IDisposable
private readonly Counter<long> _heartbeatFailuresCounter;
private readonly Counter<long> _streamDisconnectsCounter;
private readonly Counter<long> _retryAttemptsCounter;
private readonly Counter<long> _alarmProviderSwitchesCounter;
private readonly Histogram<double> _workerStartupLatencyHistogram;
private readonly Histogram<double> _commandLatencyHistogram;
private readonly Histogram<double> _eventStreamSendLatencyHistogram;
@@ -34,6 +36,7 @@ public sealed class GatewayMetrics : IDisposable
private int _workersRunning;
private int _workerEventQueueDepth;
private int _grpcEventStreamQueueDepth;
private int _alarmProviderMode;
private long _sessionsOpened;
private long _sessionsClosed;
private long _commandsStarted;
@@ -68,6 +71,7 @@ public sealed class GatewayMetrics : IDisposable
_heartbeatFailuresCounter = _meter.CreateCounter<long>("mxgateway.heartbeats.failed");
_streamDisconnectsCounter = _meter.CreateCounter<long>("mxgateway.grpc.streams.disconnected");
_retryAttemptsCounter = _meter.CreateCounter<long>("mxgateway.retries.attempted");
_alarmProviderSwitchesCounter = _meter.CreateCounter<long>("mxgateway.alarms.provider_switches");
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "s");
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "s");
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "s");
@@ -76,6 +80,7 @@ public sealed class GatewayMetrics : IDisposable
_meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning);
_meter.CreateObservableGauge("mxgateway.events.worker_queue.depth", GetWorkerEventQueueDepth);
_meter.CreateObservableGauge("mxgateway.events.grpc_stream_queue.depth", GetGrpcEventStreamQueueDepth);
_meter.CreateObservableGauge("mxgateway.alarms.provider_mode", GetAlarmProviderMode);
}
/// <summary>
@@ -377,6 +382,26 @@ public sealed class GatewayMetrics : IDisposable
_retryAttemptsCounter.Add(1, new KeyValuePair<string, object?>("area", area));
}
/// <summary>
/// Records that the alarm provider switched modes and updates the current provider mode gauge.
/// </summary>
/// <param name="fromMode">Provider mode before the switch (1=alarmmgr, 2=subtag, 0=unknown).</param>
/// <param name="toMode">Provider mode after the switch (1=alarmmgr, 2=subtag, 0=unknown).</param>
/// <param name="reason">Human-readable reason for the switch.</param>
public void AlarmProviderSwitched(int fromMode, int toMode, string reason)
{
lock (_syncRoot)
{
_alarmProviderMode = toMode;
}
_alarmProviderSwitchesCounter.Add(
1,
new KeyValuePair<string, object?>("from", fromMode.ToString(CultureInfo.InvariantCulture)),
new KeyValuePair<string, object?>("to", toMode.ToString(CultureInfo.InvariantCulture)),
new KeyValuePair<string, object?>("reason", reason ?? string.Empty));
}
/// <summary>
/// Returns a snapshot of all current metric values.
/// </summary>
@@ -455,6 +480,14 @@ public sealed class GatewayMetrics : IDisposable
}
}
private int GetAlarmProviderMode()
{
lock (_syncRoot)
{
return _alarmProviderMode;
}
}
private static void Increment(Dictionary<string, long> values, string key)
{
values.TryGetValue(key, out long currentValue);