server(metrics): alarm provider mode gauge + provider switch counter (Task 13)

This commit is contained in:
Joseph Doherty
2026-06-13 09:18:11 -04:00
parent f113ca53a1
commit a46ce90e6f
2 changed files with 126 additions and 0 deletions
@@ -1,5 +1,6 @@
using System.Collections.Concurrent;
using System.Diagnostics.Metrics;
using System.Globalization;
namespace ZB.MOM.WW.MxGateway.Server.Metrics;
@@ -22,6 +23,7 @@ public sealed class GatewayMetrics : IDisposable
private readonly Counter<long> _heartbeatFailuresCounter;
private readonly Counter<long> _streamDisconnectsCounter;
private readonly Counter<long> _retryAttemptsCounter;
private readonly Counter<long> _alarmProviderSwitchesCounter;
private readonly Histogram<double> _workerStartupLatencyHistogram;
private readonly Histogram<double> _commandLatencyHistogram;
private readonly Histogram<double> _eventStreamSendLatencyHistogram;
@@ -34,6 +36,7 @@ public sealed class GatewayMetrics : IDisposable
private int _workersRunning;
private int _workerEventQueueDepth;
private int _grpcEventStreamQueueDepth;
private int _alarmProviderMode;
private long _sessionsOpened;
private long _sessionsClosed;
private long _commandsStarted;
@@ -68,6 +71,7 @@ public sealed class GatewayMetrics : IDisposable
_heartbeatFailuresCounter = _meter.CreateCounter<long>("mxgateway.heartbeats.failed");
_streamDisconnectsCounter = _meter.CreateCounter<long>("mxgateway.grpc.streams.disconnected");
_retryAttemptsCounter = _meter.CreateCounter<long>("mxgateway.retries.attempted");
_alarmProviderSwitchesCounter = _meter.CreateCounter<long>("mxgateway.alarms.provider_switches");
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "s");
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "s");
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "s");
@@ -76,6 +80,7 @@ public sealed class GatewayMetrics : IDisposable
_meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning);
_meter.CreateObservableGauge("mxgateway.events.worker_queue.depth", GetWorkerEventQueueDepth);
_meter.CreateObservableGauge("mxgateway.events.grpc_stream_queue.depth", GetGrpcEventStreamQueueDepth);
_meter.CreateObservableGauge("mxgateway.alarms.provider_mode", GetAlarmProviderMode);
}
/// <summary>
@@ -377,6 +382,26 @@ public sealed class GatewayMetrics : IDisposable
_retryAttemptsCounter.Add(1, new KeyValuePair<string, object?>("area", area));
}
/// <summary>
/// Records that the alarm provider switched modes and updates the current provider mode gauge.
/// </summary>
/// <param name="fromMode">Provider mode before the switch (1=alarmmgr, 2=subtag, 0=unknown).</param>
/// <param name="toMode">Provider mode after the switch (1=alarmmgr, 2=subtag, 0=unknown).</param>
/// <param name="reason">Human-readable reason for the switch.</param>
public void AlarmProviderSwitched(int fromMode, int toMode, string reason)
{
lock (_syncRoot)
{
_alarmProviderMode = toMode;
}
_alarmProviderSwitchesCounter.Add(
1,
new KeyValuePair<string, object?>("from", fromMode.ToString(CultureInfo.InvariantCulture)),
new KeyValuePair<string, object?>("to", toMode.ToString(CultureInfo.InvariantCulture)),
new KeyValuePair<string, object?>("reason", reason ?? string.Empty));
}
/// <summary>
/// Returns a snapshot of all current metric values.
/// </summary>
@@ -455,6 +480,14 @@ public sealed class GatewayMetrics : IDisposable
}
}
private int GetAlarmProviderMode()
{
lock (_syncRoot)
{
return _alarmProviderMode;
}
}
private static void Increment(Dictionary<string, long> values, string key)
{
values.TryGetValue(key, out long currentValue);
@@ -1,3 +1,4 @@
using System.Diagnostics.Metrics;
using ZB.MOM.WW.MxGateway.Server.Metrics;
namespace ZB.MOM.WW.MxGateway.Tests.Metrics;
@@ -63,6 +64,98 @@ public sealed class GatewayMetricsTests
Assert.Equal("depth", exception.ParamName);
}
/// <summary>
/// Verifies that <see cref="GatewayMetrics.AlarmProviderSwitched"/> increments
/// <c>mxgateway.alarms.provider_switches</c> by one with the expected from/to/reason tags.
/// The listener filters by the specific <see cref="System.Diagnostics.Metrics.Meter"/> instance
/// to avoid cross-talk between parallel tests (Tests-027).
/// </summary>
[Fact]
public void AlarmProviderSwitched_IncrementsCounterWithExpectedTags()
{
using GatewayMetrics metrics = new();
using MeterListener listener = new();
long capturedValue = 0;
string? capturedFrom = null;
string? capturedTo = null;
string? capturedReason = null;
listener.InstrumentPublished = (instrument, meterListener) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.alarms.provider_switches")
{
meterListener.EnableMeasurementEvents(instrument);
}
};
listener.SetMeasurementEventCallback<long>(
(instrument, measurement, tags, _) =>
{
if (!ReferenceEquals(instrument.Meter, metrics.Meter)
|| instrument.Name != "mxgateway.alarms.provider_switches")
{
return;
}
capturedValue += measurement;
foreach (KeyValuePair<string, object?> tag in tags)
{
switch (tag.Key)
{
case "from": capturedFrom = tag.Value as string; break;
case "to": capturedTo = tag.Value as string; break;
case "reason": capturedReason = tag.Value as string; break;
}
}
});
listener.Start();
metrics.AlarmProviderSwitched(1, 2, "test");
Assert.Equal(1, capturedValue);
Assert.Equal("1", capturedFrom);
Assert.Equal("2", capturedTo);
Assert.Equal("test", capturedReason);
}
/// <summary>
/// Verifies that <see cref="GatewayMetrics.AlarmProviderSwitched"/> updates the
/// <c>mxgateway.alarms.provider_mode</c> observable gauge to the <paramref name="toMode"/> value.
/// </summary>
[Fact]
public void AlarmProviderSwitched_UpdatesProviderModeGauge()
{
using GatewayMetrics metrics = new();
using MeterListener listener = new();
int? capturedMode = null;
listener.InstrumentPublished = (instrument, meterListener) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.alarms.provider_mode")
{
meterListener.EnableMeasurementEvents(instrument);
}
};
listener.SetMeasurementEventCallback<int>(
(instrument, measurement, _, _) =>
{
if (ReferenceEquals(instrument.Meter, metrics.Meter)
&& instrument.Name == "mxgateway.alarms.provider_mode")
{
capturedMode = measurement;
}
});
listener.Start();
metrics.AlarmProviderSwitched(1, 2, "test");
listener.RecordObservableInstruments();
Assert.Equal(2, capturedMode);
}
/// <summary>Verifies that removing session events only affects that session.</summary>
[Fact]
public void RemoveSessionEvents_RemovesOnlyThatSession()