Merge feat/telemetry-followons: telemetry follow-ons for MxAccessGateway

Metric normalization: meter MxGateway.Server -> ZB.MOM.WW.MxGateway and the 3
duration histograms ms -> s (safe: never Prometheus-exported before). Config-driven
OTLP exporter opt-in (default Prometheus). Metrics.md synced; doc-review artifacts
gitignored.
This commit is contained in:
Joseph Doherty
2026-06-01 17:17:31 -04:00
4 changed files with 24 additions and 14 deletions
+5
View File
@@ -147,3 +147,8 @@ generated-scratch/
# Keep empty directories with .gitkeep files when needed # Keep empty directories with .gitkeep files when needed
!.gitkeep !.gitkeep
# Documentation review artifacts (CommentChecker output)
*-docs-issues.md
*-docs-fixed.md
*-docs-final.md
+6 -6
View File
@@ -4,7 +4,7 @@ The metrics subsystem exposes counters, histograms, and observable gauges that d
## Overview ## Overview
`GatewayMetrics` is a singleton (registered in `GatewayApplication.cs`) that owns a single `Meter` named `ZB.MOM.WW.MxGateway.Server` and a set of synchronised counters, histograms, and observable gauges. Subsystems call typed mutator methods (`SessionOpened`, `CommandFailed`, `EventReceived`, etc.) rather than touching the `Meter` directly, which keeps the OpenTelemetry instrument names and tag conventions in one place. A `lock (_syncRoot)` block guards the scalar fields used by `GetSnapshot`, while per-event maps use `ConcurrentDictionary<string, long>` so the hot event path avoids the lock. `GatewayMetrics` is a singleton (registered in `GatewayApplication.cs`) that owns a single `Meter` named `ZB.MOM.WW.MxGateway` and a set of synchronised counters, histograms, and observable gauges. Subsystems call typed mutator methods (`SessionOpened`, `CommandFailed`, `EventReceived`, etc.) rather than touching the `Meter` directly, which keeps the OpenTelemetry instrument names and tag conventions in one place. A `lock (_syncRoot)` block guards the scalar fields used by `GetSnapshot`, while per-event maps use `ConcurrentDictionary<string, long>` so the hot event path avoids the lock.
## Meter and OpenTelemetry Compatibility ## Meter and OpenTelemetry Compatibility
@@ -13,7 +13,7 @@ The meter name is exposed as a constant so that hosting code can register it wit
```csharp ```csharp
public sealed class GatewayMetrics : IDisposable public sealed class GatewayMetrics : IDisposable
{ {
public const string MeterName = "ZB.MOM.WW.MxGateway.Server"; public const string MeterName = "ZB.MOM.WW.MxGateway";
public GatewayMetrics() public GatewayMetrics()
{ {
@@ -50,12 +50,12 @@ All counters are `Counter<long>`. Tag values come from the call sites listed und
### Histograms ### Histograms
Histograms record durations in milliseconds (the `unit` argument on `CreateHistogram`): Histograms record durations in seconds (the `unit` argument on `CreateHistogram`):
```csharp ```csharp
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "ms"); _workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "s");
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "ms"); _commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "s");
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "ms"); _eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "s");
``` ```
| Instrument | Tags | What it measures | | Instrument | Tags | What it measures |
@@ -78,6 +78,11 @@ public static class GatewayApplication
{ {
o.ServiceName = "mxgateway"; o.ServiceName = "mxgateway";
o.Meters = [GatewayMetrics.MeterName]; // "MxGateway.Server" — name unchanged o.Meters = [GatewayMetrics.MeterName]; // "MxGateway.Server" — name unchanged
if (Enum.TryParse<ZbExporter>(builder.Configuration["MxGateway:Telemetry:Exporter"], ignoreCase: true, out var exporter))
o.Exporter = exporter;
var otlp = builder.Configuration["MxGateway:Telemetry:OtlpEndpoint"];
if (!string.IsNullOrWhiteSpace(otlp))
o.OtlpEndpoint = otlp;
}); });
builder.Services.AddSingleton<ILogRedactor, GatewayLogRedactorSeam>(); builder.Services.AddSingleton<ILogRedactor, GatewayLogRedactorSeam>();
builder.Services.AddSingleton<MxAccessGrpcMapper>(); builder.Services.AddSingleton<MxAccessGrpcMapper>();
@@ -5,7 +5,7 @@ namespace ZB.MOM.WW.MxGateway.Server.Metrics;
public sealed class GatewayMetrics : IDisposable public sealed class GatewayMetrics : IDisposable
{ {
public const string MeterName = "MxGateway.Server"; public const string MeterName = "ZB.MOM.WW.MxGateway";
private readonly object _syncRoot = new(); private readonly object _syncRoot = new();
private readonly Meter _meter; private readonly Meter _meter;
@@ -68,9 +68,9 @@ public sealed class GatewayMetrics : IDisposable
_heartbeatFailuresCounter = _meter.CreateCounter<long>("mxgateway.heartbeats.failed"); _heartbeatFailuresCounter = _meter.CreateCounter<long>("mxgateway.heartbeats.failed");
_streamDisconnectsCounter = _meter.CreateCounter<long>("mxgateway.grpc.streams.disconnected"); _streamDisconnectsCounter = _meter.CreateCounter<long>("mxgateway.grpc.streams.disconnected");
_retryAttemptsCounter = _meter.CreateCounter<long>("mxgateway.retries.attempted"); _retryAttemptsCounter = _meter.CreateCounter<long>("mxgateway.retries.attempted");
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "ms"); _workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "s");
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "ms"); _commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "s");
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "ms"); _eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "s");
_meter.CreateObservableGauge("mxgateway.sessions.open", GetOpenSessions); _meter.CreateObservableGauge("mxgateway.sessions.open", GetOpenSessions);
_meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning); _meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning);
@@ -144,7 +144,7 @@ public sealed class GatewayMetrics : IDisposable
_workersRunning++; _workersRunning++;
} }
_workerStartupLatencyHistogram.Record(startupDuration.TotalMilliseconds); _workerStartupLatencyHistogram.Record(startupDuration.TotalSeconds);
} }
/// <summary> /// <summary>
@@ -208,7 +208,7 @@ public sealed class GatewayMetrics : IDisposable
KeyValuePair<string, object?> methodTag = new("method", method); KeyValuePair<string, object?> methodTag = new("method", method);
_commandsSucceededCounter.Add(1, methodTag); _commandsSucceededCounter.Add(1, methodTag);
_commandLatencyHistogram.Record(duration.TotalMilliseconds, methodTag); _commandLatencyHistogram.Record(duration.TotalSeconds, methodTag);
} }
/// <summary> /// <summary>
@@ -228,7 +228,7 @@ public sealed class GatewayMetrics : IDisposable
KeyValuePair<string, object?> methodTag = new("method", method); KeyValuePair<string, object?> methodTag = new("method", method);
KeyValuePair<string, object?> categoryTag = new("category", category); KeyValuePair<string, object?> categoryTag = new("category", category);
_commandsFailedCounter.Add(1, methodTag, categoryTag); _commandsFailedCounter.Add(1, methodTag, categoryTag);
_commandLatencyHistogram.Record(duration.TotalMilliseconds, methodTag, categoryTag); _commandLatencyHistogram.Record(duration.TotalSeconds, methodTag, categoryTag);
} }
/// <summary> /// <summary>
@@ -255,7 +255,7 @@ public sealed class GatewayMetrics : IDisposable
public void RecordEventStreamSend(string family, TimeSpan duration) public void RecordEventStreamSend(string family, TimeSpan duration)
{ {
_eventStreamSendLatencyHistogram.Record( _eventStreamSendLatencyHistogram.Record(
duration.TotalMilliseconds, duration.TotalSeconds,
new KeyValuePair<string, object?>("family", family)); new KeyValuePair<string, object?>("family", family));
} }