Merge feat/telemetry-followons: telemetry follow-ons for MxAccessGateway
Metric normalization: meter MxGateway.Server -> ZB.MOM.WW.MxGateway and the 3 duration histograms ms -> s (safe: never Prometheus-exported before). Config-driven OTLP exporter opt-in (default Prometheus). Metrics.md synced; doc-review artifacts gitignored.
This commit is contained in:
@@ -147,3 +147,8 @@ generated-scratch/
|
||||
|
||||
# Keep empty directories with .gitkeep files when needed
|
||||
!.gitkeep
|
||||
|
||||
# Documentation review artifacts (CommentChecker output)
|
||||
*-docs-issues.md
|
||||
*-docs-fixed.md
|
||||
*-docs-final.md
|
||||
|
||||
+6
-6
@@ -4,7 +4,7 @@ The metrics subsystem exposes counters, histograms, and observable gauges that d
|
||||
|
||||
## Overview
|
||||
|
||||
`GatewayMetrics` is a singleton (registered in `GatewayApplication.cs`) that owns a single `Meter` named `ZB.MOM.WW.MxGateway.Server` and a set of synchronised counters, histograms, and observable gauges. Subsystems call typed mutator methods (`SessionOpened`, `CommandFailed`, `EventReceived`, etc.) rather than touching the `Meter` directly, which keeps the OpenTelemetry instrument names and tag conventions in one place. A `lock (_syncRoot)` block guards the scalar fields used by `GetSnapshot`, while per-event maps use `ConcurrentDictionary<string, long>` so the hot event path avoids the lock.
|
||||
`GatewayMetrics` is a singleton (registered in `GatewayApplication.cs`) that owns a single `Meter` named `ZB.MOM.WW.MxGateway` and a set of synchronised counters, histograms, and observable gauges. Subsystems call typed mutator methods (`SessionOpened`, `CommandFailed`, `EventReceived`, etc.) rather than touching the `Meter` directly, which keeps the OpenTelemetry instrument names and tag conventions in one place. A `lock (_syncRoot)` block guards the scalar fields used by `GetSnapshot`, while per-event maps use `ConcurrentDictionary<string, long>` so the hot event path avoids the lock.
|
||||
|
||||
## Meter and OpenTelemetry Compatibility
|
||||
|
||||
@@ -13,7 +13,7 @@ The meter name is exposed as a constant so that hosting code can register it wit
|
||||
```csharp
|
||||
public sealed class GatewayMetrics : IDisposable
|
||||
{
|
||||
public const string MeterName = "ZB.MOM.WW.MxGateway.Server";
|
||||
public const string MeterName = "ZB.MOM.WW.MxGateway";
|
||||
|
||||
public GatewayMetrics()
|
||||
{
|
||||
@@ -50,12 +50,12 @@ All counters are `Counter<long>`. Tag values come from the call sites listed und
|
||||
|
||||
### Histograms
|
||||
|
||||
Histograms record durations in milliseconds (the `unit` argument on `CreateHistogram`):
|
||||
Histograms record durations in seconds (the `unit` argument on `CreateHistogram`):
|
||||
|
||||
```csharp
|
||||
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "ms");
|
||||
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "ms");
|
||||
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "ms");
|
||||
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "s");
|
||||
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "s");
|
||||
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "s");
|
||||
```
|
||||
|
||||
| Instrument | Tags | What it measures |
|
||||
|
||||
@@ -78,6 +78,11 @@ public static class GatewayApplication
|
||||
{
|
||||
o.ServiceName = "mxgateway";
|
||||
o.Meters = [GatewayMetrics.MeterName]; // "MxGateway.Server" — name unchanged
|
||||
if (Enum.TryParse<ZbExporter>(builder.Configuration["MxGateway:Telemetry:Exporter"], ignoreCase: true, out var exporter))
|
||||
o.Exporter = exporter;
|
||||
var otlp = builder.Configuration["MxGateway:Telemetry:OtlpEndpoint"];
|
||||
if (!string.IsNullOrWhiteSpace(otlp))
|
||||
o.OtlpEndpoint = otlp;
|
||||
});
|
||||
builder.Services.AddSingleton<ILogRedactor, GatewayLogRedactorSeam>();
|
||||
builder.Services.AddSingleton<MxAccessGrpcMapper>();
|
||||
|
||||
@@ -5,7 +5,7 @@ namespace ZB.MOM.WW.MxGateway.Server.Metrics;
|
||||
|
||||
public sealed class GatewayMetrics : IDisposable
|
||||
{
|
||||
public const string MeterName = "MxGateway.Server";
|
||||
public const string MeterName = "ZB.MOM.WW.MxGateway";
|
||||
|
||||
private readonly object _syncRoot = new();
|
||||
private readonly Meter _meter;
|
||||
@@ -68,9 +68,9 @@ public sealed class GatewayMetrics : IDisposable
|
||||
_heartbeatFailuresCounter = _meter.CreateCounter<long>("mxgateway.heartbeats.failed");
|
||||
_streamDisconnectsCounter = _meter.CreateCounter<long>("mxgateway.grpc.streams.disconnected");
|
||||
_retryAttemptsCounter = _meter.CreateCounter<long>("mxgateway.retries.attempted");
|
||||
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "ms");
|
||||
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "ms");
|
||||
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "ms");
|
||||
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "s");
|
||||
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "s");
|
||||
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "s");
|
||||
|
||||
_meter.CreateObservableGauge("mxgateway.sessions.open", GetOpenSessions);
|
||||
_meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning);
|
||||
@@ -144,7 +144,7 @@ public sealed class GatewayMetrics : IDisposable
|
||||
_workersRunning++;
|
||||
}
|
||||
|
||||
_workerStartupLatencyHistogram.Record(startupDuration.TotalMilliseconds);
|
||||
_workerStartupLatencyHistogram.Record(startupDuration.TotalSeconds);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -208,7 +208,7 @@ public sealed class GatewayMetrics : IDisposable
|
||||
|
||||
KeyValuePair<string, object?> methodTag = new("method", method);
|
||||
_commandsSucceededCounter.Add(1, methodTag);
|
||||
_commandLatencyHistogram.Record(duration.TotalMilliseconds, methodTag);
|
||||
_commandLatencyHistogram.Record(duration.TotalSeconds, methodTag);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -228,7 +228,7 @@ public sealed class GatewayMetrics : IDisposable
|
||||
KeyValuePair<string, object?> methodTag = new("method", method);
|
||||
KeyValuePair<string, object?> categoryTag = new("category", category);
|
||||
_commandsFailedCounter.Add(1, methodTag, categoryTag);
|
||||
_commandLatencyHistogram.Record(duration.TotalMilliseconds, methodTag, categoryTag);
|
||||
_commandLatencyHistogram.Record(duration.TotalSeconds, methodTag, categoryTag);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -255,7 +255,7 @@ public sealed class GatewayMetrics : IDisposable
|
||||
public void RecordEventStreamSend(string family, TimeSpan duration)
|
||||
{
|
||||
_eventStreamSendLatencyHistogram.Record(
|
||||
duration.TotalMilliseconds,
|
||||
duration.TotalSeconds,
|
||||
new KeyValuePair<string, object?>("family", family));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user