Merge feat/telemetry-followons: telemetry follow-ons for MxAccessGateway
Metric normalization: meter MxGateway.Server -> ZB.MOM.WW.MxGateway and the 3 duration histograms ms -> s (safe: never Prometheus-exported before). Config-driven OTLP exporter opt-in (default Prometheus). Metrics.md synced; doc-review artifacts gitignored.
This commit is contained in:
@@ -147,3 +147,8 @@ generated-scratch/
|
|||||||
|
|
||||||
# Keep empty directories with .gitkeep files when needed
|
# Keep empty directories with .gitkeep files when needed
|
||||||
!.gitkeep
|
!.gitkeep
|
||||||
|
|
||||||
|
# Documentation review artifacts (CommentChecker output)
|
||||||
|
*-docs-issues.md
|
||||||
|
*-docs-fixed.md
|
||||||
|
*-docs-final.md
|
||||||
|
|||||||
+6
-6
@@ -4,7 +4,7 @@ The metrics subsystem exposes counters, histograms, and observable gauges that d
|
|||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
`GatewayMetrics` is a singleton (registered in `GatewayApplication.cs`) that owns a single `Meter` named `ZB.MOM.WW.MxGateway.Server` and a set of synchronised counters, histograms, and observable gauges. Subsystems call typed mutator methods (`SessionOpened`, `CommandFailed`, `EventReceived`, etc.) rather than touching the `Meter` directly, which keeps the OpenTelemetry instrument names and tag conventions in one place. A `lock (_syncRoot)` block guards the scalar fields used by `GetSnapshot`, while per-event maps use `ConcurrentDictionary<string, long>` so the hot event path avoids the lock.
|
`GatewayMetrics` is a singleton (registered in `GatewayApplication.cs`) that owns a single `Meter` named `ZB.MOM.WW.MxGateway` and a set of synchronised counters, histograms, and observable gauges. Subsystems call typed mutator methods (`SessionOpened`, `CommandFailed`, `EventReceived`, etc.) rather than touching the `Meter` directly, which keeps the OpenTelemetry instrument names and tag conventions in one place. A `lock (_syncRoot)` block guards the scalar fields used by `GetSnapshot`, while per-event maps use `ConcurrentDictionary<string, long>` so the hot event path avoids the lock.
|
||||||
|
|
||||||
## Meter and OpenTelemetry Compatibility
|
## Meter and OpenTelemetry Compatibility
|
||||||
|
|
||||||
@@ -13,7 +13,7 @@ The meter name is exposed as a constant so that hosting code can register it wit
|
|||||||
```csharp
|
```csharp
|
||||||
public sealed class GatewayMetrics : IDisposable
|
public sealed class GatewayMetrics : IDisposable
|
||||||
{
|
{
|
||||||
public const string MeterName = "ZB.MOM.WW.MxGateway.Server";
|
public const string MeterName = "ZB.MOM.WW.MxGateway";
|
||||||
|
|
||||||
public GatewayMetrics()
|
public GatewayMetrics()
|
||||||
{
|
{
|
||||||
@@ -50,12 +50,12 @@ All counters are `Counter<long>`. Tag values come from the call sites listed und
|
|||||||
|
|
||||||
### Histograms
|
### Histograms
|
||||||
|
|
||||||
Histograms record durations in milliseconds (the `unit` argument on `CreateHistogram`):
|
Histograms record durations in seconds (the `unit` argument on `CreateHistogram`):
|
||||||
|
|
||||||
```csharp
|
```csharp
|
||||||
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "ms");
|
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "s");
|
||||||
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "ms");
|
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "s");
|
||||||
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "ms");
|
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "s");
|
||||||
```
|
```
|
||||||
|
|
||||||
| Instrument | Tags | What it measures |
|
| Instrument | Tags | What it measures |
|
||||||
|
|||||||
@@ -78,6 +78,11 @@ public static class GatewayApplication
|
|||||||
{
|
{
|
||||||
o.ServiceName = "mxgateway";
|
o.ServiceName = "mxgateway";
|
||||||
o.Meters = [GatewayMetrics.MeterName]; // "MxGateway.Server" — name unchanged
|
o.Meters = [GatewayMetrics.MeterName]; // "MxGateway.Server" — name unchanged
|
||||||
|
if (Enum.TryParse<ZbExporter>(builder.Configuration["MxGateway:Telemetry:Exporter"], ignoreCase: true, out var exporter))
|
||||||
|
o.Exporter = exporter;
|
||||||
|
var otlp = builder.Configuration["MxGateway:Telemetry:OtlpEndpoint"];
|
||||||
|
if (!string.IsNullOrWhiteSpace(otlp))
|
||||||
|
o.OtlpEndpoint = otlp;
|
||||||
});
|
});
|
||||||
builder.Services.AddSingleton<ILogRedactor, GatewayLogRedactorSeam>();
|
builder.Services.AddSingleton<ILogRedactor, GatewayLogRedactorSeam>();
|
||||||
builder.Services.AddSingleton<MxAccessGrpcMapper>();
|
builder.Services.AddSingleton<MxAccessGrpcMapper>();
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ namespace ZB.MOM.WW.MxGateway.Server.Metrics;
|
|||||||
|
|
||||||
public sealed class GatewayMetrics : IDisposable
|
public sealed class GatewayMetrics : IDisposable
|
||||||
{
|
{
|
||||||
public const string MeterName = "MxGateway.Server";
|
public const string MeterName = "ZB.MOM.WW.MxGateway";
|
||||||
|
|
||||||
private readonly object _syncRoot = new();
|
private readonly object _syncRoot = new();
|
||||||
private readonly Meter _meter;
|
private readonly Meter _meter;
|
||||||
@@ -68,9 +68,9 @@ public sealed class GatewayMetrics : IDisposable
|
|||||||
_heartbeatFailuresCounter = _meter.CreateCounter<long>("mxgateway.heartbeats.failed");
|
_heartbeatFailuresCounter = _meter.CreateCounter<long>("mxgateway.heartbeats.failed");
|
||||||
_streamDisconnectsCounter = _meter.CreateCounter<long>("mxgateway.grpc.streams.disconnected");
|
_streamDisconnectsCounter = _meter.CreateCounter<long>("mxgateway.grpc.streams.disconnected");
|
||||||
_retryAttemptsCounter = _meter.CreateCounter<long>("mxgateway.retries.attempted");
|
_retryAttemptsCounter = _meter.CreateCounter<long>("mxgateway.retries.attempted");
|
||||||
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "ms");
|
_workerStartupLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.workers.startup.duration", "s");
|
||||||
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "ms");
|
_commandLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.commands.duration", "s");
|
||||||
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "ms");
|
_eventStreamSendLatencyHistogram = _meter.CreateHistogram<double>("mxgateway.events.stream_send.duration", "s");
|
||||||
|
|
||||||
_meter.CreateObservableGauge("mxgateway.sessions.open", GetOpenSessions);
|
_meter.CreateObservableGauge("mxgateway.sessions.open", GetOpenSessions);
|
||||||
_meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning);
|
_meter.CreateObservableGauge("mxgateway.workers.running", GetWorkersRunning);
|
||||||
@@ -144,7 +144,7 @@ public sealed class GatewayMetrics : IDisposable
|
|||||||
_workersRunning++;
|
_workersRunning++;
|
||||||
}
|
}
|
||||||
|
|
||||||
_workerStartupLatencyHistogram.Record(startupDuration.TotalMilliseconds);
|
_workerStartupLatencyHistogram.Record(startupDuration.TotalSeconds);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -208,7 +208,7 @@ public sealed class GatewayMetrics : IDisposable
|
|||||||
|
|
||||||
KeyValuePair<string, object?> methodTag = new("method", method);
|
KeyValuePair<string, object?> methodTag = new("method", method);
|
||||||
_commandsSucceededCounter.Add(1, methodTag);
|
_commandsSucceededCounter.Add(1, methodTag);
|
||||||
_commandLatencyHistogram.Record(duration.TotalMilliseconds, methodTag);
|
_commandLatencyHistogram.Record(duration.TotalSeconds, methodTag);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -228,7 +228,7 @@ public sealed class GatewayMetrics : IDisposable
|
|||||||
KeyValuePair<string, object?> methodTag = new("method", method);
|
KeyValuePair<string, object?> methodTag = new("method", method);
|
||||||
KeyValuePair<string, object?> categoryTag = new("category", category);
|
KeyValuePair<string, object?> categoryTag = new("category", category);
|
||||||
_commandsFailedCounter.Add(1, methodTag, categoryTag);
|
_commandsFailedCounter.Add(1, methodTag, categoryTag);
|
||||||
_commandLatencyHistogram.Record(duration.TotalMilliseconds, methodTag, categoryTag);
|
_commandLatencyHistogram.Record(duration.TotalSeconds, methodTag, categoryTag);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -255,7 +255,7 @@ public sealed class GatewayMetrics : IDisposable
|
|||||||
public void RecordEventStreamSend(string family, TimeSpan duration)
|
public void RecordEventStreamSend(string family, TimeSpan duration)
|
||||||
{
|
{
|
||||||
_eventStreamSendLatencyHistogram.Record(
|
_eventStreamSendLatencyHistogram.Record(
|
||||||
duration.TotalMilliseconds,
|
duration.TotalSeconds,
|
||||||
new KeyValuePair<string, object?>("family", family));
|
new KeyValuePair<string, object?>("family", family));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user