feat: add service export latency tracking with p50/p90/p99 (Gap 9.1)

Add ServiceLatencyTracker with sorted-sample histogram, percentile getters (p50/p90/p99), average/min/max, reset, and immutable snapshot. Wire LatencyTracker and RecordServiceLatency onto Account. Cover with 11 xUnit tests.
This commit is contained in:
Joseph Doherty
2026-02-25 12:52:05 -05:00
parent dcd6b78a89
commit 3107615885
4 changed files with 554 additions and 0 deletions

View File

@@ -0,0 +1,133 @@
namespace NATS.Server.Auth;
/// <summary>
/// Tracks service request latency using a sorted list of samples for percentile calculation.
/// Go reference: accounts.go serviceLatency / serviceExportLatencyStats.
/// </summary>
public sealed class ServiceLatencyTracker
{
private readonly Lock _lock = new();
private readonly List<double> _samples = [];
private readonly int _maxSamples;
private long _totalRequests;
public ServiceLatencyTracker(int maxSamples = 10000)
{
_maxSamples = maxSamples;
}
/// <summary>Records a latency sample in milliseconds.</summary>
public void RecordLatency(double latencyMs)
{
lock (_lock)
{
if (_samples.Count >= _maxSamples)
_samples.RemoveAt(0);
_samples.Add(latencyMs);
_totalRequests++;
}
}
public double GetP50() => GetPercentile(0.50);
public double GetP90() => GetPercentile(0.90);
public double GetP99() => GetPercentile(0.99);
/// <summary>Returns the value at the given percentile (0.01.0) over recorded samples.</summary>
public double GetPercentile(double percentile)
{
lock (_lock)
return ComputePercentile(_samples, percentile);
}
// Must be called under _lock.
private static double ComputePercentile(List<double> samples, double percentile)
{
if (samples.Count == 0)
return 0;
var sorted = new List<double>(samples);
sorted.Sort();
var index = (int)(percentile * (sorted.Count - 1));
return sorted[index];
}
// Must be called under _lock.
private static double ComputeAverage(List<double> samples)
{
if (samples.Count == 0)
return 0;
var sum = 0.0;
foreach (var s in samples)
sum += s;
return sum / samples.Count;
}
public long TotalRequests
{
get { lock (_lock) return _totalRequests; }
}
public double AverageLatencyMs
{
get { lock (_lock) return ComputeAverage(_samples); }
}
public double MinLatencyMs
{
get
{
lock (_lock)
return _samples.Count == 0 ? 0 : _samples.Min();
}
}
public double MaxLatencyMs
{
get
{
lock (_lock)
return _samples.Count == 0 ? 0 : _samples.Max();
}
}
public int SampleCount
{
get { lock (_lock) return _samples.Count; }
}
/// <summary>Clears all samples and resets the total request counter.</summary>
public void Reset()
{
lock (_lock)
{
_samples.Clear();
_totalRequests = 0;
}
}
/// <summary>Returns an immutable snapshot of the current tracker state.</summary>
public ServiceLatencySnapshot GetSnapshot()
{
lock (_lock)
{
return new ServiceLatencySnapshot(
TotalRequests: _totalRequests,
P50Ms: ComputePercentile(_samples, 0.50),
P90Ms: ComputePercentile(_samples, 0.90),
P99Ms: ComputePercentile(_samples, 0.99),
AverageMs: ComputeAverage(_samples),
MinMs: _samples.Count == 0 ? 0 : _samples.Min(),
MaxMs: _samples.Count == 0 ? 0 : _samples.Max(),
SampleCount: _samples.Count);
}
}
}
public sealed record ServiceLatencySnapshot(
long TotalRequests,
double P50Ms,
double P90Ms,
double P99Ms,
double AverageMs,
double MinMs,
double MaxMs,
int SampleCount);