feat: add MonitorServer with /healthz and /varz endpoints
This commit is contained in:
119
src/NATS.Server/Monitoring/VarzHandler.cs
Normal file
119
src/NATS.Server/Monitoring/VarzHandler.cs
Normal file
@@ -0,0 +1,119 @@
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.InteropServices;
|
||||
using NATS.Server.Protocol;
|
||||
|
||||
namespace NATS.Server.Monitoring;
|
||||
|
||||
/// <summary>
|
||||
/// Handles building the Varz response from server state and process metrics.
|
||||
/// Corresponds to Go server/monitor.go handleVarz function.
|
||||
/// </summary>
|
||||
public sealed class VarzHandler
|
||||
{
|
||||
private readonly NatsServer _server;
|
||||
private readonly NatsOptions _options;
|
||||
private readonly SemaphoreSlim _varzMu = new(1, 1);
|
||||
private DateTime _lastCpuSampleTime;
|
||||
private TimeSpan _lastCpuUsage;
|
||||
private double _cachedCpuPercent;
|
||||
|
||||
public VarzHandler(NatsServer server, NatsOptions options)
|
||||
{
|
||||
_server = server;
|
||||
_options = options;
|
||||
var proc = Process.GetCurrentProcess();
|
||||
_lastCpuSampleTime = DateTime.UtcNow;
|
||||
_lastCpuUsage = proc.TotalProcessorTime;
|
||||
}
|
||||
|
||||
public async Task<Varz> HandleVarzAsync()
|
||||
{
|
||||
await _varzMu.WaitAsync();
|
||||
try
|
||||
{
|
||||
var proc = Process.GetCurrentProcess();
|
||||
var now = DateTime.UtcNow;
|
||||
var uptime = now - _server.StartTime;
|
||||
var stats = _server.Stats;
|
||||
|
||||
// CPU sampling with 1-second cache to avoid excessive sampling
|
||||
if ((now - _lastCpuSampleTime).TotalSeconds >= 1.0)
|
||||
{
|
||||
var currentCpu = proc.TotalProcessorTime;
|
||||
var elapsed = now - _lastCpuSampleTime;
|
||||
_cachedCpuPercent = (currentCpu - _lastCpuUsage).TotalMilliseconds
|
||||
/ elapsed.TotalMilliseconds / Environment.ProcessorCount * 100.0;
|
||||
_lastCpuSampleTime = now;
|
||||
_lastCpuUsage = currentCpu;
|
||||
}
|
||||
|
||||
// Track HTTP request count for /varz
|
||||
stats.HttpReqStats.AddOrUpdate("/varz", 1, (_, v) => v + 1);
|
||||
|
||||
return new Varz
|
||||
{
|
||||
Id = _server.ServerId,
|
||||
Name = _server.ServerName,
|
||||
Version = NatsProtocol.Version,
|
||||
Proto = NatsProtocol.ProtoVersion,
|
||||
GoVersion = $"dotnet {RuntimeInformation.FrameworkDescription}",
|
||||
Host = _options.Host,
|
||||
Port = _options.Port,
|
||||
HttpHost = _options.MonitorHost,
|
||||
HttpPort = _options.MonitorPort,
|
||||
HttpBasePath = _options.MonitorBasePath ?? "",
|
||||
HttpsPort = _options.MonitorHttpsPort,
|
||||
TlsRequired = _options.HasTls && !_options.AllowNonTls,
|
||||
TlsVerify = _options.HasTls && _options.TlsVerify,
|
||||
TlsTimeout = _options.HasTls ? _options.TlsTimeout.TotalSeconds : 0,
|
||||
MaxConnections = _options.MaxConnections,
|
||||
MaxPayload = _options.MaxPayload,
|
||||
MaxControlLine = _options.MaxControlLine,
|
||||
MaxPingsOut = _options.MaxPingsOut,
|
||||
PingInterval = (long)_options.PingInterval.TotalNanoseconds,
|
||||
Start = _server.StartTime,
|
||||
Now = now,
|
||||
Uptime = FormatUptime(uptime),
|
||||
Mem = proc.WorkingSet64,
|
||||
Cpu = Math.Round(_cachedCpuPercent, 2),
|
||||
Cores = Environment.ProcessorCount,
|
||||
MaxProcs = ThreadPool.ThreadCount,
|
||||
Connections = _server.ClientCount,
|
||||
TotalConnections = (ulong)Interlocked.Read(ref stats.TotalConnections),
|
||||
InMsgs = Interlocked.Read(ref stats.InMsgs),
|
||||
OutMsgs = Interlocked.Read(ref stats.OutMsgs),
|
||||
InBytes = Interlocked.Read(ref stats.InBytes),
|
||||
OutBytes = Interlocked.Read(ref stats.OutBytes),
|
||||
SlowConsumers = Interlocked.Read(ref stats.SlowConsumers),
|
||||
SlowConsumerStats = new SlowConsumersStats
|
||||
{
|
||||
Clients = (ulong)Interlocked.Read(ref stats.SlowConsumerClients),
|
||||
Routes = (ulong)Interlocked.Read(ref stats.SlowConsumerRoutes),
|
||||
Gateways = (ulong)Interlocked.Read(ref stats.SlowConsumerGateways),
|
||||
Leafs = (ulong)Interlocked.Read(ref stats.SlowConsumerLeafs),
|
||||
},
|
||||
Subscriptions = _server.SubList.Count,
|
||||
ConfigLoadTime = _server.StartTime,
|
||||
HttpReqStats = stats.HttpReqStats.ToDictionary(kv => kv.Key, kv => (ulong)kv.Value),
|
||||
};
|
||||
}
|
||||
finally
|
||||
{
|
||||
_varzMu.Release();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Formats a TimeSpan as a human-readable uptime string matching Go server format.
|
||||
/// </summary>
|
||||
private static string FormatUptime(TimeSpan ts)
|
||||
{
|
||||
if (ts.TotalDays >= 1)
|
||||
return $"{(int)ts.TotalDays}d{ts.Hours}h{ts.Minutes}m{ts.Seconds}s";
|
||||
if (ts.TotalHours >= 1)
|
||||
return $"{(int)ts.TotalHours}h{ts.Minutes}m{ts.Seconds}s";
|
||||
if (ts.TotalMinutes >= 1)
|
||||
return $"{(int)ts.TotalMinutes}m{ts.Seconds}s";
|
||||
return $"{(int)ts.TotalSeconds}s";
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user