feat: add MonitorServer with /healthz and /varz endpoints

This commit is contained in:
Joseph Doherty
2026-02-22 22:20:44 -05:00
parent f6b38df291
commit f2badc3488
4 changed files with 289 additions and 0 deletions

View File

@@ -0,0 +1,119 @@
using System.Diagnostics;
using System.Runtime.InteropServices;
using NATS.Server.Protocol;
namespace NATS.Server.Monitoring;
/// <summary>
/// Handles building the Varz response from server state and process metrics.
/// Corresponds to Go server/monitor.go handleVarz function.
/// </summary>
public sealed class VarzHandler
{
private readonly NatsServer _server;
private readonly NatsOptions _options;
private readonly SemaphoreSlim _varzMu = new(1, 1);
private DateTime _lastCpuSampleTime;
private TimeSpan _lastCpuUsage;
private double _cachedCpuPercent;
public VarzHandler(NatsServer server, NatsOptions options)
{
_server = server;
_options = options;
var proc = Process.GetCurrentProcess();
_lastCpuSampleTime = DateTime.UtcNow;
_lastCpuUsage = proc.TotalProcessorTime;
}
public async Task<Varz> HandleVarzAsync()
{
await _varzMu.WaitAsync();
try
{
var proc = Process.GetCurrentProcess();
var now = DateTime.UtcNow;
var uptime = now - _server.StartTime;
var stats = _server.Stats;
// CPU sampling with 1-second cache to avoid excessive sampling
if ((now - _lastCpuSampleTime).TotalSeconds >= 1.0)
{
var currentCpu = proc.TotalProcessorTime;
var elapsed = now - _lastCpuSampleTime;
_cachedCpuPercent = (currentCpu - _lastCpuUsage).TotalMilliseconds
/ elapsed.TotalMilliseconds / Environment.ProcessorCount * 100.0;
_lastCpuSampleTime = now;
_lastCpuUsage = currentCpu;
}
// Track HTTP request count for /varz
stats.HttpReqStats.AddOrUpdate("/varz", 1, (_, v) => v + 1);
return new Varz
{
Id = _server.ServerId,
Name = _server.ServerName,
Version = NatsProtocol.Version,
Proto = NatsProtocol.ProtoVersion,
GoVersion = $"dotnet {RuntimeInformation.FrameworkDescription}",
Host = _options.Host,
Port = _options.Port,
HttpHost = _options.MonitorHost,
HttpPort = _options.MonitorPort,
HttpBasePath = _options.MonitorBasePath ?? "",
HttpsPort = _options.MonitorHttpsPort,
TlsRequired = _options.HasTls && !_options.AllowNonTls,
TlsVerify = _options.HasTls && _options.TlsVerify,
TlsTimeout = _options.HasTls ? _options.TlsTimeout.TotalSeconds : 0,
MaxConnections = _options.MaxConnections,
MaxPayload = _options.MaxPayload,
MaxControlLine = _options.MaxControlLine,
MaxPingsOut = _options.MaxPingsOut,
PingInterval = (long)_options.PingInterval.TotalNanoseconds,
Start = _server.StartTime,
Now = now,
Uptime = FormatUptime(uptime),
Mem = proc.WorkingSet64,
Cpu = Math.Round(_cachedCpuPercent, 2),
Cores = Environment.ProcessorCount,
MaxProcs = ThreadPool.ThreadCount,
Connections = _server.ClientCount,
TotalConnections = (ulong)Interlocked.Read(ref stats.TotalConnections),
InMsgs = Interlocked.Read(ref stats.InMsgs),
OutMsgs = Interlocked.Read(ref stats.OutMsgs),
InBytes = Interlocked.Read(ref stats.InBytes),
OutBytes = Interlocked.Read(ref stats.OutBytes),
SlowConsumers = Interlocked.Read(ref stats.SlowConsumers),
SlowConsumerStats = new SlowConsumersStats
{
Clients = (ulong)Interlocked.Read(ref stats.SlowConsumerClients),
Routes = (ulong)Interlocked.Read(ref stats.SlowConsumerRoutes),
Gateways = (ulong)Interlocked.Read(ref stats.SlowConsumerGateways),
Leafs = (ulong)Interlocked.Read(ref stats.SlowConsumerLeafs),
},
Subscriptions = _server.SubList.Count,
ConfigLoadTime = _server.StartTime,
HttpReqStats = stats.HttpReqStats.ToDictionary(kv => kv.Key, kv => (ulong)kv.Value),
};
}
finally
{
_varzMu.Release();
}
}
/// <summary>
/// Formats a TimeSpan as a human-readable uptime string matching Go server format.
/// </summary>
private static string FormatUptime(TimeSpan ts)
{
if (ts.TotalDays >= 1)
return $"{(int)ts.TotalDays}d{ts.Hours}h{ts.Minutes}m{ts.Seconds}s";
if (ts.TotalHours >= 1)
return $"{(int)ts.TotalHours}h{ts.Minutes}m{ts.Seconds}s";
if (ts.TotalMinutes >= 1)
return $"{(int)ts.TotalMinutes}m{ts.Seconds}s";
return $"{(int)ts.TotalSeconds}s";
}
}