Files
natsdotnet/src/NATS.Server/Monitoring/VarzHandler.cs
Joseph Doherty 67a3881c7c feat: populate TLS certificate expiry and OCSP peer verify in /varz
Load the server TLS certificate from disk during each /varz request to
read its NotAfter date and expose it as tls_cert_not_after. Also wire
OcspPeerVerify from NatsOptions into the tls_ocsp_peer_verify field.
Both fields were already declared in the Varz model but left unpopulated.
2026-02-23 04:26:45 -05:00

151 lines
6.3 KiB
C#

using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Security.Cryptography.X509Certificates;
using NATS.Server.Protocol;
namespace NATS.Server.Monitoring;
/// <summary>
/// Handles building the Varz response from server state and process metrics.
/// Corresponds to Go server/monitor.go handleVarz function.
/// </summary>
public sealed class VarzHandler : IDisposable
{
private readonly NatsServer _server;
private readonly NatsOptions _options;
private readonly SemaphoreSlim _varzMu = new(1, 1);
private DateTime _lastCpuSampleTime;
private TimeSpan _lastCpuUsage;
private double _cachedCpuPercent;
public VarzHandler(NatsServer server, NatsOptions options)
{
_server = server;
_options = options;
using var proc = Process.GetCurrentProcess();
_lastCpuSampleTime = DateTime.UtcNow;
_lastCpuUsage = proc.TotalProcessorTime;
}
public async Task<Varz> HandleVarzAsync(CancellationToken ct = default)
{
await _varzMu.WaitAsync(ct);
try
{
using var proc = Process.GetCurrentProcess();
var now = DateTime.UtcNow;
var uptime = now - _server.StartTime;
var stats = _server.Stats;
// CPU sampling with 1-second cache to avoid excessive sampling
if ((now - _lastCpuSampleTime).TotalSeconds >= 1.0)
{
var currentCpu = proc.TotalProcessorTime;
var elapsed = now - _lastCpuSampleTime;
_cachedCpuPercent = (currentCpu - _lastCpuUsage).TotalMilliseconds
/ elapsed.TotalMilliseconds / Environment.ProcessorCount * 100.0;
_lastCpuSampleTime = now;
_lastCpuUsage = currentCpu;
}
// Load the TLS certificate to report its expiry date in /varz.
// Corresponds to Go server/monitor.go handleVarz populating TLSCertExpiry.
DateTime? tlsCertExpiry = null;
if (_options.HasTls && !string.IsNullOrEmpty(_options.TlsCert))
{
try
{
using var cert = X509CertificateLoader.LoadCertificateFromFile(_options.TlsCert);
tlsCertExpiry = cert.NotAfter;
}
catch
{
// cert load failure — leave field as default
}
}
return new Varz
{
Id = _server.ServerId,
Name = _server.ServerName,
Version = NatsProtocol.Version,
Proto = NatsProtocol.ProtoVersion,
GoVersion = $"dotnet {RuntimeInformation.FrameworkDescription}",
Host = _options.Host,
Port = _options.Port,
HttpHost = _options.MonitorHost,
HttpPort = _options.MonitorPort,
HttpBasePath = _options.MonitorBasePath ?? "",
HttpsPort = _options.MonitorHttpsPort,
TlsRequired = _options.HasTls && !_options.AllowNonTls,
TlsVerify = _options.HasTls && _options.TlsVerify,
TlsTimeout = _options.HasTls ? _options.TlsTimeout.TotalSeconds : 0,
TlsCertNotAfter = tlsCertExpiry ?? default,
TlsOcspPeerVerify = _options.OcspPeerVerify,
MaxConnections = _options.MaxConnections,
MaxPayload = _options.MaxPayload,
MaxControlLine = _options.MaxControlLine,
MaxPending = _options.MaxPending,
WriteDeadline = (long)_options.WriteDeadline.TotalNanoseconds,
MaxPingsOut = _options.MaxPingsOut,
PingInterval = (long)_options.PingInterval.TotalNanoseconds,
Start = _server.StartTime,
Now = now,
Uptime = FormatUptime(uptime),
Mem = proc.WorkingSet64,
Cpu = Math.Round(_cachedCpuPercent, 2),
Cores = Environment.ProcessorCount,
MaxProcs = ThreadPool.ThreadCount,
Connections = _server.ClientCount,
TotalConnections = (ulong)Interlocked.Read(ref stats.TotalConnections),
InMsgs = Interlocked.Read(ref stats.InMsgs),
OutMsgs = Interlocked.Read(ref stats.OutMsgs),
InBytes = Interlocked.Read(ref stats.InBytes),
OutBytes = Interlocked.Read(ref stats.OutBytes),
SlowConsumers = Interlocked.Read(ref stats.SlowConsumers),
SlowConsumerStats = new SlowConsumersStats
{
Clients = (ulong)Interlocked.Read(ref stats.SlowConsumerClients),
Routes = (ulong)Interlocked.Read(ref stats.SlowConsumerRoutes),
Gateways = (ulong)Interlocked.Read(ref stats.SlowConsumerGateways),
Leafs = (ulong)Interlocked.Read(ref stats.SlowConsumerLeafs),
},
StaleConnections = Interlocked.Read(ref stats.StaleConnections),
StaleConnectionStatsDetail = new StaleConnectionStats
{
Clients = (ulong)Interlocked.Read(ref stats.StaleConnectionClients),
Routes = (ulong)Interlocked.Read(ref stats.StaleConnectionRoutes),
Gateways = (ulong)Interlocked.Read(ref stats.StaleConnectionGateways),
Leafs = (ulong)Interlocked.Read(ref stats.StaleConnectionLeafs),
},
Subscriptions = _server.SubList.Count,
ConfigLoadTime = _server.StartTime,
HttpReqStats = stats.HttpReqStats.ToDictionary(kv => kv.Key, kv => (ulong)kv.Value),
};
}
finally
{
_varzMu.Release();
}
}
public void Dispose()
{
_varzMu.Dispose();
}
/// <summary>
/// Formats a TimeSpan as a human-readable uptime string matching Go server format.
/// </summary>
private static string FormatUptime(TimeSpan ts)
{
if (ts.TotalDays >= 1)
return $"{(int)ts.TotalDays}d{ts.Hours}h{ts.Minutes}m{ts.Seconds}s";
if (ts.TotalHours >= 1)
return $"{(int)ts.TotalHours}h{ts.Minutes}m{ts.Seconds}s";
if (ts.TotalMinutes >= 1)
return $"{(int)ts.TotalMinutes}m{ts.Seconds}s";
return $"{(int)ts.TotalSeconds}s";
}
}