diff --git a/docs/plans/2026-02-23-sections-7-10-gaps-plan.md b/docs/plans/2026-02-23-sections-7-10-gaps-plan.md new file mode 100644 index 0000000..9bf9c71 --- /dev/null +++ b/docs/plans/2026-02-23-sections-7-10-gaps-plan.md @@ -0,0 +1,1911 @@ +# Sections 7-10 Gaps Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task. + +**Goal:** Achieve Go parity for monitoring (subz, connz closed state, sorting), TLS cert mapping, logging infrastructure, and ping/pong RTT tracking. + +**Architecture:** Four independent work streams touching separate files. Stream 1 (Monitoring) adds subz endpoint, closed-connection tracking, and missing connz sort/filter options. Stream 2 (TLS) adds cert-to-user mapping via X500DistinguishedName. Stream 3 (Logging) adds file rotation, debug/trace modes, color output, timestamp control, and log reopening. Stream 4 (Ping/Pong) adds RTT measurement, first-PING delay, stale connection stats, and ByRtt sorting. + +**Tech Stack:** .NET 10, C# 14, xUnit 3, Shouldly, Serilog (Sinks.File, Sinks.SyslogMessages), System.Security.Cryptography.X509Certificates + +--- + +### Task 0: Add NuGet dependencies for logging sinks + +**Files:** +- Modify: `Directory.Packages.props:6-26` +- Modify: `src/NATS.Server.Host/NATS.Server.Host.csproj:11-14` + +**Step 1: Add package versions to Directory.Packages.props** + +Add under the `` section after line 10: + +```xml + + +``` + +**Step 2: Add package references to Host csproj** + +Add to the existing `` with Serilog packages in `src/NATS.Server.Host/NATS.Server.Host.csproj`: + +```xml + + +``` + +**Step 3: Restore packages** + +Run: `dotnet restore` +Expected: Success, no errors + +**Step 4: Commit** + +```bash +git add Directory.Packages.props src/NATS.Server.Host/NATS.Server.Host.csproj +git commit -m "chore: add Serilog.Sinks.File and SyslogMessages packages" +``` + +--- + +### Task 1: Add logging and ping/pong options to NatsOptions + +**Files:** +- Modify: `src/NATS.Server/NatsOptions.cs:6-68` + +**Step 1: Add logging options after the `ConfigFile` property (line 48)** + +```csharp +// Logging +public string? LogFile { get; set; } +public long LogSizeLimit { get; set; } +public int LogMaxFiles { get; set; } +public bool Debug { get; set; } +public bool Trace { get; set; } +public bool Logtime { get; set; } = true; +public bool LogtimeUTC { get; set; } +public bool Syslog { get; set; } +public string? RemoteSyslog { get; set; } +``` + +**Step 2: Verify build** + +Run: `dotnet build src/NATS.Server/NATS.Server.csproj` +Expected: Build succeeded + +**Step 3: Commit** + +```bash +git add src/NATS.Server/NatsOptions.cs +git commit -m "feat: add logging and timestamp options to NatsOptions" +``` + +--- + +### Task 2: Add CLI flag parsing for logging and debug/trace modes + +**Files:** +- Modify: `src/NATS.Server.Host/Program.cs:12-59` + +**Step 1: Add CLI flag cases inside the `switch` block** + +Add after the existing `--tlsverify` case (line 57): + +```csharp +case "-D": + options.Debug = true; + break; +case "-V" or "-T": + options.Trace = true; + break; +case "-DV": + options.Debug = true; + options.Trace = true; + break; +case "--log_file" when i + 1 < args.Length: + options.LogFile = args[++i]; + break; +case "--log_size_limit" when i + 1 < args.Length: + options.LogSizeLimit = long.Parse(args[++i]); + break; +case "--log_max_files" when i + 1 < args.Length: + options.LogMaxFiles = int.Parse(args[++i]); + break; +case "--logtime" when i + 1 < args.Length: + options.Logtime = bool.Parse(args[++i]); + break; +case "--logtime_utc": + options.LogtimeUTC = true; + break; +case "--syslog": + options.Syslog = true; + break; +case "--remote_syslog" when i + 1 < args.Length: + options.RemoteSyslog = args[++i]; + break; +``` + +**Step 2: Rebuild Serilog logger configuration based on options** + +Replace the existing Serilog configuration block (lines 4-8) with: + +```csharp +// Parse options first to configure logging from them +var options = new NatsOptions(); + +for (int i = 0; i < args.Length; i++) +{ + switch (args[i]) + { + // ... all existing cases plus the new ones above ... + } +} + +// Build Serilog configuration from options +var logConfig = new LoggerConfiguration() + .Enrich.FromLogContext(); + +// Set minimum level based on flags +if (options.Trace) + logConfig.MinimumLevel.Verbose(); +else if (options.Debug) + logConfig.MinimumLevel.Debug(); +else + logConfig.MinimumLevel.Information(); + +// Build output template +var timestampFormat = options.LogtimeUTC + ? "{Timestamp:yyyy/MM/dd HH:mm:ss.ffffff} " + : "{Timestamp:HH:mm:ss} "; +var template = options.Logtime + ? $"[{timestampFormat}{{Level:u3}}] {{Message:lj}}{{NewLine}}{{Exception}}" + : "[{Level:u3}] {Message:lj}{NewLine}{Exception}"; + +// Console sink with color auto-detection +if (!Console.IsOutputRedirected) + logConfig.WriteTo.Console(outputTemplate: template, theme: Serilog.Sinks.SystemConsole.Themes.AnsiConsoleTheme.Code); +else + logConfig.WriteTo.Console(outputTemplate: template); + +// File sink with rotation +if (!string.IsNullOrEmpty(options.LogFile)) +{ + logConfig.WriteTo.File( + options.LogFile, + fileSizeLimitBytes: options.LogSizeLimit > 0 ? options.LogSizeLimit : null, + retainedFileCountLimit: options.LogMaxFiles > 0 ? options.LogMaxFiles : null, + rollOnFileSizeLimit: options.LogSizeLimit > 0, + outputTemplate: template); +} + +// Syslog sink +if (!string.IsNullOrEmpty(options.RemoteSyslog)) +{ + logConfig.WriteTo.UdpSyslog(options.RemoteSyslog); +} +else if (options.Syslog) +{ + logConfig.WriteTo.LocalSyslog("nats-server"); +} + +Log.Logger = logConfig.CreateLogger(); +``` + +Also add the required `using` at the top of the file: + +```csharp +using Serilog.Sinks.SystemConsole.Themes; +``` + +And remove the duplicate `var options = new NatsOptions();` line and CLI parsing block that now comes before logger setup (the options parsing and logger build are combined at the top). + +**Step 3: Verify build** + +Run: `dotnet build src/NATS.Server.Host/NATS.Server.Host.csproj` +Expected: Build succeeded + +**Step 4: Commit** + +```bash +git add src/NATS.Server.Host/Program.cs +git commit -m "feat: add CLI flags for debug/trace modes, file logging, syslog, color, timestamps" +``` + +--- + +### Task 3: Implement log reopening on SIGUSR1 + +**Files:** +- Modify: `src/NATS.Server/NatsServer.cs:197-235` (HandleSignals) +- Modify: `src/NATS.Server.Host/Program.cs` + +**Step 1: Add `ReOpenLogFile` callback property on NatsServer** + +Add after the `IsLameDuckMode` property (around line 63 in NatsServer.cs): + +```csharp +public Action? ReOpenLogFile { get; set; } +``` + +**Step 2: Update SIGUSR1 handler in HandleSignals** + +Replace the existing SIGUSR1 handler (lines 222-226) with: + +```csharp +_signalRegistrations.Add(PosixSignalRegistration.Create((PosixSignal)10, ctx => +{ + ctx.Cancel = true; + _logger.LogInformation("Trapped SIGUSR1 signal — reopening log file"); + ReOpenLogFile?.Invoke(); +})); +``` + +**Step 3: Wire up the callback in Program.cs** + +After `server.HandleSignals();` add: + +```csharp +server.ReOpenLogFile = () => +{ + Log.Information("Reopening log file"); + Log.CloseAndFlush(); + // Rebuild logger with same configuration + // (The logConfig variable must be captured in closure or rebuilt) + Log.Logger = logConfig.CreateLogger(); + Log.Information("File log re-opened"); +}; +``` + +Note: The `logConfig` variable from Task 2 must be extracted to a scope visible here. Move the `LoggerConfiguration` build into a local function or store `logConfig` before `CreateLogger()`. + +**Step 4: Verify build** + +Run: `dotnet build` +Expected: Build succeeded + +**Step 5: Commit** + +```bash +git add src/NATS.Server/NatsServer.cs src/NATS.Server.Host/Program.cs +git commit -m "feat: implement log reopening on SIGUSR1 signal" +``` + +--- + +### Task 4: Add RTT tracking to NatsClient + +**Files:** +- Modify: `src/NATS.Server/NatsClient.cs:72-74` (ping state), `322-324` (pong handler), `607-639` (ping timer) + +**Step 1: Write failing test** + +Create `tests/NATS.Server.Tests/RttTests.cs`: + +```csharp +using System.Net; +using System.Net.Http.Json; +using System.Net.Sockets; +using Microsoft.Extensions.Logging.Abstractions; +using NATS.Server.Monitoring; + +namespace NATS.Server.Tests; + +public class RttTests : IAsyncLifetime +{ + private readonly NatsServer _server; + private readonly int _natsPort; + private readonly int _monitorPort; + private readonly CancellationTokenSource _cts = new(); + private readonly HttpClient _http = new(); + + public RttTests() + { + _natsPort = GetFreePort(); + _monitorPort = GetFreePort(); + _server = new NatsServer( + new NatsOptions + { + Port = _natsPort, + MonitorPort = _monitorPort, + PingInterval = TimeSpan.FromMilliseconds(200), + MaxPingsOut = 4, + }, + NullLoggerFactory.Instance); + } + + public async Task InitializeAsync() + { + _ = _server.StartAsync(_cts.Token); + await _server.WaitForReadyAsync(); + for (int i = 0; i < 50; i++) + { + try + { + var resp = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/healthz"); + if (resp.IsSuccessStatusCode) break; + } + catch (HttpRequestException) { } + await Task.Delay(50); + } + } + + public async Task DisposeAsync() + { + _http.Dispose(); + await _cts.CancelAsync(); + _server.Dispose(); + } + + [Fact] + public async Task Rtt_populated_after_ping_pong_cycle() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await sock.ConnectAsync(new IPEndPoint(IPAddress.Loopback, _natsPort)); + using var stream = new NetworkStream(sock); + var buf = new byte[4096]; + _ = await stream.ReadAsync(buf); // INFO + + // Send CONNECT + PING (triggers firstPongSent) + await stream.WriteAsync("CONNECT {}\r\nPING\r\n"u8.ToArray()); + await stream.FlushAsync(); + _ = await stream.ReadAsync(buf); // PONG + + // Wait for server's PING cycle + await Task.Delay(500); + + // Read server PING and respond with PONG + var received = new byte[4096]; + int totalRead = 0; + bool gotPing = false; + using var readCts = new CancellationTokenSource(2000); + while (!gotPing && !readCts.IsCancellationRequested) + { + var n = await stream.ReadAsync(received.AsMemory(totalRead), readCts.Token); + totalRead += n; + var text = System.Text.Encoding.ASCII.GetString(received, 0, totalRead); + if (text.Contains("PING")) + { + gotPing = true; + await stream.WriteAsync("PONG\r\n"u8.ToArray()); + await stream.FlushAsync(); + } + } + + gotPing.ShouldBeTrue("Server should have sent PING"); + + // Wait for RTT to be computed + await Task.Delay(200); + + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/connz"); + var connz = await response.Content.ReadFromJsonAsync(); + connz.ShouldNotBeNull(); + var conn = connz.Conns.FirstOrDefault(c => c.Rtt != ""); + conn.ShouldNotBeNull("At least one connection should have RTT populated"); + } + + [Fact] + public async Task Connz_sort_by_rtt() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await sock.ConnectAsync(new IPEndPoint(IPAddress.Loopback, _natsPort)); + using var stream = new NetworkStream(sock); + var buf = new byte[4096]; + _ = await stream.ReadAsync(buf); + await stream.WriteAsync("CONNECT {}\r\n"u8.ToArray()); + await Task.Delay(200); + + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/connz?sort=rtt"); + response.StatusCode.ShouldBe(System.Net.HttpStatusCode.OK); + } + + private static int GetFreePort() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + sock.Bind(new IPEndPoint(IPAddress.Loopback, 0)); + return ((IPEndPoint)sock.LocalEndPoint!).Port; + } +} +``` + +**Step 2: Run test to verify it fails** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~RttTests" -v normal` +Expected: Tests fail (RTT never populated, `sort=rtt` not recognized) + +**Step 3: Add RTT fields to NatsClient** + +In `src/NATS.Server/NatsClient.cs`, add after line 74 (`private long _lastIn;`): + +```csharp +// RTT tracking +private long _rttStartTicks; +private long _rtt; +public TimeSpan Rtt => new(Interlocked.Read(ref _rtt)); +``` + +**Step 4: Update PONG handler to compute RTT** + +Replace the `CommandType.Pong` case at line 322-323: + +```csharp +case CommandType.Pong: + Interlocked.Exchange(ref _pingsOut, 0); + var rttStart = Interlocked.Read(ref _rttStartTicks); + if (rttStart > 0) + { + var elapsed = DateTime.UtcNow.Ticks - rttStart; + if (elapsed <= 0) elapsed = 1; // min 1 tick for Windows granularity + Interlocked.Exchange(ref _rtt, elapsed); + } + _flags.SetFlag(ClientFlags.FirstPongSent); + break; +``` + +**Step 5: Record RTT start time when sending PING** + +In `RunPingTimerAsync`, add before `WriteProtocol(NatsProtocol.PingBytes);` (line 632): + +```csharp +Interlocked.Exchange(ref _rttStartTicks, DateTime.UtcNow.Ticks); +``` + +**Step 6: Add first-PING delay logic** + +In `RunPingTimerAsync`, add at the top of the `while` loop body (after `while (await timer.WaitForNextTickAsync(ct))` at line 612), before the elapsed check: + +```csharp +// Delay first PING until client has responded with PONG or 2 seconds elapsed +if (!_flags.HasFlag(ClientFlags.FirstPongSent) + && (DateTime.UtcNow - StartTime).TotalSeconds < 2) +{ + continue; +} +``` + +**Step 7: Run tests** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~RttTests.Rtt_populated" -v normal` +Expected: Still fails — ByRtt sort not added yet (that's task 6) + +**Step 8: Commit the RTT tracking (independent of sort)** + +```bash +git add src/NATS.Server/NatsClient.cs tests/NATS.Server.Tests/RttTests.cs +git commit -m "feat: add RTT tracking and first-PING delay to NatsClient" +``` + +--- + +### Task 5: Add stale connection stats and expose in varz + +**Files:** +- Modify: `src/NATS.Server/ServerStats.cs:1-20` +- Modify: `src/NATS.Server/Monitoring/Varz.cs:153-161` +- Modify: `src/NATS.Server/Monitoring/VarzHandler.cs:86-93` +- Modify: `src/NATS.Server/NatsClient.cs:646-665` (MarkClosed) + +**Step 1: Write failing test** + +Add to `tests/NATS.Server.Tests/ServerStatsTests.cs`: + +```csharp +[Fact] +public void StaleConnection_stats_incremented_on_mark_closed() +{ + var stats = new ServerStats(); + stats.StaleConnectionClients.ShouldBe(0); + + Interlocked.Increment(ref stats.StaleConnectionClients); + stats.StaleConnectionClients.ShouldBe(1); +} +``` + +**Step 2: Add stale connection fields to ServerStats** + +In `src/NATS.Server/ServerStats.cs`, add after `SlowConsumerGateways` (line 16): + +```csharp +public long StaleConnectionClients; +public long StaleConnectionRoutes; +public long StaleConnectionLeafs; +public long StaleConnectionGateways; +``` + +**Step 3: Add StaleConnectionStats model to Varz.cs** + +Add after the `SlowConsumersStats` class (after line 220): + +```csharp +/// +/// Statistics about stale connections by connection type. +/// Corresponds to Go server/monitor.go StaleConnectionStats struct. +/// +public sealed class StaleConnectionStats +{ + [JsonPropertyName("clients")] + public ulong Clients { get; set; } + + [JsonPropertyName("routes")] + public ulong Routes { get; set; } + + [JsonPropertyName("gateways")] + public ulong Gateways { get; set; } + + [JsonPropertyName("leafs")] + public ulong Leafs { get; set; } +} +``` + +Add to the `Varz` class after `SlowConsumerStats` (after line 158): + +```csharp +[JsonPropertyName("stale_connections")] +public long StaleConnections { get; set; } + +[JsonPropertyName("stale_connection_stats")] +public StaleConnectionStats StaleConnectionStatsDetail { get; set; } = new(); +``` + +**Step 4: Populate in VarzHandler** + +In `src/NATS.Server/Monitoring/VarzHandler.cs`, add after line 93 (after `SlowConsumerStats` block): + +```csharp +StaleConnections = Interlocked.Read(ref stats.StaleConnections), +StaleConnectionStatsDetail = new StaleConnectionStats +{ + Clients = (ulong)Interlocked.Read(ref stats.StaleConnectionClients), + Routes = (ulong)Interlocked.Read(ref stats.StaleConnectionRoutes), + Gateways = (ulong)Interlocked.Read(ref stats.StaleConnectionGateways), + Leafs = (ulong)Interlocked.Read(ref stats.StaleConnectionLeafs), +}, +``` + +**Step 5: Increment stale stats in NatsClient.MarkClosed** + +In `src/NATS.Server/NatsClient.cs`, inside the `MarkClosed` method, add after the existing switch (around line 662): + +```csharp +if (reason == ClientClosedReason.StaleConnection) +{ + Interlocked.Increment(ref _serverStats.StaleConnections); + Interlocked.Increment(ref _serverStats.StaleConnectionClients); +} +``` + +**Step 6: Run tests** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~StaleConnection" -v normal` +Expected: PASS + +**Step 7: Commit** + +```bash +git add src/NATS.Server/ServerStats.cs src/NATS.Server/Monitoring/Varz.cs src/NATS.Server/Monitoring/VarzHandler.cs src/NATS.Server/NatsClient.cs tests/NATS.Server.Tests/ServerStatsTests.cs +git commit -m "feat: add stale connection stats tracking and varz exposure" +``` + +--- + +### Task 6: Add closed connection tracking and connz state filtering + +**Files:** +- Create: `src/NATS.Server/Monitoring/ClosedClient.cs` +- Modify: `src/NATS.Server/NatsServer.cs:20-21,64,575-582` +- Modify: `src/NATS.Server/Monitoring/ConnzHandler.cs:11-49,99-138` +- Modify: `src/NATS.Server/Monitoring/Connz.cs:158-171` + +**Step 1: Write failing test** + +Add to `tests/NATS.Server.Tests/MonitorTests.cs` inside the `MonitorTests` class: + +```csharp +[Fact] +public async Task Connz_state_closed_returns_disconnected_clients() +{ + // Connect then disconnect a client + var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await sock.ConnectAsync(new IPEndPoint(IPAddress.Loopback, _natsPort)); + using var stream = new NetworkStream(sock); + var buf = new byte[4096]; + _ = await stream.ReadAsync(buf); + await stream.WriteAsync("CONNECT {\"name\":\"closing-client\"}\r\n"u8.ToArray()); + await Task.Delay(200); + sock.Shutdown(SocketShutdown.Both); + sock.Dispose(); + await Task.Delay(500); // Wait for server to detect disconnect + + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/connz?state=closed"); + var connz = await response.Content.ReadFromJsonAsync(); + connz.ShouldNotBeNull(); + connz.Conns.ShouldContain(c => c.Name == "closing-client"); + var closed = connz.Conns.First(c => c.Name == "closing-client"); + closed.Stop.ShouldNotBeNull(); + closed.Reason.ShouldNotBeNullOrEmpty(); +} + +[Fact] +public async Task Connz_sort_by_stop_requires_closed_state() +{ + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/connz?sort=stop&state=open"); + // Should return 400 or ignore invalid sort for open state + var connz = await response.Content.ReadFromJsonAsync(); + // Fallback to ByCid when sort is invalid for state + connz.ShouldNotBeNull(); +} + +[Fact] +public async Task Connz_sort_by_reason() +{ + // Connect then disconnect a client + var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await sock.ConnectAsync(new IPEndPoint(IPAddress.Loopback, _natsPort)); + var buf = new byte[4096]; + _ = await sock.ReceiveAsync(buf); + sock.Shutdown(SocketShutdown.Both); + sock.Dispose(); + await Task.Delay(500); + + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/connz?sort=reason&state=closed"); + response.StatusCode.ShouldBe(HttpStatusCode.OK); +} +``` + +**Step 2: Run test to verify it fails** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~Connz_state_closed" -v normal` +Expected: FAIL + +**Step 3: Create ClosedClient record** + +Create `src/NATS.Server/Monitoring/ClosedClient.cs`: + +```csharp +namespace NATS.Server.Monitoring; + +/// +/// Snapshot of a closed client connection for /connz reporting. +/// Corresponds to Go server/monitor.go closedClient struct. +/// +public sealed record ClosedClient +{ + public required ulong Cid { get; init; } + public string Ip { get; init; } = ""; + public int Port { get; init; } + public DateTime Start { get; init; } + public DateTime Stop { get; init; } + public string Reason { get; init; } = ""; + public string Name { get; init; } = ""; + public string Lang { get; init; } = ""; + public string Version { get; init; } = ""; + public long InMsgs { get; init; } + public long OutMsgs { get; init; } + public long InBytes { get; init; } + public long OutBytes { get; init; } + public uint NumSubs { get; init; } + public TimeSpan Rtt { get; init; } + public string TlsVersion { get; init; } = ""; + public string TlsCipherSuite { get; init; } = ""; +} +``` + +**Step 4: Add closed client tracking to NatsServer** + +In `src/NATS.Server/NatsServer.cs`, add field after `_nextClientId` (line 35): + +```csharp +private readonly ConcurrentQueue _closedClients = new(); +private const int MaxClosedClients = 10_000; +``` + +Add public accessor after `GetClients()` (line 64): + +```csharp +public IEnumerable GetClosedClients() => _closedClients; +public IEnumerable GetAccounts() => _accounts.Values; +``` + +Add import at top: + +```csharp +using NATS.Server.Monitoring; +``` + +**Step 5: Populate closed client snapshot in RemoveClient** + +Replace `RemoveClient` method (lines 575-582): + +```csharp +public void RemoveClient(NatsClient client) +{ + _clients.TryRemove(client.Id, out _); + _logger.LogDebug("Removed client {ClientId}", client.Id); + + // Snapshot for closed-connections tracking + _closedClients.Enqueue(new ClosedClient + { + Cid = client.Id, + Ip = client.RemoteIp ?? "", + Port = client.RemotePort, + Start = client.StartTime, + Stop = DateTime.UtcNow, + Reason = client.CloseReason.ToReasonString(), + Name = client.ClientOpts?.Name ?? "", + Lang = client.ClientOpts?.Lang ?? "", + Version = client.ClientOpts?.Version ?? "", + InMsgs = Interlocked.Read(ref client.InMsgs), + OutMsgs = Interlocked.Read(ref client.OutMsgs), + InBytes = Interlocked.Read(ref client.InBytes), + OutBytes = Interlocked.Read(ref client.OutBytes), + NumSubs = (uint)client.Subscriptions.Count, + Rtt = client.Rtt, + TlsVersion = client.TlsState?.TlsVersion ?? "", + TlsCipherSuite = client.TlsState?.CipherSuite ?? "", + }); + + // Cap closed clients list + while (_closedClients.Count > MaxClosedClients) + _closedClients.TryDequeue(out _); + + var subList = client.Account?.SubList ?? _globalAccount.SubList; + client.RemoveAllSubscriptions(subList); + client.Account?.RemoveClient(client.Id); +} +``` + +**Step 6: Add ByStop, ByReason, ByRtt to SortOpt enum** + +In `src/NATS.Server/Monitoring/Connz.cs`, update the `SortOpt` enum: + +```csharp +public enum SortOpt +{ + ByCid, + ByStart, + BySubs, + ByPending, + ByMsgsTo, + ByMsgsFrom, + ByBytesTo, + ByBytesFrom, + ByLast, + ByIdle, + ByUptime, + ByStop, + ByReason, + ByRtt, +} +``` + +**Step 7: Update ConnzHandler to support state filtering and new sorts** + +Replace `HandleConnz` method in `src/NATS.Server/Monitoring/ConnzHandler.cs`: + +```csharp +public Connz HandleConnz(HttpContext ctx) +{ + var opts = ParseQueryParams(ctx); + var now = DateTime.UtcNow; + + var connInfos = new List(); + + // Collect open connections + if (opts.State is ConnState.Open or ConnState.All) + { + var clients = server.GetClients().ToArray(); + connInfos.AddRange(clients.Select(c => BuildConnInfo(c, now, opts))); + } + + // Collect closed connections + if (opts.State is ConnState.Closed or ConnState.All) + { + connInfos.AddRange(server.GetClosedClients().Select(c => BuildClosedConnInfo(c, now, opts))); + } + + // Validate sort options that require closed state + if (opts.Sort is SortOpt.ByStop or SortOpt.ByReason && opts.State == ConnState.Open) + opts.Sort = SortOpt.ByCid; // Fallback + + // Sort + connInfos = opts.Sort switch + { + SortOpt.ByCid => connInfos.OrderBy(c => c.Cid).ToList(), + SortOpt.ByStart => connInfos.OrderBy(c => c.Start).ToList(), + SortOpt.BySubs => connInfos.OrderByDescending(c => c.NumSubs).ToList(), + SortOpt.ByPending => connInfos.OrderByDescending(c => c.Pending).ToList(), + SortOpt.ByMsgsTo => connInfos.OrderByDescending(c => c.OutMsgs).ToList(), + SortOpt.ByMsgsFrom => connInfos.OrderByDescending(c => c.InMsgs).ToList(), + SortOpt.ByBytesTo => connInfos.OrderByDescending(c => c.OutBytes).ToList(), + SortOpt.ByBytesFrom => connInfos.OrderByDescending(c => c.InBytes).ToList(), + SortOpt.ByLast => connInfos.OrderByDescending(c => c.LastActivity).ToList(), + SortOpt.ByIdle => connInfos.OrderByDescending(c => now - c.LastActivity).ToList(), + SortOpt.ByUptime => connInfos.OrderByDescending(c => now - c.Start).ToList(), + SortOpt.ByStop => connInfos.OrderByDescending(c => c.Stop ?? DateTime.MinValue).ToList(), + SortOpt.ByReason => connInfos.OrderBy(c => c.Reason).ToList(), + SortOpt.ByRtt => connInfos.OrderByDescending(c => c.Rtt).ToList(), + _ => connInfos.OrderBy(c => c.Cid).ToList(), + }; + + var total = connInfos.Count; + var paged = connInfos.Skip(opts.Offset).Take(opts.Limit).ToArray(); + + return new Connz + { + Id = server.ServerId, + Now = now, + NumConns = paged.Length, + Total = total, + Offset = opts.Offset, + Limit = opts.Limit, + Conns = paged, + }; +} +``` + +**Step 8: Add BuildClosedConnInfo helper** + +Add after `BuildConnInfo` in `ConnzHandler.cs`: + +```csharp +private static ConnInfo BuildClosedConnInfo(ClosedClient closed, DateTime now, ConnzOptions opts) +{ + return new ConnInfo + { + Cid = closed.Cid, + Kind = "Client", + Type = "Client", + Ip = closed.Ip, + Port = closed.Port, + Start = closed.Start, + Stop = closed.Stop, + LastActivity = closed.Stop, + Uptime = FormatDuration(closed.Stop - closed.Start), + Idle = FormatDuration(now - closed.Stop), + InMsgs = closed.InMsgs, + OutMsgs = closed.OutMsgs, + InBytes = closed.InBytes, + OutBytes = closed.OutBytes, + NumSubs = closed.NumSubs, + Name = closed.Name, + Lang = closed.Lang, + Version = closed.Version, + Reason = closed.Reason, + Rtt = FormatRtt(closed.Rtt), + TlsVersion = closed.TlsVersion, + TlsCipherSuite = closed.TlsCipherSuite, + }; +} + +private static string FormatRtt(TimeSpan rtt) +{ + if (rtt == TimeSpan.Zero) return ""; + if (rtt.TotalMilliseconds < 1) + return $"{rtt.TotalMicroseconds:F3}µs"; + if (rtt.TotalSeconds < 1) + return $"{rtt.TotalMilliseconds:F3}ms"; + return $"{rtt.TotalSeconds:F3}s"; +} +``` + +**Step 9: Update BuildConnInfo to include RTT** + +In the existing `BuildConnInfo` method, add after `TlsCipherSuite` (line 75): + +```csharp +Rtt = FormatRtt(client.Rtt), +``` + +**Step 10: Update ParseQueryParams for new sort options and state filter** + +In `ParseQueryParams`, add the new sort cases and state parsing: + +```csharp +// Add to the sort switch: +"stop" => SortOpt.ByStop, +"reason" => SortOpt.ByReason, +"rtt" => SortOpt.ByRtt, + +// Add state parsing after the existing limit parsing: +if (q.TryGetValue("state", out var state)) +{ + opts.State = state.ToString().ToLowerInvariant() switch + { + "open" => ConnState.Open, + "closed" => ConnState.Closed, + "all" => ConnState.All, + _ => ConnState.Open, + }; +} +``` + +**Step 11: Run tests** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~Connz_state_closed|FullyQualifiedName~Connz_sort_by_stop|FullyQualifiedName~Connz_sort_by_reason|FullyQualifiedName~RttTests" -v normal` +Expected: All PASS + +**Step 12: Commit** + +```bash +git add src/NATS.Server/Monitoring/ClosedClient.cs src/NATS.Server/NatsServer.cs src/NATS.Server/Monitoring/ConnzHandler.cs src/NATS.Server/Monitoring/Connz.cs tests/NATS.Server.Tests/MonitorTests.cs tests/NATS.Server.Tests/RttTests.cs +git commit -m "feat: add closed connection tracking, state filtering, ByStop/ByReason/ByRtt sorting" +``` + +--- + +### Task 7: Implement /subz endpoint + +**Files:** +- Create: `src/NATS.Server/Monitoring/Subsz.cs` +- Create: `src/NATS.Server/Monitoring/SubszHandler.cs` +- Modify: `src/NATS.Server/Monitoring/MonitorServer.cs:78-87` +- Modify: `src/NATS.Server/Subscriptions/SubList.cs` (add Stats method) + +**Step 1: Write failing test** + +Create `tests/NATS.Server.Tests/SubszTests.cs`: + +```csharp +using System.Net; +using System.Net.Http.Json; +using System.Net.Sockets; +using Microsoft.Extensions.Logging.Abstractions; +using NATS.Server.Monitoring; + +namespace NATS.Server.Tests; + +public class SubszTests : IAsyncLifetime +{ + private readonly NatsServer _server; + private readonly int _natsPort; + private readonly int _monitorPort; + private readonly CancellationTokenSource _cts = new(); + private readonly HttpClient _http = new(); + + public SubszTests() + { + _natsPort = GetFreePort(); + _monitorPort = GetFreePort(); + _server = new NatsServer( + new NatsOptions { Port = _natsPort, MonitorPort = _monitorPort }, + NullLoggerFactory.Instance); + } + + public async Task InitializeAsync() + { + _ = _server.StartAsync(_cts.Token); + await _server.WaitForReadyAsync(); + for (int i = 0; i < 50; i++) + { + try + { + var resp = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/healthz"); + if (resp.IsSuccessStatusCode) break; + } + catch (HttpRequestException) { } + await Task.Delay(50); + } + } + + public async Task DisposeAsync() + { + _http.Dispose(); + await _cts.CancelAsync(); + _server.Dispose(); + } + + [Fact] + public async Task Subz_returns_empty_when_no_subscriptions() + { + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/subz"); + response.StatusCode.ShouldBe(HttpStatusCode.OK); + + var subz = await response.Content.ReadFromJsonAsync(); + subz.ShouldNotBeNull(); + subz.NumSubs.ShouldBe(0u); + } + + [Fact] + public async Task Subz_returns_count_with_subscriptions() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await sock.ConnectAsync(new IPEndPoint(IPAddress.Loopback, _natsPort)); + using var stream = new NetworkStream(sock); + var buf = new byte[4096]; + _ = await stream.ReadAsync(buf); + await stream.WriteAsync("CONNECT {}\r\nSUB foo 1\r\nSUB bar 2\r\nSUB baz.* 3\r\n"u8.ToArray()); + await Task.Delay(200); + + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/subz"); + var subz = await response.Content.ReadFromJsonAsync(); + subz.ShouldNotBeNull(); + subz.NumSubs.ShouldBeGreaterThanOrEqualTo(3u); + } + + [Fact] + public async Task Subz_subs_true_returns_subscription_details() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await sock.ConnectAsync(new IPEndPoint(IPAddress.Loopback, _natsPort)); + using var stream = new NetworkStream(sock); + var buf = new byte[4096]; + _ = await stream.ReadAsync(buf); + await stream.WriteAsync("CONNECT {}\r\nSUB foo 1\r\n"u8.ToArray()); + await Task.Delay(200); + + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/subz?subs=true"); + var subz = await response.Content.ReadFromJsonAsync(); + subz.ShouldNotBeNull(); + subz.Subs.ShouldNotBeEmpty(); + subz.Subs.ShouldContain(s => s.Subject == "foo"); + } + + [Fact] + public async Task Subz_test_subject_filters_matching_subs() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await sock.ConnectAsync(new IPEndPoint(IPAddress.Loopback, _natsPort)); + using var stream = new NetworkStream(sock); + var buf = new byte[4096]; + _ = await stream.ReadAsync(buf); + await stream.WriteAsync("CONNECT {}\r\nSUB foo.* 1\r\nSUB bar 2\r\n"u8.ToArray()); + await Task.Delay(200); + + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/subz?subs=true&test=foo.hello"); + var subz = await response.Content.ReadFromJsonAsync(); + subz.ShouldNotBeNull(); + subz.Subs.ShouldContain(s => s.Subject == "foo.*"); + subz.Subs.ShouldNotContain(s => s.Subject == "bar"); + } + + [Fact] + public async Task Subz_pagination() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await sock.ConnectAsync(new IPEndPoint(IPAddress.Loopback, _natsPort)); + using var stream = new NetworkStream(sock); + var buf = new byte[4096]; + _ = await stream.ReadAsync(buf); + await stream.WriteAsync("CONNECT {}\r\nSUB a 1\r\nSUB b 2\r\nSUB c 3\r\n"u8.ToArray()); + await Task.Delay(200); + + var response = await _http.GetAsync($"http://127.0.0.1:{_monitorPort}/subz?subs=true&offset=0&limit=2"); + var subz = await response.Content.ReadFromJsonAsync(); + subz.ShouldNotBeNull(); + subz.Subs.Length.ShouldBe(2); + subz.Total.ShouldBeGreaterThanOrEqualTo(3); + } + + private static int GetFreePort() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + sock.Bind(new IPEndPoint(IPAddress.Loopback, 0)); + return ((IPEndPoint)sock.LocalEndPoint!).Port; + } +} +``` + +**Step 2: Run tests to verify they fail** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~SubszTests" -v normal` +Expected: FAIL (subz returns empty `{}`) + +**Step 3: Add SubList.GetAllSubscriptions() method** + +In `src/NATS.Server/Subscriptions/SubList.cs`, add a public method to enumerate all subscriptions. Add after the `Count` property (line 34): + +```csharp +/// +/// Returns all subscriptions in the trie. For monitoring only. +/// +public List GetAllSubscriptions() +{ + _lock.EnterReadLock(); + try + { + var result = new List(); + CollectAll(_root, result); + return result; + } + finally + { + _lock.ExitReadLock(); + } +} + +private static void CollectAll(TrieLevel level, List result) +{ + foreach (var (_, node) in level.Nodes) + { + foreach (var sub in node.PlainSubs) result.Add(sub); + foreach (var (_, qset) in node.QueueSubs) + foreach (var sub in qset) result.Add(sub); + if (node.Next != null) CollectAll(node.Next, result); + } + if (level.Pwc != null) + { + foreach (var sub in level.Pwc.PlainSubs) result.Add(sub); + foreach (var (_, qset) in level.Pwc.QueueSubs) + foreach (var sub in qset) result.Add(sub); + if (level.Pwc.Next != null) CollectAll(level.Pwc.Next, result); + } + if (level.Fwc != null) + { + foreach (var sub in level.Fwc.PlainSubs) result.Add(sub); + foreach (var (_, qset) in level.Fwc.QueueSubs) + foreach (var sub in qset) result.Add(sub); + if (level.Fwc.Next != null) CollectAll(level.Fwc.Next, result); + } +} + +/// +/// Returns the current number of entries in the cache. +/// +public int CacheCount +{ + get + { + _lock.EnterReadLock(); + try { return _cache?.Count ?? 0; } + finally { _lock.ExitReadLock(); } + } +} +``` + +**Step 4: Create Subsz model** + +Create `src/NATS.Server/Monitoring/Subsz.cs`: + +```csharp +using System.Text.Json.Serialization; + +namespace NATS.Server.Monitoring; + +/// +/// Subscription information response. Corresponds to Go server/monitor.go Subsz struct. +/// +public sealed class Subsz +{ + [JsonPropertyName("server_id")] + public string Id { get; set; } = ""; + + [JsonPropertyName("now")] + public DateTime Now { get; set; } + + [JsonPropertyName("num_subscriptions")] + public uint NumSubs { get; set; } + + [JsonPropertyName("num_cache")] + public int NumCache { get; set; } + + [JsonPropertyName("total")] + public int Total { get; set; } + + [JsonPropertyName("offset")] + public int Offset { get; set; } + + [JsonPropertyName("limit")] + public int Limit { get; set; } + + [JsonPropertyName("subscriptions")] + public SubDetail[] Subs { get; set; } = []; +} + +/// +/// Options passed to Subsz() for filtering. +/// Corresponds to Go server/monitor.go SubszOptions struct. +/// +public sealed class SubszOptions +{ + public int Offset { get; set; } + public int Limit { get; set; } = 1024; + public bool Subscriptions { get; set; } + public string Account { get; set; } = ""; + public string Test { get; set; } = ""; +} +``` + +**Step 5: Create SubszHandler** + +Create `src/NATS.Server/Monitoring/SubszHandler.cs`: + +```csharp +using Microsoft.AspNetCore.Http; +using NATS.Server.Subscriptions; + +namespace NATS.Server.Monitoring; + +/// +/// Handles /subz endpoint requests, returning subscription information. +/// Corresponds to Go server/monitor.go handleSubsz function. +/// +public sealed class SubszHandler(NatsServer server) +{ + public Subsz HandleSubsz(HttpContext ctx) + { + var opts = ParseQueryParams(ctx); + var now = DateTime.UtcNow; + + // Collect subscriptions from all accounts (or filtered) + var allSubs = new List(); + foreach (var account in server.GetAccounts()) + { + if (!string.IsNullOrEmpty(opts.Account) && account.Name != opts.Account) + continue; + allSubs.AddRange(account.SubList.GetAllSubscriptions()); + } + + // Filter by test subject if provided + if (!string.IsNullOrEmpty(opts.Test)) + { + allSubs = allSubs.Where(s => SubjectMatch.MatchLiteral(opts.Test, s.Subject)).ToList(); + } + + var total = allSubs.Count; + var numSubs = server.GetAccounts() + .Where(a => string.IsNullOrEmpty(opts.Account) || a.Name == opts.Account) + .Aggregate(0u, (sum, a) => sum + a.SubList.Count); + var numCache = server.GetAccounts() + .Where(a => string.IsNullOrEmpty(opts.Account) || a.Name == opts.Account) + .Sum(a => a.SubList.CacheCount); + + SubDetail[] details = []; + if (opts.Subscriptions) + { + details = allSubs + .Skip(opts.Offset) + .Take(opts.Limit) + .Select(s => new SubDetail + { + Subject = s.Subject, + Queue = s.Queue ?? "", + Sid = s.Sid, + Msgs = Interlocked.Read(ref s.MessageCount), + Max = s.MaxMessages, + Cid = s.Client?.Id ?? 0, + }) + .ToArray(); + } + + return new Subsz + { + Id = server.ServerId, + Now = now, + NumSubs = numSubs, + NumCache = numCache, + Total = total, + Offset = opts.Offset, + Limit = opts.Limit, + Subs = details, + }; + } + + private static SubszOptions ParseQueryParams(HttpContext ctx) + { + var q = ctx.Request.Query; + var opts = new SubszOptions(); + + if (q.TryGetValue("subs", out var subs)) + opts.Subscriptions = subs == "true" || subs == "1" || subs == "detail"; + + if (q.TryGetValue("offset", out var offset) && int.TryParse(offset, out var o)) + opts.Offset = o; + + if (q.TryGetValue("limit", out var limit) && int.TryParse(limit, out var l)) + opts.Limit = l; + + if (q.TryGetValue("acc", out var acc)) + opts.Account = acc.ToString(); + + if (q.TryGetValue("test", out var test)) + opts.Test = test.ToString(); + + return opts; + } +} +``` + +**Step 6: Wire into MonitorServer** + +In `src/NATS.Server/Monitoring/MonitorServer.cs`: + +Add field after `_connzHandler` (line 17): + +```csharp +private readonly SubszHandler _subszHandler; +``` + +Initialize in constructor after `_connzHandler` (line 31): + +```csharp +_subszHandler = new SubszHandler(server); +``` + +Replace the two subz stubs (lines 78-87) with: + +```csharp +_app.MapGet(basePath + "/subz", (HttpContext ctx) => +{ + stats.HttpReqStats.AddOrUpdate("/subz", 1, (_, v) => v + 1); + return Results.Ok(_subszHandler.HandleSubsz(ctx)); +}); +_app.MapGet(basePath + "/subscriptionsz", (HttpContext ctx) => +{ + stats.HttpReqStats.AddOrUpdate("/subscriptionsz", 1, (_, v) => v + 1); + return Results.Ok(_subszHandler.HandleSubsz(ctx)); +}); +``` + +**Step 7: Run tests** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~SubszTests" -v normal` +Expected: All PASS + +**Step 8: Commit** + +```bash +git add src/NATS.Server/Monitoring/Subsz.cs src/NATS.Server/Monitoring/SubszHandler.cs src/NATS.Server/Monitoring/MonitorServer.cs src/NATS.Server/Subscriptions/SubList.cs tests/NATS.Server.Tests/SubszTests.cs +git commit -m "feat: implement /subz endpoint with account filter, test subject, and pagination" +``` + +--- + +### Task 8: Implement TLS cert-to-user mapping (TlsMap) + +**Files:** +- Create: `src/NATS.Server/Auth/TlsMapAuthenticator.cs` +- Modify: `src/NATS.Server/Auth/IAuthenticator.cs:10-14` +- Modify: `src/NATS.Server/Auth/AuthService.cs:30-67` +- Modify: `src/NATS.Server/NatsClient.cs:345-403` + +**Step 1: Write failing test** + +Create `tests/NATS.Server.Tests/TlsMapAuthenticatorTests.cs`: + +```csharp +using System.Security.Cryptography; +using System.Security.Cryptography.X509Certificates; +using NATS.Server.Auth; + +namespace NATS.Server.Tests; + +public class TlsMapAuthenticatorTests +{ + private static X509Certificate2 CreateSelfSignedCert(string cn) + { + using var rsa = RSA.Create(2048); + var req = new CertificateRequest($"CN={cn}", rsa, HashAlgorithmName.SHA256, RSASignaturePadding.Pkcs1); + return req.CreateSelfSigned(DateTimeOffset.UtcNow, DateTimeOffset.UtcNow.AddYears(1)); + } + + private static X509Certificate2 CreateCertWithDn(string dn) + { + using var rsa = RSA.Create(2048); + var req = new CertificateRequest(dn, rsa, HashAlgorithmName.SHA256, RSASignaturePadding.Pkcs1); + return req.CreateSelfSigned(DateTimeOffset.UtcNow, DateTimeOffset.UtcNow.AddYears(1)); + } + + [Fact] + public void Matches_user_by_cn() + { + var users = new List + { + new() { Username = "alice", Password = "" }, + }; + var auth = new TlsMapAuthenticator(users); + var cert = CreateSelfSignedCert("alice"); + + var ctx = new ClientAuthContext + { + Opts = new Protocol.ClientOptions(), + Nonce = [], + ClientCertificate = cert, + }; + + var result = auth.Authenticate(ctx); + result.ShouldNotBeNull(); + result.Identity.ShouldBe("alice"); + } + + [Fact] + public void Returns_null_when_no_cert() + { + var users = new List + { + new() { Username = "alice", Password = "" }, + }; + var auth = new TlsMapAuthenticator(users); + + var ctx = new ClientAuthContext + { + Opts = new Protocol.ClientOptions(), + Nonce = [], + ClientCertificate = null, + }; + + var result = auth.Authenticate(ctx); + result.ShouldBeNull(); + } + + [Fact] + public void Returns_null_when_cn_doesnt_match() + { + var users = new List + { + new() { Username = "alice", Password = "" }, + }; + var auth = new TlsMapAuthenticator(users); + var cert = CreateSelfSignedCert("bob"); + + var ctx = new ClientAuthContext + { + Opts = new Protocol.ClientOptions(), + Nonce = [], + ClientCertificate = cert, + }; + + var result = auth.Authenticate(ctx); + result.ShouldBeNull(); + } + + [Fact] + public void Matches_by_full_dn_string() + { + var users = new List + { + new() { Username = "CN=alice, O=TestOrg", Password = "" }, + }; + var auth = new TlsMapAuthenticator(users); + var cert = CreateCertWithDn("CN=alice, O=TestOrg"); + + var ctx = new ClientAuthContext + { + Opts = new Protocol.ClientOptions(), + Nonce = [], + ClientCertificate = cert, + }; + + var result = auth.Authenticate(ctx); + result.ShouldNotBeNull(); + result.Identity.ShouldBe("CN=alice, O=TestOrg"); + } + + [Fact] + public void Returns_permissions_from_matched_user() + { + var perms = new Permissions + { + Publish = new SubjectPermission { Allow = ["foo.>"] }, + }; + var users = new List + { + new() { Username = "alice", Password = "", Permissions = perms }, + }; + var auth = new TlsMapAuthenticator(users); + var cert = CreateSelfSignedCert("alice"); + + var ctx = new ClientAuthContext + { + Opts = new Protocol.ClientOptions(), + Nonce = [], + ClientCertificate = cert, + }; + + var result = auth.Authenticate(ctx); + result.ShouldNotBeNull(); + result.Permissions.ShouldNotBeNull(); + result.Permissions.Publish!.Allow.ShouldContain("foo.>"); + } +} +``` + +**Step 2: Run test to verify failure** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~TlsMapAuthenticatorTests" -v normal` +Expected: Compilation error — `TlsMapAuthenticator` and `ClientCertificate` don't exist + +**Step 3: Add ClientCertificate to ClientAuthContext** + +In `src/NATS.Server/Auth/IAuthenticator.cs`, add to `ClientAuthContext`: + +```csharp +using System.Security.Cryptography.X509Certificates; +using NATS.Server.Protocol; + +namespace NATS.Server.Auth; + +public interface IAuthenticator +{ + AuthResult? Authenticate(ClientAuthContext context); +} + +public sealed class ClientAuthContext +{ + public required ClientOptions Opts { get; init; } + public required byte[] Nonce { get; init; } + public X509Certificate2? ClientCertificate { get; init; } +} +``` + +**Step 4: Create TlsMapAuthenticator** + +Create `src/NATS.Server/Auth/TlsMapAuthenticator.cs`: + +```csharp +using System.Security.Cryptography.X509Certificates; + +namespace NATS.Server.Auth; + +/// +/// Authenticates clients by mapping TLS certificate subject DN to configured users. +/// Corresponds to Go server/auth.go checkClientTLSCertSubject. +/// +public sealed class TlsMapAuthenticator : IAuthenticator +{ + private readonly Dictionary _usersByDn; + private readonly Dictionary _usersByCn; + + public TlsMapAuthenticator(IReadOnlyList users) + { + _usersByDn = new Dictionary(StringComparer.OrdinalIgnoreCase); + _usersByCn = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var user in users) + { + _usersByDn[user.Username] = user; + // Also index by just the username as a potential CN match + _usersByCn[user.Username] = user; + } + } + + public AuthResult? Authenticate(ClientAuthContext context) + { + var cert = context.ClientCertificate; + if (cert == null) + return null; + + var dn = cert.SubjectName; + var dnString = dn.Name; // RFC 2253 format: "CN=alice, O=TestOrg" + + // Try exact DN match first + if (_usersByDn.TryGetValue(dnString, out var user)) + { + return BuildResult(user); + } + + // Try CN extraction + var cn = ExtractCn(dn); + if (cn != null && _usersByCn.TryGetValue(cn, out user)) + { + return BuildResult(user); + } + + return null; + } + + private static string? ExtractCn(X500DistinguishedName dn) + { + // Parse the DN to extract the CN component + var dnString = dn.Name; + foreach (var rdn in dnString.Split(',', StringSplitOptions.TrimEntries)) + { + if (rdn.StartsWith("CN=", StringComparison.OrdinalIgnoreCase)) + { + return rdn[3..]; + } + } + return null; + } + + private static AuthResult BuildResult(User user) + { + return new AuthResult + { + Identity = user.Username, + AccountName = user.Account, + Permissions = user.Permissions, + Expiry = user.ConnectionDeadline, + }; + } +} +``` + +**Step 5: Wire TlsMap into AuthService** + +In `src/NATS.Server/Auth/AuthService.cs`, add TlsMap authenticator in `Build()`. Add before the NKeys check (before line 39): + +```csharp +// TLS certificate mapping (highest priority when enabled) +if (options.TlsMap && options.TlsVerify && options.Users is { Count: > 0 }) +{ + authenticators.Add(new TlsMapAuthenticator(options.Users)); + authRequired = true; +} +``` + +**Step 6: Pass client certificate to auth context** + +In `src/NATS.Server/NatsClient.cs`, in `ProcessConnectAsync`, update the `ClientAuthContext` construction (around line 353): + +```csharp +var context = new ClientAuthContext +{ + Opts = ClientOpts, + Nonce = _nonce ?? [], + ClientCertificate = TlsState?.PeerCert, +}; +``` + +**Step 7: Run tests** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~TlsMapAuthenticatorTests" -v normal` +Expected: All PASS + +**Step 8: Run full test suite to verify no regressions** + +Run: `dotnet test tests/NATS.Server.Tests -v normal` +Expected: All existing tests pass + +**Step 9: Commit** + +```bash +git add src/NATS.Server/Auth/TlsMapAuthenticator.cs src/NATS.Server/Auth/IAuthenticator.cs src/NATS.Server/Auth/AuthService.cs src/NATS.Server/NatsClient.cs tests/NATS.Server.Tests/TlsMapAuthenticatorTests.cs +git commit -m "feat: implement TLS cert-to-user mapping via X500 DN matching" +``` + +--- + +### Task 9: Add TLS rate limiter test + +**Files:** +- Create: `tests/NATS.Server.Tests/TlsRateLimiterTests.cs` + +**Step 1: Write test** + +```csharp +using NATS.Server.Tls; + +namespace NATS.Server.Tests; + +public class TlsRateLimiterTests +{ + [Fact] + public async Task Rate_limiter_allows_configured_tokens_per_second() + { + using var limiter = new TlsRateLimiter(5); + + // Should allow 5 tokens immediately + for (int i = 0; i < 5; i++) + { + using var cts = new CancellationTokenSource(100); + await limiter.WaitAsync(cts.Token); // Should not throw + } + + // 6th token should block (no refill yet) + using var blockCts = new CancellationTokenSource(200); + var blocked = false; + try + { + await limiter.WaitAsync(blockCts.Token); + } + catch (OperationCanceledException) + { + blocked = true; + } + blocked.ShouldBeTrue("6th token should be blocked before refill"); + } + + [Fact] + public async Task Rate_limiter_refills_after_one_second() + { + using var limiter = new TlsRateLimiter(2); + + // Consume all tokens + await limiter.WaitAsync(CancellationToken.None); + await limiter.WaitAsync(CancellationToken.None); + + // Wait for refill + await Task.Delay(1200); + + // Should have tokens again + using var cts = new CancellationTokenSource(200); + await limiter.WaitAsync(cts.Token); // Should not throw + } +} +``` + +**Step 2: Run tests** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~TlsRateLimiterTests" -v normal` +Expected: All PASS + +**Step 3: Commit** + +```bash +git add tests/NATS.Server.Tests/TlsRateLimiterTests.cs +git commit -m "test: add TLS rate limiter unit tests" +``` + +--- + +### Task 10: File logging tests + +**Files:** +- Create: `tests/NATS.Server.Tests/LoggingTests.cs` + +**Step 1: Write test** + +```csharp +using Serilog; + +namespace NATS.Server.Tests; + +public class LoggingTests : IDisposable +{ + private readonly string _logDir; + + public LoggingTests() + { + _logDir = Path.Combine(Path.GetTempPath(), $"nats-log-test-{Guid.NewGuid():N}"); + Directory.CreateDirectory(_logDir); + } + + public void Dispose() + { + try { Directory.Delete(_logDir, true); } catch { } + } + + [Fact] + public void File_sink_creates_log_file() + { + var logPath = Path.Combine(_logDir, "test.log"); + + using var logger = new LoggerConfiguration() + .WriteTo.File(logPath) + .CreateLogger(); + + logger.Information("Hello from test"); + logger.Dispose(); + + File.Exists(logPath).ShouldBeTrue(); + var content = File.ReadAllText(logPath); + content.ShouldContain("Hello from test"); + } + + [Fact] + public void File_sink_rotates_on_size_limit() + { + var logPath = Path.Combine(_logDir, "rotate.log"); + + using var logger = new LoggerConfiguration() + .WriteTo.File( + logPath, + fileSizeLimitBytes: 200, + rollOnFileSizeLimit: true, + retainedFileCountLimit: 3) + .CreateLogger(); + + // Write enough to trigger rotation + for (int i = 0; i < 50; i++) + logger.Information("Log message number {Number} with some padding text", i); + + logger.Dispose(); + + // Should have created rotated files + var logFiles = Directory.GetFiles(_logDir, "rotate*.log"); + logFiles.Length.ShouldBeGreaterThan(1); + } +} +``` + +**Step 2: Add Serilog.Sinks.File to test project** + +In `tests/NATS.Server.Tests/NATS.Server.Tests.csproj`, add: + +```xml + +``` + +**Step 3: Run tests** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~LoggingTests" -v normal` +Expected: All PASS + +**Step 4: Commit** + +```bash +git add tests/NATS.Server.Tests/LoggingTests.cs tests/NATS.Server.Tests/NATS.Server.Tests.csproj +git commit -m "test: add file logging and rotation tests" +``` + +--- + +### Task 11: Run full test suite and verify + +**Step 1: Run all tests** + +Run: `dotnet test tests/NATS.Server.Tests -v normal` +Expected: All tests pass with no regressions + +**Step 2: Build the solution** + +Run: `dotnet build` +Expected: Build succeeded, no warnings + +**Step 3: Commit any fixups** + +If any tests need adjustments, fix and commit: + +```bash +git add -A +git commit -m "fix: resolve test issues from sections 7-10 implementation" +``` + +--- + +### Task 12: Update differences.md to reflect implementation + +**Files:** +- Modify: `differences.md` + +**Step 1: Update Section 7 (Monitoring)** + +- `/subz` / `/subscriptionsz`: Change from `Stub` to `Y` +- Connz sorting: Add `ByStop`, `ByReason`, `ByRtt` as implemented — change "ByStop, ByReason" from missing to Y +- Connz state filtering: Note `state=open|closed|all` now supported +- Connz subscription detail: Note populated (was already in code but now confirmed) + +**Step 2: Update Section 8 (TLS)** + +- TLS rate limiting: Change "Property only" to `Y` with note about semaphore implementation +- Cert subject→user mapping: Change `N` to `Y` with note about X500DistinguishedName + +**Step 3: Update Section 9 (Logging)** + +- File logging with rotation: Change `N` to `Y` +- Trace mode: Change `N` to `Y` with note about `-V` flag +- Debug mode: Change `N` to `Y` with note about `-D` flag +- Color output on TTY: Change `N` to `Y` +- Timestamp format control: Change `N` to `Y` +- Log reopening (SIGUSR1): Change `N` to `Y` +- Syslog: Change `N` to `Y` + +**Step 4: Update Section 10 (Ping/Pong)** + +- RTT-based first PING delay: Change `N` to `Y` +- RTT tracking: Change `N` to `Y` +- Stale connection watcher: Change `N` to `Y` with note about PeriodicTimer approach + stale stats + +**Step 5: Update Summary section** + +Remove items that are now implemented from the critical gaps list. + +**Step 6: Commit** + +```bash +git add differences.md +git commit -m "docs: update differences.md sections 7-10 to reflect implemented features" +``` + +--- + +## Parallelization Notes + +These tasks can be parallelized in groups: + +**Group A (can run in parallel):** +- Task 0 (NuGet deps) — must complete first +- Task 1 (NatsOptions) — must complete first + +**Group B (after Group A, can run in parallel):** +- Tasks 2-3 (Logging CLI + SIGUSR1) +- Tasks 4-5 (RTT + Stale stats) +- Task 7 (Subz endpoint) +- Task 8 (TlsMap auth) +- Task 9 (TLS rate limiter test) + +**Group C (after Group B):** +- Task 6 (Closed connections + connz) — depends on Task 4 (RTT) for `client.Rtt` +- Task 10 (Logging tests) + +**Group D (after all):** +- Task 11 (Full test suite) +- Task 12 (Update differences.md) diff --git a/docs/plans/2026-02-23-sections-7-10-gaps-plan.md.tasks.json b/docs/plans/2026-02-23-sections-7-10-gaps-plan.md.tasks.json new file mode 100644 index 0000000..3a9d029 --- /dev/null +++ b/docs/plans/2026-02-23-sections-7-10-gaps-plan.md.tasks.json @@ -0,0 +1,19 @@ +{ + "planPath": "docs/plans/2026-02-23-sections-7-10-gaps-plan.md", + "tasks": [ + {"id": 6, "subject": "Task 0: Add NuGet dependencies for logging sinks", "status": "pending"}, + {"id": 7, "subject": "Task 1: Add logging and ping/pong options to NatsOptions", "status": "pending", "blockedBy": [6]}, + {"id": 8, "subject": "Task 2: Add CLI flag parsing for logging and debug/trace", "status": "pending", "blockedBy": [6, 7]}, + {"id": 9, "subject": "Task 3: Implement log reopening on SIGUSR1", "status": "pending", "blockedBy": [6, 7]}, + {"id": 10, "subject": "Task 4: Add RTT tracking to NatsClient", "status": "pending", "blockedBy": [6, 7]}, + {"id": 11, "subject": "Task 5: Add stale connection stats and expose in varz", "status": "pending", "blockedBy": [6, 7]}, + {"id": 12, "subject": "Task 6: Add closed connection tracking and connz state filtering", "status": "pending", "blockedBy": [10]}, + {"id": 13, "subject": "Task 7: Implement /subz endpoint", "status": "pending", "blockedBy": [6, 7]}, + {"id": 14, "subject": "Task 8: Implement TLS cert-to-user mapping (TlsMap)", "status": "pending", "blockedBy": [6, 7]}, + {"id": 15, "subject": "Task 9: Add TLS rate limiter test", "status": "pending", "blockedBy": [6, 7]}, + {"id": 16, "subject": "Task 10: File logging tests", "status": "pending", "blockedBy": [6, 7, 8]}, + {"id": 17, "subject": "Task 11: Run full test suite and verify", "status": "pending", "blockedBy": [8, 9, 10, 11, 12, 13, 14, 15, 16]}, + {"id": 18, "subject": "Task 12: Update differences.md", "status": "pending", "blockedBy": [17]} + ], + "lastUpdated": "2026-02-23T00:00:00Z" +}