From 149c8525109dda993f5bfd8c06110348fa75f9a7 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 22 Feb 2026 23:31:01 -0500 Subject: [PATCH] docs: add core lifecycle implementation plan with 12 tasks Detailed step-by-step plan covering ClosedState enum, close reason tracking, ephemeral port, graceful shutdown, flush-before-close, lame duck mode, PID/ports files, NKey stubs, signal handling, and differences.md update. --- docs/plans/2026-02-22-core-lifecycle-plan.md | 1631 +++++++++++++++++ ...26-02-22-core-lifecycle-plan.md.tasks.json | 18 + 2 files changed, 1649 insertions(+) create mode 100644 docs/plans/2026-02-22-core-lifecycle-plan.md create mode 100644 docs/plans/2026-02-22-core-lifecycle-plan.md.tasks.json diff --git a/docs/plans/2026-02-22-core-lifecycle-plan.md b/docs/plans/2026-02-22-core-lifecycle-plan.md new file mode 100644 index 0000000..3a1a8ec --- /dev/null +++ b/docs/plans/2026-02-22-core-lifecycle-plan.md @@ -0,0 +1,1631 @@ +# Core Server Lifecycle Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task. + +**Goal:** Close all section 1 (Core Server Lifecycle) gaps from `differences.md` — ClosedState enum, accept loop backoff, ephemeral port, graceful shutdown, task tracking, flush-before-close, lame duck mode, PID/ports files, signal handling, and stubs for NKey identity, $SYS account, config file, and profiling. + +**Architecture:** Add shutdown coordination infrastructure to `NatsServer` (quit CTS, shutdown TCS, active-client tracking). Add `ClosedState` enum and close-reason tracking to `NatsClient`. Add `LameDuckShutdownAsync` for graceful draining. Wire Unix signal handling in the host via `PosixSignalRegistration`. + +**Tech Stack:** .NET 10 / C# 14, xUnit 3, Shouldly, NATS.NKeys (already referenced), System.IO.Pipelines + +--- + +### Task 0: Create ClosedState enum + +**Files:** +- Create: `src/NATS.Server/ClosedState.cs` + +**Step 1: Create ClosedState.cs with full Go enum** + +```csharp +namespace NATS.Server; + +/// +/// Reason a client connection was closed. Ported from Go client.go:188-228. +/// +public enum ClosedState +{ + ClientClosed = 1, + AuthenticationTimeout, + AuthenticationViolation, + TLSHandshakeError, + SlowConsumerPendingBytes, + SlowConsumerWriteDeadline, + WriteError, + ReadError, + ParseError, + StaleConnection, + ProtocolViolation, + BadClientProtocolVersion, + WrongPort, + MaxAccountConnectionsExceeded, + MaxConnectionsExceeded, + MaxPayloadExceeded, + MaxControlLineExceeded, + MaxSubscriptionsExceeded, + DuplicateRoute, + RouteRemoved, + ServerShutdown, + AuthenticationExpired, + WrongGateway, + MissingAccount, + Revocation, + InternalClient, + MsgHeaderViolation, + NoRespondersRequiresHeaders, + ClusterNameConflict, + DuplicateRemoteLeafnodeConnection, + DuplicateClientID, + DuplicateServerName, + MinimumVersionRequired, + ClusterNamesIdentical, + Kicked, + ProxyNotTrusted, + ProxyRequired, +} +``` + +**Step 2: Verify it compiles** + +Run: `dotnet build src/NATS.Server` +Expected: Build succeeded + +**Step 3: Commit** + +```bash +git add src/NATS.Server/ClosedState.cs +git commit -m "feat: add ClosedState enum ported from Go client.go" +``` + +--- + +### Task 1: Add close reason tracking to NatsClient + +**Files:** +- Modify: `src/NATS.Server/NatsClient.cs` + +**Step 1: Write failing test** + +Add to `tests/NATS.Server.Tests/ServerTests.cs`: + +```csharp +public class CloseReasonTests : IAsyncLifetime +{ + private readonly NatsServer _server; + private readonly int _port; + private readonly CancellationTokenSource _cts = new(); + + public CloseReasonTests() + { + _port = GetFreePort(); + _server = new NatsServer(new NatsOptions { Port = _port }, NullLoggerFactory.Instance); + } + + public async Task InitializeAsync() + { + _ = _server.StartAsync(_cts.Token); + await _server.WaitForReadyAsync(); + } + + public async Task DisposeAsync() + { + await _cts.CancelAsync(); + _server.Dispose(); + } + + private static int GetFreePort() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + sock.Bind(new IPEndPoint(IPAddress.Loopback, 0)); + return ((IPEndPoint)sock.LocalEndPoint!).Port; + } + + [Fact] + public async Task Client_close_reason_set_on_normal_disconnect() + { + var client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await client.ConnectAsync(IPAddress.Loopback, _port); + var buf = new byte[4096]; + await client.ReceiveAsync(buf, SocketFlags.None); // INFO + await client.SendAsync("CONNECT {}\r\nPING\r\n"u8.ToArray()); + await client.ReceiveAsync(buf, SocketFlags.None); // PONG + + // Get the NatsClient instance + var natsClient = _server.GetClients().First(); + + // Close the TCP connection (simulates client disconnect) + client.Shutdown(SocketShutdown.Both); + client.Dispose(); + + // Wait for server to detect the disconnect + await Task.Delay(500); + + natsClient.CloseReason.ShouldBe(ClosedState.ClientClosed); + } +} +``` + +**Step 2: Run test to verify it fails** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~Client_close_reason_set_on_normal_disconnect" -v normal` +Expected: FAIL — `CloseReason` property does not exist + +**Step 3: Add CloseReason property and MarkClosed method to NatsClient** + +In `src/NATS.Server/NatsClient.cs`, add fields after the `_lastIn` field (line 67): + +```csharp + // Close reason tracking + private int _closeReason; // stores ClosedState as int for atomics + private int _skipFlushOnClose; + public ClosedState CloseReason => (ClosedState)Volatile.Read(ref _closeReason); +``` + +Add the `MarkClosed` method before `RemoveAllSubscriptions` (before line 525): + +```csharp + /// + /// Marks this connection as closed with the given reason. + /// Sets skip-flush flag for error-related reasons. + /// Thread-safe — only the first call sets the reason. + /// + public void MarkClosed(ClosedState reason) + { + // Only the first caller sets the reason + if (Interlocked.CompareExchange(ref _closeReason, (int)reason, 0) != 0) + return; + + switch (reason) + { + case ClosedState.ReadError: + case ClosedState.WriteError: + case ClosedState.SlowConsumerPendingBytes: + case ClosedState.SlowConsumerWriteDeadline: + case ClosedState.TLSHandshakeError: + Volatile.Write(ref _skipFlushOnClose, 1); + break; + } + + _logger.LogDebug("Client {ClientId} connection closed: {CloseReason}", Id, reason); + } + + public bool ShouldSkipFlush => Volatile.Read(ref _skipFlushOnClose) != 0; +``` + +Update the `RunAsync` method's catch/finally blocks to set close reasons: + +Replace the existing catch/finally in RunAsync (lines 139-153): + +```csharp + catch (OperationCanceledException) + { + _logger.LogDebug("Client {ClientId} operation cancelled", Id); + // If no close reason set yet, this was a server shutdown + MarkClosed(ClosedState.ServerShutdown); + } + catch (IOException) + { + MarkClosed(ClosedState.ReadError); + } + catch (SocketException) + { + MarkClosed(ClosedState.ReadError); + } + catch (Exception ex) + { + _logger.LogDebug(ex, "Client {ClientId} connection error", Id); + MarkClosed(ClosedState.ReadError); + } + finally + { + // If FillPipeAsync ended with EOF (client disconnected), mark as ClientClosed + MarkClosed(ClosedState.ClientClosed); + try { _socket.Shutdown(SocketShutdown.Both); } + catch (SocketException) { } + catch (ObjectDisposedException) { } + Router?.RemoveClient(this); + } +``` + +**Step 4: Run test to verify it passes** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~Client_close_reason_set_on_normal_disconnect" -v normal` +Expected: PASS + +**Step 5: Commit** + +```bash +git add src/NATS.Server/NatsClient.cs tests/NATS.Server.Tests/ServerTests.cs +git commit -m "feat: add close reason tracking to NatsClient" +``` + +--- + +### Task 2: Add NatsOptions for new lifecycle features + +**Files:** +- Modify: `src/NATS.Server/NatsOptions.cs` + +**Step 1: Add new option fields** + +Add after the MonitorHttpsPort line (after line 37): + +```csharp + // Lifecycle + public TimeSpan LameDuckDuration { get; set; } = TimeSpan.FromMinutes(2); + public TimeSpan LameDuckGracePeriod { get; set; } = TimeSpan.FromSeconds(10); + public string? PidFile { get; set; } + public string? PortsFileDir { get; set; } + + // Stubs + public string? ConfigFile { get; set; } + public int ProfPort { get; set; } +``` + +**Step 2: Verify it compiles** + +Run: `dotnet build src/NATS.Server` +Expected: Build succeeded + +**Step 3: Commit** + +```bash +git add src/NATS.Server/NatsOptions.cs +git commit -m "feat: add lifecycle options (lame duck, PID file, ports file, config stub)" +``` + +--- + +### Task 3: Ephemeral port support + +**Files:** +- Modify: `src/NATS.Server/NatsServer.cs` +- Test: `tests/NATS.Server.Tests/ServerTests.cs` + +**Step 1: Write failing test** + +Add to `tests/NATS.Server.Tests/ServerTests.cs`: + +```csharp +public class EphemeralPortTests +{ + [Fact] + public async Task Server_resolves_ephemeral_port() + { + using var cts = new CancellationTokenSource(); + var server = new NatsServer(new NatsOptions { Port = 0 }, NullLoggerFactory.Instance); + _ = server.StartAsync(cts.Token); + await server.WaitForReadyAsync(); + + try + { + server.Port.ShouldBeGreaterThan(0); + + // Verify we can actually connect to the resolved port + using var client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await client.ConnectAsync(IPAddress.Loopback, server.Port); + var buf = new byte[4096]; + var n = await client.ReceiveAsync(buf, SocketFlags.None); + Encoding.ASCII.GetString(buf, 0, n).ShouldStartWith("INFO "); + } + finally + { + await cts.CancelAsync(); + server.Dispose(); + } + } +} +``` + +**Step 2: Run test to verify it fails** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~Server_resolves_ephemeral_port" -v normal` +Expected: FAIL — `Port` property does not exist on NatsServer + +**Step 3: Add Port property and ephemeral port resolution to NatsServer** + +Add `Port` property after `ClientCount` (after line 40 in NatsServer.cs): + +```csharp + public int Port => _options.Port; +``` + +In `StartAsync`, after `_listener.Listen(128);` and before `_listeningStarted.TrySetResult();` (after line 84), add: + +```csharp + // Resolve ephemeral port if port=0 + if (_options.Port == 0) + { + var actualPort = ((IPEndPoint)_listener.LocalEndPoint!).Port; + _options.Port = actualPort; + _serverInfo.Port = actualPort; + } +``` + +**Step 4: Run test to verify it passes** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~Server_resolves_ephemeral_port" -v normal` +Expected: PASS + +**Step 5: Commit** + +```bash +git add src/NATS.Server/NatsServer.cs tests/NATS.Server.Tests/ServerTests.cs +git commit -m "feat: add ephemeral port (port=0) support" +``` + +--- + +### Task 4: Graceful shutdown infrastructure + +**Files:** +- Modify: `src/NATS.Server/NatsServer.cs` +- Test: `tests/NATS.Server.Tests/ServerTests.cs` + +This is the largest task — it adds `_quitCts`, `_shutdownComplete`, `_activeClientCount`, `ShutdownAsync()`, `WaitForShutdown()`, and refactors the accept loop and client lifecycle. + +**Step 1: Write failing test** + +Add to `tests/NATS.Server.Tests/ServerTests.cs`: + +```csharp +public class GracefulShutdownTests +{ + private static int GetFreePort() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + sock.Bind(new IPEndPoint(IPAddress.Loopback, 0)); + return ((IPEndPoint)sock.LocalEndPoint!).Port; + } + + [Fact] + public async Task ShutdownAsync_disconnects_all_clients() + { + var port = GetFreePort(); + var server = new NatsServer(new NatsOptions { Port = port }, NullLoggerFactory.Instance); + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + + // Connect two clients + var client1 = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await client1.ConnectAsync(IPAddress.Loopback, port); + var buf = new byte[4096]; + await client1.ReceiveAsync(buf, SocketFlags.None); // INFO + await client1.SendAsync("CONNECT {}\r\n"u8.ToArray()); + + var client2 = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await client2.ConnectAsync(IPAddress.Loopback, port); + await client2.ReceiveAsync(buf, SocketFlags.None); // INFO + await client2.SendAsync("CONNECT {}\r\n"u8.ToArray()); + + // Allow clients to register + await Task.Delay(200); + server.ClientCount.ShouldBe(2); + + // Shutdown + await server.ShutdownAsync(); + + // Both clients should receive EOF (connection closed) + var n1 = await client1.ReceiveAsync(buf, SocketFlags.None); + var n2 = await client2.ReceiveAsync(buf, SocketFlags.None); + + // 0 bytes = connection closed, or they get -ERR data then close + // Either way, connection should be dead + server.ClientCount.ShouldBe(0); + + client1.Dispose(); + client2.Dispose(); + server.Dispose(); + } + + [Fact] + public async Task WaitForShutdown_blocks_until_shutdown() + { + var port = GetFreePort(); + var server = new NatsServer(new NatsOptions { Port = port }, NullLoggerFactory.Instance); + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + + var waitTask = Task.Run(() => server.WaitForShutdown()); + + // Should not complete yet + await Task.Delay(200); + waitTask.IsCompleted.ShouldBeFalse(); + + // Trigger shutdown + await server.ShutdownAsync(); + + // Now it should complete promptly + await waitTask.WaitAsync(TimeSpan.FromSeconds(5)); + waitTask.IsCompletedSuccessfully.ShouldBeTrue(); + + server.Dispose(); + } + + [Fact] + public async Task ShutdownAsync_is_idempotent() + { + var port = GetFreePort(); + var server = new NatsServer(new NatsOptions { Port = port }, NullLoggerFactory.Instance); + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + + // Multiple shutdowns should not throw + await server.ShutdownAsync(); + await server.ShutdownAsync(); + await server.ShutdownAsync(); + + server.Dispose(); + } + + [Fact] + public async Task Accept_loop_waits_for_active_clients() + { + var port = GetFreePort(); + var server = new NatsServer(new NatsOptions { Port = port }, NullLoggerFactory.Instance); + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + + // Connect a client + var client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await client.ConnectAsync(IPAddress.Loopback, port); + var buf = new byte[4096]; + await client.ReceiveAsync(buf, SocketFlags.None); // INFO + await client.SendAsync("CONNECT {}\r\n"u8.ToArray()); + await Task.Delay(200); + + // Shutdown should complete even with connected client + var shutdownTask = server.ShutdownAsync(); + var completed = await Task.WhenAny(shutdownTask, Task.Delay(TimeSpan.FromSeconds(10))); + completed.ShouldBe(shutdownTask, "Shutdown should complete within timeout"); + + client.Dispose(); + server.Dispose(); + } +} +``` + +**Step 2: Run tests to verify they fail** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~GracefulShutdownTests" -v normal` +Expected: FAIL — `ShutdownAsync` and `WaitForShutdown` do not exist + +**Step 3: Add shutdown infrastructure to NatsServer** + +Add new fields after `_startTimeTicks` (after line 33): + +```csharp + private readonly CancellationTokenSource _quitCts = new(); + private readonly TaskCompletionSource _shutdownComplete = new(TaskCreationOptions.RunContinuationsAsynchronously); + private readonly TaskCompletionSource _acceptLoopExited = new(TaskCreationOptions.RunContinuationsAsynchronously); + private int _shutdown; + private int _lameDuck; + private int _activeClientCount; +``` + +Add public properties and methods after `WaitForReadyAsync`: + +```csharp + public bool IsShuttingDown => Volatile.Read(ref _shutdown) != 0; + public bool IsLameDuckMode => Volatile.Read(ref _lameDuck) != 0; + + public void WaitForShutdown() => _shutdownComplete.Task.GetAwaiter().GetResult(); + + public async Task ShutdownAsync() + { + if (Interlocked.CompareExchange(ref _shutdown, 1, 0) != 0) + return; // Already shutting down + + _logger.LogInformation("Initiating Shutdown..."); + + // Signal all internal loops to stop + await _quitCts.CancelAsync(); + + // Close listener to stop accept loop + _listener?.Close(); + + // Wait for accept loop to exit + await _acceptLoopExited.Task.WaitAsync(TimeSpan.FromSeconds(5)).ConfigureAwait(ConfigureAwaitOptions.SuppressThrowing); + + // Close all client connections + foreach (var client in _clients.Values) + { + client.MarkClosed(ClosedState.ServerShutdown); + } + + // Wait for active client tasks to drain (with timeout) + if (Volatile.Read(ref _activeClientCount) > 0) + { + using var drainCts = new CancellationTokenSource(TimeSpan.FromSeconds(10)); + try + { + while (Volatile.Read(ref _activeClientCount) > 0 && !drainCts.IsCancellationRequested) + await Task.Delay(50, drainCts.Token); + } + catch (OperationCanceledException) { } + } + + // Stop monitor server + if (_monitorServer != null) + await _monitorServer.DisposeAsync(); + + // Delete PID and ports files + DeletePidFile(); + DeletePortsFile(); + + _logger.LogInformation("Server Exiting.."); + _shutdownComplete.TrySetResult(); + } +``` + +Refactor `StartAsync` to use `_quitCts` and track the accept loop: + +Replace the entire `StartAsync` body with: + +```csharp + public async Task StartAsync(CancellationToken ct) + { + // Link the external CT to our internal quit signal + using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct, _quitCts.Token); + + _listener = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + _listener.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, true); + _listener.Bind(new IPEndPoint( + _options.Host == "0.0.0.0" ? IPAddress.Any : IPAddress.Parse(_options.Host), + _options.Port)); + + // Resolve ephemeral port if port=0 + if (_options.Port == 0) + { + var actualPort = ((IPEndPoint)_listener.LocalEndPoint!).Port; + _options.Port = actualPort; + _serverInfo.Port = actualPort; + } + + Interlocked.Exchange(ref _startTimeTicks, DateTime.UtcNow.Ticks); + _listener.Listen(128); + _listeningStarted.TrySetResult(); + + _logger.LogInformation("Listening for client connections on {Host}:{Port}", _options.Host, _options.Port); + + // Warn about stub features + if (_options.ConfigFile != null) + _logger.LogWarning("Config file parsing not yet supported (file: {ConfigFile})", _options.ConfigFile); + if (_options.ProfPort > 0) + _logger.LogWarning("Profiling endpoint not yet supported (port: {ProfPort})", _options.ProfPort); + + // Write PID and ports files + WritePidFile(); + WritePortsFile(); + + if (_options.MonitorPort > 0) + { + _monitorServer = new MonitorServer(this, _options, _stats, _loggerFactory); + await _monitorServer.StartAsync(linked.Token); + } + + var tmpDelay = AcceptMinSleep; + + try + { + while (!linked.Token.IsCancellationRequested) + { + Socket socket; + try + { + socket = await _listener.AcceptAsync(linked.Token); + tmpDelay = AcceptMinSleep; // Reset on success + } + catch (OperationCanceledException) + { + break; + } + catch (ObjectDisposedException) + { + // Listener was closed during shutdown or lame duck + break; + } + catch (SocketException ex) + { + if (IsShuttingDown || IsLameDuckMode) + break; + + _logger.LogError(ex, "Temporary accept error, sleeping {Delay}ms", tmpDelay.TotalMilliseconds); + try + { + await Task.Delay(tmpDelay, linked.Token); + } + catch (OperationCanceledException) { break; } + + tmpDelay = TimeSpan.FromTicks(Math.Min(tmpDelay.Ticks * 2, AcceptMaxSleep.Ticks)); + continue; + } + + // Check MaxConnections before creating the client + if (_options.MaxConnections > 0 && _clients.Count >= _options.MaxConnections) + { + _logger.LogWarning("Client connection rejected: maximum connections ({MaxConnections}) exceeded", + _options.MaxConnections); + try + { + var stream = new NetworkStream(socket, ownsSocket: false); + var errBytes = Encoding.ASCII.GetBytes( + $"-ERR '{NatsProtocol.ErrMaxConnectionsExceeded}'\r\n"); + await stream.WriteAsync(errBytes, linked.Token); + await stream.FlushAsync(linked.Token); + stream.Dispose(); + } + catch (Exception ex) + { + _logger.LogDebug(ex, "Failed to send -ERR to rejected client"); + } + finally + { + socket.Dispose(); + } + continue; + } + + var clientId = Interlocked.Increment(ref _nextClientId); + Interlocked.Increment(ref _stats.TotalConnections); + Interlocked.Increment(ref _activeClientCount); + + _logger.LogDebug("Client {ClientId} connected from {RemoteEndpoint}", clientId, socket.RemoteEndPoint); + + _ = AcceptClientAsync(socket, clientId, linked.Token); + } + } + catch (OperationCanceledException) + { + _logger.LogDebug("Accept loop cancelled, server shutting down"); + } + finally + { + _acceptLoopExited.TrySetResult(); + } + } +``` + +Update `RunClientAsync` to decrement active client count (replace existing): + +```csharp + private async Task RunClientAsync(NatsClient client, CancellationToken ct) + { + try + { + await client.RunAsync(ct); + } + catch (Exception ex) + { + _logger.LogDebug(ex, "Client {ClientId} disconnected with error", client.Id); + } + finally + { + _logger.LogDebug("Client {ClientId} disconnected (reason: {CloseReason})", client.Id, client.CloseReason); + RemoveClient(client); + Interlocked.Decrement(ref _activeClientCount); + } + } +``` + +Update `Dispose` to call `ShutdownAsync`: + +```csharp + public void Dispose() + { + if (!IsShuttingDown) + ShutdownAsync().GetAwaiter().GetResult(); + _quitCts.Dispose(); + _tlsRateLimiter?.Dispose(); + _listener?.Dispose(); + foreach (var client in _clients.Values) + client.Dispose(); + foreach (var account in _accounts.Values) + account.Dispose(); + } +``` + +Add accept loop constants as static fields at the top of the class: + +```csharp + private static readonly TimeSpan AcceptMinSleep = TimeSpan.FromMilliseconds(10); + private static readonly TimeSpan AcceptMaxSleep = TimeSpan.FromSeconds(1); +``` + +**Step 4: Run tests to verify they pass** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~GracefulShutdownTests" -v normal` +Expected: PASS + +**Step 5: Run all existing tests to verify no regressions** + +Run: `dotnet test tests/NATS.Server.Tests -v normal` +Expected: All tests pass + +**Step 6: Commit** + +```bash +git add src/NATS.Server/NatsServer.cs tests/NATS.Server.Tests/ServerTests.cs +git commit -m "feat: add graceful shutdown, accept loop backoff, and task tracking" +``` + +--- + +### Task 5: Flush pending data before close + +**Files:** +- Modify: `src/NATS.Server/NatsClient.cs` +- Modify: `src/NATS.Server/NatsServer.cs` +- Test: `tests/NATS.Server.Tests/ServerTests.cs` + +**Step 1: Write failing test** + +Add to `tests/NATS.Server.Tests/ServerTests.cs`: + +```csharp +public class FlushBeforeCloseTests +{ + private static int GetFreePort() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + sock.Bind(new IPEndPoint(IPAddress.Loopback, 0)); + return ((IPEndPoint)sock.LocalEndPoint!).Port; + } + + [Fact] + public async Task Shutdown_flushes_pending_data_to_clients() + { + var port = GetFreePort(); + var server = new NatsServer(new NatsOptions { Port = port }, NullLoggerFactory.Instance); + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + + // Connect subscriber + var sub = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await sub.ConnectAsync(IPAddress.Loopback, port); + var buf = new byte[4096]; + await sub.ReceiveAsync(buf, SocketFlags.None); // INFO + await sub.SendAsync("CONNECT {}\r\nSUB foo 1\r\nPING\r\n"u8.ToArray()); + // Read PONG + var pong = new StringBuilder(); + while (!pong.ToString().Contains("PONG")) + { + var n2 = await sub.ReceiveAsync(buf, SocketFlags.None); + pong.Append(Encoding.ASCII.GetString(buf, 0, n2)); + } + + // Connect publisher and publish a message + var pub = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await pub.ConnectAsync(IPAddress.Loopback, port); + await pub.ReceiveAsync(buf, SocketFlags.None); // INFO + await pub.SendAsync("CONNECT {}\r\nPUB foo 5\r\nHello\r\n"u8.ToArray()); + await Task.Delay(100); + + // The subscriber should receive the MSG + // Read all data from subscriber until connection closes + var allData = new StringBuilder(); + try + { + using var readCts = new CancellationTokenSource(TimeSpan.FromSeconds(5)); + + // First, read the MSG that should already be queued + while (true) + { + var n = await sub.ReceiveAsync(buf, SocketFlags.None, readCts.Token); + if (n == 0) break; + allData.Append(Encoding.ASCII.GetString(buf, 0, n)); + if (allData.ToString().Contains("Hello")) + break; + } + } + catch (OperationCanceledException) { } + + allData.ToString().ShouldContain("MSG foo 1 5\r\nHello\r\n"); + + pub.Dispose(); + sub.Dispose(); + server.Dispose(); + } +} +``` + +**Step 2: Run test to verify it passes (baseline — data already reaches subscriber before shutdown)** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~Shutdown_flushes_pending_data" -v normal` +Expected: This test should PASS with current code since the msg was delivered before shutdown. This establishes the baseline. + +**Step 3: Add FlushAndCloseAsync to NatsClient** + +In `NatsClient.cs`, add before `RemoveAllSubscriptions`: + +```csharp + /// + /// Flushes pending data (unless skip-flush is set) and closes the connection. + /// + public async Task FlushAndCloseAsync(bool minimalFlush = false) + { + if (!ShouldSkipFlush) + { + try + { + using var flushCts = new CancellationTokenSource(minimalFlush + ? TimeSpan.FromMilliseconds(100) + : TimeSpan.FromSeconds(1)); + await _stream.FlushAsync(flushCts.Token); + } + catch (Exception) + { + // Best effort flush — don't let it prevent close + } + } + + try { _socket.Shutdown(SocketShutdown.Both); } + catch (SocketException) { } + catch (ObjectDisposedException) { } + } +``` + +**Step 4: Wire FlushAndCloseAsync into ShutdownAsync** + +In `NatsServer.ShutdownAsync`, replace the client close loop: + +```csharp + // Close all client connections — flush first, then mark closed + var flushTasks = new List(); + foreach (var client in _clients.Values) + { + client.MarkClosed(ClosedState.ServerShutdown); + flushTasks.Add(client.FlushAndCloseAsync(minimalFlush: true)); + } + await Task.WhenAll(flushTasks).WaitAsync(TimeSpan.FromSeconds(2)).ConfigureAwait(ConfigureAwaitOptions.SuppressThrowing); +``` + +**Step 5: Run all tests** + +Run: `dotnet test tests/NATS.Server.Tests -v normal` +Expected: All tests pass + +**Step 6: Commit** + +```bash +git add src/NATS.Server/NatsClient.cs src/NATS.Server/NatsServer.cs tests/NATS.Server.Tests/ServerTests.cs +git commit -m "feat: add flush-before-close for graceful client shutdown" +``` + +--- + +### Task 6: Lame duck mode + +**Files:** +- Modify: `src/NATS.Server/NatsServer.cs` +- Test: `tests/NATS.Server.Tests/ServerTests.cs` + +**Step 1: Write failing test** + +Add to `tests/NATS.Server.Tests/ServerTests.cs`: + +```csharp +public class LameDuckTests +{ + private static int GetFreePort() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + sock.Bind(new IPEndPoint(IPAddress.Loopback, 0)); + return ((IPEndPoint)sock.LocalEndPoint!).Port; + } + + [Fact] + public async Task LameDuckShutdown_stops_accepting_new_connections() + { + var port = GetFreePort(); + var server = new NatsServer(new NatsOptions + { + Port = port, + LameDuckDuration = TimeSpan.FromSeconds(3), + LameDuckGracePeriod = TimeSpan.FromMilliseconds(500), + }, NullLoggerFactory.Instance); + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + + // Connect a client before lame duck + var client1 = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await client1.ConnectAsync(IPAddress.Loopback, port); + var buf = new byte[4096]; + await client1.ReceiveAsync(buf, SocketFlags.None); // INFO + await client1.SendAsync("CONNECT {}\r\n"u8.ToArray()); + await Task.Delay(200); + + // Start lame duck mode (don't await — it runs in background) + var lameDuckTask = server.LameDuckShutdownAsync(); + + // Small delay to let lame duck close the listener + await Task.Delay(200); + + server.IsLameDuckMode.ShouldBeTrue(); + + // New connection attempt should fail + var client2 = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + var connectTask = client2.ConnectAsync(IPAddress.Loopback, port); + + // Should either throw or timeout quickly since listener is closed + var threw = false; + try + { + await connectTask.WaitAsync(TimeSpan.FromSeconds(2)); + } + catch + { + threw = true; + } + threw.ShouldBeTrue("New connection should be rejected"); + + // Wait for lame duck to finish + await lameDuckTask.WaitAsync(TimeSpan.FromSeconds(15)); + + client1.Dispose(); + client2.Dispose(); + server.Dispose(); + } + + [Fact] + public async Task LameDuckShutdown_eventually_closes_all_clients() + { + var port = GetFreePort(); + var server = new NatsServer(new NatsOptions + { + Port = port, + LameDuckDuration = TimeSpan.FromSeconds(2), + LameDuckGracePeriod = TimeSpan.FromMilliseconds(200), + }, NullLoggerFactory.Instance); + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + + // Connect clients + var clients = new List(); + for (int i = 0; i < 3; i++) + { + var c = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await c.ConnectAsync(IPAddress.Loopback, port); + var buf2 = new byte[4096]; + await c.ReceiveAsync(buf2, SocketFlags.None); + await c.SendAsync("CONNECT {}\r\n"u8.ToArray()); + clients.Add(c); + } + await Task.Delay(200); + server.ClientCount.ShouldBe(3); + + // Run lame duck shutdown + await server.LameDuckShutdownAsync(); + + // All clients should be disconnected + server.ClientCount.ShouldBe(0); + + foreach (var c in clients) c.Dispose(); + server.Dispose(); + } +} +``` + +**Step 2: Run tests to verify they fail** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~LameDuckTests" -v normal` +Expected: FAIL — `LameDuckShutdownAsync` does not exist + +**Step 3: Implement LameDuckShutdownAsync in NatsServer** + +Add after `ShutdownAsync`: + +```csharp + public async Task LameDuckShutdownAsync() + { + if (IsShuttingDown || Interlocked.CompareExchange(ref _lameDuck, 1, 0) != 0) + return; + + _logger.LogInformation("Entering lame duck mode, stop accepting new clients"); + + // Close listener to stop accepting new connections + _listener?.Close(); + + // Wait for accept loop to exit + await _acceptLoopExited.Task.WaitAsync(TimeSpan.FromSeconds(5)).ConfigureAwait(ConfigureAwaitOptions.SuppressThrowing); + + var gracePeriod = _options.LameDuckGracePeriod; + if (gracePeriod < TimeSpan.Zero) gracePeriod = -gracePeriod; + + // If no clients, go straight to shutdown + if (_clients.IsEmpty) + { + await ShutdownAsync(); + return; + } + + // Wait grace period for clients to drain naturally + _logger.LogInformation("Waiting {GracePeriod}ms grace period", gracePeriod.TotalMilliseconds); + try + { + await Task.Delay(gracePeriod, _quitCts.Token); + } + catch (OperationCanceledException) { return; } + + if (_clients.IsEmpty) + { + await ShutdownAsync(); + return; + } + + // Stagger-close remaining clients + var dur = _options.LameDuckDuration - gracePeriod; + if (dur <= TimeSpan.Zero) dur = TimeSpan.FromSeconds(1); + + var clients = _clients.Values.ToList(); + var numClients = clients.Count; + + if (numClients > 0) + { + _logger.LogInformation("Closing {Count} existing clients over {Duration}ms", + numClients, dur.TotalMilliseconds); + + var sleepInterval = dur.Ticks / numClients; + if (sleepInterval < TimeSpan.TicksPerMillisecond) + sleepInterval = TimeSpan.TicksPerMillisecond; + if (sleepInterval > TimeSpan.TicksPerSecond) + sleepInterval = TimeSpan.TicksPerSecond; + + for (int i = 0; i < clients.Count; i++) + { + clients[i].MarkClosed(ClosedState.ServerShutdown); + await clients[i].FlushAndCloseAsync(minimalFlush: true); + + if (i < clients.Count - 1) + { + // Randomize slightly to avoid reconnect storms + var jitter = Random.Shared.NextInt64(sleepInterval / 2, sleepInterval); + try + { + await Task.Delay(TimeSpan.FromTicks(jitter), _quitCts.Token); + } + catch (OperationCanceledException) { break; } + } + } + } + + await ShutdownAsync(); + } +``` + +**Step 4: Run tests to verify they pass** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~LameDuckTests" -v normal` +Expected: PASS + +**Step 5: Run all tests** + +Run: `dotnet test tests/NATS.Server.Tests -v normal` +Expected: All tests pass + +**Step 6: Commit** + +```bash +git add src/NATS.Server/NatsServer.cs tests/NATS.Server.Tests/ServerTests.cs +git commit -m "feat: add lame duck mode with staggered client shutdown" +``` + +--- + +### Task 7: PID file and ports file + +**Files:** +- Modify: `src/NATS.Server/NatsServer.cs` +- Test: `tests/NATS.Server.Tests/ServerTests.cs` + +**Step 1: Write failing test** + +Add to `tests/NATS.Server.Tests/ServerTests.cs`: + +```csharp +public class PidFileTests : IDisposable +{ + private readonly string _tempDir = Path.Combine(Path.GetTempPath(), $"nats-test-{Guid.NewGuid():N}"); + + public PidFileTests() => Directory.CreateDirectory(_tempDir); + + public void Dispose() + { + if (Directory.Exists(_tempDir)) + Directory.Delete(_tempDir, recursive: true); + } + + private static int GetFreePort() + { + using var sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + sock.Bind(new IPEndPoint(IPAddress.Loopback, 0)); + return ((IPEndPoint)sock.LocalEndPoint!).Port; + } + + [Fact] + public async Task Server_writes_pid_file_on_startup() + { + var pidFile = Path.Combine(_tempDir, "nats.pid"); + var port = GetFreePort(); + var server = new NatsServer(new NatsOptions { Port = port, PidFile = pidFile }, NullLoggerFactory.Instance); + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + + File.Exists(pidFile).ShouldBeTrue(); + var content = await File.ReadAllTextAsync(pidFile); + int.Parse(content).ShouldBe(Environment.ProcessId); + + await server.ShutdownAsync(); + + // PID file should be deleted on shutdown + File.Exists(pidFile).ShouldBeFalse(); + + server.Dispose(); + } + + [Fact] + public async Task Server_writes_ports_file_on_startup() + { + var port = GetFreePort(); + var server = new NatsServer(new NatsOptions { Port = port, PortsFileDir = _tempDir }, NullLoggerFactory.Instance); + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + + // Find the ports file + var portsFiles = Directory.GetFiles(_tempDir, "*.ports"); + portsFiles.Length.ShouldBe(1); + + var content = await File.ReadAllTextAsync(portsFiles[0]); + content.ShouldContain($"\"client\":{port}"); + + await server.ShutdownAsync(); + + // Ports file should be deleted on shutdown + Directory.GetFiles(_tempDir, "*.ports").Length.ShouldBe(0); + + server.Dispose(); + } +} +``` + +**Step 2: Run tests to verify they fail** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~PidFileTests" -v normal` +Expected: FAIL — `WritePidFile`/`DeletePidFile` methods don't exist yet (called in ShutdownAsync but not implemented) + +**Step 3: Add PID file and ports file methods to NatsServer** + +Add `using System.Diagnostics;` and `using System.Text.Json;` to the top of NatsServer.cs. + +Add private methods before `Dispose`: + +```csharp + private void WritePidFile() + { + if (string.IsNullOrEmpty(_options.PidFile)) return; + try + { + File.WriteAllText(_options.PidFile, Environment.ProcessId.ToString()); + _logger.LogDebug("Wrote PID file {PidFile}", _options.PidFile); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error writing PID file {PidFile}", _options.PidFile); + } + } + + private void DeletePidFile() + { + if (string.IsNullOrEmpty(_options.PidFile)) return; + try + { + if (File.Exists(_options.PidFile)) + File.Delete(_options.PidFile); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error deleting PID file {PidFile}", _options.PidFile); + } + } + + private void WritePortsFile() + { + if (string.IsNullOrEmpty(_options.PortsFileDir)) return; + try + { + var exeName = Path.GetFileNameWithoutExtension(Environment.ProcessPath ?? "nats-server"); + var fileName = $"{exeName}_{Environment.ProcessId}.ports"; + _portsFilePath = Path.Combine(_options.PortsFileDir, fileName); + + var ports = new + { + client = _options.Port, + monitor = _options.MonitorPort > 0 ? _options.MonitorPort : (int?)null, + }; + var json = JsonSerializer.Serialize(ports); + File.WriteAllText(_portsFilePath, json); + _logger.LogDebug("Wrote ports file {PortsFile}", _portsFilePath); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error writing ports file to {PortsFileDir}", _options.PortsFileDir); + } + } + + private void DeletePortsFile() + { + if (_portsFilePath == null) return; + try + { + if (File.Exists(_portsFilePath)) + File.Delete(_portsFilePath); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error deleting ports file {PortsFile}", _portsFilePath); + } + } +``` + +Add the field for ports file tracking near the other fields: + +```csharp + private string? _portsFilePath; +``` + +**Step 4: Run tests to verify they pass** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~PidFileTests" -v normal` +Expected: PASS + +**Step 5: Commit** + +```bash +git add src/NATS.Server/NatsServer.cs tests/NATS.Server.Tests/ServerTests.cs +git commit -m "feat: add PID file and ports file support" +``` + +--- + +### Task 8: System account and NKey identity stubs + +**Files:** +- Modify: `src/NATS.Server/NatsServer.cs` +- Test: `tests/NATS.Server.Tests/ServerTests.cs` + +**Step 1: Write failing test** + +Add to `tests/NATS.Server.Tests/ServerTests.cs`: + +```csharp +public class ServerIdentityTests +{ + [Fact] + public void Server_creates_system_account() + { + var server = new NatsServer(new NatsOptions { Port = 0 }, NullLoggerFactory.Instance); + server.SystemAccount.ShouldNotBeNull(); + server.SystemAccount.Name.ShouldBe("$SYS"); + server.Dispose(); + } + + [Fact] + public void Server_generates_nkey_identity() + { + var server = new NatsServer(new NatsOptions { Port = 0 }, NullLoggerFactory.Instance); + server.ServerNKey.ShouldNotBeNullOrEmpty(); + // Server NKey public keys start with 'N' + server.ServerNKey[0].ShouldBe('N'); + server.Dispose(); + } +} +``` + +**Step 2: Run tests to verify they fail** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~ServerIdentityTests" -v normal` +Expected: FAIL — `SystemAccount` and `ServerNKey` properties do not exist + +**Step 3: Add system account and NKey generation to NatsServer constructor** + +Add to NatsServer fields: + +```csharp + private readonly Account _systemAccount; + public Account SystemAccount => _systemAccount; + public string ServerNKey { get; } +``` + +In the constructor, after `_accounts[Account.GlobalAccountName] = _globalAccount;`: + +```csharp + // Create $SYS system account (stub — no internal subscriptions yet) + _systemAccount = new Account("$SYS"); + _accounts["$SYS"] = _systemAccount; + + // Generate server identity NKey (Ed25519) + var kp = NATS.NKeys.NKeys.FromSeed(NATS.NKeys.NKeys.CreatePair(NATS.NKeys.NKeys.PrefixByte.Server)); + ServerNKey = kp.EncodedPublicKey; +``` + +Note: Check the NATS.NKeys API — the exact method names may vary. If the API is different, adjust accordingly. The important thing is to generate a server-type key pair and store the public key. + +**Step 4: Run tests to verify they pass** + +Run: `dotnet test tests/NATS.Server.Tests --filter "FullyQualifiedName~ServerIdentityTests" -v normal` +Expected: PASS + +**Step 5: Commit** + +```bash +git add src/NATS.Server/NatsServer.cs tests/NATS.Server.Tests/ServerTests.cs +git commit -m "feat: add system account ($SYS) and server NKey identity stubs" +``` + +--- + +### Task 9: Signal handling and CLI stubs + +**Files:** +- Modify: `src/NATS.Server.Host/Program.cs` +- Modify: `src/NATS.Server/NatsServer.cs` (add `HandleSignals` method) + +**Step 1: Add signal handling to NatsServer** + +Add to `NatsServer` after `LameDuckShutdownAsync`: + +```csharp + /// + /// Registers Unix signal handlers. Call from the host process after server creation. + /// SIGTERM → shutdown, SIGUSR2 → lame duck, SIGUSR1 → log reopen (stub), SIGHUP → reload (stub). + /// + public void HandleSignals() + { + PosixSignalRegistration.Create(PosixSignal.SIGTERM, ctx => + { + ctx.Cancel = true; + _logger.LogInformation("Trapped SIGTERM signal"); + _ = Task.Run(async () => + { + await ShutdownAsync(); + }); + }); + + PosixSignalRegistration.Create(PosixSignal.SIGQUIT, ctx => + { + ctx.Cancel = true; + _logger.LogInformation("Trapped SIGQUIT signal"); + _ = Task.Run(async () => + { + await ShutdownAsync(); + }); + }); + + // SIGUSR1 and SIGUSR2 — only available on Unix + if (!OperatingSystem.IsWindows()) + { + // SIGUSR1 = reopen log files (stub) + PosixSignalRegistration.Create((PosixSignal)10, ctx => // SIGUSR1 = 10 + { + ctx.Cancel = true; + _logger.LogWarning("Trapped SIGUSR1 signal — log reopen not yet supported"); + }); + + // SIGUSR2 = lame duck mode + PosixSignalRegistration.Create((PosixSignal)12, ctx => // SIGUSR2 = 12 + { + ctx.Cancel = true; + _logger.LogInformation("Trapped SIGUSR2 signal — entering lame duck mode"); + _ = Task.Run(async () => + { + await LameDuckShutdownAsync(); + }); + }); + } + + PosixSignalRegistration.Create(PosixSignal.SIGHUP, ctx => + { + ctx.Cancel = true; + _logger.LogWarning("Trapped SIGHUP signal — config reload not yet supported"); + }); + } +``` + +**Step 2: Update Program.cs with signal handling and new CLI flags** + +Replace the entire `Program.cs` with: + +```csharp +using NATS.Server; +using Serilog; + +Log.Logger = new LoggerConfiguration() + .MinimumLevel.Debug() + .Enrich.FromLogContext() + .WriteTo.Console(outputTemplate: "[{Timestamp:HH:mm:ss} {Level:u3}] {Message:lj}{NewLine}{Exception}") + .CreateLogger(); + +var options = new NatsOptions(); + +// Simple CLI argument parsing +for (int i = 0; i < args.Length; i++) +{ + switch (args[i]) + { + case "-p" or "--port" when i + 1 < args.Length: + options.Port = int.Parse(args[++i]); + break; + case "-a" or "--addr" when i + 1 < args.Length: + options.Host = args[++i]; + break; + case "-n" or "--name" when i + 1 < args.Length: + options.ServerName = args[++i]; + break; + case "-m" or "--http_port" when i + 1 < args.Length: + options.MonitorPort = int.Parse(args[++i]); + break; + case "--http_base_path" when i + 1 < args.Length: + options.MonitorBasePath = args[++i]; + break; + case "--https_port" when i + 1 < args.Length: + options.MonitorHttpsPort = int.Parse(args[++i]); + break; + case "-c" when i + 1 < args.Length: + options.ConfigFile = args[++i]; + break; + case "--pid" when i + 1 < args.Length: + options.PidFile = args[++i]; + break; + case "--ports_file_dir" when i + 1 < args.Length: + options.PortsFileDir = args[++i]; + break; + case "--tls": + break; + case "--tlscert" when i + 1 < args.Length: + options.TlsCert = args[++i]; + break; + case "--tlskey" when i + 1 < args.Length: + options.TlsKey = args[++i]; + break; + case "--tlscacert" when i + 1 < args.Length: + options.TlsCaCert = args[++i]; + break; + case "--tlsverify": + options.TlsVerify = true; + break; + } +} + +using var loggerFactory = new Serilog.Extensions.Logging.SerilogLoggerFactory(Log.Logger); +var server = new NatsServer(options, loggerFactory); + +// Register Unix signal handlers +server.HandleSignals(); + +// Ctrl+C triggers graceful shutdown +Console.CancelKeyPress += (_, e) => +{ + e.Cancel = true; + Log.Information("Trapped SIGINT signal"); + _ = Task.Run(async () => + { + await server.ShutdownAsync(); + }); +}; + +try +{ + _ = server.StartAsync(CancellationToken.None); + await server.WaitForReadyAsync(); + server.WaitForShutdown(); +} +catch (OperationCanceledException) +{ + Log.Information("Server shutdown requested"); +} +finally +{ + Log.CloseAndFlush(); +} +``` + +**Step 3: Verify it compiles** + +Run: `dotnet build` +Expected: Build succeeded + +**Step 4: Run all tests** + +Run: `dotnet test tests/NATS.Server.Tests -v normal` +Expected: All tests pass + +**Step 5: Commit** + +```bash +git add src/NATS.Server/NatsServer.cs src/NATS.Server.Host/Program.cs +git commit -m "feat: add signal handling (SIGTERM, SIGUSR2, SIGHUP) and CLI stubs" +``` + +--- + +### Task 10: Update differences.md + +**Files:** +- Modify: `differences.md` + +**Step 1: Run full test suite to verify all implementations** + +Run: `dotnet test tests/NATS.Server.Tests -v normal` +Expected: All tests pass + +**Step 2: Update differences.md section 1 tables** + +Update each row in section 1 that was implemented. Change `.NET` column from `N` to `Y` (or `Stub` for stub items). Update `Notes` column with implementation details. + +**Server Initialization table:** +| Feature | Go | .NET | Notes | +|---------|:--:|:----:|-------| +| NKey generation (server identity) | Y | Stub | Generates Ed25519 key at startup, not used in protocol yet | +| System account setup | Y | Stub | Creates `$SYS` account, no internal subscriptions | +| Config file validation on startup | Y | Stub | `-c` flag parsed, logs warning that parsing not yet supported | +| PID file writing | Y | Y | `--pid` flag, writes/deletes on startup/shutdown | +| Profiling HTTP endpoint (`/debug/pprof`) | Y | Stub | `ProfPort` option, logs warning if set | +| Ports file output | Y | Y | `--ports_file_dir` flag, JSON with client/monitor ports | + +**Accept Loop table:** +| Feature | Go | .NET | Notes | +|---------|:--:|:----:|-------| +| Exponential backoff on accept errors | Y | Y | 10ms→1s doubling, resets on success | +| Config reload lock during client creation | Y | Stub | No config reload yet | +| Goroutine/task tracking (WaitGroup) | Y | Y | `_activeClientCount` with Interlocked, waits on shutdown | +| Callback-based error handling | Y | N | .NET uses structured exception handling instead | +| Random/ephemeral port (port=0) | Y | Y | Resolves actual port after bind | + +**Shutdown table:** +| Feature | Go | .NET | Notes | +|---------|:--:|:----:|-------| +| Graceful shutdown with `WaitForShutdown()` | Y | Y | `ShutdownAsync()` + `WaitForShutdown()` | +| Close reason tracking per connection | Y | Y | Full 37-value `ClosedState` enum | +| Lame duck mode (stop new, drain existing) | Y | Y | `LameDuckShutdownAsync()` with staggered close | +| Wait for accept loop completion | Y | Y | `_acceptLoopExited` TaskCompletionSource | +| Flush pending data before close | Y | Y | `FlushAndCloseAsync()` with skip-flush for error reasons | + +**Signal Handling table:** +| Signal | Go | .NET | Notes | +|--------|:--:|:----:|-------| +| SIGINT (Ctrl+C) | Y | Y | Triggers `ShutdownAsync` | +| SIGTERM | Y | Y | Via `PosixSignalRegistration` | +| SIGUSR1 (reopen logs) | Y | Stub | Logs warning, no log rotation support yet | +| SIGUSR2 (lame duck mode) | Y | Y | Triggers `LameDuckShutdownAsync` | +| SIGHUP (config reload) | Y | Stub | Logs warning, no config reload support yet | +| Windows Service integration | Y | N | | + +**Step 3: Update the summary section** + +Remove items from "Critical Gaps" that are now implemented. Move to a new "Recently Implemented" section or adjust status. + +**Step 4: Commit** + +```bash +git add differences.md +git commit -m "docs: update differences.md section 1 — mark implemented lifecycle gaps" +``` + +--- + +### Task 11: Final verification + +**Step 1: Run full test suite** + +Run: `dotnet test tests/NATS.Server.Tests -v normal` +Expected: All tests pass + +**Step 2: Run build to verify no warnings** + +Run: `dotnet build -warnaserrors` +Expected: Build succeeded (or only pre-existing warnings) + +**Step 3: Verify git status is clean** + +Run: `git status` +Expected: Clean working tree (all changes committed) diff --git a/docs/plans/2026-02-22-core-lifecycle-plan.md.tasks.json b/docs/plans/2026-02-22-core-lifecycle-plan.md.tasks.json new file mode 100644 index 0000000..383cdb5 --- /dev/null +++ b/docs/plans/2026-02-22-core-lifecycle-plan.md.tasks.json @@ -0,0 +1,18 @@ +{ + "planPath": "docs/plans/2026-02-22-core-lifecycle-plan.md", + "tasks": [ + {"id": 5, "subject": "Task 0: Create ClosedState enum", "status": "pending"}, + {"id": 6, "subject": "Task 1: Add close reason tracking to NatsClient", "status": "pending", "blockedBy": [5]}, + {"id": 7, "subject": "Task 2: Add NatsOptions for lifecycle features", "status": "pending"}, + {"id": 8, "subject": "Task 3: Ephemeral port support", "status": "pending"}, + {"id": 9, "subject": "Task 4: Graceful shutdown infrastructure", "status": "pending", "blockedBy": [5, 6, 7, 8]}, + {"id": 10, "subject": "Task 5: Flush pending data before close", "status": "pending", "blockedBy": [9]}, + {"id": 11, "subject": "Task 6: Lame duck mode", "status": "pending", "blockedBy": [9]}, + {"id": 12, "subject": "Task 7: PID file and ports file", "status": "pending", "blockedBy": [9]}, + {"id": 13, "subject": "Task 8: System account and NKey identity stubs", "status": "pending"}, + {"id": 14, "subject": "Task 9: Signal handling and CLI stubs", "status": "pending", "blockedBy": [11]}, + {"id": 15, "subject": "Task 10: Update differences.md", "status": "pending", "blockedBy": [14]}, + {"id": 16, "subject": "Task 11: Final verification", "status": "pending", "blockedBy": [15]} + ], + "lastUpdated": "2026-02-22T00:00:00Z" +}