From 99399ac917a6bc22a1973ed84c42dfc347097396 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sun, 1 Mar 2026 14:52:35 -0500 Subject: [PATCH] docs: add server boot parity implementation plan 5-task plan for wiring Start() to full subsystem startup sequence: 1. Add CheckAuthForWarnings() stub 2. Add StartDelayedApiResponder() stub 3. Guard MQTT StartMqtt() to not throw 4. Wire up Start() body to match Go sequence 5. Write server boot validation integration tests --- .../2026-03-01-server-boot-parity-plan.md | 601 ++++++++++++++++++ ...3-01-server-boot-parity-plan.md.tasks.json | 11 + reports/current.md | 2 +- 3 files changed, 613 insertions(+), 1 deletion(-) create mode 100644 docs/plans/2026-03-01-server-boot-parity-plan.md create mode 100644 docs/plans/2026-03-01-server-boot-parity-plan.md.tasks.json diff --git a/docs/plans/2026-03-01-server-boot-parity-plan.md b/docs/plans/2026-03-01-server-boot-parity-plan.md new file mode 100644 index 0000000..5cd6dcb --- /dev/null +++ b/docs/plans/2026-03-01-server-boot-parity-plan.md @@ -0,0 +1,601 @@ +# Server Boot Parity Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers-extended-cc:executing-plans to implement this plan task-by-task. + +**Goal:** Wire up `NatsServer.Start()` to call all subsystem startup methods matching Go's `Server.Start()` sequence, enabling a fully booting server that accepts client connections. + +**Architecture:** The .NET port already has every subsystem startup method ported. The gap is that `Start()` stops after system account setup instead of continuing through the full startup sequence. The fix is pure wiring — calling existing methods in order — plus 2 trivial no-op stubs and an MQTT guard. + +**Tech Stack:** .NET 10, C# latest, xUnit, Shouldly, NATS.Client.Core 2.7.2 + +--- + +## Task 1: Add `CheckAuthForWarnings()` stub + +**Files:** +- Modify: `dotnet/src/ZB.MOM.NatsNet.Server/NatsServer.Auth.cs` (add method at end of class, before closing `}`) + +**Step 1: Write the failing test** + +No dedicated test needed — this is a no-op stub. The build itself will verify the method exists when Task 3 calls it from `Start()`. + +**Step 2: Add the stub method** + +Add the following to `NatsServer.Auth.cs`, at the end of the partial class body (before the final `}`): + +```csharp + // ========================================================================= + // CheckAuthForWarnings (feature 3049 — Start parity) + // ========================================================================= + + /// + /// Checks for insecure auth configurations and logs warnings. + /// Mirrors Go Server.checkAuthforWarnings() in server/server.go. + /// Stub — full implementation deferred. + /// + internal void CheckAuthForWarnings() + { + // No-op stub. Go logs warnings about: + // - Password auth without TLS + // - Token auth without TLS + // - NKey auth without TLS + // These are informational warnings only. + } +``` + +**Step 3: Build to verify it compiles** + +Run: `dotnet build dotnet/src/ZB.MOM.NatsNet.Server/ZB.MOM.NatsNet.Server.csproj` +Expected: Build succeeded. 0 Error(s) + +**Step 4: Commit** + +```bash +git add dotnet/src/ZB.MOM.NatsNet.Server/NatsServer.Auth.cs +git commit -m "feat: add CheckAuthForWarnings stub for Start() parity" +``` + +--- + +## Task 2: Add `StartDelayedApiResponder()` stub + +**Files:** +- Modify: `dotnet/src/ZB.MOM.NatsNet.Server/NatsServer.JetStreamCore.cs` (add method) + +**Step 1: Add the stub method** + +Add the following to `NatsServer.JetStreamCore.cs`, inside the partial class: + +```csharp + // ========================================================================= + // Delayed API responder (feature 3049 — Start parity) + // ========================================================================= + + /// + /// Starts the delayed JetStream API response handler goroutine. + /// Started regardless of JetStream being enabled (can be enabled via config reload). + /// Mirrors Go Server.delayedAPIResponder() in server/jetstream_api.go. + /// Stub — full implementation deferred. + /// + internal void StartDelayedApiResponder() + { + StartGoRoutine(() => + { + // No-op: exits when quit is signaled. + _quitCts.Token.WaitHandle.WaitOne(); + }); + } +``` + +**Step 2: Build to verify it compiles** + +Run: `dotnet build dotnet/src/ZB.MOM.NatsNet.Server/ZB.MOM.NatsNet.Server.csproj` +Expected: Build succeeded. 0 Error(s) + +**Step 3: Commit** + +```bash +git add dotnet/src/ZB.MOM.NatsNet.Server/NatsServer.JetStreamCore.cs +git commit -m "feat: add StartDelayedApiResponder stub for Start() parity" +``` + +--- + +## Task 3: Guard MQTT `StartMqtt()` to not throw + +**Files:** +- Modify: `dotnet/src/ZB.MOM.NatsNet.Server/Mqtt/MqttHandler.cs:136–137` + +**Step 1: Change StartMqtt from throwing to logging a warning** + +Replace the existing line at `MqttHandler.cs:136-137`: + +```csharp + public static void StartMqtt(this NatsServer server) => + throw new NotImplementedException("TODO: session 22"); +``` + +With: + +```csharp + public static void StartMqtt(this NatsServer server) + { + server.Warnf("MQTT listener not yet implemented; skipping MQTT startup"); + } +``` + +Note: `Warnf` is a public method on `NatsServer` (verified in `NatsServer.Logging.cs`). + +**Step 2: Build to verify it compiles** + +Run: `dotnet build dotnet/src/ZB.MOM.NatsNet.Server/ZB.MOM.NatsNet.Server.csproj` +Expected: Build succeeded. 0 Error(s) + +**Step 3: Commit** + +```bash +git add dotnet/src/ZB.MOM.NatsNet.Server/Mqtt/MqttHandler.cs +git commit -m "fix: guard StartMqtt to warn instead of throwing NotImplementedException" +``` + +--- + +## Task 4: Wire up `Start()` body to match Go's sequence + +This is the core task. Replace the body of `Start()` in `NatsServer.Init.cs:976–1037` with the full Go-parity startup sequence. + +**Files:** +- Modify: `dotnet/src/ZB.MOM.NatsNet.Server/NatsServer.Init.cs:970–1037` + +**Step 1: Replace the Start() method** + +Replace lines 970–1037 (the `/// ` through the closing `}` of `Start()`) with the complete implementation below. The new code mirrors Go `server.go:2263–2575` line-by-line: + +```csharp + /// + /// Starts the server (non-blocking). Writes startup log lines, starts all + /// subsystems (monitoring, JetStream, gateways, websocket, leafnodes, routes, + /// MQTT), then begins the client accept loop. + /// Mirrors Go Server.Start in server.go:2263–2575. + /// + public void Start() + { + Noticef("Starting nats-server"); + + var gc = string.IsNullOrEmpty(ServerConstants.GitCommit) ? "not set" : ServerConstants.GitCommit; + var opts = GetOpts(); + + _mu.EnterReadLock(); + var leafNoCluster = _leafNoCluster; + _mu.ExitReadLock(); + + var clusterName = leafNoCluster ? string.Empty : ClusterName(); + + Noticef(" Version: {0}", ServerConstants.Version); + Noticef(" Git: [{0}]", gc); + if (!string.IsNullOrEmpty(clusterName)) + Noticef(" Cluster: {0}", clusterName); + Noticef(" Name: {0}", _info.Name); + if (opts.JetStream) + Noticef(" Node: {0}", GetHash(_info.Name)); + Noticef(" ID: {0}", _info.Id); + + // Check for insecure configurations. + CheckAuthForWarnings(); + + // Avoid RACE between Start() and Shutdown(). + Interlocked.Exchange(ref _running, 1); + + _mu.EnterWriteLock(); + _leafNodeEnabled = opts.LeafNode.Port != 0 || opts.LeafNode.Remotes.Count > 0; + _mu.ExitWriteLock(); + + lock (_grMu) { _grRunning = true; } + + StartRateLimitLogExpiration(); + + // Pprof http endpoint for the profiler. + if (opts.ProfPort != 0) + { + StartProfiler(); + } + + if (!string.IsNullOrEmpty(opts.ConfigFile)) + { + Noticef("Using configuration file: {0}", opts.ConfigFile); + } + + var hasOperators = opts.TrustedOperators.Count > 0; + if (hasOperators) + { + Noticef("Trusted Operators"); + } + if (hasOperators && string.IsNullOrEmpty(opts.SystemAccount)) + { + Warnf("Trusted Operators should utilize a System Account"); + } + if (opts.MaxPayload > ServerConstants.MaxPayloadMaxSize) + { + Warnf("Maximum payloads over {0} are generally discouraged and could lead to poor performance", + ServerConstants.MaxPayloadMaxSize); + } + + // Log the pid to a file. + if (!string.IsNullOrEmpty(opts.PidFile)) + { + var pidErr = LogPid(); + if (pidErr != null) + { + Fatalf("Could not write pidfile: {0}", pidErr); + return; + } + } + + // Setup system account which will start the eventing stack. + if (!string.IsNullOrEmpty(opts.SystemAccount)) + { + var saErr = SetSystemAccount(opts.SystemAccount); + if (saErr != null) + { + Fatalf("Can't set system account: {0}", saErr); + return; + } + } + else if (!opts.NoSystemAccount) + { + SetDefaultSystemAccount(); + } + + // Start monitoring before enabling other subsystems. + var monErr = StartMonitoring(); + if (monErr != null) + { + Fatalf("Can't start monitoring: {0}", monErr); + return; + } + + // Start up resolver machinery. + var ar = AccountResolver(); + if (ar != null) + { + var arErr = ar.Start(this); + if (arErr != null) + { + Fatalf("Could not start resolver: {0}", arErr); + return; + } + } + + // Start expiration of mapped GW replies. + StartGWReplyMapExpiration(); + + // Check if JetStream has been enabled. + if (opts.JetStream) + { + var sa = SystemAccount(); + if (sa != null && sa.JsLimitsCount() > 0) + { + Fatalf("Not allowed to enable JetStream on the system account"); + return; + } + + var cfg = new JetStreamConfig + { + StoreDir = opts.StoreDir, + SyncInterval = opts.SyncInterval, + SyncAlways = opts.SyncAlways, + MaxMemory = opts.JetStreamMaxMemory, + MaxStore = opts.JetStreamMaxStore, + Domain = opts.JetStreamDomain, + CompressOk = true, + UniqueTag = opts.JetStreamUniqueTag, + }; + + var jsErr = EnableJetStream(cfg); + if (jsErr != null) + { + Fatalf("Can't start JetStream: {0}", jsErr); + return; + } + } + + // Delayed API response handling. + StartDelayedApiResponder(); + + // Start OCSP Stapling monitoring. + StartOCSPMonitoring(); + + // Configure OCSP Response Cache. + StartOCSPResponseCache(); + + // Start up gateway if needed. + if (opts.Gateway.Port != 0) + { + var gwErr = StartGateways(); + if (gwErr != null) + { + Fatalf("Can't start gateways: {0}", gwErr); + return; + } + } + + // Start websocket server if needed. + if (opts.Websocket.Port != 0) + { + StartWebsocketServer(); + } + + // Start up listen if we want to accept leaf node connections. + if (opts.LeafNode.Port != 0) + { + var lnErr = StartLeafNodeAcceptLoop(); + if (lnErr != null) + { + Fatalf("Can't start leaf node listener: {0}", lnErr); + return; + } + } + + // Solicit remote servers for leaf node connections. + if (opts.LeafNode.Remotes.Count > 0) + { + SolicitLeafNodeRemotes(opts.LeafNode.Remotes); + } + + // The Routing routine needs to wait for the client listen + // port to be opened and potential ephemeral port selected. + var clientListenReady = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + + // MQTT + if (opts.Mqtt.Port != 0) + { + this.StartMqtt(); + } + + // Start up routing as well if needed. + if (opts.Cluster.Port != 0) + { + StartGoRoutine(() => + { + StartRouting(); + }); + } + + if (!string.IsNullOrEmpty(opts.PortsFileDir)) + { + LogPorts(); + } + + // We've finished starting up. + _startupComplete.TrySetResult(); + + // Wait for clients. + if (!opts.DontListen) + { + AcceptLoop(clientListenReady); + } + + // Bring OCSP Response cache online after accept loop started. + StartOCSPResponseCache(); + + Noticef("Server is ready"); + } +``` + +**Important notes for the implementer:** +- `GetHash` is a static method in `NatsServer` — verify it exists. If not found, skip the `Noticef(" Node: ...")` line. +- `SystemAccount()` is a public method on `NatsServer` — verified exists in `NatsServer.Accounts.cs`. +- `JsLimitsCount()` must exist on `Account`. If not, use a check like `sa.HasJsLimits()` or skip that guard with a TODO comment. +- `StartRouting()` in .NET does not take a `clientListenReady` parameter (Go's does). The `clientListenReady` TCS is created for forward-compatibility but is currently only passed to `AcceptLoop`. +- The `CompressOk` property on `JetStreamConfig` — verify exact property name. May be `CompressOK`. +- The `UniqueTag` property on `JetStreamConfig` — verify exists. If not, omit. + +**Step 2: Build to verify it compiles** + +Run: `dotnet build dotnet/src/ZB.MOM.NatsNet.Server/ZB.MOM.NatsNet.Server.csproj` +Expected: Build succeeded. 0 Error(s) + +If there are compilation errors (e.g., missing properties like `CompressOk`, `UniqueTag`, `JsLimitsCount`), fix by: +- Using the correct property name (check the class definition) +- Adding a stub property if it's truly missing +- Commenting out the line with a `// TODO` if it references unavailable API + +**Step 3: Run existing unit tests to verify no regressions** + +Run: `dotnet test dotnet/tests/ZB.MOM.NatsNet.Server.Tests/ --no-build` +Expected: 2659 passed, ~53 skipped, 0 failed + +**Step 4: Run existing integration tests to verify no regressions** + +Run: `dotnet test dotnet/tests/ZB.MOM.NatsNet.Server.IntegrationTests/ --no-build` +Expected: ~113 passed, ~854 skipped, 0 failed + +**Step 5: Commit** + +```bash +git add dotnet/src/ZB.MOM.NatsNet.Server/NatsServer.Init.cs +git commit -m "feat: complete Start() with full subsystem startup sequence + +Wire up Start() to call all subsystem startup methods matching +Go's Server.Start() sequence: monitoring, resolver, JetStream, +gateways, websocket, leafnodes, MQTT, routing, accept loop." +``` + +--- + +## Task 5: Write server boot validation integration test + +**Files:** +- Create: `dotnet/tests/ZB.MOM.NatsNet.Server.IntegrationTests/ServerBootTests.cs` + +**Step 1: Write the test** + +Create `ServerBootTests.cs`: + +```csharp +// Copyright 2013-2026 The NATS Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Net; +using System.Text; +using NATS.Client.Core; +using Shouldly; +using ZB.MOM.NatsNet.Server; + +namespace ZB.MOM.NatsNet.Server.IntegrationTests; + +/// +/// End-to-end server boot tests that validate the full Start() → AcceptLoop → client connection lifecycle. +/// +[Trait("Category", "Integration")] +public sealed class ServerBootTests : IDisposable +{ + private readonly string _storeDir; + + public ServerBootTests() + { + _storeDir = Path.Combine(Path.GetTempPath(), $"natsnet-test-{Guid.NewGuid():N}"); + } + + public void Dispose() + { + try { Directory.Delete(_storeDir, true); } catch { } + } + + /// + /// Validates that a server can boot, accept a TCP connection, and exchange + /// a NATS protocol handshake (INFO line). This proves the full Start() → + /// AcceptLoop → CreateClient pipeline works end-to-end. + /// + [Fact] + public async Task ServerBoot_AcceptsClientConnection_ShouldSucceed() + { + // Arrange — create server with ephemeral port + var opts = new ServerOptions + { + Host = "127.0.0.1", + Port = 0, // ephemeral + }; + + var (server, err) = NatsServer.NewServer(opts); + err.ShouldBeNull("NewServer should succeed"); + server.ShouldNotBeNull(); + + try + { + // Act — start the server + server!.Start(); + + // Get the actual bound port + var addr = server.Addr() as IPEndPoint; + addr.ShouldNotBeNull("Server should have a listener address after Start()"); + addr!.Port.ShouldBeGreaterThan(0); + + // Connect a raw TCP client and read the INFO line + using var tcp = new System.Net.Sockets.TcpClient(); + await tcp.ConnectAsync(addr.Address, addr.Port); + + using var stream = tcp.GetStream(); + stream.ReadTimeout = 5000; + + var buffer = new byte[4096]; + var bytesRead = await stream.ReadAsync(buffer.AsMemory(0, buffer.Length)); + bytesRead.ShouldBeGreaterThan(0, "Should receive data from server"); + + var response = Encoding.UTF8.GetString(buffer, 0, bytesRead); + response.ShouldStartWith("INFO ", "Server should send INFO line on connect"); + } + finally + { + server!.Shutdown(); + } + } + + /// + /// Validates that Shutdown() after Start() completes cleanly without errors. + /// + [Fact] + public void ServerBoot_StartAndShutdown_ShouldSucceed() + { + var opts = new ServerOptions + { + Host = "127.0.0.1", + Port = 0, + DontListen = true, // Don't open TCP listener — just test lifecycle + }; + + var (server, err) = NatsServer.NewServer(opts); + err.ShouldBeNull(); + server.ShouldNotBeNull(); + + // Should not throw + server!.Start(); + server.Running().ShouldBeTrue(); + + server.Shutdown(); + server.Running().ShouldBeFalse(); + } +} +``` + +**Step 2: Build the integration tests** + +Run: `dotnet build dotnet/tests/ZB.MOM.NatsNet.Server.IntegrationTests/` +Expected: Build succeeded. 0 Error(s) + +**Step 3: Run the new tests** + +Run: `dotnet test dotnet/tests/ZB.MOM.NatsNet.Server.IntegrationTests/ --filter "FullyQualifiedName~ServerBootTests" --no-build -v normal` +Expected: 2 passed, 0 failed + +If `ServerBoot_AcceptsClientConnection_ShouldSucceed` fails: +- Check if `AcceptLoop` is actually being called (add a `Noticef` before the call) +- Check if the listener is null after `AcceptLoop` returns +- Check if `CreateClient` throws — look at the error in the test output +- If the INFO line isn't received, the client-side protocol handshake may be hanging — check `CreateClient` and the initial write path + +If `ServerBoot_StartAndShutdown_ShouldSucceed` fails: +- Check if `Start()` throws an exception from one of the subsystem calls +- The `DontListen = true` flag should skip `AcceptLoop` — verify the `if (!opts.DontListen)` guard + +**Step 4: Run the full test suite to confirm no regressions** + +Run: `dotnet test dotnet/tests/ZB.MOM.NatsNet.Server.Tests/` +Expected: 2659 passed, ~53 skipped, 0 failed + +Run: `dotnet test dotnet/tests/ZB.MOM.NatsNet.Server.IntegrationTests/` +Expected: ~115 passed (113 old + 2 new), ~854 skipped, 0 failed + +**Step 5: Commit** + +```bash +git add dotnet/tests/ZB.MOM.NatsNet.Server.IntegrationTests/ServerBootTests.cs +git commit -m "test: add server boot validation integration tests + +Two tests verifying full Start() lifecycle: +- ServerBoot_AcceptsClientConnection: connects TCP, reads INFO line +- ServerBoot_StartAndShutdown: verifies clean lifecycle with DontListen" +``` + +--- + +## Verification Checklist + +After all tasks are complete: + +- [ ] `dotnet build` — 0 errors +- [ ] Unit tests — 2659 pass, ~53 skip, 0 fail +- [ ] Integration tests — ~115 pass, ~854 skip, 0 fail (2 new tests pass) +- [ ] New `ServerBoot_AcceptsClientConnection_ShouldSucceed` — PASSES +- [ ] New `ServerBoot_StartAndShutdown_ShouldSucceed` — PASSES +- [ ] `Start()` method body matches Go's `Server.Start()` call sequence diff --git a/docs/plans/2026-03-01-server-boot-parity-plan.md.tasks.json b/docs/plans/2026-03-01-server-boot-parity-plan.md.tasks.json new file mode 100644 index 0000000..495128a --- /dev/null +++ b/docs/plans/2026-03-01-server-boot-parity-plan.md.tasks.json @@ -0,0 +1,11 @@ +{ + "planPath": "docs/plans/2026-03-01-server-boot-parity-plan.md", + "tasks": [ + {"id": 33, "subject": "Task 1: Add CheckAuthForWarnings() stub", "status": "pending"}, + {"id": 34, "subject": "Task 2: Add StartDelayedApiResponder() stub", "status": "pending"}, + {"id": 35, "subject": "Task 3: Guard MQTT StartMqtt() to not throw", "status": "pending"}, + {"id": 36, "subject": "Task 4: Wire up Start() body to match Go sequence", "status": "pending", "blockedBy": [33, 34, 35]}, + {"id": 37, "subject": "Task 5: Write server boot validation integration tests", "status": "pending", "blockedBy": [36]} + ], + "lastUpdated": "2026-03-01T00:00:00Z" +} diff --git a/reports/current.md b/reports/current.md index d2ebd51..654b5bd 100644 --- a/reports/current.md +++ b/reports/current.md @@ -1,6 +1,6 @@ # NATS .NET Porting Status Report -Generated: 2026-03-01 19:47:52 UTC +Generated: 2026-03-01 19:52:36 UTC ## Modules (12 total)