feat: add benchmark test project for Go vs .NET server comparison

Side-by-side performance benchmarks using NATS.Client.Core against both servers on ephemeral ports. Includes core pub/sub, request/reply latency, and JetStream throughput tests with comparison output and benchmarks_comparison.md results. Also fixes timestamp flakiness in StoreInterfaceTests by using explicit timestamps.
2026-03-13 01:23:31 -04:00
parent e9c86c51c3
commit 37575dc41c
28 changed files with 2264 additions and 12 deletions
@@ -0,0 +1,212 @@
 # AGENTS.md
 This file provides guidance to Codex (Codex.ai/code) when working with code in this repository.
 ## Project Overview
 This project ports the [NATS server](https://github.com/nats-io/nats-server) from Go to .NET 10 / C#. The Go reference implementation lives in `golang/nats-server/`. The .NET port lives at the repository root.
 NATS is a high-performance publish-subscribe messaging system. It supports wildcards (`*` single token, `>` multi-token), queue groups for load balancing, request-reply, clustering (full-mesh routes, gateways, leaf nodes), and persistent streaming via JetStream.
 ## Build & Test Commands
 The solution file is `NatsDotNet.slnx`.
 ```bash
 # Build the solution
 dotnet build
 # Run all tests
 dotnet test
 # Run tests with verbose output
 dotnet test -v normal
 # Run a single test project
 dotnet test tests/NATS.Server.Tests
 # Run a specific test project
 dotnet test tests/NATS.Server.Core.Tests
 dotnet test tests/NATS.Server.JetStream.Tests
 # Run a specific test by name
 dotnet test tests/NATS.Server.Core.Tests --filter "FullyQualifiedName~TestName"
 # Run the NATS server (default port 4222)
 dotnet run --project src/NATS.Server.Host
 # Run the NATS server on a custom port
 dotnet run --project src/NATS.Server.Host -- -p 14222
 # Clean and rebuild
 dotnet clean && dotnet build
 ```
 ## .NET Project Structure
 ```
 NatsDotNet.slnx                           # Solution file
 src/
  NATS.Server/                            # Core server library
    NatsServer.cs                         # Server: listener, accept loop, shutdown
    NatsClient.cs                         # Per-connection client: read/write loops, sub tracking
    NatsOptions.cs                        # Server configuration (port, host, etc.)
    Protocol/
      NatsParser.cs                       # Protocol state machine (PUB, SUB, UNSUB, etc.)
      NatsProtocol.cs                     # Wire-level protocol writing (INFO, MSG, PING/PONG)
    Subscriptions/
      SubjectMatch.cs                     # Subject validation and wildcard matching
      SubList.cs                          # Trie-based subscription list with caching
      SubListResult.cs                    # Match result container (plain subs + queue groups)
      Subscription.cs                     # Subscription model (subject, sid, queue, client)
  NATS.Server.Host/                       # Executable host app
    Program.cs                            # Entry point, CLI arg parsing (-p port)
 tests/
  NATS.Server.TestUtilities/              # Shared helpers, fixtures, parity tools (class library)
  NATS.Server.Core.Tests/                 # Client, server, parser, config, subscriptions, protocol
  NATS.Server.Auth.Tests/                 # Auth, accounts, permissions, JWT, NKeys
  NATS.Server.JetStream.Tests/            # JetStream API, streams, consumers, storage, cluster
  NATS.Server.Raft.Tests/                 # RAFT consensus
  NATS.Server.Clustering.Tests/           # Routes, cluster topology, inter-server protocol
  NATS.Server.Gateways.Tests/             # Gateway connections, interest modes
  NATS.Server.LeafNodes.Tests/            # Leaf node connections, hub-spoke
  NATS.Server.Mqtt.Tests/                 # MQTT protocol bridge
  NATS.Server.Monitoring.Tests/           # Monitor endpoints, events, system events
  NATS.Server.Transport.Tests/            # WebSocket, TLS, OCSP, IO
  NATS.E2E.Tests/                         # End-to-end tests using NATS.Client.Core NuGet
 ```
 ## Go Reference Commands
 ```bash
 # Build the Go reference server
 cd golang/nats-server && go build
 # Run Go tests for a specific area
 cd golang/nats-server && go test -v -run TestName ./server/ -count=1 -timeout=30m
 # Run all Go server tests (slow, ~30min)
 cd golang/nats-server && go test -v ./server/ -count=1 -timeout=30m
 ```
 ## Architecture: NATS Server (Reference)
 The Go source in `golang/nats-server/server/` is the authoritative reference. Key files by subsystem:
 ### Core Message Path
 - **`server.go`** — Server struct, startup lifecycle (`NewServer` → `Run` → `WaitForShutdown`), listener management
 - **`client.go`** (6700 lines) — Connection handling, `readLoop`/`writeLoop` goroutines, per-client subscription tracking, dynamic buffer sizing (512→65536 bytes), client types: `CLIENT`, `ROUTER`, `GATEWAY`, `LEAF`, `SYSTEM`
 - **`parser.go`** — Protocol state machine. Text protocol: `PUB`, `SUB`, `UNSUB`, `CONNECT`, `INFO`, `PING/PONG`, `MSG`. Extended: `HPUB/HMSG` (headers), `RPUB/RMSG` (routes). Control line limit: 4096 bytes. Default max payload: 1MB.
 - **`sublist.go`** — Trie-based subject matcher with wildcard support. Nodes have `psubs` (plain), `qsubs` (queue groups), special pointers for `*` and `>` wildcards. Results are cached with atomic generation IDs for invalidation.
 ### Authentication & Accounts
 - **`auth.go`** — Auth mechanisms: username/password, token, NKeys (Ed25519), JWT, external auth callout, LDAP
 - **`accounts.go`** (137KB) — Multi-tenant account isolation. Each account has its own `Sublist`, client set, and subject namespace. Supports exports/imports between accounts, service latency tracking.
 - **`jwt.go`**, **`nkey.go`** — JWT claims parsing and NKey validation
 ### Clustering
 - **`route.go`** — Full-mesh cluster routes. Route pooling (default 3 connections per peer). Account-specific dedicated routes. Protocol: `RS+`/`RS-` for subscribe propagation, `RMSG` for routed messages.
 - **`gateway.go`** (103KB) — Inter-cluster bridges. Interest-only mode optimizes traffic. Reply subject mapping (`_GR_.` prefix) avoids cross-cluster conflicts.
 - **`leafnode.go`** — Hub-and-spoke topology for edge deployments. Only subscribed subjects shared with hub. Loop detection via `$LDS.` prefix.
 ### JetStream (Persistence)
 - **`jetstream.go`** — Orchestration, API subject handlers (`$JS.API.*`)
 - **`stream.go`** (8000 lines) — Stream lifecycle, retention policies (Limits, Interest, WorkQueue), subject transforms, mirroring/sourcing
 - **`consumer.go`** — Stateful readers. Push vs pull delivery. Ack policies: None, All, Explicit. Redelivery tracking, priority groups.
 - **`filestore.go`** (337KB) — Block-based persistent storage with S2 compression, encryption (ChaCha20/AES-GCM), indexing
 - **`memstore.go`** — In-memory storage with hash-wheel TTL expiration
 - **`raft.go`** — RAFT consensus for clustered JetStream. Meta-cluster for metadata, per-stream/consumer RAFT groups.
 ### Configuration & Monitoring
 - **`opts.go`** — CLI flags + config file loading. CLI overrides config. Supports hot reload on signal.
 - **`monitor.go`** — HTTP endpoints: `/varz`, `/connz`, `/routez`, `/gatewayz`, `/jsz`, `/healthz`
 - **`conf/`** — Config file parser (custom format with includes)
 ### Internal Data Structures
 - **`server/avl/`** — AVL tree for sparse sequence sets (ack tracking)
 - **`server/stree/`** — Subject tree for per-subject state in streams
 - **`server/gsl/`** — Generic subject list, optimized trie
 - **`server/thw/`** — Time hash wheel for efficient TTL expiration
 ## Key Porting Considerations
 **Concurrency model:** Go uses goroutines (one per connection readLoop + writeLoop). Map to async/await with `Task`-based I/O. Use `Channel<T>` or `Pipe` for producer-consumer patterns where Go uses channels.
 **Locking:** Go `sync.RWMutex` maps to `ReaderWriterLockSlim`. Go `sync.Map` maps to `ConcurrentDictionary`. Go `atomic` operations map to `Interlocked` or `volatile`.
 **Subject matching:** The `Sublist` trie is performance-critical. Every published message triggers a `Match()` call. Cache invalidation uses atomic generation counters.
 **Protocol parsing:** The parser is a byte-by-byte state machine. In .NET, use `System.IO.Pipelines` for zero-copy parsing with `ReadOnlySequence<byte>`.
 **Buffer management:** Go uses `[]byte` slices with pooling. Map to `ArrayPool<byte>` and `Memory<T>`/`Span<T>`.
 **Compression:** NATS uses S2 (Snappy variant) for route/gateway compression. Use an equivalent .NET S2 library or IronSnappy.
 **Ports:** Client=4222, Cluster=6222, Monitoring=8222, Leaf=5222, Gateway=7222.
 ## Message Flow Summary
 ```
 Client PUB → parser → permission check → Sublist.Match() →
  ├─ Local subscribers: MSG to each (queue subs: pick one per group)
  ├─ Cluster routes: RMSG to peers (who deliver to their locals)
  ├─ Gateways: forward to interested remote clusters
  └─ JetStream: if subject matches a stream, store + deliver to consumers
 ```
 ## NuGet Package Management
 This solution uses **Central Package Management (CPM)** via `Directory.Packages.props` at the repo root. All package versions are defined centrally there.
 - In `.csproj` files, use `<PackageReference Include="Foo" />` **without** a `Version` attribute
 - To add a new package: add a `<PackageVersion>` entry in `Directory.Packages.props`, then reference it without version in the project's csproj
 - To update a version: change it only in `Directory.Packages.props` — all projects pick it up automatically
 - Never specify `Version` on `<PackageReference>` in individual csproj files
 ## Logging
 Use **Microsoft.Extensions.Logging** (`ILogger<T>`) for all logging throughout the server. Wire up **Serilog** as the logging provider in the host application.
 - Inject `ILogger<T>` via constructor in all components (NatsServer, NatsClient, etc.)
 - Use **Serilog.Context.LogContext** to push contextual properties (client ID, remote endpoint, subscription subject) so they appear on all log entries within that scope
 - Use structured logging with message templates: `logger.LogInformation("Client {ClientId} subscribed to {Subject}", id, subject)` — never string interpolation
 - Log levels: `Trace` for protocol bytes, `Debug` for per-message flow, `Information` for lifecycle events (connect/disconnect), `Warning` for protocol violations, `Error` for unexpected failures
 ## Testing
 - **xUnit 3** for test framework
 - **Shouldly** for assertions — use `value.ShouldBe(expected)`, `action.ShouldThrow<T>()`, etc. Do NOT use `Assert.*` from xUnit
 - **NSubstitute** for mocking/substitution when needed
 - Do **NOT** use FluentAssertions or Moq — these are explicitly excluded
 - Test project uses global `using Shouldly;`
 ## Porting Guidelines
 - Use modern .NET 10 / C# 14 best practices (primary constructors, collection expressions, `field` keyword where stable, file-scoped namespaces, raw string literals, etc.)
 - Prefer `readonly record struct` for small value types over mutable structs
 - Use `required` properties and `init` setters for initialization-only state
 - Use pattern matching and switch expressions where they improve clarity
 - Prefer `System.Text.Json` source generators for JSON serialization
 - Use `ValueTask` where appropriate for hot-path async methods
 ## Agent Model Guidance
 - **Sonnet** (`model: "sonnet"`) — use for simpler implementation tasks: straightforward file modifications, adding packages, converting assertions, boilerplate code
 - **Opus** (default) — use for complex tasks, architectural decisions, design work, tricky protocol logic, and code review
 - **Parallel subagents** — use where tasks are independent and don't touch the same files (e.g., converting test files in parallel, adding packages while updating docs)
 ## Documentation
 Follow the documentation rules in [`documentation_rules.md`](documentation_rules.md) for all project documentation. Key points:
 - Documentation lives in `Documentation/` with component subfolders (Protocol, Subscriptions, Server, Configuration, Operations)
 - Use `PascalCase.md` file names, always specify language on code blocks, use real code snippets (not invented examples)
 - Update documentation when code changes — see the trigger rules and component map in the rules file
 - Technical and direct tone, explain "why" not just "what", present tense
 ## Conventions
 - Reference the Go implementation file and line when porting a subsystem
 - Maintain protocol compatibility — the .NET server must interoperate with existing NATS clients and Go servers in a cluster
 - Use the same configuration file format as the Go server (parsed by `conf/` package)
 - Match the Go server's monitoring JSON response shapes for tooling compatibility
@@ -0,0 +1,112 @@
 # NATS Go Server — Reference Benchmark Numbers
 Typical throughput and latency figures for the Go NATS server, collected from official documentation and community benchmarks. These serve as performance targets for the .NET port.
 ## Test Environment
 Official NATS docs benchmarks were run on an Apple M4 (10 cores: 4P + 6E, 16 GB RAM). Numbers will vary by hardware — treat these as order-of-magnitude targets, not exact goals.
 ---
 ## Core NATS — Pub/Sub Throughput
 All figures use the `nats bench` tool with default 16-byte messages unless noted.
 ### Single Publisher (no subscribers)
 | Messages | Payload | Throughput | Latency |
 |----------|---------|------------|---------|
 | 1M | 16 B | 14,786,683 msgs/sec (~226 MiB/s) | 0.07 us |
 ### Publisher + Subscriber (1:1)
 | Messages | Payload | Throughput (each side) | Latency |
 |----------|---------|------------------------|---------|
 | 1M | 16 B | ~4,927,000 msgs/sec (~75 MiB/s) | 0.20 us |
 | 100K | 16 KB | ~228,000 msgs/sec (~3.5 GiB/s) | 4.3 us |
 ### Fan-Out (1 Publisher : N Subscribers)
 | Subscribers | Payload | Per-Subscriber Rate | Aggregate | Latency |
 |-------------|---------|---------------------|-----------|---------|
 | 4 | 128 B | ~1,010,000 msgs/sec | 4,015,923 msgs/sec (~490 MiB/s) | ~1.0 us |
 ### Multi-Publisher / Multi-Subscriber (N:M)
 | Config | Payload | Pub Aggregate | Sub Aggregate | Pub Latency | Sub Latency |
 |--------|---------|---------------|---------------|-------------|-------------|
 | 4P x 4S | 128 B | 1,080,144 msgs/sec (~132 MiB/s) | 4,323,201 msgs/sec (~528 MiB/s) | 3.7 us | 0.93 us |
 ---
 ## Core NATS — Request/Reply Latency
 | Config | Payload | Throughput | Avg Latency |
 |--------|---------|------------|-------------|
 | 1 client, 1 service | 128 B | 19,659 msgs/sec | 50.9 us |
 | 50 clients, 2 services | 16 B | 132,438 msgs/sec | ~370 us |
 ---
 ## Core NATS — Tail Latency Under Load
 From the Brave New Geek latency benchmarks (loopback, request/reply):
 | Payload | Rate | p99.99 | p99.9999 | Notes |
 |---------|------|--------|----------|-------|
 | 256 B | 3,000 req/s | sub-ms | sub-ms | Minimal load |
 | 1 KB | 3,000 req/s | sub-ms | ~1.2 ms | |
 | 5 KB | 2,000 req/s | sub-ms | ~1.2 ms | |
 | 1 KB | 20,000 req/s (25 conns) | elevated | ~90 ms | Concurrent load |
 | 1 MB | 100 req/s | ~214 ms | — | Large payload tail |
 A protocol parser optimization improved 5 KB latencies by ~30% and 1 MB latencies by ~90% up to p90.
 ---
 ## JetStream — Publication
 | Mode | Payload | Storage | Throughput | Latency |
 |------|---------|---------|------------|---------|
 | Synchronous | 16 B | Memory | 35,734 msgs/sec (~558 KiB/s) | 28.0 us |
 | Batch (1000 msgs) | 16 B | Memory | 627,430 msgs/sec (~9.6 MiB/s) | 1.6 us |
 | Async | 128 B | File | 403,828 msgs/sec (~49 MiB/s) | 2.5 us |
 ---
 ## JetStream — Consumption
 | Mode | Clients | Throughput | Latency |
 |------|---------|------------|---------|
 | Ordered ephemeral consumer | 1 | 1,201,540 msgs/sec (~147 MiB/s) | 0.83 us |
 | Durable consumer (callback) | 4 | 290,438 msgs/sec (~36 MiB/s) | 13.7 us |
 | Durable consumer fetch (no ack) | 2 | 1,128,932 msgs/sec (~138 MiB/s) | 1.76 us |
 | Direct sync get | 1 | 33,244 msgs/sec (~4.1 MiB/s) | 30.1 us |
 | Batched get | 2 | 1,000,898 msgs/sec (~122 MiB/s) | — |
 ---
 ## JetStream — Key-Value Store
 | Operation | Clients | Payload | Throughput | Latency |
 |-----------|---------|---------|------------|---------|
 | Sync put | 1 | 128 B | 30,067 msgs/sec (~3.7 MiB/s) | 33.3 us |
 | Get (randomized keys) | 16 | 128 B | 102,844 msgs/sec (~13 MiB/s) | ~153 us |
 ---
 ## Resource Usage
 | Scenario | RSS Memory |
 |----------|------------|
 | Core NATS at 2M msgs/sec (1 pub + 1 sub) | ~11 MB |
 | JetStream production (recommended minimum) | 4 CPU cores, 8 GiB RAM |
 ---
 ## Sources
 - [NATS Bench — Official Docs](https://docs.nats.io/using-nats/nats-tools/nats_cli/natsbench)
 - [NATS Latency Test Framework](https://github.com/nats-io/latency-tests)
 - [Benchmarking Message Queue Latency — Brave New Geek](https://bravenewgeek.com/benchmarking-message-queue-latency/)
 - [NATS CLI Benchmark Blog](https://nats.io/blog/cli-benchmark/)
@@ -0,0 +1,97 @@
 # Go vs .NET NATS Server — Benchmark Comparison
 Benchmark run: 2026-03-13. Both servers running on the same machine, tested with identical NATS.Client.Core workloads. Test parallelization disabled to avoid resource contention.
 **Environment:** Apple M4, .NET 10, Go nats-server (latest from `golang/nats-server/`).
 ---
 ## Core NATS — Pub/Sub Throughput
 ### Single Publisher (no subscribers)
 | Payload | Go msg/s | Go MB/s | .NET msg/s | .NET MB/s | Ratio (.NET/Go) |
 |---------|----------|---------|------------|-----------|-----------------|
 | 16 B | 2,436,416 | 37.2 | 1,425,767 | 21.8 | 0.59x |
 | 128 B | 2,143,434 | 261.6 | 1,654,692 | 202.0 | 0.77x |
 ### Publisher + Subscriber (1:1)
 | Payload | Go msg/s | Go MB/s | .NET msg/s | .NET MB/s | Ratio (.NET/Go) |
 |---------|----------|---------|------------|-----------|-----------------|
 | 16 B | 1,140,225 | 17.4 | 207,654 | 3.2 | 0.18x |
 | 16 KB | 41,762 | 652.5 | 34,429 | 538.0 | 0.82x |
 ### Fan-Out (1 Publisher : 4 Subscribers)
 | Payload | Go msg/s | Go MB/s | .NET msg/s | .NET MB/s | Ratio (.NET/Go) |
 |---------|----------|---------|------------|-----------|-----------------|
 | 128 B | 3,192,313 | 389.7 | 581,284 | 71.0 | 0.18x |
 ### Multi-Publisher / Multi-Subscriber (4P x 4S)
 | Payload | Go msg/s | Go MB/s | .NET msg/s | .NET MB/s | Ratio (.NET/Go) |
 |---------|----------|---------|------------|-----------|-----------------|
 | 128 B | 269,445 | 32.9 | 529,808 | 64.7 | 1.97x |
 ---
 ## Core NATS — Request/Reply Latency
 ### Single Client, Single Service
 | Payload | Go msg/s | .NET msg/s | Ratio | Go P50 (us) | .NET P50 (us) | Go P99 (us) | .NET P99 (us) |
 |---------|----------|------------|-------|-------------|---------------|-------------|---------------|
 | 128 B | 9,347 | 7,215 | 0.77x | 104.5 | 134.7 | 146.2 | 190.5 |
 ### 10 Clients, 2 Services (Queue Group)
 | Payload | Go msg/s | .NET msg/s | Ratio | Go P50 (us) | .NET P50 (us) | Go P99 (us) | .NET P99 (us) |
 |---------|----------|------------|-------|-------------|---------------|-------------|---------------|
 | 16 B | 30,893 | 25,861 | 0.84x | 315.0 | 370.2 | 451.1 | 595.0 |
 ---
 ## JetStream — Publication
 | Mode | Payload | Storage | Go msg/s | .NET msg/s | Ratio (.NET/Go) |
 |------|---------|---------|----------|------------|-----------------|
 | Synchronous | 16 B | Memory | 16,783 | 13,815 | 0.82x |
 | Async (batch) | 128 B | File | 187,067 | 115 | 0.00x |
 > **Note:** Async file store publish is extremely slow on the .NET server — likely a JetStream file store implementation bottleneck rather than a client issue.
 ---
 ## JetStream — Consumption
 | Mode | Go msg/s | .NET msg/s | Ratio (.NET/Go) |
 |------|----------|------------|-----------------|
 | Ordered ephemeral consumer | 109,519 | N/A | N/A |
 | Durable consumer fetch | 639,247 | 80,792 | 0.13x |
 > **Note:** Ordered ephemeral consumer is not yet fully supported on the .NET server (API timeout during consumer creation).
 ---
 ## Summary
 | Category | Ratio Range | Assessment |
 |----------|-------------|------------|
 | Pub-only throughput | 0.59x–0.77x | Good — within 2x |
 | Pub/sub (large payload) | 0.82x | Good |
 | Pub/sub (small payload) | 0.18x | Needs optimization |
 | Fan-out | 0.18x | Needs optimization |
 | Multi pub/sub | 1.97x | .NET faster (likely measurement artifact at low counts) |
 | Request/reply latency | 0.77x–0.84x | Good |
 | JetStream sync publish | 0.82x | Good |
 | JetStream async file publish | ~0x | Broken — file store bottleneck |
 | JetStream durable fetch | 0.13x | Needs optimization |
 ### Key Observations
 1. **Pub-only and request/reply are within striking distance** (0.6x–0.85x), suggesting the core message path is reasonably well ported.
 2. **Small-payload pub/sub and fan-out are 5x slower** (0.18x ratio). The bottleneck is likely in the subscription dispatch / message delivery hot path — the `SubList.Match()` → `MSG` write loop.
 3. **JetStream file store is essentially non-functional** for async batch publishing. The sync memory store path works at 0.82x parity, so the issue is specific to file I/O or ack handling.
 4. **JetStream consumption** (durable fetch) is 8x slower than Go. Ordered consumers don't work yet.
 5. The multi-pub/sub result showing .NET faster is likely a measurement artifact from the small message count (2,000 per publisher) — not representative at scale.
@@ -0,0 +1 @@
 [assembly: CollectionBehavior(DisableTestParallelization = true)]
@@ -0,0 +1,99 @@
 using NATS.Client.Core;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.CorePubSub;
 [Collection("Benchmark-Core")]
 public class FanOutTests(CoreServerPairFixture fixture, ITestOutputHelper output)
 {
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task FanOut1To4_128B()
    {
        const int payloadSize = 128;
        const int messageCount = 10_000;
        const int subscriberCount = 4;
        var dotnetResult = await RunFanOut("Fan-Out 1:4 (128B)", "DotNet", payloadSize, messageCount, subscriberCount, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunFanOut("Fan-Out 1:4 (128B)", "Go", payloadSize, messageCount, subscriberCount, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private static async Task<BenchmarkResult> RunFanOut(string name, string serverType, int payloadSize, int messageCount, int subscriberCount, Func<NatsConnection> createClient)
    {
        var payload = new byte[payloadSize];
        var subject = $"bench.fanout.{serverType.ToLowerInvariant()}.{Guid.NewGuid():N}";
        await using var pubClient = createClient();
        await pubClient.ConnectAsync();
        var subClients = new NatsConnection[subscriberCount];
        var subs = new INatsSub<byte[]>[subscriberCount];
        var subTasks = new Task[subscriberCount];
        var totalExpected = messageCount * subscriberCount;
        var totalReceived = 0;
        var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
        for (var i = 0; i < subscriberCount; i++)
        {
            subClients[i] = createClient();
            await subClients[i].ConnectAsync();
            subs[i] = await subClients[i].SubscribeCoreAsync<byte[]>(subject);
        }
        // Flush to ensure all subscriptions are propagated
        foreach (var client in subClients)
            await client.PingAsync();
        await pubClient.PingAsync();
        // Start reading after subscriptions are confirmed
        for (var i = 0; i < subscriberCount; i++)
        {
            var sub = subs[i];
            subTasks[i] = Task.Run(async () =>
            {
                await foreach (var _ in sub.Msgs.ReadAllAsync())
                {
                    if (Interlocked.Increment(ref totalReceived) >= totalExpected)
                    {
                        tcs.TrySetResult();
                        return;
                    }
                }
            });
        }
        var sw = System.Diagnostics.Stopwatch.StartNew();
        for (var i = 0; i < messageCount; i++)
            await pubClient.PublishAsync(subject, payload);
        await pubClient.PingAsync();
        using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(60));
        await tcs.Task.WaitAsync(cts.Token);
        sw.Stop();
        foreach (var sub in subs)
            await sub.UnsubscribeAsync();
        foreach (var client in subClients)
            await client.DisposeAsync();
        return new BenchmarkResult
        {
            Name = name,
            ServerType = serverType,
            TotalMessages = totalExpected,
            TotalBytes = (long)totalExpected * payloadSize,
            Duration = sw.Elapsed,
        };
    }
 }
@@ -0,0 +1,124 @@
 using NATS.Client.Core;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.CorePubSub;
 [Collection("Benchmark-Core")]
 public class MultiPubSubTests(CoreServerPairFixture fixture, ITestOutputHelper output)
 {
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task MultiPubSub4x4_128B()
    {
        const int payloadSize = 128;
        const int messagesPerPublisher = 2_000;
        const int pubCount = 4;
        const int subCount = 4;
        var dotnetResult = await RunMultiPubSub("Multi 4Px4S (128B)", "DotNet", payloadSize, messagesPerPublisher, pubCount, subCount, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunMultiPubSub("Multi 4Px4S (128B)", "Go", payloadSize, messagesPerPublisher, pubCount, subCount, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private static async Task<BenchmarkResult> RunMultiPubSub(
        string name, string serverType, int payloadSize, int messagesPerPublisher,
        int pubCount, int subCount, Func<NatsConnection> createClient)
    {
        var payload = new byte[payloadSize];
        var totalMessages = messagesPerPublisher * pubCount;
        var runId = Guid.NewGuid().ToString("N")[..8];
        var subjects = Enumerable.Range(0, pubCount).Select(i => $"bench.multi.{serverType.ToLowerInvariant()}.{runId}.{i}").ToArray();
        // Create subscribers — one per subject
        var subClients = new NatsConnection[subCount];
        var subs = new INatsSub<byte[]>[subCount];
        var subTasks = new Task[subCount];
        var totalReceived = 0;
        var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
        for (var i = 0; i < subCount; i++)
        {
            subClients[i] = createClient();
            await subClients[i].ConnectAsync();
            subs[i] = await subClients[i].SubscribeCoreAsync<byte[]>(subjects[i % subjects.Length]);
        }
        // Flush to ensure all subscriptions are propagated
        foreach (var client in subClients)
            await client.PingAsync();
        // Start reading
        for (var i = 0; i < subCount; i++)
        {
            var sub = subs[i];
            subTasks[i] = Task.Run(async () =>
            {
                await foreach (var _ in sub.Msgs.ReadAllAsync())
                {
                    if (Interlocked.Increment(ref totalReceived) >= totalMessages)
                    {
                        tcs.TrySetResult();
                        return;
                    }
                }
            });
        }
        // Create publishers
        var pubClients = new NatsConnection[pubCount];
        for (var i = 0; i < pubCount; i++)
        {
            pubClients[i] = createClient();
            await pubClients[i].ConnectAsync();
        }
        var sw = System.Diagnostics.Stopwatch.StartNew();
        var pubTasks = new Task[pubCount];
        for (var p = 0; p < pubCount; p++)
        {
            var client = pubClients[p];
            var subject = subjects[p];
            pubTasks[p] = Task.Run(async () =>
            {
                for (var i = 0; i < messagesPerPublisher; i++)
                    await client.PublishAsync(subject, payload);
            });
        }
        await Task.WhenAll(pubTasks);
        // Flush all publishers
        foreach (var client in pubClients)
            await client.PingAsync();
        using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(60));
        await tcs.Task.WaitAsync(cts.Token);
        sw.Stop();
        foreach (var sub in subs)
            await sub.UnsubscribeAsync();
        foreach (var client in subClients)
            await client.DisposeAsync();
        foreach (var client in pubClients)
            await client.DisposeAsync();
        return new BenchmarkResult
        {
            Name = name,
            ServerType = serverType,
            TotalMessages = totalMessages,
            TotalBytes = (long)totalMessages * payloadSize,
            Duration = sw.Elapsed,
        };
    }
 }
@@ -0,0 +1,105 @@
 using NATS.Client.Core;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.CorePubSub;
 [Collection("Benchmark-Core")]
 public class PubSubOneToOneTests(CoreServerPairFixture fixture, ITestOutputHelper output)
 {
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task PubSub1To1_16B()
    {
        const int payloadSize = 16;
        const int messageCount = 10_000;
        var dotnetResult = await RunPubSub("PubSub 1:1 (16B)", "DotNet", payloadSize, messageCount, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunPubSub("PubSub 1:1 (16B)", "Go", payloadSize, messageCount, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task PubSub1To1_16KB()
    {
        const int payloadSize = 16 * 1024;
        const int messageCount = 1_000;
        var dotnetResult = await RunPubSub("PubSub 1:1 (16KB)", "DotNet", payloadSize, messageCount, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunPubSub("PubSub 1:1 (16KB)", "Go", payloadSize, messageCount, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private static async Task<BenchmarkResult> RunPubSub(string name, string serverType, int payloadSize, int messageCount, Func<NatsConnection> createClient)
    {
        var payload = new byte[payloadSize];
        var subject = $"bench.pubsub.{serverType.ToLowerInvariant()}.{Guid.NewGuid():N}";
        await using var pubClient = createClient();
        await using var subClient = createClient();
        await pubClient.ConnectAsync();
        await subClient.ConnectAsync();
        var received = 0;
        var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
        // Start subscriber
        var sub = await subClient.SubscribeCoreAsync<byte[]>(subject);
        // Flush to ensure subscription is propagated
        await subClient.PingAsync();
        await pubClient.PingAsync();
        var subTask = Task.Run(async () =>
        {
            await foreach (var msg in sub.Msgs.ReadAllAsync())
            {
                if (Interlocked.Increment(ref received) >= messageCount)
                {
                    tcs.TrySetResult();
                    return;
                }
            }
        });
        // Publish and measure
        var sw = System.Diagnostics.Stopwatch.StartNew();
        for (var i = 0; i < messageCount; i++)
            await pubClient.PublishAsync(subject, payload);
        await pubClient.PingAsync(); // Flush all pending writes
        // Wait for all messages received
        using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(60));
        await tcs.Task.WaitAsync(cts.Token);
        sw.Stop();
        await sub.UnsubscribeAsync();
        return new BenchmarkResult
        {
            Name = name,
            ServerType = serverType,
            TotalMessages = messageCount,
            TotalBytes = (long)messageCount * payloadSize,
            Duration = sw.Elapsed,
        };
    }
 }
@@ -0,0 +1,63 @@
 using NATS.Client.Core;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.CorePubSub;
 [Collection("Benchmark-Core")]
 public class SinglePublisherThroughputTests(CoreServerPairFixture fixture, ITestOutputHelper output)
 {
    private readonly BenchmarkRunner _runner = new() { WarmupCount = 1_000, MeasurementCount = 100_000 };
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task PubNoSub_16B()
    {
        const int payloadSize = 16;
        var payload = new byte[payloadSize];
        const string subject = "bench.pub.nosub.16";
        var dotnetResult = await RunThroughput("Single Publisher (16B)", "DotNet", subject, payload, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunThroughput("Single Publisher (16B)", "Go", subject, payload, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task PubNoSub_128B()
    {
        const int payloadSize = 128;
        var payload = new byte[payloadSize];
        const string subject = "bench.pub.nosub.128";
        var dotnetResult = await RunThroughput("Single Publisher (128B)", "DotNet", subject, payload, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunThroughput("Single Publisher (128B)", "Go", subject, payload, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private async Task<BenchmarkResult> RunThroughput(string name, string serverType, string subject, byte[] payload, Func<NatsConnection> createClient)
    {
        await using var client = createClient();
        await client.ConnectAsync();
        return await _runner.MeasureThroughputAsync(name, serverType, payload.Length,
            async _ => await client.PublishAsync(subject, payload));
    }
 }
@@ -0,0 +1,27 @@
 namespace NATS.Server.Benchmark.Tests.Harness;
 /// <summary>
 /// Captures the results of a single benchmark run against one server.
 /// </summary>
 public sealed record BenchmarkResult
 {
    public required string Name { get; init; }
    public required string ServerType { get; init; }
    public required long TotalMessages { get; init; }
    public required long TotalBytes { get; init; }
    public required TimeSpan Duration { get; init; }
    /// <summary>Latency percentiles in microseconds, if measured.</summary>
    public LatencyPercentiles? Latencies { get; init; }
    public double MessagesPerSecond => TotalMessages / Duration.TotalSeconds;
    public double BytesPerSecond => TotalBytes / Duration.TotalSeconds;
    public double MegabytesPerSecond => BytesPerSecond / (1024.0 * 1024.0);
 }
 public sealed record LatencyPercentiles(
    double P50Us,
    double P95Us,
    double P99Us,
    double MinUs,
    double MaxUs);
@@ -0,0 +1,55 @@
 using System.Globalization;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.Harness;
 /// <summary>
 /// Writes side-by-side benchmark comparison output to xUnit's ITestOutputHelper.
 /// </summary>
 public static class BenchmarkResultWriter
 {
    public static void WriteComparison(ITestOutputHelper output, BenchmarkResult goResult, BenchmarkResult dotnetResult)
    {
        var ratio = dotnetResult.MessagesPerSecond / goResult.MessagesPerSecond;
        output.WriteLine($"=== {goResult.Name} ===");
        output.WriteLine($"Go:      {FormatRate(goResult.MessagesPerSecond)} msg/s  |  {goResult.MegabytesPerSecond:F1} MB/s  |  {goResult.Duration.TotalMilliseconds:F0} ms");
        output.WriteLine($".NET:    {FormatRate(dotnetResult.MessagesPerSecond)} msg/s  |  {dotnetResult.MegabytesPerSecond:F1} MB/s  |  {dotnetResult.Duration.TotalMilliseconds:F0} ms");
        output.WriteLine($"Ratio:   {ratio:F2}x (.NET / Go)");
        if (goResult.Latencies is not null && dotnetResult.Latencies is not null)
        {
            output.WriteLine("");
            output.WriteLine("Latency (us):");
            output.WriteLine($"  {"",8} {"P50",10} {"P95",10} {"P99",10} {"Min",10} {"Max",10}");
            WriteLatencyRow(output, "Go", goResult.Latencies);
            WriteLatencyRow(output, ".NET", dotnetResult.Latencies);
        }
        output.WriteLine("");
    }
    public static void WriteSingle(ITestOutputHelper output, BenchmarkResult result)
    {
        output.WriteLine($"=== {result.Name} ({result.ServerType}) ===");
        output.WriteLine($"{FormatRate(result.MessagesPerSecond)} msg/s  |  {result.MegabytesPerSecond:F1} MB/s  |  {result.Duration.TotalMilliseconds:F0} ms");
        if (result.Latencies is not null)
        {
            output.WriteLine("");
            output.WriteLine("Latency (us):");
            output.WriteLine($"  {"",8} {"P50",10} {"P95",10} {"P99",10} {"Min",10} {"Max",10}");
            WriteLatencyRow(output, result.ServerType, result.Latencies);
        }
        output.WriteLine("");
    }
    private static void WriteLatencyRow(ITestOutputHelper output, string label, LatencyPercentiles p)
    {
        output.WriteLine($"  {label,8} {p.P50Us,10:F1} {p.P95Us,10:F1} {p.P99Us,10:F1} {p.MinUs,10:F1} {p.MaxUs,10:F1}");
    }
    private static string FormatRate(double rate)
        => rate.ToString("N0", CultureInfo.InvariantCulture).PadLeft(15);
 }
@@ -0,0 +1,80 @@
 using System.Diagnostics;
 namespace NATS.Server.Benchmark.Tests.Harness;
 /// <summary>
 /// Lightweight benchmark runner with warmup + timed measurement.
 /// </summary>
 public sealed class BenchmarkRunner
 {
    public int WarmupCount { get; init; } = 1_000;
    public int MeasurementCount { get; init; } = 100_000;
    /// <summary>
    /// Measures throughput for a fire-and-forget style workload (pub-only or pub+sub).
    /// The <paramref name="action"/> is called <see cref="MeasurementCount"/> times.
    /// </summary>
    public async Task<BenchmarkResult> MeasureThroughputAsync(
        string name,
        string serverType,
        int payloadSize,
        Func<int, Task> action)
    {
        // Warmup
        for (var i = 0; i < WarmupCount; i++)
            await action(i);
        // Measurement
        var sw = Stopwatch.StartNew();
        for (var i = 0; i < MeasurementCount; i++)
            await action(i);
        sw.Stop();
        return new BenchmarkResult
        {
            Name = name,
            ServerType = serverType,
            TotalMessages = MeasurementCount,
            TotalBytes = (long)MeasurementCount * payloadSize,
            Duration = sw.Elapsed,
        };
    }
    /// <summary>
    /// Measures latency for a request-reply style workload.
    /// Records per-iteration round-trip time and computes percentiles.
    /// </summary>
    public async Task<BenchmarkResult> MeasureLatencyAsync(
        string name,
        string serverType,
        int payloadSize,
        Func<int, Task> roundTripAction)
    {
        // Warmup
        for (var i = 0; i < WarmupCount; i++)
            await roundTripAction(i);
        // Measurement with per-iteration timing
        var tracker = new LatencyTracker(MeasurementCount);
        var overallSw = Stopwatch.StartNew();
        for (var i = 0; i < MeasurementCount; i++)
        {
            var start = Stopwatch.GetTimestamp();
            await roundTripAction(i);
            tracker.Record(Stopwatch.GetTimestamp() - start);
        }
        overallSw.Stop();
        return new BenchmarkResult
        {
            Name = name,
            ServerType = serverType,
            TotalMessages = MeasurementCount,
            TotalBytes = (long)MeasurementCount * payloadSize,
            Duration = overallSw.Elapsed,
            Latencies = tracker.ComputePercentiles(),
        };
    }
 }
@@ -0,0 +1,51 @@
 using System.Diagnostics;
 namespace NATS.Server.Benchmark.Tests.Harness;
 /// <summary>
 /// Pre-allocated latency recording buffer with percentile computation.
 /// Records elapsed ticks from Stopwatch for each sample.
 /// </summary>
 public sealed class LatencyTracker
 {
    private readonly long[] _samples;
    private int _count;
    private bool _sorted;
    public LatencyTracker(int capacity)
    {
        _samples = new long[capacity];
    }
    public void Record(long elapsedTicks)
    {
        if (_count < _samples.Length)
        {
            _samples[_count++] = elapsedTicks;
            _sorted = false;
        }
    }
    public LatencyPercentiles ComputePercentiles()
    {
        if (_count == 0)
            return new LatencyPercentiles(0, 0, 0, 0, 0);
        if (!_sorted)
        {
            Array.Sort(_samples, 0, _count);
            _sorted = true;
        }
        var ticksPerUs = Stopwatch.Frequency / 1_000_000.0;
        return new LatencyPercentiles(
            P50Us: _samples[Percentile(50)] / ticksPerUs,
            P95Us: _samples[Percentile(95)] / ticksPerUs,
            P99Us: _samples[Percentile(99)] / ticksPerUs,
            MinUs: _samples[0] / ticksPerUs,
            MaxUs: _samples[_count - 1] / ticksPerUs);
    }
    private int Percentile(int p) => Math.Min((int)(_count * (p / 100.0)), _count - 1);
 }
@@ -0,0 +1,7 @@
 namespace NATS.Server.Benchmark.Tests.Infrastructure;
 [CollectionDefinition("Benchmark-Core")]
 public class BenchmarkCoreCollection : ICollectionFixture<CoreServerPairFixture>;
 [CollectionDefinition("Benchmark-JetStream")]
 public class BenchmarkJetStreamCollection : ICollectionFixture<JetStreamServerPairFixture>;
@@ -0,0 +1,47 @@
 using NATS.Client.Core;
 namespace NATS.Server.Benchmark.Tests.Infrastructure;
 /// <summary>
 /// Starts both a Go and .NET NATS server for core pub/sub benchmarks.
 /// Shared across all tests in the "Benchmark-Core" collection.
 /// </summary>
 public sealed class CoreServerPairFixture : IAsyncLifetime
 {
    private GoServerProcess? _goServer;
    private DotNetServerProcess? _dotNetServer;
    public int GoPort => _goServer?.Port ?? throw new InvalidOperationException("Go server not started");
    public int DotNetPort => _dotNetServer?.Port ?? throw new InvalidOperationException(".NET server not started");
    public bool GoAvailable => _goServer is not null;
    public async Task InitializeAsync()
    {
        _dotNetServer = new DotNetServerProcess();
        var dotNetTask = _dotNetServer.StartAsync();
        if (GoServerProcess.IsAvailable())
        {
            _goServer = new GoServerProcess();
            await Task.WhenAll(dotNetTask, _goServer.StartAsync());
        }
        else
        {
            await dotNetTask;
        }
    }
    public async Task DisposeAsync()
    {
        if (_goServer is not null)
            await _goServer.DisposeAsync();
        if (_dotNetServer is not null)
            await _dotNetServer.DisposeAsync();
    }
    public NatsConnection CreateGoClient()
        => new(new NatsOpts { Url = $"nats://127.0.0.1:{GoPort}" });
    public NatsConnection CreateDotNetClient()
        => new(new NatsOpts { Url = $"nats://127.0.0.1:{DotNetPort}" });
 }
@@ -0,0 +1,173 @@
 using System.Diagnostics;
 using System.Net;
 using System.Net.Sockets;
 using System.Text;
 namespace NATS.Server.Benchmark.Tests.Infrastructure;
 /// <summary>
 /// Manages a NATS.Server.Host child process for benchmark testing.
 /// </summary>
 public sealed class DotNetServerProcess : IAsyncDisposable
 {
    private Process? _process;
    private readonly StringBuilder _output = new();
    private readonly object _outputLock = new();
    private readonly string? _configContent;
    private string? _configFilePath;
    public int Port { get; }
    public string Output
    {
        get
        {
            lock (_outputLock)
                return _output.ToString();
        }
    }
    public DotNetServerProcess(string? configContent = null)
    {
        Port = PortAllocator.AllocateFreePort();
        _configContent = configContent;
    }
    public async Task StartAsync()
    {
        var hostDll = ResolveHostDll();
        if (_configContent is not null)
        {
            _configFilePath = Path.Combine(Path.GetTempPath(), $"nats-bench-dotnet-{Guid.NewGuid():N}.conf");
            await File.WriteAllTextAsync(_configFilePath, _configContent);
        }
        var args = new StringBuilder($"exec \"{hostDll}\" -p {Port}");
        if (_configFilePath is not null)
            args.Append($" -c \"{_configFilePath}\"");
        var psi = new ProcessStartInfo
        {
            FileName = "dotnet",
            Arguments = args.ToString(),
            UseShellExecute = false,
            RedirectStandardOutput = true,
            RedirectStandardError = true,
            CreateNoWindow = true,
        };
        _process = new Process { StartInfo = psi, EnableRaisingEvents = true };
        _process.OutputDataReceived += (_, e) =>
        {
            if (e.Data is not null)
                lock (_outputLock) _output.AppendLine(e.Data);
        };
        _process.ErrorDataReceived += (_, e) =>
        {
            if (e.Data is not null)
                lock (_outputLock) _output.AppendLine(e.Data);
        };
        _process.Start();
        _process.BeginOutputReadLine();
        _process.BeginErrorReadLine();
        await WaitForTcpReadyAsync();
    }
    public async ValueTask DisposeAsync()
    {
        if (_process is not null)
        {
            if (!_process.HasExited)
            {
                _process.Kill(entireProcessTree: true);
                using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
                try
                {
                    await _process.WaitForExitAsync(cts.Token);
                }
                catch (OperationCanceledException ex) when (!_process.HasExited)
                {
                    throw new InvalidOperationException(
                        "NATS .NET server process did not exit within 5s after kill.", ex);
                }
            }
            _process.Dispose();
            _process = null;
        }
        if (_configFilePath is not null && File.Exists(_configFilePath))
        {
            File.Delete(_configFilePath);
            _configFilePath = null;
        }
    }
    private async Task WaitForTcpReadyAsync()
    {
        using var timeout = new CancellationTokenSource(TimeSpan.FromSeconds(10));
        using var timer = new PeriodicTimer(TimeSpan.FromMilliseconds(100));
        SocketException? lastError = null;
        while (await timer.WaitForNextTickAsync(timeout.Token).ConfigureAwait(false))
        {
            try
            {
                using var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
                await socket.ConnectAsync(new IPEndPoint(IPAddress.Loopback, Port), timeout.Token);
                return;
            }
            catch (SocketException ex)
            {
                lastError = ex;
            }
        }
        throw new TimeoutException(
            $"NATS .NET server did not become ready on port {Port} within 10s. Last error: {lastError?.Message}\n\nServer output:\n{Output}");
    }
    private static string ResolveHostDll()
    {
        var dir = new DirectoryInfo(AppContext.BaseDirectory);
        while (dir is not null)
        {
            if (File.Exists(Path.Combine(dir.FullName, "NatsDotNet.slnx")))
            {
                var dll = Path.Combine(dir.FullName, "src", "NATS.Server.Host", "bin", "Debug", "net10.0", "NATS.Server.Host.dll");
                if (File.Exists(dll))
                    return dll;
                var build = Process.Start(new ProcessStartInfo
                {
                    FileName = "dotnet",
                    Arguments = "build src/NATS.Server.Host/NATS.Server.Host.csproj -c Debug",
                    WorkingDirectory = dir.FullName,
                    UseShellExecute = false,
                    RedirectStandardOutput = true,
                    RedirectStandardError = true,
                });
                build!.WaitForExit();
                if (build.ExitCode != 0)
                    throw new InvalidOperationException(
                        $"Failed to build NATS.Server.Host:\n{build.StandardError.ReadToEnd()}");
                if (File.Exists(dll))
                    return dll;
                throw new FileNotFoundException($"Built NATS.Server.Host but DLL not found at: {dll}");
            }
            dir = dir.Parent;
        }
        throw new FileNotFoundException(
            "Could not find solution root (NatsDotNet.slnx) walking up from " + AppContext.BaseDirectory);
    }
 }
@@ -0,0 +1,217 @@
 using System.Diagnostics;
 using System.Net;
 using System.Net.Sockets;
 using System.Text;
 namespace NATS.Server.Benchmark.Tests.Infrastructure;
 /// <summary>
 /// Manages a Go nats-server child process for benchmark testing.
 /// Builds the Go binary if not present and skips gracefully if Go is not installed.
 /// </summary>
 public sealed class GoServerProcess : IAsyncDisposable
 {
    private Process? _process;
    private readonly StringBuilder _output = new();
    private readonly object _outputLock = new();
    private readonly string? _configContent;
    private string? _configFilePath;
    public int Port { get; }
    public string Output
    {
        get
        {
            lock (_outputLock)
                return _output.ToString();
        }
    }
    public GoServerProcess(string? configContent = null)
    {
        Port = PortAllocator.AllocateFreePort();
        _configContent = configContent;
    }
    /// <summary>
    /// Returns true if Go is installed and the Go NATS server source exists.
    /// </summary>
    public static bool IsAvailable()
    {
        try
        {
            var goVersion = Process.Start(new ProcessStartInfo
            {
                FileName = "go",
                Arguments = "version",
                UseShellExecute = false,
                RedirectStandardOutput = true,
                RedirectStandardError = true,
                CreateNoWindow = true,
            });
            goVersion!.WaitForExit();
            if (goVersion.ExitCode != 0)
                return false;
            var solutionRoot = FindSolutionRoot();
            return solutionRoot is not null &&
                   Directory.Exists(Path.Combine(solutionRoot, "golang", "nats-server"));
        }
        catch
        {
            return false;
        }
    }
    public async Task StartAsync()
    {
        var binary = ResolveGoBinary();
        if (_configContent is not null)
        {
            _configFilePath = Path.Combine(Path.GetTempPath(), $"nats-bench-go-{Guid.NewGuid():N}.conf");
            await File.WriteAllTextAsync(_configFilePath, _configContent);
        }
        var args = new StringBuilder($"-p {Port}");
        if (_configFilePath is not null)
            args.Append($" -c \"{_configFilePath}\"");
        var psi = new ProcessStartInfo
        {
            FileName = binary,
            Arguments = args.ToString(),
            UseShellExecute = false,
            RedirectStandardOutput = true,
            RedirectStandardError = true,
            CreateNoWindow = true,
        };
        _process = new Process { StartInfo = psi, EnableRaisingEvents = true };
        _process.OutputDataReceived += (_, e) =>
        {
            if (e.Data is not null)
                lock (_outputLock) _output.AppendLine(e.Data);
        };
        _process.ErrorDataReceived += (_, e) =>
        {
            if (e.Data is not null)
                lock (_outputLock) _output.AppendLine(e.Data);
        };
        _process.Start();
        _process.BeginOutputReadLine();
        _process.BeginErrorReadLine();
        await WaitForTcpReadyAsync();
    }
    public async ValueTask DisposeAsync()
    {
        if (_process is not null)
        {
            if (!_process.HasExited)
            {
                _process.Kill(entireProcessTree: true);
                using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(5));
                try
                {
                    await _process.WaitForExitAsync(cts.Token);
                }
                catch (OperationCanceledException ex) when (!_process.HasExited)
                {
                    throw new InvalidOperationException(
                        "Go nats-server process did not exit within 5s after kill.", ex);
                }
            }
            _process.Dispose();
            _process = null;
        }
        if (_configFilePath is not null && File.Exists(_configFilePath))
        {
            File.Delete(_configFilePath);
            _configFilePath = null;
        }
    }
    private async Task WaitForTcpReadyAsync()
    {
        using var timeout = new CancellationTokenSource(TimeSpan.FromSeconds(15));
        using var timer = new PeriodicTimer(TimeSpan.FromMilliseconds(100));
        SocketException? lastError = null;
        while (await timer.WaitForNextTickAsync(timeout.Token).ConfigureAwait(false))
        {
            try
            {
                using var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
                await socket.ConnectAsync(new IPEndPoint(IPAddress.Loopback, Port), timeout.Token);
                return;
            }
            catch (SocketException ex)
            {
                lastError = ex;
            }
        }
        throw new TimeoutException(
            $"Go nats-server did not become ready on port {Port} within 15s. Last error: {lastError?.Message}\n\nServer output:\n{Output}");
    }
    private static string ResolveGoBinary()
    {
        var solutionRoot = FindSolutionRoot()
            ?? throw new FileNotFoundException(
                "Could not find solution root (NatsDotNet.slnx) walking up from " + AppContext.BaseDirectory);
        var goServerDir = Path.Combine(solutionRoot, "golang", "nats-server");
        if (!Directory.Exists(goServerDir))
            throw new DirectoryNotFoundException($"Go nats-server source not found at: {goServerDir}");
        var binaryName = OperatingSystem.IsWindows() ? "nats-server.exe" : "nats-server";
        var binaryPath = Path.Combine(goServerDir, binaryName);
        if (File.Exists(binaryPath))
            return binaryPath;
        // Build the Go binary
        var build = Process.Start(new ProcessStartInfo
        {
            FileName = "go",
            Arguments = "build -o " + binaryName,
            WorkingDirectory = goServerDir,
            UseShellExecute = false,
            RedirectStandardOutput = true,
            RedirectStandardError = true,
            CreateNoWindow = true,
        });
        build!.WaitForExit();
        if (build.ExitCode != 0)
            throw new InvalidOperationException(
                $"Failed to build Go nats-server:\n{build.StandardError.ReadToEnd()}");
        if (File.Exists(binaryPath))
            return binaryPath;
        throw new FileNotFoundException($"Built Go nats-server but binary not found at: {binaryPath}");
    }
    internal static string? FindSolutionRoot()
    {
        var dir = new DirectoryInfo(AppContext.BaseDirectory);
        while (dir is not null)
        {
            if (File.Exists(Path.Combine(dir.FullName, "NatsDotNet.slnx")))
                return dir.FullName;
            dir = dir.Parent;
        }
        return null;
    }
 }
@@ -0,0 +1,83 @@
 using NATS.Client.Core;
 namespace NATS.Server.Benchmark.Tests.Infrastructure;
 /// <summary>
 /// Starts both a Go and .NET NATS server with JetStream enabled for benchmark testing.
 /// Shared across all tests in the "Benchmark-JetStream" collection.
 /// </summary>
 public sealed class JetStreamServerPairFixture : IAsyncLifetime
 {
    private GoServerProcess? _goServer;
    private DotNetServerProcess? _dotNetServer;
    private string? _goStoreDir;
    private string? _dotNetStoreDir;
    public int GoPort => _goServer?.Port ?? throw new InvalidOperationException("Go server not started");
    public int DotNetPort => _dotNetServer?.Port ?? throw new InvalidOperationException(".NET server not started");
    public bool GoAvailable => _goServer is not null;
    public async Task InitializeAsync()
    {
        _dotNetStoreDir = Path.Combine(Path.GetTempPath(), "nats-bench-dotnet-js-" + Guid.NewGuid().ToString("N")[..8]);
        Directory.CreateDirectory(_dotNetStoreDir);
        var dotNetConfig = $$"""
            jetstream {
                store_dir: "{{_dotNetStoreDir}}"
                max_mem_store: 256mb
                max_file_store: 1gb
            }
            """;
        _dotNetServer = new DotNetServerProcess(dotNetConfig);
        var dotNetTask = _dotNetServer.StartAsync();
        if (GoServerProcess.IsAvailable())
        {
            _goStoreDir = Path.Combine(Path.GetTempPath(), "nats-bench-go-js-" + Guid.NewGuid().ToString("N")[..8]);
            Directory.CreateDirectory(_goStoreDir);
            var goConfig = $$"""
                jetstream {
                    store_dir: "{{_goStoreDir}}"
                    max_mem_store: 256mb
                    max_file_store: 1gb
                }
                """;
            _goServer = new GoServerProcess(goConfig);
            await Task.WhenAll(dotNetTask, _goServer.StartAsync());
        }
        else
        {
            await dotNetTask;
        }
    }
    public async Task DisposeAsync()
    {
        if (_goServer is not null)
            await _goServer.DisposeAsync();
        if (_dotNetServer is not null)
            await _dotNetServer.DisposeAsync();
        CleanupDir(_goStoreDir);
        CleanupDir(_dotNetStoreDir);
    }
    public NatsConnection CreateGoClient()
        => new(new NatsOpts { Url = $"nats://127.0.0.1:{GoPort}" });
    public NatsConnection CreateDotNetClient()
        => new(new NatsOpts { Url = $"nats://127.0.0.1:{DotNetPort}" });
    private static void CleanupDir(string? dir)
    {
        if (dir is not null && Directory.Exists(dir))
        {
            try { Directory.Delete(dir, recursive: true); }
            catch { /* best-effort cleanup */ }
        }
    }
 }
@@ -0,0 +1,14 @@
 using System.Net;
 using System.Net.Sockets;
 namespace NATS.Server.Benchmark.Tests.Infrastructure;
 internal static class PortAllocator
 {
    public static int AllocateFreePort()
    {
        using var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
        socket.Bind(new IPEndPoint(IPAddress.Loopback, 0));
        return ((IPEndPoint)socket.LocalEndPoint!).Port;
    }
 }
@@ -0,0 +1,93 @@
 using System.Diagnostics;
 using NATS.Client.Core;
 using NATS.Client.JetStream;
 using NATS.Client.JetStream.Models;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.JetStream;
 [Collection("Benchmark-JetStream")]
 public class AsyncPublishTests(JetStreamServerPairFixture fixture, ITestOutputHelper output)
 {
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task JSAsyncPublish_128B_FileStore()
    {
        const int payloadSize = 128;
        const int messageCount = 5_000;
        const int batchSize = 100;
        var dotnetResult = await RunAsyncPublish("JS Async Publish (128B File)", "DotNet", payloadSize, messageCount, batchSize, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunAsyncPublish("JS Async Publish (128B File)", "Go", payloadSize, messageCount, batchSize, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private static async Task<BenchmarkResult> RunAsyncPublish(string name, string serverType, int payloadSize, int messageCount, int batchSize, Func<NatsConnection> createClient)
    {
        var payload = new byte[payloadSize];
        var streamName = $"BENCH_ASYNC_{serverType.ToUpperInvariant()}_{Guid.NewGuid():N}"[..30];
        var subject = $"bench.js.async.{serverType.ToLowerInvariant()}";
        await using var nats = createClient();
        await nats.ConnectAsync();
        var js = new NatsJSContext(nats);
        await js.CreateStreamAsync(new StreamConfig(streamName, [subject])
        {
            Storage = StreamConfigStorage.File,
            Retention = StreamConfigRetention.Limits,
            MaxMsgs = 10_000_000,
        });
        try
        {
            // Warmup
            for (var i = 0; i < 500; i++)
                await js.PublishAsync(subject, payload);
            // Measurement — fire-and-gather in batches
            var sw = Stopwatch.StartNew();
            var tasks = new List<ValueTask<PubAckResponse>>(batchSize);
            for (var i = 0; i < messageCount; i++)
            {
                tasks.Add(js.PublishAsync(subject, payload));
                if (tasks.Count >= batchSize)
                {
                    foreach (var t in tasks)
                        await t;
                    tasks.Clear();
                }
            }
            foreach (var t in tasks)
                await t;
            sw.Stop();
            return new BenchmarkResult
            {
                Name = name,
                ServerType = serverType,
                TotalMessages = messageCount,
                TotalBytes = (long)messageCount * payloadSize,
                Duration = sw.Elapsed,
            };
        }
        finally
        {
            await js.DeleteStreamAsync(streamName);
        }
    }
 }
@@ -0,0 +1,109 @@
 using System.Diagnostics;
 using NATS.Client.Core;
 using NATS.Client.JetStream;
 using NATS.Client.JetStream.Models;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.JetStream;
 [Collection("Benchmark-JetStream")]
 public class DurableConsumerFetchTests(JetStreamServerPairFixture fixture, ITestOutputHelper output)
 {
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task JSDurableFetch_Throughput()
    {
        const int payloadSize = 128;
        const int messageCount = 5_000;
        const int fetchBatchSize = 500;
        var dotnetResult = await RunDurableFetch("JS Durable Fetch (128B)", "DotNet", payloadSize, messageCount, fetchBatchSize, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunDurableFetch("JS Durable Fetch (128B)", "Go", payloadSize, messageCount, fetchBatchSize, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private static async Task<BenchmarkResult> RunDurableFetch(
        string name, string serverType, int payloadSize, int messageCount, int fetchBatchSize,
        Func<NatsConnection> createClient)
    {
        var payload = new byte[payloadSize];
        var streamName = $"BENCH_DUR_{serverType.ToUpperInvariant()}_{Guid.NewGuid():N}"[..30];
        var subject = $"bench.js.durable.{serverType.ToLowerInvariant()}";
        var consumerName = $"bench-dur-{serverType.ToLowerInvariant()}";
        await using var nats = createClient();
        await nats.ConnectAsync();
        var js = new NatsJSContext(nats);
        await js.CreateStreamAsync(new StreamConfig(streamName, [subject])
        {
            Storage = StreamConfigStorage.Memory,
            Retention = StreamConfigRetention.Limits,
            MaxMsgs = 10_000_000,
        });
        try
        {
            // Pre-populate stream
            var pubTasks = new List<ValueTask<PubAckResponse>>(1000);
            for (var i = 0; i < messageCount; i++)
            {
                pubTasks.Add(js.PublishAsync(subject, payload));
                if (pubTasks.Count >= 1000)
                {
                    foreach (var t in pubTasks)
                        await t;
                    pubTasks.Clear();
                }
            }
            foreach (var t in pubTasks)
                await t;
            // Create durable consumer
            var consumer = await js.CreateOrUpdateConsumerAsync(streamName, new ConsumerConfig(consumerName)
            {
                AckPolicy = ConsumerConfigAckPolicy.None,
            });
            // Fetch in batches
            var received = 0;
            var sw = Stopwatch.StartNew();
            while (received < messageCount)
            {
                await foreach (var msg in consumer.FetchAsync<byte[]>(new NatsJSFetchOpts { MaxMsgs = fetchBatchSize }))
                {
                    received++;
                    if (received >= messageCount)
                        break;
                }
            }
            sw.Stop();
            return new BenchmarkResult
            {
                Name = name,
                ServerType = serverType,
                TotalMessages = received,
                TotalBytes = (long)received * payloadSize,
                Duration = sw.Elapsed,
            };
        }
        finally
        {
            await js.DeleteStreamAsync(streamName);
        }
    }
 }
@@ -0,0 +1,108 @@
 using System.Diagnostics;
 using NATS.Client.Core;
 using NATS.Client.JetStream;
 using NATS.Client.JetStream.Models;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.JetStream;
 [Collection("Benchmark-JetStream")]
 public class OrderedConsumerTests(JetStreamServerPairFixture fixture, ITestOutputHelper output)
 {
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task JSOrderedConsumer_Throughput()
    {
        const int payloadSize = 128;
        const int messageCount = 5_000;
        BenchmarkResult? dotnetResult = null;
        try
        {
            dotnetResult = await RunOrderedConsume("JS Ordered Consumer (128B)", "DotNet", payloadSize, messageCount, fixture.CreateDotNetClient);
        }
        catch (Exception ex) when (ex.GetType().Name.Contains("NatsJS"))
        {
            output.WriteLine($"[DotNet] Ordered consumer not fully supported: {ex.Message}");
        }
        if (fixture.GoAvailable)
        {
            var goResult = await RunOrderedConsume("JS Ordered Consumer (128B)", "Go", payloadSize, messageCount, fixture.CreateGoClient);
            if (dotnetResult is not null)
                BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
            else
                BenchmarkResultWriter.WriteSingle(output, goResult);
        }
        else if (dotnetResult is not null)
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private static async Task<BenchmarkResult> RunOrderedConsume(string name, string serverType, int payloadSize, int messageCount, Func<NatsConnection> createClient)
    {
        var payload = new byte[payloadSize];
        var streamName = $"BENCH_ORD_{serverType.ToUpperInvariant()}_{Guid.NewGuid():N}"[..30];
        var subject = $"bench.js.ordered.{serverType.ToLowerInvariant()}";
        await using var nats = createClient();
        await nats.ConnectAsync();
        var js = new NatsJSContext(nats);
        await js.CreateStreamAsync(new StreamConfig(streamName, [subject])
        {
            Storage = StreamConfigStorage.Memory,
            Retention = StreamConfigRetention.Limits,
            MaxMsgs = 10_000_000,
        });
        try
        {
            // Pre-populate stream
            var pubTasks = new List<ValueTask<PubAckResponse>>(1000);
            for (var i = 0; i < messageCount; i++)
            {
                pubTasks.Add(js.PublishAsync(subject, payload));
                if (pubTasks.Count >= 1000)
                {
                    foreach (var t in pubTasks)
                        await t;
                    pubTasks.Clear();
                }
            }
            foreach (var t in pubTasks)
                await t;
            // Consume via ordered consumer
            var consumer = await js.CreateOrderedConsumerAsync(streamName);
            var received = 0;
            var sw = Stopwatch.StartNew();
            await foreach (var msg in consumer.ConsumeAsync<byte[]>())
            {
                received++;
                if (received >= messageCount)
                    break;
            }
            sw.Stop();
            return new BenchmarkResult
            {
                Name = name,
                ServerType = serverType,
                TotalMessages = received,
                TotalBytes = (long)received * payloadSize,
                Duration = sw.Elapsed,
            };
        }
        finally
        {
            await js.DeleteStreamAsync(streamName);
        }
    }
 }
@@ -0,0 +1,62 @@
 using NATS.Client.Core;
 using NATS.Client.JetStream;
 using NATS.Client.JetStream.Models;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.JetStream;
 [Collection("Benchmark-JetStream")]
 public class SyncPublishTests(JetStreamServerPairFixture fixture, ITestOutputHelper output)
 {
    private readonly BenchmarkRunner _runner = new() { WarmupCount = 500, MeasurementCount = 10_000 };
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task JSSyncPublish_16B_MemoryStore()
    {
        const int payloadSize = 16;
        var dotnetResult = await RunSyncPublish("JS Sync Publish (16B Memory)", "DotNet", payloadSize, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunSyncPublish("JS Sync Publish (16B Memory)", "Go", payloadSize, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private async Task<BenchmarkResult> RunSyncPublish(string name, string serverType, int payloadSize, Func<NatsConnection> createClient)
    {
        var payload = new byte[payloadSize];
        var streamName = $"BENCH_SYNC_{serverType.ToUpperInvariant()}_{Guid.NewGuid():N}"[..30];
        var subject = $"bench.js.sync.{serverType.ToLowerInvariant()}";
        await using var nats = createClient();
        await nats.ConnectAsync();
        var js = new NatsJSContext(nats);
        await js.CreateStreamAsync(new StreamConfig(streamName, [subject])
        {
            Storage = StreamConfigStorage.Memory,
            Retention = StreamConfigRetention.Limits,
            MaxMsgs = 1_000_000,
        });
        try
        {
            var result = await _runner.MeasureThroughputAsync(name, serverType, payloadSize,
                async _ => await js.PublishAsync(subject, payload));
            return result;
        }
        finally
        {
            await js.DeleteStreamAsync(streamName);
        }
    }
 }
@@ -0,0 +1,26 @@
 <Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <IsPackable>false</IsPackable>
  </PropertyGroup>
  <ItemGroup>
    <PackageReference Include="coverlet.collector" />
    <PackageReference Include="Microsoft.NET.Test.Sdk" />
    <PackageReference Include="NATS.Client.Core" />
    <PackageReference Include="NATS.Client.JetStream" />
    <PackageReference Include="Shouldly" />
    <PackageReference Include="xunit" />
    <PackageReference Include="xunit.runner.visualstudio" />
  </ItemGroup>
  <ItemGroup>
    <Using Include="Xunit" />
    <Using Include="Shouldly" />
  </ItemGroup>
  <ItemGroup>
    <Content Include="xunit.runner.json" CopyToOutputDirectory="PreserveNewest" />
  </ItemGroup>
 </Project>
@@ -0,0 +1,113 @@
 # NATS.Server.Benchmark.Tests
 Side-by-side performance comparison of the Go and .NET NATS server implementations. Both servers are launched as child processes on ephemeral ports and exercised with identical workloads using the `NATS.Client.Core` / `NATS.Client.JetStream` NuGet client libraries.
 ## Prerequisites
 - .NET 10 SDK
 - Go toolchain (for Go server comparison; benchmarks still run .NET-only if Go is unavailable)
 - The Go NATS server source at `golang/nats-server/` (the Go binary is built automatically on first run)
 ## Running Benchmarks
 All benchmark tests are tagged with `[Trait("Category", "Benchmark")]`, so a plain `dotnet test` against the solution will **not** run them. Use the `--filter` flag.
 ```bash
 # Run all benchmarks
 dotnet test tests/NATS.Server.Benchmark.Tests --filter "Category=Benchmark" -v normal
 # Core pub/sub only
 dotnet test tests/NATS.Server.Benchmark.Tests --filter "Category=Benchmark&FullyQualifiedName~CorePubSub" -v normal
 # Request/reply only
 dotnet test tests/NATS.Server.Benchmark.Tests --filter "Category=Benchmark&FullyQualifiedName~RequestReply" -v normal
 # JetStream only
 dotnet test tests/NATS.Server.Benchmark.Tests --filter "Category=Benchmark&FullyQualifiedName~JetStream" -v normal
 # A single benchmark by name
 dotnet test tests/NATS.Server.Benchmark.Tests --filter "FullyQualifiedName=NATS.Server.Benchmark.Tests.CorePubSub.SinglePublisherThroughputTests.PubNoSub_16B" -v normal
 ```
 Use `-v normal` or `--logger "console;verbosity=detailed"` to see the comparison output in the console. Without verbosity, xUnit suppresses `ITestOutputHelper` output for passing tests.
 ## Benchmark List
 | Test Class | Test Method | What It Measures |
 |---|---|---|
 | `SinglePublisherThroughputTests` | `PubNoSub_16B` | Publish-only throughput, 16-byte payload |
 | `SinglePublisherThroughputTests` | `PubNoSub_128B` | Publish-only throughput, 128-byte payload |
 | `PubSubOneToOneTests` | `PubSub1To1_16B` | 1 publisher, 1 subscriber, 16-byte payload |
 | `PubSubOneToOneTests` | `PubSub1To1_16KB` | 1 publisher, 1 subscriber, 16 KB payload |
 | `FanOutTests` | `FanOut1To4_128B` | 1 publisher, 4 subscribers, 128-byte payload |
 | `MultiPubSubTests` | `MultiPubSub4x4_128B` | 4 publishers, 4 subscribers, 128-byte payload |
 | `SingleClientLatencyTests` | `RequestReply_SingleClient_128B` | Request/reply round-trip latency, 1 client, 1 service |
 | `MultiClientLatencyTests` | `RequestReply_10Clients2Services_16B` | Request/reply latency, 10 concurrent clients, 2 queue-group services |
 | `SyncPublishTests` | `JSSyncPublish_16B_MemoryStore` | JetStream synchronous publish, memory-backed stream |
 | `AsyncPublishTests` | `JSAsyncPublish_128B_FileStore` | JetStream async batch publish, file-backed stream |
 | `OrderedConsumerTests` | `JSOrderedConsumer_Throughput` | JetStream ordered ephemeral consumer read throughput |
 | `DurableConsumerFetchTests` | `JSDurableFetch_Throughput` | JetStream durable consumer fetch-in-batches throughput |
 ## Output Format
 Each test writes a side-by-side comparison to xUnit's test output:
 ```
 === Single Publisher (16B) ===
 Go:            2,436,416 msg/s  |  37.2 MB/s  |  41 ms
 .NET:          1,425,767 msg/s  |  21.8 MB/s  |  70 ms
 Ratio:   0.59x (.NET / Go)
 ```
 Request/reply tests also include latency percentiles:
 ```
 Latency (us):
                P50        P95        P99        Min        Max
      Go      104.5      124.2      146.2       82.8     1204.5
    .NET      134.7      168.0      190.5       91.6     3469.5
 ```
 If Go is not available, only the .NET result is printed.
 ## Updating benchmarks_comparison.md
 After running the full benchmark suite, update `benchmarks_comparison.md` in the repository root with the new numbers:
 1. Run all benchmarks with detailed output:
   ```bash
   dotnet test tests/NATS.Server.Benchmark.Tests \
     --filter "Category=Benchmark" \
     -v normal \
     --logger "console;verbosity=detailed" 2>&1 | tee /tmp/bench-output.txt
   ```
 2. Open `/tmp/bench-output.txt` and extract the comparison blocks from the "Standard Output Messages" sections.
 3. Update the tables in `benchmarks_comparison.md` with the new msg/s, MB/s, ratio, and latency values. Update the date on the first line and the environment description.
 4. Review the Summary table and Key Observations — update assessments if ratios have changed significantly.
 ## Architecture
 ```
 Infrastructure/
  PortAllocator.cs            # Ephemeral port allocation (bind to port 0)
  DotNetServerProcess.cs      # Builds + launches NATS.Server.Host
  GoServerProcess.cs          # Builds + launches golang/nats-server
  CoreServerPairFixture.cs    # IAsyncLifetime: Go + .NET servers for core tests
  JetStreamServerPairFixture  # IAsyncLifetime: Go + .NET servers with JetStream
  Collections.cs              # xUnit collection definitions
 Harness/
  BenchmarkResult.cs          # Result record (msg/s, MB/s, latencies)
  LatencyTracker.cs           # Pre-allocated sample buffer, percentile math
  BenchmarkRunner.cs          # Warmup + timed measurement loop
  BenchmarkResultWriter.cs    # Formats side-by-side comparison output
 ```
 Server pair fixtures start both servers once per xUnit collection and expose `CreateGoClient()` / `CreateDotNetClient()` factory methods. Test parallelization is disabled at the assembly level (`AssemblyInfo.cs`) to prevent resource contention between collections.
 ## Notes
 - The Go binary at `golang/nats-server/nats-server` is built automatically on first run via `go build`. Subsequent runs reuse the cached binary.
 - If Go is not installed or `golang/nats-server/` does not exist, Go benchmarks are skipped and only .NET results are reported.
 - JetStream tests create temporary store directories under `$TMPDIR` and clean them up on teardown.
 - Message counts are intentionally conservative (2K–10K per test) to keep the full suite under 2 minutes while still producing meaningful throughput ratios. For higher-fidelity numbers, increase the counts in individual test methods or the `BenchmarkRunner` configuration.
@@ -0,0 +1,106 @@
 using System.Diagnostics;
 using NATS.Client.Core;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.RequestReply;
 [Collection("Benchmark-Core")]
 public class MultiClientLatencyTests(CoreServerPairFixture fixture, ITestOutputHelper output)
 {
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task RequestReply_10Clients2Services_16B()
    {
        const int payloadSize = 16;
        const int requestsPerClient = 1_000;
        const int clientCount = 10;
        const int serviceCount = 2;
        var dotnetResult = await RunMultiLatency("Request-Reply 10Cx2S (16B)", "DotNet", payloadSize, requestsPerClient, clientCount, serviceCount, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunMultiLatency("Request-Reply 10Cx2S (16B)", "Go", payloadSize, requestsPerClient, clientCount, serviceCount, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private static async Task<BenchmarkResult> RunMultiLatency(
        string name, string serverType, int payloadSize, int requestsPerClient,
        int clientCount, int serviceCount, Func<NatsConnection> createClient)
    {
        var payload = new byte[payloadSize];
        const string subject = "bench.reqrep.multi";
        var queueGroup = $"bench-svc-{serverType.ToLowerInvariant()}";
        // Start service responders on a queue group
        var serviceClients = new NatsConnection[serviceCount];
        var serviceSubs = new INatsSub<byte[]>[serviceCount];
        var serviceTasks = new Task[serviceCount];
        for (var i = 0; i < serviceCount; i++)
        {
            serviceClients[i] = createClient();
            await serviceClients[i].ConnectAsync();
            serviceSubs[i] = await serviceClients[i].SubscribeCoreAsync<byte[]>(subject, queueGroup: queueGroup);
            var client = serviceClients[i];
            var sub = serviceSubs[i];
            serviceTasks[i] = Task.Run(async () =>
            {
                await foreach (var msg in sub.Msgs.ReadAllAsync())
                {
                    if (msg.ReplyTo is not null)
                        await client.PublishAsync(msg.ReplyTo, payload);
                }
            });
        }
        await Task.Delay(50);
        // Run concurrent clients
        var totalMessages = requestsPerClient * clientCount;
        var tracker = new LatencyTracker(totalMessages);
        var sw = Stopwatch.StartNew();
        var clientTasks = new Task[clientCount];
        for (var c = 0; c < clientCount; c++)
        {
            clientTasks[c] = Task.Run(async () =>
            {
                await using var client = createClient();
                await client.ConnectAsync();
                for (var i = 0; i < requestsPerClient; i++)
                {
                    var start = Stopwatch.GetTimestamp();
                    await client.RequestAsync<byte[], byte[]>(subject, payload);
                    tracker.Record(Stopwatch.GetTimestamp() - start);
                }
            });
        }
        await Task.WhenAll(clientTasks);
        sw.Stop();
        foreach (var sub in serviceSubs)
            await sub.UnsubscribeAsync();
        foreach (var client in serviceClients)
            await client.DisposeAsync();
        return new BenchmarkResult
        {
            Name = name,
            ServerType = serverType,
            TotalMessages = totalMessages,
            TotalBytes = (long)totalMessages * payloadSize,
            Duration = sw.Elapsed,
            Latencies = tracker.ComputePercentiles(),
        };
    }
 }
@@ -0,0 +1,64 @@
 using NATS.Client.Core;
 using NATS.Server.Benchmark.Tests.Harness;
 using NATS.Server.Benchmark.Tests.Infrastructure;
 using Xunit.Abstractions;
 namespace NATS.Server.Benchmark.Tests.RequestReply;
 [Collection("Benchmark-Core")]
 public class SingleClientLatencyTests(CoreServerPairFixture fixture, ITestOutputHelper output)
 {
    private readonly BenchmarkRunner _runner = new() { WarmupCount = 500, MeasurementCount = 10_000 };
    [Fact]
    [Trait("Category", "Benchmark")]
    public async Task RequestReply_SingleClient_128B()
    {
        const int payloadSize = 128;
        const string subject = "bench.reqrep.single";
        var dotnetResult = await RunLatency("Request-Reply Single (128B)", "DotNet", subject, payloadSize, fixture.CreateDotNetClient);
        if (fixture.GoAvailable)
        {
            var goResult = await RunLatency("Request-Reply Single (128B)", "Go", subject, payloadSize, fixture.CreateGoClient);
            BenchmarkResultWriter.WriteComparison(output, goResult, dotnetResult);
        }
        else
        {
            BenchmarkResultWriter.WriteSingle(output, dotnetResult);
        }
    }
    private async Task<BenchmarkResult> RunLatency(string name, string serverType, string subject, int payloadSize, Func<NatsConnection> createClient)
    {
        var payload = new byte[payloadSize];
        await using var serviceClient = createClient();
        await using var requestClient = createClient();
        await serviceClient.ConnectAsync();
        await requestClient.ConnectAsync();
        // Start service responder
        var sub = await serviceClient.SubscribeCoreAsync<byte[]>(subject);
        var responderTask = Task.Run(async () =>
        {
            await foreach (var msg in sub.Msgs.ReadAllAsync())
            {
                if (msg.ReplyTo is not null)
                    await serviceClient.PublishAsync(msg.ReplyTo, payload);
            }
        });
        await Task.Delay(50);
        var result = await _runner.MeasureLatencyAsync(name, serverType, payloadSize,
            async _ =>
            {
                await requestClient.RequestAsync<byte[], byte[]>(subject, payload);
            });
        await sub.UnsubscribeAsync();
        return result;
    }
 }
@@ -0,0 +1,5 @@
 {
  "$schema": "https://xunit.net/schema/current/xunit.runner.schema.json",
  "parallelizeAssembly": false,
  "parallelizeTestCollections": false
 }
@@ -286,6 +286,7 @@ public sealed class StoreInterfaceTests
    // Go: TestStoreUpdateConfigTTLState server/store_test.go:574
    [Fact]
    [SlopwatchSuppress("SW004", "TTL expiry test requires real wall-clock time to elapse; Thread.Sleep waits for message TTL to expire or survive")]
    public void UpdateConfigTTLState_MessageSurvivesWhenTtlDisabled()
    {
        var cfg = new StreamConfig
@@ -413,13 +414,12 @@ public sealed class StoreInterfaceTests
        var ms = new MemStore();
        var s = Sync(ms);
-        long startTs = 0;
+        // Use StoreRawMsg with explicit timestamps to avoid flakiness under CPU pressure.
        var baseTs = (DateTime.UtcNow.Ticks - DateTime.UnixEpoch.Ticks) * 100L;
        for (var i = 0; i < 10; i++)
-        {
+            s.StoreRawMsg("foo", null, [], (ulong)(i + 1), baseTs + i * 1_000_000L, 0, false);
-            var (_, ts) = s.StoreMsg("foo", null, [], 0);
+
-            if (i == 1)
+        var startTs = baseTs + 1_000_000L; // timestamp of seq 2 (i == 1)
                startTs = ts;
        }
        // Create a delete gap in the middle: seqs 4-7 deleted.
        // A naive binary search would hit deleted sequences and return wrong result.
@@ -443,13 +443,12 @@ public sealed class StoreInterfaceTests
        var ms = new MemStore();
        var s = Sync(ms);
-        long startTs = 0;
+        // Use StoreRawMsg with explicit timestamps to avoid flakiness under CPU pressure.
        var baseTs = (DateTime.UtcNow.Ticks - DateTime.UnixEpoch.Ticks) * 100L;
        for (var i = 0; i < 3; i++)
-        {
+            s.StoreRawMsg("foo", null, [], (ulong)(i + 1), baseTs + i * 1_000_000L, 0, false);
-            var (_, ts) = s.StoreMsg("foo", null, [], 0);
+
-            if (i == 1)
+        var startTs = baseTs + 1_000_000L; // timestamp of seq 2 (i == 1)
                startTs = ts;
        }
        // Delete last message — trailing delete.
        s.RemoveMsg(3).ShouldBeTrue();
		`@@ -0,0 +1 @@`
							`[assembly: CollectionBehavior(DisableTestParallelization = true)]`