Merge branch 'opt/js-async-file-publish'

JetStream async file publish optimization (~10% improvement):
- Cached state properties eliminate GetStateAsync on publish path
- Single stream lookup eliminates double FindBySubject
- Removed _messageIndexes dictionary from write path
- Hand-rolled UTF-8 pub-ack formatter for success path
- Exponential flush backoff matching Go server
- Lazy StoredMessage materialization (MessageMeta struct)

# Conflicts:
#	benchmarks_comparison.md
This commit is contained in:
Joseph Doherty
2026-03-13 15:37:11 -04:00
9 changed files with 472 additions and 267 deletions

View File

@@ -1,8 +1,9 @@
# Go vs .NET NATS Server — Benchmark Comparison
Benchmark run: 2026-03-13 04:30 PM America/Indiana/Indianapolis. Both servers ran on the same machine using the benchmark project README command (`dotnet test tests/NATS.Server.Benchmark.Tests --filter "Category=Benchmark" -v normal --logger "console;verbosity=detailed"`). Test parallelization remained disabled inside the benchmark assembly.
Benchmark run: 2026-03-13 America/Indiana/Indianapolis. Both servers ran on the same machine using the benchmark project README command (`dotnet test tests/NATS.Server.Benchmark.Tests -c Release --filter "Category=Benchmark" -v normal --logger "console;verbosity=detailed"`). Test parallelization remained disabled inside the benchmark assembly.
**Environment:** Apple M4, .NET SDK 10.0.101, .NET server built and run in `Release` configuration (server GC, tiered PGO enabled), Go toolchain installed, Go reference server built from `golang/nats-server/`.
**Environment:** Apple M4, .NET SDK 10.0.101, Release build, Go toolchain installed, Go reference server built from `golang/nats-server/`.
**Environment:** Apple M4, .NET SDK 10.0.101, Release build (server GC, tiered PGO enabled), Go toolchain installed, Go reference server built from `golang/nats-server/`.
---
---
@@ -59,10 +60,10 @@ Benchmark run: 2026-03-13 04:30 PM America/Indiana/Indianapolis. Both servers ra
| Mode | Payload | Storage | Go msg/s | .NET msg/s | Ratio (.NET/Go) |
|------|---------|---------|----------|------------|-----------------|
| Synchronous | 16 B | Memory | 14,812 | 11,002 | 0.74x |
| Async (batch) | 128 B | File | 148,156 | 60,348 | 0.41x |
| Synchronous | 16 B | Memory | 14,812 | 12,134 | 0.82x |
| Async (batch) | 128 B | File | 174,705 | 52,350 | 0.30x |
> **Note:** Async file-store publish improved to 0.41x with Release build. Still storage-bound.
> **Note:** Async file-store publish improved ~10% (47K→52K) after hot-path optimizations: cached state properties, single stream lookup, _messageIndexes removal, hand-rolled pub-ack formatter, exponential flush backoff, lazy StoredMessage materialization. Still storage-bound at 0.30x Go.
---

View File

@@ -0,0 +1,41 @@
using System.Text;
namespace NATS.Server.JetStream.Publish;
/// <summary>
/// Hand-rolled UTF-8 formatter for the common success PubAck case.
/// Avoids JsonSerializer overhead (~100-200B internal allocations + reflection).
/// For error/duplicate/batch acks, callers fall back to JsonSerializer.
/// </summary>
internal static class JetStreamPubAckFormatter
{
// Pre-encoded UTF-8 fragments for {"stream":"NAME","seq":N}
private static readonly byte[] Prefix = "{\"stream\":\""u8.ToArray();
private static readonly byte[] SeqField = "\",\"seq\":"u8.ToArray();
private static readonly byte[] Suffix = "}"u8.ToArray();
/// <summary>
/// Formats a success PubAck directly into a span. Returns bytes written.
/// Caller must ensure dest is large enough (256 bytes is safe for any stream name).
/// </summary>
public static int FormatSuccess(Span<byte> dest, string streamName, ulong seq)
{
var pos = 0;
Prefix.CopyTo(dest);
pos += Prefix.Length;
pos += Encoding.UTF8.GetBytes(streamName, dest[pos..]);
SeqField.CopyTo(dest[pos..]);
pos += SeqField.Length;
seq.TryFormat(dest[pos..], out var written);
pos += written;
Suffix.CopyTo(dest[pos..]);
pos += Suffix.Length;
return pos;
}
/// <summary>
/// Returns true if this PubAck is a simple success that can use the fast formatter.
/// </summary>
public static bool IsSimpleSuccess(PubAck ack)
=> ack.ErrorCode == null && !ack.Duplicate && ack.BatchId == null;
}

View File

@@ -37,8 +37,8 @@ public sealed class JetStreamPublisher
}
// --- Normal (non-batch) publish path ---
var state = stream.Store.GetStateAsync(default).GetAwaiter().GetResult();
if (!_preconditions.CheckExpectedLastSeq(options.ExpectedLastSeq, state.LastSeq))
// Use cached LastSeq property instead of GetStateAsync to avoid allocation.
if (!_preconditions.CheckExpectedLastSeq(options.ExpectedLastSeq, stream.Store.LastSeq))
{
ack = new PubAck { ErrorCode = 10071 };
return true;
@@ -54,7 +54,8 @@ public sealed class JetStreamPublisher
return true;
}
var captured = _streamManager.Capture(subject, payload);
// Pass resolved stream to avoid double FindBySubject lookup.
var captured = _streamManager.Capture(stream, subject, payload);
ack = captured ?? new PubAck();
_preconditions.Record(options.MsgId, ack.Seq);
_preconditions.TrimOlderThan(stream.Config.DuplicateWindowMs);
@@ -136,15 +137,14 @@ public sealed class JetStreamPublisher
stream.Config.DuplicateWindowMs,
staged =>
{
// Check expected last sequence.
// Check expected last sequence using cached property.
if (staged.ExpectedLastSeq > 0)
{
var st = stream.Store.GetStateAsync(default).GetAwaiter().GetResult();
if (st.LastSeq != staged.ExpectedLastSeq)
if (stream.Store.LastSeq != staged.ExpectedLastSeq)
return new PubAck { ErrorCode = 10071, Stream = stream.Config.Name };
}
var captured = _streamManager.Capture(staged.Subject, staged.Payload);
var captured = _streamManager.Capture(stream, staged.Subject, staged.Payload);
return captured ?? new PubAck { Stream = stream.Config.Name };
});

File diff suppressed because it is too large Load Diff

View File

@@ -32,6 +32,13 @@ public interface IStreamStore
// Existing MemStore/FileStore implementations return this type.
ValueTask<ApiStreamState> GetStateAsync(CancellationToken ct);
// Cached state properties — avoid GetStateAsync on the publish hot path.
// These are maintained incrementally by FileStore/MemStore and are O(1).
ulong LastSeq => throw new NotSupportedException("LastSeq not implemented.");
ulong MessageCount => throw new NotSupportedException("MessageCount not implemented.");
ulong TotalBytes => throw new NotSupportedException("TotalBytes not implemented.");
ulong FirstSeq => throw new NotSupportedException("FirstSeq not implemented.");
// -------------------------------------------------------------------------
// Go-parity sync interface — mirrors server/store.go StreamStore
// Default implementations throw NotSupportedException so existing

View File

@@ -122,6 +122,12 @@ public sealed class MemStore : IStreamStore
}
}
// IStreamStore cached state properties — O(1), maintained incrementally.
public ulong LastSeq { get { lock (_gate) return _st.LastSeq; } }
public ulong MessageCount { get { lock (_gate) return _st.Msgs; } }
public ulong TotalBytes { get { lock (_gate) return _st.Bytes; } }
ulong IStreamStore.FirstSeq { get { lock (_gate) return _st.Msgs == 0 ? (_st.FirstSeq > 0 ? _st.FirstSeq : 0UL) : _st.FirstSeq; } }
// -------------------------------------------------------------------------
// Async helpers (used by existing JetStream layer)
// -------------------------------------------------------------------------

View File

@@ -315,6 +315,10 @@ public sealed class MsgBlock : IDisposable
_index[sequence] = (offset, written);
// If this sequence was previously soft-deleted, clear the deletion marker
// so that subsequent Read calls return the new record rather than null.
_deleted.Remove(sequence);
// Go: cache populated lazily on read, not eagerly on write.
// Reads that miss _cache flush pending buf to disk and decode from there.

View File

@@ -397,6 +397,11 @@ public sealed class StreamManager : IDisposable
if (stream == null)
return null;
return Capture(stream, subject, payload);
}
public PubAck? Capture(StreamHandle stream, string subject, ReadOnlyMemory<byte> payload)
{
// Go: sealed stream rejects all publishes.
// Reference: server/stream.go — processJetStreamMsg checks mset.cfg.Sealed.
if (stream.Config.Sealed)
@@ -414,17 +419,20 @@ public sealed class StreamManager : IDisposable
// Go: memStoreMsgSize — full message size includes subject + headers + payload + 16 bytes overhead.
var msgSize = subject.Length + payload.Length + 16;
var stateBefore = stream.Store.GetStateAsync(default).GetAwaiter().GetResult();
// Use cached state properties instead of GetStateAsync to avoid allocation on hot path.
var currentMsgCount = stream.Store.MessageCount;
var currentBytes = stream.Store.TotalBytes;
var currentFirstSeq = stream.Store.FirstSeq;
// Go: DiscardPolicy.New — reject when MaxMsgs reached.
// Reference: server/stream.go — processJetStreamMsg checks discard new + maxMsgs.
if (stream.Config.MaxMsgs > 0 && stream.Config.Discard == DiscardPolicy.New
&& (long)stateBefore.Messages >= stream.Config.MaxMsgs)
&& (long)currentMsgCount >= stream.Config.MaxMsgs)
{
return new PubAck { Stream = stream.Config.Name, ErrorCode = 10054 };
}
if (stream.Config.MaxBytes > 0 && (long)stateBefore.Bytes + msgSize > stream.Config.MaxBytes)
if (stream.Config.MaxBytes > 0 && (long)currentBytes + msgSize > stream.Config.MaxBytes)
{
if (stream.Config.Discard == DiscardPolicy.New)
{
@@ -435,10 +443,9 @@ public sealed class StreamManager : IDisposable
};
}
while ((long)stateBefore.Bytes + msgSize > stream.Config.MaxBytes && stateBefore.FirstSeq > 0)
while ((long)stream.Store.TotalBytes + msgSize > stream.Config.MaxBytes && stream.Store.FirstSeq > 0)
{
stream.Store.RemoveAsync(stateBefore.FirstSeq, default).GetAwaiter().GetResult();
stateBefore = stream.Store.GetStateAsync(default).GetAwaiter().GetResult();
stream.Store.RemoveAsync(stream.Store.FirstSeq, default).GetAwaiter().GetResult();
}
}

View File

@@ -1398,9 +1398,20 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
// Send pub ack response to the reply subject (request-reply pattern).
// Go reference: server/jetstream.go — jsPubAckResponse sent to reply.
if (replyTo != null)
{
if (JetStream.Publish.JetStreamPubAckFormatter.IsSimpleSuccess(pubAck))
{
// Fast path: hand-rolled UTF-8 formatter avoids JsonSerializer overhead.
Span<byte> ackBuf = stackalloc byte[256];
var ackLen = JetStream.Publish.JetStreamPubAckFormatter.FormatSuccess(ackBuf, pubAck.Stream, pubAck.Seq);
ProcessMessage(replyTo, null, default, ackBuf[..ackLen].ToArray(), sender);
}
else
{
var ackData = JsonSerializer.SerializeToUtf8Bytes(pubAck, s_jetStreamJsonOptions);
ProcessMessage(replyTo, null, default, ackData, sender);
}
return;
}
}