perf: optimize fan-out hot path and switch benchmarks to Release build

Round 9 optimizations targeting per-delivery overhead:
- Switch benchmark harness from Debug to Release build (biggest impact:
  durable fetch 0.42x→0.92x, request-reply to parity)
- Batch server-wide stats after fan-out loop (2 Interlocked per delivery → 2 per publish)
- Guard auto-unsub tracking with MaxMessages > 0 (skip Interlocked in common case)
- Cache SID as ASCII bytes on Subscription (avoid per-delivery encoding)
- Pre-encode subject bytes once before fan-out loop (avoid N encodings)
- Add 1-element subject string cache in ProcessPub (avoid repeated alloc)
- Remove Interlocked from SubList.Match stats counters (approximate is fine)
- Extract WriteMessageToBuffer helper for both string and span overloads
This commit is contained in:
Joseph Doherty
2026-03-13 15:30:02 -04:00
parent 82cc3ec841
commit a62a25dcdf
6 changed files with 251 additions and 94 deletions

View File

@@ -1437,6 +1437,10 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
var subList = sender.Account?.SubList ?? _globalAccount.SubList;
var result = subList.Match(subject);
var delivered = false;
int deliveredCount = 0;
// Pre-encode subject bytes once for all fan-out deliveries (one alloc per publish, not per delivery).
var subjectBytes = Encoding.ASCII.GetBytes(subject);
// Per-client deferred flush: collect unique clients during fan-out, signal each once.
// Go reference: client.go:3905 addToPCD / client.go:1324 flushClients.
@@ -1444,13 +1448,15 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
pcd.Clear();
// Deliver to plain subscribers
var messageSize = payload.Length + headers.Length;
foreach (var sub in result.PlainSubs)
{
if (sub.Client == null || sub.Client == sender && !(sender.ClientOpts?.Echo ?? true))
continue;
DeliverMessage(sub, subject, replyTo, headers, payload, pcd);
DeliverMessage(sub, subjectBytes, sub.SidBytes, subject, replyTo, headers, payload, pcd);
delivered = true;
deliveredCount++;
}
// Deliver to one member of each queue group (round-robin)
@@ -1470,8 +1476,9 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
var sub = queueGroup[(idx + attempt) % queueGroup.Length];
if (sub.Client != null && (sub.Client != sender || (sender.ClientOpts?.Echo ?? true)))
{
DeliverMessage(sub, subject, replyTo, headers, payload, pcd);
DeliverMessage(sub, subjectBytes, sub.SidBytes, subject, replyTo, headers, payload, pcd);
delivered = true;
deliveredCount++;
break;
}
}
@@ -1483,14 +1490,22 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
{
if (sub.Client != null && sub.Client != sender)
{
DeliverMessage(sub, subject, replyTo, headers, payload, pcd);
DeliverMessage(sub, subjectBytes, sub.SidBytes, subject, replyTo, headers, payload, pcd);
delivered = true;
deliveredCount++;
break;
}
}
}
}
// Batch server-wide stats once per publish (instead of per-delivery Interlocked ops).
if (deliveredCount > 0)
{
Interlocked.Add(ref _stats.OutMsgs, (long)deliveredCount);
Interlocked.Add(ref _stats.OutBytes, (long)messageSize * deliveredCount);
}
// Flush all unique clients once after fan-out.
// Go reference: client.go:1324 flushClients — iterates pcd map, one signal per client.
foreach (var client in pcd)
@@ -1794,6 +1809,54 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
pcd.Clear();
}
/// <summary>
/// Fast-path overload using pre-encoded subject and SID bytes to avoid per-delivery encoding.
/// Used by ProcessMessage fan-out loop.
/// </summary>
private void DeliverMessage(Subscription sub, ReadOnlySpan<byte> subjectBytes, ReadOnlySpan<byte> sidBytes,
string subject, string? replyTo,
ReadOnlyMemory<byte> headers, ReadOnlyMemory<byte> payload,
HashSet<INatsClient>? pcd = null)
{
var client = sub.Client;
if (client == null) return;
// Auto-unsub: only track when a limit is set (common case is MaxMessages == 0).
if (sub.MaxMessages > 0)
{
var count = Interlocked.Increment(ref sub.MessageCount);
if (count > sub.MaxMessages)
{
var subList = client.Account?.SubList ?? _globalAccount.SubList;
subList.Remove(sub);
client.RemoveSubscription(sub.Sid);
return;
}
}
if (client.Permissions?.IsDeliveryAllowed(subject) == false)
return;
if (pcd != null)
{
if (client is NatsClient nc)
nc.SendMessageNoFlush(subjectBytes, sidBytes, replyTo, headers, payload);
else
client.SendMessageNoFlush(subject, sub.Sid, replyTo, headers, payload);
pcd.Add(client);
}
else
{
client.SendMessage(subject, sub.Sid, replyTo, headers, payload);
}
if (replyTo != null && client.Permissions?.ResponseTracker != null)
{
if (client.Permissions.IsPublishAllowed(replyTo) == false)
client.Permissions.ResponseTracker.RegisterReply(replyTo);
}
}
private void DeliverMessage(Subscription sub, string subject, string? replyTo,
ReadOnlyMemory<byte> headers, ReadOnlyMemory<byte> payload,
HashSet<INatsClient>? pcd = null)
@@ -1801,24 +1864,22 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
var client = sub.Client;
if (client == null) return;
// Check auto-unsub
var count = Interlocked.Increment(ref sub.MessageCount);
if (sub.MaxMessages > 0 && count > sub.MaxMessages)
// Auto-unsub: only track when a limit is set (common case is MaxMessages == 0).
if (sub.MaxMessages > 0)
{
// Clean up exhausted subscription from trie and client tracking
var subList = client.Account?.SubList ?? _globalAccount.SubList;
subList.Remove(sub);
client.RemoveSubscription(sub.Sid);
return;
var count = Interlocked.Increment(ref sub.MessageCount);
if (count > sub.MaxMessages)
{
var subList = client.Account?.SubList ?? _globalAccount.SubList;
subList.Remove(sub);
client.RemoveSubscription(sub.Sid);
return;
}
}
// Deny-list delivery filter
if (client.Permissions?.IsDeliveryAllowed(subject) == false)
return;
// When pcd (per-client deferred flush) set is provided, queue data without
// signaling the write loop. The caller flushes all unique clients once after
// the fan-out loop. Go reference: client.go addToPCD / flushClients.
if (pcd != null)
{
client.SendMessageNoFlush(subject, sub.Sid, replyTo, headers, payload);
@@ -1829,7 +1890,6 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
client.SendMessage(subject, sub.Sid, replyTo, headers, payload);
}
// Track reply subject for response permissions
if (replyTo != null && client.Permissions?.ResponseTracker != null)
{
if (client.Permissions.IsPublishAllowed(replyTo) == false)