feat(networking): expand gateway reply mapper and add leaf solicited connections (D4+D5)

D4: Add hash segment support to ReplyMapper (_GR_.{cluster}.{hash}.{reply}),
FNV-1a ComputeReplyHash, TryExtractClusterId/Hash, legacy format compat.
D5: Add ConnectSolicitedAsync with exponential backoff (1s-60s cap),
JetStreamDomain propagation in LEAF handshake, LeafNodeOptions.JetStreamDomain.
This commit is contained in:
Joseph Doherty
2026-02-24 15:22:24 -05:00
parent 7116988d03
commit efd053ba60
6 changed files with 677 additions and 29 deletions

View File

@@ -4,6 +4,12 @@ using NATS.Server.Subscriptions;
namespace NATS.Server.LeafNodes;
/// <summary>
/// Represents a single leaf node connection (inbound or outbound).
/// Handles LEAF handshake, LS+/LS- interest propagation, and LMSG forwarding.
/// The JetStreamDomain property is propagated during handshake for domain-aware routing.
/// Go reference: leafnode.go.
/// </summary>
public sealed class LeafConnection(Socket socket) : IAsyncDisposable
{
private readonly NetworkStream _stream = new(socket, ownsSocket: true);
@@ -16,18 +22,32 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
public Func<RemoteSubscription, Task>? RemoteSubscriptionReceived { get; set; }
public Func<LeafMessage, Task>? MessageReceived { get; set; }
/// <summary>
/// JetStream domain for this leaf connection. When set, the domain is propagated
/// in the LEAF handshake and included in LMSG frames for domain-aware routing.
/// Go reference: leafnode.go — jsClusterDomain field in leafInfo.
/// </summary>
public string? JetStreamDomain { get; set; }
/// <summary>
/// The JetStream domain advertised by the remote side during handshake.
/// </summary>
public string? RemoteJetStreamDomain { get; private set; }
public async Task PerformOutboundHandshakeAsync(string serverId, CancellationToken ct)
{
await WriteLineAsync($"LEAF {serverId}", ct);
var handshakeLine = BuildHandshakeLine(serverId);
await WriteLineAsync(handshakeLine, ct);
var line = await ReadLineAsync(ct);
RemoteId = ParseHandshake(line);
ParseHandshakeResponse(line);
}
public async Task PerformInboundHandshakeAsync(string serverId, CancellationToken ct)
{
var line = await ReadLineAsync(ct);
RemoteId = ParseHandshake(line);
await WriteLineAsync($"LEAF {serverId}", ct);
ParseHandshakeResponse(line);
var handshakeLine = BuildHandshakeLine(serverId);
await WriteLineAsync(handshakeLine, ct);
}
public void StartLoop(CancellationToken ct)
@@ -77,6 +97,39 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
await _stream.DisposeAsync();
}
private string BuildHandshakeLine(string serverId)
{
if (!string.IsNullOrEmpty(JetStreamDomain))
return $"LEAF {serverId} domain={JetStreamDomain}";
return $"LEAF {serverId}";
}
private void ParseHandshakeResponse(string line)
{
if (!line.StartsWith("LEAF ", StringComparison.OrdinalIgnoreCase))
throw new InvalidOperationException("Invalid leaf handshake");
var rest = line[5..].Trim();
if (rest.Length == 0)
throw new InvalidOperationException("Leaf handshake missing id");
// Parse "serverId [domain=xxx]" format
var spaceIdx = rest.IndexOf(' ');
if (spaceIdx > 0)
{
RemoteId = rest[..spaceIdx];
var attrs = rest[(spaceIdx + 1)..];
const string domainPrefix = "domain=";
if (attrs.StartsWith(domainPrefix, StringComparison.OrdinalIgnoreCase))
RemoteJetStreamDomain = attrs[domainPrefix.Length..].Trim();
}
else
{
RemoteId = rest;
}
}
private async Task ReadLoopAsync(CancellationToken ct)
{
while (!ct.IsCancellationRequested)
@@ -198,17 +251,6 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
return Encoding.ASCII.GetString([.. bytes]);
}
private static string ParseHandshake(string line)
{
if (!line.StartsWith("LEAF ", StringComparison.OrdinalIgnoreCase))
throw new InvalidOperationException("Invalid leaf handshake");
var id = line[5..].Trim();
if (id.Length == 0)
throw new InvalidOperationException("Leaf handshake missing id");
return id;
}
private static bool TryParseAccountScopedInterest(string[] parts, out string account, out string subject, out string? queue)
{
account = "$G";

View File

@@ -7,6 +7,11 @@ using NATS.Server.Subscriptions;
namespace NATS.Server.LeafNodes;
/// <summary>
/// Manages leaf node connections — both inbound (accepted) and outbound (solicited).
/// Outbound connections use exponential backoff retry: 1s, 2s, 4s, ..., capped at 60s.
/// Go reference: leafnode.go.
/// </summary>
public sealed class LeafNodeManager : IAsyncDisposable
{
private readonly LeafNodeOptions _options;
@@ -21,6 +26,17 @@ public sealed class LeafNodeManager : IAsyncDisposable
private Socket? _listener;
private Task? _acceptLoopTask;
/// <summary>
/// Initial retry delay for solicited connections (1 second).
/// Go reference: leafnode.go — DEFAULT_LEAF_NODE_RECONNECT constant.
/// </summary>
internal static readonly TimeSpan InitialRetryDelay = TimeSpan.FromSeconds(1);
/// <summary>
/// Maximum retry delay for solicited connections (60 seconds).
/// </summary>
internal static readonly TimeSpan MaxRetryDelay = TimeSpan.FromSeconds(60);
public string ListenEndpoint => $"{_options.Host}:{_options.Port}";
public LeafNodeManager(
@@ -52,12 +68,41 @@ public sealed class LeafNodeManager : IAsyncDisposable
_acceptLoopTask = Task.Run(() => AcceptLoopAsync(_cts.Token));
foreach (var remote in _options.Remotes.Distinct(StringComparer.OrdinalIgnoreCase))
_ = Task.Run(() => ConnectWithRetryAsync(remote, _cts.Token));
_ = Task.Run(() => ConnectSolicitedWithRetryAsync(remote, _options.JetStreamDomain, _cts.Token));
_logger.LogDebug("Leaf manager started (listen={Host}:{Port})", _options.Host, _options.Port);
return Task.CompletedTask;
}
/// <summary>
/// Establishes a single solicited (outbound) leaf connection to the specified URL.
/// Performs socket connection and LEAF handshake. If a JetStream domain is specified,
/// it is propagated during the handshake.
/// Go reference: leafnode.go — connectSolicited.
/// </summary>
public async Task<LeafConnection> ConnectSolicitedAsync(string url, string? account, CancellationToken ct)
{
var endPoint = ParseEndpoint(url);
var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
try
{
await socket.ConnectAsync(endPoint.Address, endPoint.Port, ct);
var connection = new LeafConnection(socket)
{
JetStreamDomain = _options.JetStreamDomain,
};
await connection.PerformOutboundHandshakeAsync(_serverId, ct);
Register(connection);
_logger.LogDebug("Solicited leaf connection established to {Url} (account={Account})", url, account ?? "$G");
return connection;
}
catch
{
socket.Dispose();
throw;
}
}
public async Task ForwardMessageAsync(string account, string subject, string? replyTo, ReadOnlyMemory<byte> payload, CancellationToken ct)
{
foreach (var connection in _connections.Values)
@@ -95,6 +140,17 @@ public sealed class LeafNodeManager : IAsyncDisposable
_logger.LogDebug("Leaf manager stopped");
}
/// <summary>
/// Computes the next backoff delay using exponential backoff with a cap.
/// Delay sequence: 1s, 2s, 4s, 8s, 16s, 32s, 60s, 60s, ...
/// </summary>
internal static TimeSpan ComputeBackoff(int attempt)
{
if (attempt < 0) attempt = 0;
var seconds = Math.Min(InitialRetryDelay.TotalSeconds * Math.Pow(2, attempt), MaxRetryDelay.TotalSeconds);
return TimeSpan.FromSeconds(seconds);
}
private async Task AcceptLoopAsync(CancellationToken ct)
{
while (!ct.IsCancellationRequested)
@@ -115,7 +171,10 @@ public sealed class LeafNodeManager : IAsyncDisposable
private async Task HandleInboundAsync(Socket socket, CancellationToken ct)
{
var connection = new LeafConnection(socket);
var connection = new LeafConnection(socket)
{
JetStreamDomain = _options.JetStreamDomain,
};
try
{
await connection.PerformInboundHandshakeAsync(_serverId, ct);
@@ -127,19 +186,32 @@ public sealed class LeafNodeManager : IAsyncDisposable
}
}
private async Task ConnectWithRetryAsync(string remote, CancellationToken ct)
private async Task ConnectSolicitedWithRetryAsync(string remote, string? jetStreamDomain, CancellationToken ct)
{
var attempt = 0;
while (!ct.IsCancellationRequested)
{
try
{
var endPoint = ParseEndpoint(remote);
var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
await socket.ConnectAsync(endPoint.Address, endPoint.Port, ct);
var connection = new LeafConnection(socket);
await connection.PerformOutboundHandshakeAsync(_serverId, ct);
Register(connection);
return;
try
{
await socket.ConnectAsync(endPoint.Address, endPoint.Port, ct);
var connection = new LeafConnection(socket)
{
JetStreamDomain = jetStreamDomain,
};
await connection.PerformOutboundHandshakeAsync(_serverId, ct);
Register(connection);
_logger.LogDebug("Solicited leaf connection established to {Remote}", remote);
return;
}
catch
{
socket.Dispose();
throw;
}
}
catch (OperationCanceledException)
{
@@ -147,12 +219,14 @@ public sealed class LeafNodeManager : IAsyncDisposable
}
catch (Exception ex)
{
_logger.LogDebug(ex, "Leaf connect retry for {Remote}", remote);
_logger.LogDebug(ex, "Leaf connect retry for {Remote} (attempt {Attempt})", remote, attempt);
}
var delay = ComputeBackoff(attempt);
attempt++;
try
{
await Task.Delay(250, ct);
await Task.Delay(delay, ct);
}
catch (OperationCanceledException)
{