Fix E2E test gaps and add comprehensive E2E + parity test suites

- Fix pull consumer fetch: send original stream subject in HMSG (not inbox)
  so NATS client distinguishes data messages from control messages
- Fix MaxAge expiry: add background timer in StreamManager for periodic pruning
- Fix JetStream wire format: Go-compatible anonymous objects with string enums,
  proper offset-based pagination for stream/consumer list APIs
- Add 42 E2E black-box tests (core messaging, auth, TLS, accounts, JetStream)
- Add ~1000 parity tests across all subsystems (gaps closure)
- Update gap inventory docs to reflect implementation status
This commit is contained in:
Joseph Doherty
2026-03-12 14:09:23 -04:00
parent 79c1ee8776
commit c30e67a69d
226 changed files with 17801 additions and 709 deletions

View File

@@ -0,0 +1,40 @@
using System.Text.Json.Serialization;
namespace NATS.Server.LeafNodes;
/// <summary>
/// CONNECT payload sent on solicited leaf connections.
/// Go reference: leafnode.go leafConnectInfo.
/// </summary>
public sealed class LeafConnectInfo
{
[JsonPropertyName("jwt")]
public string? Jwt { get; init; }
[JsonPropertyName("nkey")]
public string? Nkey { get; init; }
[JsonPropertyName("sig")]
public string? Sig { get; init; }
[JsonPropertyName("hub")]
public bool Hub { get; init; }
[JsonPropertyName("cluster")]
public string? Cluster { get; init; }
[JsonPropertyName("headers")]
public bool Headers { get; init; }
[JsonPropertyName("jetstream")]
public bool JetStream { get; init; }
[JsonPropertyName("compression")]
public string? Compression { get; init; }
[JsonPropertyName("remote_account")]
public string? RemoteAccount { get; init; }
[JsonPropertyName("proto")]
public int Proto { get; init; }
}

View File

@@ -1,5 +1,6 @@
using System.Net.Sockets;
using System.Text;
using System.Text.Json;
using NATS.Server.Subscriptions;
namespace NATS.Server.LeafNodes;
@@ -15,6 +16,8 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
private readonly NetworkStream _stream = new(socket, ownsSocket: true);
private readonly SemaphoreSlim _writeGate = new(1, 1);
private readonly CancellationTokenSource _closedCts = new();
private TimeSpan _connectDelay;
private string? _remoteCluster;
private Task? _loopTask;
public string? RemoteId { get; internal set; }
@@ -22,6 +25,24 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
public Func<RemoteSubscription, Task>? RemoteSubscriptionReceived { get; set; }
public Func<LeafMessage, Task>? MessageReceived { get; set; }
/// <summary>
/// True when this connection was solicited (outbound dial) rather than accepted inbound.
/// Go reference: isSolicitedLeafNode.
/// </summary>
public bool IsSolicited { get; internal set; }
/// <summary>
/// True when this connection is a spoke-side leaf connection.
/// Go reference: isSpokeLeafNode / isHubLeafNode.
/// </summary>
public bool IsSpoke { get; set; }
/// <summary>
/// True when this leaf connection is isolated from hub propagation.
/// Go reference: isIsolatedLeafNode.
/// </summary>
public bool Isolated { get; set; }
/// <summary>
/// JetStream domain for this leaf connection. When set, the domain is propagated
/// in the LEAF handshake and included in LMSG frames for domain-aware routing.
@@ -58,6 +79,12 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
/// </summary>
public bool PermsSynced { get; private set; }
/// <summary>
/// Returns the currently configured reconnect delay for this connection.
/// Go reference: leafnode.go setLeafConnectDelayIfSoliciting.
/// </summary>
public TimeSpan GetConnectDelay() => _connectDelay;
/// <summary>
/// Sets the allowed publish and subscribe subjects for this connection and marks
/// permissions as synced. Passing null for either list clears that list.
@@ -106,11 +133,35 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
=> _loopTask?.WaitAsync(ct) ?? Task.CompletedTask;
public Task SendLsPlusAsync(string account, string subject, string? queue, CancellationToken ct)
=> WriteLineAsync(queue is { Length: > 0 } ? $"LS+ {account} {subject} {queue}" : $"LS+ {account} {subject}", ct);
=> SendLsPlusAsync(account, subject, queue, queueWeight: 0, ct);
public Task SendLsPlusAsync(string account, string subject, string? queue, int queueWeight, CancellationToken ct)
{
string frame;
if (queue is { Length: > 0 } && queueWeight > 0)
frame = $"LS+ {account} {subject} {queue} {queueWeight}";
else if (queue is { Length: > 0 })
frame = $"LS+ {account} {subject} {queue}";
else
frame = $"LS+ {account} {subject}";
return WriteLineAsync(frame, ct);
}
public Task SendLsMinusAsync(string account, string subject, string? queue, CancellationToken ct)
=> WriteLineAsync(queue is { Length: > 0 } ? $"LS- {account} {subject} {queue}" : $"LS- {account} {subject}", ct);
/// <summary>
/// Sends a CONNECT protocol line with JSON payload for solicited leaf links.
/// Go reference: leafnode.go sendLeafConnect.
/// </summary>
public Task SendLeafConnectAsync(LeafConnectInfo connectInfo, CancellationToken ct)
{
ArgumentNullException.ThrowIfNull(connectInfo);
var json = JsonSerializer.Serialize(connectInfo);
return WriteLineAsync($"CONNECT {json}", ct);
}
public async Task SendMessageAsync(string account, string subject, string? replyTo, ReadOnlyMemory<byte> payload, CancellationToken ct)
{
var reply = string.IsNullOrEmpty(replyTo) ? "-" : replyTo;
@@ -148,6 +199,63 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
return $"LEAF {serverId}";
}
public bool IsSolicitedLeafNode() => IsSolicited;
public bool IsSpokeLeafNode() => IsSpoke;
public bool IsHubLeafNode() => !IsSpoke;
public bool IsIsolatedLeafNode() => Isolated;
public string? RemoteCluster() => _remoteCluster;
/// <summary>
/// Applies connect delay only when this is a solicited leaf connection.
/// Go reference: leafnode.go setLeafConnectDelayIfSoliciting.
/// </summary>
public void SetLeafConnectDelayIfSoliciting(TimeSpan delay)
{
if (IsSolicited)
_connectDelay = delay;
}
/// <summary>
/// Handles remote ERR protocol for leaf links and applies reconnect delay hints.
/// Go reference: leafnode.go leafProcessErr.
/// </summary>
public void LeafProcessErr(string errStr)
{
if (string.IsNullOrWhiteSpace(errStr))
return;
if (errStr.Contains("permission", StringComparison.OrdinalIgnoreCase))
{
SetLeafConnectDelayIfSoliciting(LeafNodeManager.LeafNodeReconnectAfterPermViolation);
return;
}
if (errStr.Contains("loop", StringComparison.OrdinalIgnoreCase))
{
SetLeafConnectDelayIfSoliciting(LeafNodeManager.LeafNodeReconnectDelayAfterLoopDetected);
return;
}
if (errStr.Contains("cluster name", StringComparison.OrdinalIgnoreCase)
&& errStr.Contains("same", StringComparison.OrdinalIgnoreCase))
{
SetLeafConnectDelayIfSoliciting(LeafNodeManager.LeafNodeReconnectDelayAfterClusterNameSame);
}
}
/// <summary>
/// Handles subscription permission violations.
/// Go reference: leafnode.go leafSubPermViolation.
/// </summary>
public void LeafSubPermViolation(string subj) => LeafPermViolation(pub: false, subj);
/// <summary>
/// Handles publish/subscribe permission violations.
/// Go reference: leafnode.go leafPermViolation.
/// </summary>
public void LeafPermViolation(bool pub, string subj)
=> SetLeafConnectDelayIfSoliciting(LeafNodeManager.LeafNodeReconnectAfterPermViolation);
private void ParseHandshakeResponse(string line)
{
if (!line.StartsWith("LEAF ", StringComparison.OrdinalIgnoreCase))
@@ -163,9 +271,19 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
{
RemoteId = rest[..spaceIdx];
var attrs = rest[(spaceIdx + 1)..];
const string domainPrefix = "domain=";
if (attrs.StartsWith(domainPrefix, StringComparison.OrdinalIgnoreCase))
RemoteJetStreamDomain = attrs[domainPrefix.Length..].Trim();
foreach (var token in attrs.Split(' ', StringSplitOptions.RemoveEmptyEntries))
{
const string domainPrefix = "domain=";
if (token.StartsWith(domainPrefix, StringComparison.OrdinalIgnoreCase))
{
RemoteJetStreamDomain = token[domainPrefix.Length..].Trim();
continue;
}
const string clusterPrefix = "cluster=";
if (token.StartsWith(clusterPrefix, StringComparison.OrdinalIgnoreCase))
_remoteCluster = token[clusterPrefix.Length..].Trim();
}
}
else
{
@@ -190,9 +308,10 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
if (line.StartsWith("LS+ ", StringComparison.Ordinal))
{
var parts = line.Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (RemoteSubscriptionReceived != null && TryParseAccountScopedInterest(parts, out var parsedAccount, out var parsedSubject, out var queue))
if (RemoteSubscriptionReceived != null &&
TryParseAccountScopedInterest(parts, out var parsedAccount, out var parsedSubject, out var queue, out var queueWeight))
{
await RemoteSubscriptionReceived(new RemoteSubscription(parsedSubject, queue, RemoteId ?? string.Empty, parsedAccount));
await RemoteSubscriptionReceived(new RemoteSubscription(parsedSubject, queue, RemoteId ?? string.Empty, parsedAccount, QueueWeight: queueWeight));
}
continue;
}
@@ -200,7 +319,8 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
if (line.StartsWith("LS- ", StringComparison.Ordinal))
{
var parts = line.Split(' ', StringSplitOptions.RemoveEmptyEntries);
if (RemoteSubscriptionReceived != null && TryParseAccountScopedInterest(parts, out var parsedAccount, out var parsedSubject, out var queue))
if (RemoteSubscriptionReceived != null &&
TryParseAccountScopedInterest(parts, out var parsedAccount, out var parsedSubject, out var queue, out _))
{
await RemoteSubscriptionReceived(RemoteSubscription.Removal(parsedSubject, queue, RemoteId ?? string.Empty, parsedAccount));
}
@@ -294,11 +414,12 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
return Encoding.ASCII.GetString([.. bytes]);
}
private static bool TryParseAccountScopedInterest(string[] parts, out string account, out string subject, out string? queue)
private static bool TryParseAccountScopedInterest(string[] parts, out string account, out string subject, out string? queue, out int queueWeight)
{
account = "$G";
subject = string.Empty;
queue = null;
queueWeight = 1;
if (parts.Length < 2)
return false;
@@ -310,11 +431,15 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
account = parts[1];
subject = parts[2];
queue = parts.Length >= 4 ? parts[3] : null;
if (queue is { Length: > 0 } && parts.Length >= 5)
queueWeight = ParseQueueWeight(parts[4]);
return true;
}
subject = parts[1];
queue = parts.Length >= 3 ? parts[2] : null;
if (queue is { Length: > 0 } && parts.Length >= 4)
queueWeight = ParseQueueWeight(parts[3]);
return true;
}
@@ -322,6 +447,9 @@ public sealed class LeafConnection(Socket socket) : IAsyncDisposable
=> token.Contains('.', StringComparison.Ordinal)
|| token.Contains('*', StringComparison.Ordinal)
|| token.Contains('>', StringComparison.Ordinal);
private static int ParseQueueWeight(string token)
=> int.TryParse(token, out var parsed) && parsed > 0 ? parsed : 1;
}
public sealed record LeafMessage(string Subject, string? ReplyTo, ReadOnlyMemory<byte> Payload, string Account = "$G");

View File

@@ -3,6 +3,7 @@ using System.Net;
using System.Net.Sockets;
using Microsoft.Extensions.Logging;
using NATS.Server.Configuration;
using NATS.Server.Gateways;
using NATS.Server.Subscriptions;
namespace NATS.Server.LeafNodes;
@@ -16,6 +17,11 @@ namespace NATS.Server.LeafNodes;
/// </summary>
public sealed class LeafNodeManager : IAsyncDisposable
{
public static readonly TimeSpan LeafNodeReconnectDelayAfterLoopDetected = TimeSpan.FromSeconds(30);
public static readonly TimeSpan LeafNodeReconnectAfterPermViolation = TimeSpan.FromSeconds(30);
public static readonly TimeSpan LeafNodeReconnectDelayAfterClusterNameSame = TimeSpan.FromSeconds(30);
public static readonly TimeSpan LeafNodeWaitBeforeClose = TimeSpan.FromSeconds(5);
private readonly LeafNodeOptions _options;
private readonly ServerStats _stats;
private readonly string _serverId;
@@ -90,6 +96,27 @@ public sealed class LeafNodeManager : IAsyncDisposable
public bool IsLeafConnectDisabled(string remoteUrl)
=> IsGloballyDisabled || _disabledRemotes.ContainsKey(remoteUrl);
/// <summary>
/// Returns true when the remote URL is still configured and not disabled.
/// Go reference: leafnode.go remoteLeafNodeStillValid.
/// </summary>
internal bool RemoteLeafNodeStillValid(string remoteUrl)
{
if (IsLeafConnectDisabled(remoteUrl))
return false;
if (_options.Remotes.Any(r => string.Equals(r, remoteUrl, StringComparison.OrdinalIgnoreCase)))
return true;
foreach (var remote in _options.RemoteLeaves)
{
if (remote.Urls.Any(u => string.Equals(u, remoteUrl, StringComparison.OrdinalIgnoreCase)))
return true;
}
return false;
}
/// <summary>
/// Disables outbound leaf connections to the specified remote URL.
/// Has no effect if the remote is already disabled.
@@ -232,6 +259,8 @@ public sealed class LeafNodeManager : IAsyncDisposable
var connection = new LeafConnection(socket)
{
JetStreamDomain = _options.JetStreamDomain,
IsSolicited = true,
IsSpoke = true,
};
await connection.PerformOutboundHandshakeAsync(_serverId, ct);
Register(connection);
@@ -263,6 +292,9 @@ public sealed class LeafNodeManager : IAsyncDisposable
}
public void PropagateLocalSubscription(string account, string subject, string? queue)
=> PropagateLocalSubscription(account, subject, queue, queueWeight: 0);
public void PropagateLocalSubscription(string account, string subject, string? queue, int queueWeight)
{
// Subscription propagation is also subject to export filtering:
// we don't propagate subscriptions for subjects that are denied.
@@ -273,7 +305,18 @@ public sealed class LeafNodeManager : IAsyncDisposable
}
foreach (var connection in _connections.Values)
_ = connection.SendLsPlusAsync(account, subject, queue, _cts?.Token ?? CancellationToken.None);
{
if (!CanSpokeSendSubscription(connection, subject))
{
_logger.LogDebug(
"Leaf subscription propagation denied for spoke connection {RemoteId} and subject {Subject} (subscribe permissions)",
connection.RemoteId ?? "<unknown>",
subject);
continue;
}
_ = connection.SendLsPlusAsync(account, subject, queue, queueWeight, _cts?.Token ?? CancellationToken.None);
}
}
public void PropagateLocalUnsubscription(string account, string subject, string? queue)
@@ -585,6 +628,9 @@ public sealed class LeafNodeManager : IAsyncDisposable
var attempt = 0;
while (!ct.IsCancellationRequested)
{
if (!RemoteLeafNodeStillValid(remote))
return;
try
{
var endPoint = ParseEndpoint(remote);
@@ -595,6 +641,8 @@ public sealed class LeafNodeManager : IAsyncDisposable
var connection = new LeafConnection(socket)
{
JetStreamDomain = jetStreamDomain,
IsSolicited = true,
IsSpoke = true,
};
await connection.PerformOutboundHandshakeAsync(_serverId, ct);
Register(connection);
@@ -736,6 +784,39 @@ public sealed class LeafNodeManager : IAsyncDisposable
return null;
}
private static bool CanSpokeSendSubscription(LeafConnection connection, string subject)
{
if (!connection.IsSpokeLeafNode())
return true;
if (ShouldBypassSpokeSubscribePermission(subject))
return true;
if (!connection.PermsSynced || connection.AllowedSubscribeSubjects.Count == 0)
return true;
for (var i = 0; i < connection.AllowedSubscribeSubjects.Count; i++)
{
if (SubjectMatch.MatchLiteral(subject, connection.AllowedSubscribeSubjects[i]))
return true;
}
return false;
}
private static bool ShouldBypassSpokeSubscribePermission(string subject)
{
if (string.IsNullOrEmpty(subject))
return false;
if (subject[0] != '$' && subject[0] != '_')
return false;
return subject.StartsWith("$LDS.", StringComparison.Ordinal)
|| subject.StartsWith(ReplyMapper.GatewayReplyPrefix, StringComparison.Ordinal)
|| subject.StartsWith(ReplyMapper.OldGatewayReplyPrefix, StringComparison.Ordinal);
}
private static IPEndPoint ParseEndpoint(string endpoint)
{
var parts = endpoint.Split(':', 2, StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);

View File

@@ -0,0 +1,46 @@
using NATS.Server.Subscriptions;
namespace NATS.Server.LeafNodes;
/// <summary>
/// Helpers for building leaf-node subscription map keys.
/// Go reference: server/leafnode.go keyFromSub / keyFromSubWithOrigin.
/// </summary>
public static class LeafSubKey
{
public const string KeyRoutedSub = "R";
public const byte KeyRoutedSubByte = (byte)'R';
public const string KeyRoutedLeafSub = "L";
public const byte KeyRoutedLeafSubByte = (byte)'L';
public static readonly TimeSpan SharedSysAccDelay = TimeSpan.FromMilliseconds(250);
public static readonly TimeSpan ConnectProcessTimeout = TimeSpan.FromSeconds(2);
public static string KeyFromSub(Subscription sub)
{
ArgumentNullException.ThrowIfNull(sub);
return sub.Queue is { Length: > 0 }
? $"{sub.Subject} {sub.Queue}"
: sub.Subject;
}
public static string KeyFromSubWithOrigin(Subscription sub, string? origin = null)
{
ArgumentNullException.ThrowIfNull(sub);
var hasOrigin = !string.IsNullOrEmpty(origin);
var prefix = hasOrigin ? KeyRoutedLeafSub : KeyRoutedSub;
if (sub.Queue is { Length: > 0 })
{
if (hasOrigin)
return $"{prefix} {sub.Subject} {sub.Queue} {origin}";
return $"{prefix} {sub.Subject} {sub.Queue}";
}
if (hasOrigin)
return $"{prefix} {sub.Subject} {origin}";
return $"{prefix} {sub.Subject}";
}
}