Fix E2E test gaps and add comprehensive E2E + parity test suites

- Fix pull consumer fetch: send original stream subject in HMSG (not inbox)
  so NATS client distinguishes data messages from control messages
- Fix MaxAge expiry: add background timer in StreamManager for periodic pruning
- Fix JetStream wire format: Go-compatible anonymous objects with string enums,
  proper offset-based pagination for stream/consumer list APIs
- Add 42 E2E black-box tests (core messaging, auth, TLS, accounts, JetStream)
- Add ~1000 parity tests across all subsystems (gaps closure)
- Update gap inventory docs to reflect implementation status
This commit is contained in:
Joseph Doherty
2026-03-12 14:09:23 -04:00
parent 79c1ee8776
commit c30e67a69d
226 changed files with 17801 additions and 709 deletions

View File

@@ -1,5 +1,6 @@
using System.Collections.Concurrent;
using System.Net;
using System.Net.NetworkInformation;
using System.Net.Security;
using System.Net.Sockets;
using System.Runtime.InteropServices;
@@ -9,6 +10,7 @@ using System.Text.Json;
using Microsoft.Extensions.Logging;
using NATS.NKeys;
using NATS.Server.Auth;
using NATS.Server.Auth.Jwt;
using NATS.Server.Configuration;
using NATS.Server.Events;
using NATS.Server.Gateways;
@@ -61,6 +63,7 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
/// via InternalsVisibleTo.
/// </summary>
internal RouteManager? RouteManager => _routeManager;
internal GatewayManager? GatewayManager => _gatewayManager;
private readonly GatewayManager? _gatewayManager;
private readonly LeafNodeManager? _leafNodeManager;
private readonly InternalClient? _jetStreamInternalClient;
@@ -90,9 +93,15 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
private readonly List<PosixSignalRegistration> _signalRegistrations = [];
private string? _portsFilePath;
private DateTime _configTime = DateTime.UtcNow;
private static readonly TimeSpan AcceptMinSleep = TimeSpan.FromMilliseconds(10);
private static readonly TimeSpan AcceptMaxSleep = TimeSpan.FromSeconds(1);
private static readonly TimeSpan AcceptMinSleep = NatsProtocol.AcceptMinSleep;
private static readonly TimeSpan AcceptMaxSleep = NatsProtocol.AcceptMaxSleep;
private static readonly JsonSerializerOptions s_jetStreamJsonOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.SnakeCaseLower,
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
};
public SubList SubList => _globalAccount.SubList;
public byte[] CachedInfoLine => _cachedInfoLine;
@@ -117,6 +126,152 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
public int JetStreamConsumers => _jetStreamConsumerManager?.ConsumerCount ?? 0;
public Action? ReOpenLogFile { get; set; }
public IEnumerable<NatsClient> GetClients() => _clients.Values;
public string? ClusterName() => _options.Cluster?.Name;
public IReadOnlyList<string> ActivePeers()
=> _routeManager?.BuildTopologySnapshot().ConnectedServerIds ?? [];
public bool StartProfiler()
{
if (_options.ProfPort <= 0)
return false;
_logger.LogWarning("Profiling endpoint not yet supported (port: {ProfPort})", _options.ProfPort);
return true;
}
public bool DisconnectClientByID(ulong clientId)
=> CloseClientById(clientId, minimalFlush: true);
public bool LDMClientByID(ulong clientId)
=> CloseClientById(clientId, minimalFlush: false);
public Ports PortsInfo()
{
var ports = new Ports();
AddEndpoint(ports.Nats, _options.Host, _options.Port);
AddEndpoint(ports.Monitoring, _options.MonitorHost, _options.MonitorPort);
if (_routeManager != null)
AddEndpoint(ports.Cluster, _routeManager.ListenEndpoint);
else if (_options.Cluster != null)
AddEndpoint(ports.Cluster, _options.Cluster.Host, _options.Cluster.Port);
AddEndpoint(ports.Profile, _options.Host, _options.ProfPort);
if (_options.WebSocket.Port >= 0)
AddEndpoint(ports.WebSocket, _options.WebSocket.Host, _options.WebSocket.Port);
if (_leafNodeManager != null)
AddEndpoint(ports.LeafNodes, _leafNodeManager.ListenEndpoint);
else if (_options.LeafNode != null)
AddEndpoint(ports.LeafNodes, _options.LeafNode.Host, _options.LeafNode.Port);
return ports;
}
public IReadOnlyList<string> GetConnectURLs()
{
if (!string.IsNullOrWhiteSpace(_options.ClientAdvertise))
return [NormalizeAdvertiseUrl(_options.ClientAdvertise!, "nats")];
var hosts = GetNonLocalIPsIfHostIsIPAny(_options.Host);
var result = new List<string>(hosts.Count);
foreach (var host in hosts)
result.Add($"nats://{host}:{_options.Port}");
return result;
}
public void UpdateServerINFOAndSendINFOToClients()
{
_serverInfo.ConnectUrls = [.. GetConnectURLs()];
BuildCachedInfo();
foreach (var client in _clients.Values)
{
if (client.ConnectReceived)
client.QueueOutbound(_cachedInfoLine);
}
}
public string ClientURL()
{
if (!string.IsNullOrWhiteSpace(_options.ClientAdvertise))
return NormalizeAdvertiseUrl(_options.ClientAdvertise!, "nats");
var host = IsWildcardHost(_options.Host) ? "127.0.0.1" : _options.Host;
return $"nats://{host}:{_options.Port}";
}
public string? WebsocketURL()
{
if (_options.WebSocket.Port < 0)
return null;
if (!string.IsNullOrWhiteSpace(_options.WebSocket.Advertise))
{
var scheme = _options.WebSocket.NoTls ? "ws" : "wss";
return NormalizeAdvertiseUrl(_options.WebSocket.Advertise!, scheme);
}
var wsHost = IsWildcardHost(_options.WebSocket.Host) ? "127.0.0.1" : _options.WebSocket.Host;
var wsScheme = _options.WebSocket.NoTls ? "ws" : "wss";
return $"{wsScheme}://{wsHost}:{_options.WebSocket.Port}";
}
public int NumRoutes() => (int)Interlocked.Read(ref _stats.Routes);
public int NumRemotes()
=> (int)(Interlocked.Read(ref _stats.Routes) + Interlocked.Read(ref _stats.Gateways) + Interlocked.Read(ref _stats.Leafs));
public int NumLeafNodes() => (int)Interlocked.Read(ref _stats.Leafs);
public int NumOutboundGateways() => _gatewayManager?.NumOutboundGateways() ?? 0;
public int NumInboundGateways() => _gatewayManager?.NumInboundGateways() ?? 0;
public int NumSubscriptions() => _accounts.Values.Sum(acc => acc.SubscriptionCount);
public bool JetStreamEnabled() => _jetStreamService?.IsRunning ?? false;
public JetStreamOptions? JetStreamConfig()
{
if (_options.JetStream is null)
return null;
return new JetStreamOptions
{
StoreDir = _options.JetStream.StoreDir,
MaxMemoryStore = _options.JetStream.MaxMemoryStore,
MaxFileStore = _options.JetStream.MaxFileStore,
MaxStreams = _options.JetStream.MaxStreams,
MaxConsumers = _options.JetStream.MaxConsumers,
Domain = _options.JetStream.Domain,
};
}
public string StoreDir() => _options.JetStream?.StoreDir ?? string.Empty;
public DateTime ConfigTime() => _configTime;
public string Addr() => $"{_options.Host}:{_options.Port}";
public string? MonitorAddr()
=> _options.MonitorPort > 0
? $"{_options.MonitorHost}:{_options.MonitorPort}"
: null;
public string? ClusterAddr() => _routeManager?.ListenEndpoint;
public string? GatewayAddr() => _gatewayManager?.ListenEndpoint;
public string? GetGatewayURL() => _gatewayManager?.ListenEndpoint;
public string? GetGatewayName() => _options.Gateway?.Name;
public string? ProfilerAddr()
=> _options.ProfPort > 0
? $"{_options.Host}:{_options.ProfPort}"
: null;
public int NumActiveAccounts() => _accounts.Values.Count(acc => acc.ClientCount > 0);
public int NumLoadedAccounts() => _accounts.Count;
public IReadOnlyList<ClosedClient> GetClosedClients() => _closedClients.GetAll();
@@ -402,6 +557,8 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
_routeManager = new RouteManager(options.Cluster, _stats, _serverInfo.ServerId, ApplyRemoteSubscription,
ProcessRoutedMessage,
_loggerFactory.CreateLogger<RouteManager>());
_routeManager.OnRouteRemoved += RemoveRemoteSubscriptionsForRoute;
_routeManager.OnRouteAccountRemoved += RemoveRemoteSubscriptionsForRouteAccount;
}
if (options.Gateway != null)
@@ -485,6 +642,7 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
{
var (_, digest) = NatsConfParser.ParseFileWithDigest(options.ConfigFile);
_configDigest = digest;
_configTime = DateTime.UtcNow;
}
catch (Exception ex)
{
@@ -499,6 +657,79 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
_cachedInfoLine = Encoding.ASCII.GetBytes($"INFO {infoJson}\r\n");
}
private static string NormalizeAdvertiseUrl(string advertise, string defaultScheme)
{
if (advertise.Contains("://", StringComparison.Ordinal))
return advertise;
return $"{defaultScheme}://{advertise}";
}
private static bool IsWildcardHost(string host)
=> host == "0.0.0.0" || host == "::";
internal static IReadOnlyList<string> GetNonLocalIPsIfHostIsIPAny(string host)
{
if (!IsWildcardHost(host))
return [host];
var addresses = new HashSet<string>(StringComparer.Ordinal);
foreach (var netIf in NetworkInterface.GetAllNetworkInterfaces())
{
if (netIf.OperationalStatus != OperationalStatus.Up)
continue;
IPInterfaceProperties? props;
try
{
props = netIf.GetIPProperties();
}
catch
{
continue;
}
foreach (var uni in props.UnicastAddresses)
{
var addr = uni.Address;
if (IPAddress.IsLoopback(addr) || addr.IsIPv6LinkLocal || addr.IsIPv6Multicast)
continue;
if (addr.AddressFamily is not (AddressFamily.InterNetwork or AddressFamily.InterNetworkV6))
continue;
addresses.Add(addr.ToString());
}
}
if (addresses.Count == 0)
addresses.Add("127.0.0.1");
return [.. addresses.OrderBy(static a => a, StringComparer.Ordinal)];
}
private bool CloseClientById(ulong clientId, bool minimalFlush)
{
if (!_clients.TryGetValue(clientId, out var client))
return false;
client.MarkClosed(ClientClosedReason.ServerShutdown);
_ = client.FlushAndCloseAsync(minimalFlush);
return true;
}
private static void AddEndpoint(List<string> targets, string? host, int port)
{
if (string.IsNullOrWhiteSpace(host) || port <= 0)
return;
targets.Add($"{host}:{port}");
}
private static void AddEndpoint(List<string> targets, string? endpoint)
{
if (!string.IsNullOrWhiteSpace(endpoint))
targets.Add(endpoint);
}
public async Task StartAsync(CancellationToken ct)
{
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct, _quitCts.Token);
@@ -523,8 +754,7 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
_logger.LogInformation("Listening for client connections on {Host}:{Port}", _options.Host, _options.Port);
// Warn about stub features
if (_options.ProfPort > 0)
_logger.LogWarning("Profiling endpoint not yet supported (port: {ProfPort})", _options.ProfPort);
StartProfiler();
if (_options.MonitorPort > 0)
{
@@ -535,6 +765,11 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
WritePidFile();
WritePortsFile();
WsAuthConfig.Apply(_options.WebSocket);
var wsValidation = WebSocketOptionsValidator.Validate(_options);
if (!wsValidation.IsValid)
throw new InvalidOperationException($"Invalid websocket options: {string.Join("; ", wsValidation.Errors)}");
if (_options.WebSocket.Port >= 0)
{
_wsListener = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
@@ -728,6 +963,14 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
}
catch (Exception ex)
{
if (client is null)
{
var earlyReason = _options.HasTls
? ClientClosedReason.TlsHandshakeError
: ClientClosedReason.ReadError;
TrackEarlyClosedClient(socket, clientId, earlyReason);
}
_logger.LogDebug(ex, "Failed to accept client {ClientId}", clientId);
try { socket.Shutdown(SocketShutdown.Both); } catch { }
socket.Dispose();
@@ -887,6 +1130,18 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
account.SubList.ApplyRemoteSub(sub);
}
private void RemoveRemoteSubscriptionsForRoute(string routeId)
{
foreach (var account in _accounts.Values)
account.SubList.RemoveRemoteSubs(routeId);
}
private void RemoveRemoteSubscriptionsForRouteAccount(string routeId, string accountName)
{
if (_accounts.TryGetValue(accountName, out var account))
account.SubList.RemoveRemoteSubsForAccount(routeId, accountName);
}
private void ProcessRoutedMessage(RouteMessage message)
{
DeliverRemoteMessage(message.Account, message.Subject, message.ReplyTo, message.Payload);
@@ -942,19 +1197,42 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
&& subject.StartsWith("$JS.API", StringComparison.Ordinal)
&& _jetStreamApiRouter != null)
{
// Pull consumer MSG.NEXT requires special handling: deliver individual
// HMSG messages to the client's reply inbox instead of a single JSON blob.
// Go reference: consumer.go:4276 processNextMsgRequest
if (subject.StartsWith(JetStream.Api.JetStreamApiSubjects.ConsumerNext, StringComparison.Ordinal)
&& _jetStreamConsumerManager != null
&& _jetStreamStreamManager != null)
{
Interlocked.Increment(ref _stats.JetStreamApiTotal);
DeliverPullFetchMessages(subject, replyTo, payload, sender);
return;
}
var response = _jetStreamApiRouter.Route(subject, payload.Span);
Interlocked.Increment(ref _stats.JetStreamApiTotal);
if (response.Error != null)
Interlocked.Increment(ref _stats.JetStreamApiErrors);
var data = JsonSerializer.SerializeToUtf8Bytes(response);
var data = JsonSerializer.SerializeToUtf8Bytes(response.ToWireFormat(), s_jetStreamJsonOptions);
ProcessMessage(replyTo, null, default, data, sender);
return;
}
if (TryCaptureJetStreamPublish(subject, payload, out var pubAck))
{
sender.RecordJetStreamPubAck(pubAck);
// Send pub ack response to the reply subject (request-reply pattern).
// Go reference: server/jetstream.go — jsPubAckResponse sent to reply.
if (replyTo != null)
{
var ackData = JsonSerializer.SerializeToUtf8Bytes(pubAck, s_jetStreamJsonOptions);
ProcessMessage(replyTo, null, default, ackData, sender);
return;
}
}
// Apply subject transforms
if (_subjectTransforms.Length > 0)
{
@@ -1049,6 +1327,94 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
}
}
/// <summary>
/// Handles $JS.API.CONSUMER.MSG.NEXT by delivering individual HMSG messages
/// to the client's reply inbox. Go reference: consumer.go:4276 processNextMsgRequest.
/// </summary>
private void DeliverPullFetchMessages(string subject, string replyTo, ReadOnlyMemory<byte> payload, NatsClient sender)
{
var prefix = JetStream.Api.JetStreamApiSubjects.ConsumerNext;
var remainder = subject[prefix.Length..];
var split = remainder.Split('.', 2, StringSplitOptions.RemoveEmptyEntries);
if (split.Length != 2)
{
var notFoundHeader = System.Text.Encoding.UTF8.GetBytes("NATS/1.0 404 No Messages\r\n\r\n");
ProcessMessage(replyTo, null, (ReadOnlyMemory<byte>)notFoundHeader, default, sender);
return;
}
var (streamName, consumerName) = (split[0], split[1]);
// Parse batch request
int batch = 1;
int expiresMs = 0;
bool noWait = false;
if (payload.Length > 0)
{
try
{
using var doc = System.Text.Json.JsonDocument.Parse(payload);
if (doc.RootElement.TryGetProperty("batch", out var batchEl) && batchEl.TryGetInt32(out var b))
batch = Math.Max(b, 1);
if (doc.RootElement.TryGetProperty("no_wait", out var nwEl) && nwEl.ValueKind == System.Text.Json.JsonValueKind.True)
noWait = true;
if (doc.RootElement.TryGetProperty("expires", out var expEl) && expEl.TryGetInt64(out var expNs))
expiresMs = (int)(expNs / 1_000_000);
}
catch (System.Text.Json.JsonException ex)
{
_logger.LogDebug(ex, "Malformed JSON in pull request payload, using defaults");
}
}
var fetchResult = _jetStreamConsumerManager!.FetchAsync(
streamName, consumerName, new JetStream.Consumers.PullFetchRequest { Batch = batch, NoWait = noWait, ExpiresMs = expiresMs },
_jetStreamStreamManager!, default).GetAwaiter().GetResult();
// Find the sender's inbox subscription so we can deliver directly.
// Go reference: consumer.go deliverMsg — delivers directly to the client, bypassing pub/sub echo checks.
var subList = sender.Account?.SubList ?? _globalAccount.SubList;
var matchResult = subList.Match(replyTo);
Subscription? inboxSub = null;
foreach (var sub in matchResult.PlainSubs)
{
if (sub.Client == sender)
{
inboxSub = sub;
break;
}
}
if (inboxSub == null)
return;
ReadOnlyMemory<byte> minHeaders = "NATS/1.0\r\n\r\n"u8.ToArray();
int deliverySeq = 0;
int numPending = fetchResult.Messages.Count;
foreach (var msg in fetchResult.Messages)
{
deliverySeq++;
numPending--;
var tsNanos = new DateTimeOffset(msg.TimestampUtc).ToUnixTimeMilliseconds() * 1_000_000L;
var ackReply = $"$JS.ACK.{streamName}.{consumerName}.1.{msg.Sequence}.{deliverySeq}.{tsNanos}.{numPending}";
// Send with the ORIGINAL stream subject (not the inbox) so the NATS client
// can distinguish data messages from control/status messages.
// Go reference: consumer.go deliverMsg — uses original subject on wire, inbox SID.
DeliverMessage(inboxSub, msg.Subject, ackReply, minHeaders, msg.Payload);
}
// Send terminal status to end the fetch
ReadOnlyMemory<byte> statusHeader;
if (fetchResult.Messages.Count == 0 || noWait)
statusHeader = System.Text.Encoding.UTF8.GetBytes("NATS/1.0 404 No Messages\r\n\r\n");
else
statusHeader = System.Text.Encoding.UTF8.GetBytes("NATS/1.0 408 Request Timeout\r\n\r\n");
DeliverMessage(inboxSub, replyTo, null, statusHeader, default);
}
private void DeliverMessage(Subscription sub, string subject, string? replyTo,
ReadOnlyMemory<byte> headers, ReadOnlyMemory<byte> payload)
{
@@ -1510,6 +1876,11 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
_clients.TryRemove(client.Id, out _);
_logger.LogDebug("Removed client {ClientId}", client.Id);
var (tlsPeerCertSubject, tlsPeerCertSubjectPkSha256, tlsPeerCertSha256) =
TlsPeerCertMapper.ToClosedFields(client.TlsState?.PeerCert);
var (jwt, issuerKey, tags) = ExtractJwtMetadata(client.ClientOpts?.JWT);
var proxyKey = ExtractProxyKey(client.ClientOpts?.Username);
// Snapshot for closed-connections tracking (ring buffer auto-overwrites oldest when full)
_closedClients.Add(new ClosedClient
{
@@ -1532,11 +1903,16 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
Rtt = client.Rtt,
TlsVersion = client.TlsState?.TlsVersion ?? "",
TlsCipherSuite = client.TlsState?.CipherSuite ?? "",
TlsPeerCertSubject = client.TlsState?.PeerCert?.Subject ?? "",
TlsPeerCertSubject = tlsPeerCertSubject,
TlsPeerCertSubjectPkSha256 = tlsPeerCertSubjectPkSha256,
TlsPeerCertSha256 = tlsPeerCertSha256,
MqttClient = "", // populated when MQTT transport is implemented
JwtIssuerKey = string.IsNullOrEmpty(client.ClientOpts?.JWT) ? "" : "present",
JwtTags = "",
Proxy = client.ClientOpts?.Username?.StartsWith("proxy:", StringComparison.Ordinal) == true ? "true" : "",
Stalls = 0,
Jwt = jwt,
IssuerKey = issuerKey,
NameTag = "",
Tags = tags,
ProxyKey = proxyKey,
});
var subList = client.Account?.SubList ?? _globalAccount.SubList;
@@ -1544,6 +1920,58 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
client.Account?.RemoveClient(client.Id);
}
private void TrackEarlyClosedClient(Socket socket, ulong clientId, ClientClosedReason reason)
{
string ip = "";
int port = 0;
if (socket.RemoteEndPoint is IPEndPoint endpoint)
{
ip = endpoint.Address.ToString();
port = endpoint.Port;
}
var now = DateTime.UtcNow;
_closedClients.Add(new ClosedClient
{
Cid = clientId,
Ip = ip,
Port = port,
Start = now,
Stop = now,
Reason = reason.ToReasonString(),
});
}
private static (string Jwt, string IssuerKey, string[] Tags) ExtractJwtMetadata(string? jwt)
{
if (string.IsNullOrWhiteSpace(jwt))
return ("", "", []);
var issuerKey = "";
var tags = Array.Empty<string>();
var claims = NatsJwt.DecodeUserClaims(jwt);
if (claims != null)
{
issuerKey = claims.Issuer ?? "";
tags = claims.Nats?.Tags ?? Array.Empty<string>();
}
return (jwt, issuerKey, tags);
}
private static string ExtractProxyKey(string? username)
{
if (string.IsNullOrWhiteSpace(username))
return "";
const string prefix = "proxy:";
return username.StartsWith(prefix, StringComparison.Ordinal)
? username[prefix.Length..]
: "";
}
private void WritePidFile()
{
if (string.IsNullOrEmpty(_options.PidFile)) return;
@@ -1670,6 +2098,7 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
// Apply changes to running options
ApplyConfigChanges(changes, newOpts);
_configDigest = digest;
_configTime = DateTime.UtcNow;
_logger.LogInformation("Config reloaded successfully ({Count} changes applied)", changes.Count);
}
catch (Exception ex)
@@ -1859,6 +2288,9 @@ public sealed class NatsServer : IMessageRouter, ISubListAccess, IDisposable
_options.SystemAccount = newOpts.SystemAccount;
}
public override string ToString()
=> $"NatsServer(ServerId={ServerId}, Name={ServerName}, Addr={Addr()}, Clients={ClientCount})";
public void Dispose()
{
if (!IsShuttingDown)