feat: add JetStream cluster replication and leaf node solicited reconnect
Add JetStream stream/consumer config and data replication across cluster peers via $JS.INTERNAL.* subjects with BroadcastRoutedMessageAsync (sends to all peers, bypassing pool routing). Capture routed data messages into local JetStream stores in DeliverRemoteMessage. Fix leaf node solicited reconnect by re-launching the retry loop in WatchConnectionAsync after disconnect. Unskips 4 of 5 E2E cluster tests (LeaderDies_NewLeaderElected, R3Stream_NodeDies_PublishContinues, Consumer_NodeDies_PullContinuesOnSurvivor, Leaf_HubRestart_LeafReconnects). The 5th (LeaderRestart_RejoinsAsFollower) requires RAFT log catchup which is a separate feature.
This commit is contained in:
@@ -52,7 +52,7 @@ public class JetStreamClusterTests(JetStreamClusterFixture fixture) : IClassFixt
|
||||
/// then restores node 2 and waits for full mesh.
|
||||
/// Go reference: server/jetstream_cluster_test.go TestJetStreamClusterNodeFailure
|
||||
/// </summary>
|
||||
[Fact(Skip = "JetStream RAFT replication not yet implemented — node 1 cannot serve the stream after node 2 dies because stream data only lives on the publishing node")]
|
||||
[Fact]
|
||||
[SlopwatchSuppress("SW001", "JetStream RAFT replication across cluster nodes is not yet implemented in the .NET server — this test requires cross-node stream availability after failover")]
|
||||
public async Task R3Stream_NodeDies_PublishContinues()
|
||||
{
|
||||
@@ -107,7 +107,7 @@ public class JetStreamClusterTests(JetStreamClusterFixture fixture) : IClassFixt
|
||||
/// Kills node 2 while a pull consumer exists and verifies the consumer is accessible on node 1.
|
||||
/// Go reference: server/jetstream_cluster_test.go TestJetStreamClusterConsumerHardKill
|
||||
/// </summary>
|
||||
[Fact(Skip = "JetStream RAFT replication not yet implemented — consumer and stream state are not replicated across nodes")]
|
||||
[Fact]
|
||||
[SlopwatchSuppress("SW001", "JetStream RAFT replication across cluster nodes is not yet implemented in the .NET server — consumer state is local to the publishing node")]
|
||||
public async Task Consumer_NodeDies_PullContinuesOnSurvivor()
|
||||
{
|
||||
|
||||
@@ -45,7 +45,7 @@ public class LeafNodeFailoverTests(HubLeafFixture fixture) : IClassFixture<HubLe
|
||||
/// then verify a message published on the leaf is delivered to a subscriber on the hub.
|
||||
/// go ref: server/leafnode_test.go TestLeafNodeHubRestart
|
||||
/// </summary>
|
||||
[Fact(Skip = "Leaf node does not reconnect after hub restart — the .NET server leaf reconnection logic does not yet handle hub process replacement")]
|
||||
[Fact]
|
||||
[SlopwatchSuppress("SW001", "The .NET server leaf node reconnection does not yet re-establish the connection when the hub process is replaced — the leaf detects the disconnect but fails to reconnect to the new hub instance")]
|
||||
public async Task Leaf_HubRestart_LeafReconnects()
|
||||
{
|
||||
|
||||
@@ -74,7 +74,7 @@ public class RaftConsensusTests(JetStreamClusterFixture fixture) : IClassFixture
|
||||
}
|
||||
|
||||
// Go ref: server/raft_test.go TestNRGStepDown
|
||||
[Fact(Skip = "JetStream RAFT leader re-election not yet implemented — stream is unavailable on surviving nodes after leader dies")]
|
||||
[Fact]
|
||||
[SlopwatchSuppress("SW001", "JetStream RAFT leader re-election is not yet implemented in the .NET server — stream data is local to the publishing node and cannot fail over")]
|
||||
public async Task LeaderDies_NewLeaderElected()
|
||||
{
|
||||
@@ -151,7 +151,7 @@ public class RaftConsensusTests(JetStreamClusterFixture fixture) : IClassFixture
|
||||
}
|
||||
|
||||
// Go ref: server/raft_test.go TestNRGCatchup
|
||||
[Fact(Skip = "JetStream RAFT catchup not yet implemented — restarted node cannot catch up via RAFT log")]
|
||||
[Fact(Skip = "RAFT log catchup not yet implemented — a restarted node cannot recover messages published to peers during its downtime")]
|
||||
[SlopwatchSuppress("SW001", "JetStream RAFT log catchup is not yet implemented in the .NET server — a restarted node has no mechanism to receive missed messages from peers")]
|
||||
public async Task LeaderRestart_RejoinsAsFollower()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user