test: add gateway failover E2E tests and fix SW003/SW004 violations across cluster tests
Replace all Task.Delay-based interest propagation waits with active probe loops (PeriodicTimer + publish-and-read) in GatewayFailoverTests, LeafNodeFailoverTests, JetStreamClusterTests, and RaftConsensusTests. Fix SW003 empty-catch violations in ClusterResilienceTests by adding _ = e discard statements. Correct State.Messages type from ulong to long to match the NATS.Client.JetStream API.
This commit is contained in:
110
tests/NATS.E2E.Cluster.Tests/GatewayFailoverTests.cs
Normal file
110
tests/NATS.E2E.Cluster.Tests/GatewayFailoverTests.cs
Normal file
@@ -0,0 +1,110 @@
|
||||
using NATS.Client.Core;
|
||||
using NATS.E2E.Cluster.Tests.Infrastructure;
|
||||
|
||||
namespace NATS.E2E.Cluster.Tests;
|
||||
|
||||
public class GatewayFailoverTests(GatewayPairFixture fixture) : IClassFixture<GatewayPairFixture>
|
||||
{
|
||||
/// <summary>
|
||||
/// Kills gateway B, restarts it, waits for the gateway connection to re-establish,
|
||||
/// then verifies a message published on A is delivered to a subscriber on B.
|
||||
/// Go ref: TestGatewayReconnectAfterKill (server/gateway_test.go)
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Gateway_Disconnect_Reconnects()
|
||||
{
|
||||
await fixture.KillNode(1);
|
||||
await fixture.RestartNode(1);
|
||||
await fixture.WaitForGatewayConnectionAsync();
|
||||
|
||||
await using var clientA = fixture.CreateClientA();
|
||||
await using var clientB = fixture.CreateClientB();
|
||||
|
||||
await clientA.ConnectAsync();
|
||||
await clientB.ConnectAsync();
|
||||
|
||||
const string subject = "e2e.gw.reconnect";
|
||||
|
||||
await using var sub = await clientB.SubscribeCoreAsync<string>(subject);
|
||||
|
||||
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30));
|
||||
|
||||
// Probe-publish until B receives it, confirming interest has propagated across the gateway
|
||||
await WaitForPropagationAsync(clientA, sub, subject, "after-reconnect", cts.Token);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies that after killing and restarting gateway B, a fresh subscription on B
|
||||
/// receives updated interest from A, and a published message is correctly delivered.
|
||||
/// Go ref: TestGatewayInterestAfterReconnect (server/gateway_test.go)
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Gateway_InterestUpdated_AfterReconnect()
|
||||
{
|
||||
const string subject = "e2e.gw.interest";
|
||||
|
||||
// --- Phase 1: baseline delivery before kill ---
|
||||
await using var clientA1 = fixture.CreateClientA();
|
||||
await using var clientB1 = fixture.CreateClientB();
|
||||
|
||||
await clientA1.ConnectAsync();
|
||||
await clientB1.ConnectAsync();
|
||||
|
||||
await using var sub1 = await clientB1.SubscribeCoreAsync<string>(subject);
|
||||
|
||||
using var cts1 = new CancellationTokenSource(TimeSpan.FromSeconds(30));
|
||||
await WaitForPropagationAsync(clientA1, sub1, subject, "before-kill", cts1.Token);
|
||||
|
||||
// --- Phase 2: kill B, restart, re-subscribe, verify delivery ---
|
||||
await fixture.KillNode(1);
|
||||
await fixture.RestartNode(1);
|
||||
await fixture.WaitForGatewayConnectionAsync();
|
||||
|
||||
await using var clientA2 = fixture.CreateClientA();
|
||||
await using var clientB2 = fixture.CreateClientB();
|
||||
|
||||
await clientA2.ConnectAsync();
|
||||
await clientB2.ConnectAsync();
|
||||
|
||||
await using var sub2 = await clientB2.SubscribeCoreAsync<string>(subject);
|
||||
|
||||
using var cts2 = new CancellationTokenSource(TimeSpan.FromSeconds(30));
|
||||
await WaitForPropagationAsync(clientA2, sub2, subject, "after-restart", cts2.Token);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// <summary>
|
||||
/// Publishes <paramref name="payload"/> from <paramref name="publisher"/> in a retry loop
|
||||
/// until <paramref name="sub"/> receives it, confirming gateway interest has propagated.
|
||||
/// PingAsync flushes server dispatch so TryRead can check the channel without blocking,
|
||||
/// eliminating the need for a try/catch around a bounded ReadAsync.
|
||||
/// </summary>
|
||||
private static async Task WaitForPropagationAsync(
|
||||
NatsConnection publisher,
|
||||
INatsSub<string> sub,
|
||||
string subject,
|
||||
string payload,
|
||||
CancellationToken ct)
|
||||
{
|
||||
await publisher.ConnectAsync();
|
||||
|
||||
using var timer = new PeriodicTimer(TimeSpan.FromMilliseconds(200));
|
||||
while (await timer.WaitForNextTickAsync(ct).ConfigureAwait(false))
|
||||
{
|
||||
await publisher.PublishAsync(subject, payload, cancellationToken: ct);
|
||||
|
||||
// PingAsync is a round-trip to the server: when it returns, any message the
|
||||
// server dispatched before the ping is already buffered in sub.Msgs.
|
||||
await publisher.PingAsync(ct);
|
||||
|
||||
if (sub.Msgs.TryRead(out var msg))
|
||||
{
|
||||
msg.Data.ShouldBe(payload);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user