Move 43 Raft consensus test files (8 root-level + 35 in Raft/ subfolder) from NATS.Server.Tests into a dedicated NATS.Server.Raft.Tests project. Update namespaces, add InternalsVisibleTo, and fix timing/exception handling issues in moved test files.
418 lines
16 KiB
C#
418 lines
16 KiB
C#
using NATS.Server.Raft;
|
|
|
|
namespace NATS.Server.Raft.Tests.Raft;
|
|
|
|
/// <summary>
|
|
/// Tests for RAFT leadership transfer via TimeoutNow RPC (Gap 8.4).
|
|
/// The leader sends a TimeoutNow message to a target follower, which immediately
|
|
/// starts an election. The leader blocks proposals while the transfer is in flight.
|
|
/// Go reference: raft.go sendTimeoutNow / processTimeoutNow
|
|
/// </summary>
|
|
public class RaftLeadershipTransferTests
|
|
{
|
|
// -- Helpers --
|
|
|
|
private static (RaftNode[] nodes, InMemoryRaftTransport transport) CreateCluster(int size)
|
|
{
|
|
var transport = new InMemoryRaftTransport();
|
|
var nodes = Enumerable.Range(1, size)
|
|
.Select(i => new RaftNode($"n{i}", transport))
|
|
.ToArray();
|
|
foreach (var node in nodes)
|
|
{
|
|
transport.Register(node);
|
|
node.ConfigureCluster(nodes);
|
|
// Use short election timeouts so polling in TransferLeadershipAsync
|
|
// converges quickly in tests without requiring real async delays.
|
|
node.ElectionTimeoutMinMs = 5;
|
|
node.ElectionTimeoutMaxMs = 10;
|
|
}
|
|
return (nodes, transport);
|
|
}
|
|
|
|
private static RaftNode ElectLeader(RaftNode[] nodes)
|
|
{
|
|
var candidate = nodes[0];
|
|
candidate.StartElection(nodes.Length);
|
|
foreach (var voter in nodes.Skip(1))
|
|
candidate.ReceiveVote(voter.GrantVote(candidate.Term, candidate.Id), nodes.Length);
|
|
return candidate;
|
|
}
|
|
|
|
// -- Wire format tests --
|
|
|
|
// Go reference: raft.go TimeoutNow wire encoding
|
|
[Fact]
|
|
public void TimeoutNowRpc_wire_format_roundtrip()
|
|
{
|
|
var wire = new RaftTimeoutNowWire(Term: 7UL, LeaderId: "n1");
|
|
|
|
var encoded = wire.Encode();
|
|
encoded.Length.ShouldBe(RaftTimeoutNowWire.MessageLen); // 16 bytes
|
|
|
|
var decoded = RaftTimeoutNowWire.Decode(encoded);
|
|
decoded.Term.ShouldBe(7UL);
|
|
decoded.LeaderId.ShouldBe("n1");
|
|
}
|
|
|
|
[Fact]
|
|
public void TimeoutNowRpc_wire_format_preserves_term_and_leader_id()
|
|
{
|
|
var wire = new RaftTimeoutNowWire(Term: 42UL, LeaderId: "node5");
|
|
|
|
var decoded = RaftTimeoutNowWire.Decode(wire.Encode());
|
|
|
|
decoded.Term.ShouldBe(42UL);
|
|
decoded.LeaderId.ShouldBe("node5");
|
|
}
|
|
|
|
[Fact]
|
|
public void TimeoutNowRpc_decode_throws_on_wrong_length()
|
|
{
|
|
Should.Throw<ArgumentException>(() =>
|
|
RaftTimeoutNowWire.Decode(new byte[10]));
|
|
}
|
|
|
|
[Fact]
|
|
public void TimeoutNowRpc_message_len_is_16_bytes()
|
|
{
|
|
RaftTimeoutNowWire.MessageLen.ShouldBe(16);
|
|
}
|
|
|
|
// -- ReceiveTimeoutNow logic tests --
|
|
|
|
// Go reference: raft.go processTimeoutNow -- follower starts election immediately
|
|
[Fact]
|
|
public void ReceiveTimeoutNow_triggers_immediate_election_on_follower()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var follower = nodes[1]; // starts as follower
|
|
follower.Role.ShouldBe(RaftRole.Follower);
|
|
|
|
follower.ReceiveTimeoutNow(term: 0);
|
|
|
|
// Node should now be a candidate (or leader if it self-voted quorum)
|
|
follower.Role.ShouldBeOneOf(RaftRole.Candidate, RaftRole.Leader);
|
|
}
|
|
|
|
[Fact]
|
|
public void ReceiveTimeoutNow_updates_term_when_sender_term_is_higher()
|
|
{
|
|
var node = new RaftNode("follower");
|
|
node.TermState.CurrentTerm = 3;
|
|
|
|
node.ReceiveTimeoutNow(term: 10);
|
|
|
|
// ReceiveTimeoutNow sets term to 10, then StartElection increments to 11
|
|
node.TermState.CurrentTerm.ShouldBe(11);
|
|
}
|
|
|
|
[Fact]
|
|
public void ReceiveTimeoutNow_increments_term_and_starts_campaign()
|
|
{
|
|
var node = new RaftNode("n1");
|
|
node.TermState.CurrentTerm = 2;
|
|
var termBefore = node.Term;
|
|
|
|
node.ReceiveTimeoutNow(term: 0);
|
|
|
|
// StartElection increments the term regardless of whether the node wins.
|
|
node.Term.ShouldBe(termBefore + 1);
|
|
// With no cluster configured, quorum = 1 (self-vote), so the node becomes leader.
|
|
node.Role.ShouldBeOneOf(RaftRole.Candidate, RaftRole.Leader);
|
|
}
|
|
|
|
[Fact]
|
|
public void ReceiveTimeoutNow_on_single_node_makes_it_leader()
|
|
{
|
|
// Single-node cluster: quorum = 1, so self-vote is sufficient.
|
|
var node = new RaftNode("solo");
|
|
node.ConfigureCluster([node]);
|
|
|
|
node.ReceiveTimeoutNow(term: 0);
|
|
|
|
node.IsLeader.ShouldBeTrue();
|
|
}
|
|
|
|
// -- Proposal blocking during transfer --
|
|
|
|
// Go reference: raft.go -- leader rejects new entries while transfer is in progress.
|
|
// BlockingTimeoutNowTransport signals via SemaphoreSlim when SendTimeoutNowAsync is
|
|
// entered, letting the test observe the _transferInProgress flag without timing deps.
|
|
[Fact]
|
|
public async Task TransferLeadership_leader_blocks_proposals_during_transfer()
|
|
{
|
|
var blockingTransport = new BlockingTimeoutNowTransport();
|
|
var node = new RaftNode("leader", blockingTransport);
|
|
node.ConfigureCluster([node]);
|
|
node.StartElection(1); // become leader
|
|
node.IsLeader.ShouldBeTrue();
|
|
|
|
using var cts = new CancellationTokenSource();
|
|
var transferTask = node.TransferLeadershipAsync("n2", cts.Token);
|
|
|
|
// Wait until SendTimeoutNowAsync is entered -- transfer flag is guaranteed set.
|
|
await blockingTransport.WaitUntilBlockingAsync();
|
|
|
|
// ProposeAsync must throw because the transfer flag is set.
|
|
var ex = await Should.ThrowAsync<InvalidOperationException>(
|
|
() => node.ProposeAsync("cmd", CancellationToken.None).AsTask());
|
|
ex.Message.ShouldContain("Leadership transfer in progress");
|
|
|
|
// Cancel and await proper completion to avoid test resource leaks.
|
|
await cts.CancelAsync();
|
|
await Should.ThrowAsync<OperationCanceledException>(() => transferTask);
|
|
}
|
|
|
|
// Go reference: raft.go -- only leader can initiate leadership transfer
|
|
[Fact]
|
|
public async Task TransferLeadership_only_leader_can_transfer()
|
|
{
|
|
var transport = new InMemoryRaftTransport();
|
|
var follower = new RaftNode("follower", transport);
|
|
follower.Role.ShouldBe(RaftRole.Follower);
|
|
|
|
var ex = await Should.ThrowAsync<InvalidOperationException>(
|
|
() => follower.TransferLeadershipAsync("n2", CancellationToken.None));
|
|
ex.Message.ShouldContain("Only the leader");
|
|
}
|
|
|
|
// Go reference: raft.go -- TransferLeadershipAsync requires a configured transport
|
|
[Fact]
|
|
public async Task TransferLeadership_throws_when_no_transport_configured()
|
|
{
|
|
// No transport injected.
|
|
var node = new RaftNode("leader");
|
|
node.StartElection(1); // become leader (single node, quorum = 1)
|
|
node.IsLeader.ShouldBeTrue();
|
|
|
|
var ex = await Should.ThrowAsync<InvalidOperationException>(
|
|
() => node.TransferLeadershipAsync("n2", CancellationToken.None));
|
|
ex.Message.ShouldContain("No transport configured");
|
|
}
|
|
|
|
// Go reference: raft.go sendTimeoutNow -- target becomes leader after receiving TimeoutNow.
|
|
// VoteGrantingTransport delivers TimeoutNow and immediately grants votes so the target
|
|
// is already leader before the polling loop runs -- no Task.Delay required.
|
|
[Fact]
|
|
public async Task TransferLeadership_target_becomes_leader()
|
|
{
|
|
var transport = new VoteGrantingTransport();
|
|
var nodes = Enumerable.Range(1, 3)
|
|
.Select(i => new RaftNode($"n{i}", transport))
|
|
.ToArray();
|
|
foreach (var node in nodes)
|
|
{
|
|
transport.Register(node);
|
|
node.ConfigureCluster(nodes);
|
|
node.ElectionTimeoutMinMs = 5;
|
|
node.ElectionTimeoutMaxMs = 10;
|
|
}
|
|
|
|
var leader = nodes[0];
|
|
leader.StartElection(nodes.Length);
|
|
foreach (var voter in nodes.Skip(1))
|
|
leader.ReceiveVote(voter.GrantVote(leader.Term, leader.Id), nodes.Length);
|
|
leader.IsLeader.ShouldBeTrue();
|
|
|
|
var target = nodes[1];
|
|
// VoteGrantingTransport makes the target a leader synchronously during TimeoutNow
|
|
// delivery, so the first poll iteration in TransferLeadershipAsync succeeds.
|
|
var result = await leader.TransferLeadershipAsync(target.Id, CancellationToken.None);
|
|
|
|
result.ShouldBeTrue();
|
|
target.IsLeader.ShouldBeTrue();
|
|
}
|
|
|
|
// Go reference: raft.go sendTimeoutNow -- returns false when target doesn't respond.
|
|
// "ghost" is not registered in the transport so TimeoutNow is a no-op and the
|
|
// polling loop times out after 2x election timeout.
|
|
[Fact]
|
|
public async Task TransferLeadership_timeout_on_unreachable_target()
|
|
{
|
|
var transport = new InMemoryRaftTransport();
|
|
var leader = new RaftNode("leader", transport);
|
|
leader.ConfigureCluster([leader]);
|
|
transport.Register(leader);
|
|
leader.StartElection(1);
|
|
|
|
// Very short timeouts so the poll deadline is reached quickly.
|
|
leader.ElectionTimeoutMinMs = 5;
|
|
leader.ElectionTimeoutMaxMs = 10;
|
|
|
|
// "ghost" is not registered -- TimeoutNow is a no-op; target never becomes leader.
|
|
var result = await leader.TransferLeadershipAsync("ghost", CancellationToken.None);
|
|
|
|
result.ShouldBeFalse();
|
|
}
|
|
|
|
// -- Integration: flag lifecycle --
|
|
|
|
[Fact]
|
|
public async Task TransferLeadership_clears_transfer_flag_after_success()
|
|
{
|
|
var transport = new VoteGrantingTransport();
|
|
var nodes = Enumerable.Range(1, 3)
|
|
.Select(i => new RaftNode($"n{i}", transport))
|
|
.ToArray();
|
|
foreach (var node in nodes)
|
|
{
|
|
transport.Register(node);
|
|
node.ConfigureCluster(nodes);
|
|
node.ElectionTimeoutMinMs = 5;
|
|
node.ElectionTimeoutMaxMs = 10;
|
|
}
|
|
|
|
var leader = nodes[0];
|
|
leader.StartElection(nodes.Length);
|
|
foreach (var voter in nodes.Skip(1))
|
|
leader.ReceiveVote(voter.GrantVote(leader.Term, leader.Id), nodes.Length);
|
|
|
|
var target = nodes[1];
|
|
var success = await leader.TransferLeadershipAsync(target.Id, CancellationToken.None);
|
|
|
|
success.ShouldBeTrue();
|
|
// After transfer completes the flag must be cleared.
|
|
leader.TransferInProgress.ShouldBeFalse();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task TransferLeadership_clears_transfer_flag_after_timeout()
|
|
{
|
|
var transport = new InMemoryRaftTransport();
|
|
var leader = new RaftNode("leader", transport);
|
|
leader.ConfigureCluster([leader]);
|
|
transport.Register(leader);
|
|
leader.StartElection(1);
|
|
leader.ElectionTimeoutMinMs = 5;
|
|
leader.ElectionTimeoutMaxMs = 10;
|
|
|
|
// "ghost" is not registered -- transfer times out.
|
|
await leader.TransferLeadershipAsync("ghost", CancellationToken.None);
|
|
|
|
// Flag must be cleared regardless of outcome.
|
|
leader.TransferInProgress.ShouldBeFalse();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// A transport that blocks inside <see cref="SendTimeoutNowAsync"/> until the
|
|
/// provided <see cref="CancellationToken"/> is cancelled. Exposes a semaphore
|
|
/// so the test can synchronize on when the leader transfer flag is set.
|
|
/// </summary>
|
|
file sealed class BlockingTimeoutNowTransport : IRaftTransport
|
|
{
|
|
private readonly SemaphoreSlim _entered = new(0, 1);
|
|
|
|
/// <summary>
|
|
/// Returns a task that completes once <see cref="SendTimeoutNowAsync"/> has been
|
|
/// entered and the leader's transfer flag is guaranteed to be set.
|
|
/// </summary>
|
|
public Task WaitUntilBlockingAsync() => _entered.WaitAsync();
|
|
|
|
public Task<IReadOnlyList<AppendResult>> AppendEntriesAsync(
|
|
string leaderId, IReadOnlyList<string> followerIds, RaftLogEntry entry, CancellationToken ct)
|
|
=> Task.FromResult<IReadOnlyList<AppendResult>>([]);
|
|
|
|
public Task<VoteResponse> RequestVoteAsync(
|
|
string candidateId, string voterId, VoteRequest request, CancellationToken ct)
|
|
=> Task.FromResult(new VoteResponse { Granted = false });
|
|
|
|
public Task InstallSnapshotAsync(
|
|
string leaderId, string followerId, RaftSnapshot snapshot, CancellationToken ct)
|
|
=> Task.CompletedTask;
|
|
|
|
public async Task SendTimeoutNowAsync(string leaderId, string targetId, ulong term, CancellationToken ct)
|
|
{
|
|
// Signal that the transfer flag is set -- the test can now probe ProposeAsync.
|
|
_entered.Release();
|
|
|
|
// Block until the test cancels the token.
|
|
var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
|
|
await using var reg = ct.Register(() => tcs.TrySetCanceled(ct));
|
|
await tcs.Task;
|
|
}
|
|
|
|
public Task SendHeartbeatAsync(string leaderId, IReadOnlyList<string> followerIds, int term, Action<string> onAck, CancellationToken ct)
|
|
=> Task.CompletedTask;
|
|
}
|
|
|
|
/// <summary>
|
|
/// A transport that, when delivering a TimeoutNow RPC, also immediately grants
|
|
/// votes to the target candidate so it reaches quorum synchronously. This makes
|
|
/// the target become leader before TransferLeadershipAsync starts polling, removing
|
|
/// any need for Task.Delay waits in the test.
|
|
/// </summary>
|
|
file sealed class VoteGrantingTransport : IRaftTransport
|
|
{
|
|
private readonly Dictionary<string, RaftNode> _nodes = new(StringComparer.Ordinal);
|
|
|
|
public void Register(RaftNode node) => _nodes[node.Id] = node;
|
|
|
|
public Task<IReadOnlyList<AppendResult>> AppendEntriesAsync(
|
|
string leaderId, IReadOnlyList<string> followerIds, RaftLogEntry entry, CancellationToken ct)
|
|
{
|
|
var results = new List<AppendResult>(followerIds.Count);
|
|
foreach (var followerId in followerIds)
|
|
{
|
|
if (_nodes.TryGetValue(followerId, out var node))
|
|
{
|
|
node.ReceiveReplicatedEntry(entry);
|
|
results.Add(new AppendResult { FollowerId = followerId, Success = true });
|
|
}
|
|
else
|
|
{
|
|
results.Add(new AppendResult { FollowerId = followerId, Success = false });
|
|
}
|
|
}
|
|
return Task.FromResult<IReadOnlyList<AppendResult>>(results);
|
|
}
|
|
|
|
public Task<VoteResponse> RequestVoteAsync(
|
|
string candidateId, string voterId, VoteRequest request, CancellationToken ct)
|
|
{
|
|
if (_nodes.TryGetValue(voterId, out var node))
|
|
return Task.FromResult(node.GrantVote(request.Term, candidateId));
|
|
return Task.FromResult(new VoteResponse { Granted = false });
|
|
}
|
|
|
|
public Task InstallSnapshotAsync(
|
|
string leaderId, string followerId, RaftSnapshot snapshot, CancellationToken ct)
|
|
=> Task.CompletedTask;
|
|
|
|
/// <summary>
|
|
/// Delivers TimeoutNow to the target (triggering an immediate election), then
|
|
/// grants votes from every other peer so the target reaches quorum synchronously.
|
|
/// This ensures the target is already leader before TransferLeadershipAsync polls,
|
|
/// removing any timing dependency between delivery and vote propagation.
|
|
/// </summary>
|
|
public Task SendTimeoutNowAsync(string leaderId, string targetId, ulong term, CancellationToken ct)
|
|
{
|
|
if (!_nodes.TryGetValue(targetId, out var target))
|
|
return Task.CompletedTask;
|
|
|
|
// Trigger immediate election on the target node.
|
|
target.ReceiveTimeoutNow(term);
|
|
|
|
// Grant peer votes so the target reaches quorum immediately.
|
|
if (target.Role == RaftRole.Candidate)
|
|
{
|
|
var clusterSize = _nodes.Count;
|
|
foreach (var (peerId, peer) in _nodes)
|
|
{
|
|
if (string.Equals(peerId, targetId, StringComparison.Ordinal))
|
|
continue;
|
|
var vote = peer.GrantVote(target.Term, targetId);
|
|
target.ReceiveVote(vote, clusterSize);
|
|
if (target.IsLeader)
|
|
break;
|
|
}
|
|
}
|
|
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
public Task SendHeartbeatAsync(string leaderId, IReadOnlyList<string> followerIds, int term, Action<string> onAck, CancellationToken ct)
|
|
=> Task.CompletedTask;
|
|
}
|