Files
natsdotnet/tests/NATS.Server.Raft.Tests/Raft/RaftLeadershipTransferTests.cs
Joseph Doherty edf9ed770e refactor: extract NATS.Server.Raft.Tests project
Move 43 Raft consensus test files (8 root-level + 35 in Raft/ subfolder)
from NATS.Server.Tests into a dedicated NATS.Server.Raft.Tests project.
Update namespaces, add InternalsVisibleTo, and fix timing/exception
handling issues in moved test files.
2026-03-12 15:36:02 -04:00

418 lines
16 KiB
C#

using NATS.Server.Raft;
namespace NATS.Server.Raft.Tests.Raft;
/// <summary>
/// Tests for RAFT leadership transfer via TimeoutNow RPC (Gap 8.4).
/// The leader sends a TimeoutNow message to a target follower, which immediately
/// starts an election. The leader blocks proposals while the transfer is in flight.
/// Go reference: raft.go sendTimeoutNow / processTimeoutNow
/// </summary>
public class RaftLeadershipTransferTests
{
// -- Helpers --
private static (RaftNode[] nodes, InMemoryRaftTransport transport) CreateCluster(int size)
{
var transport = new InMemoryRaftTransport();
var nodes = Enumerable.Range(1, size)
.Select(i => new RaftNode($"n{i}", transport))
.ToArray();
foreach (var node in nodes)
{
transport.Register(node);
node.ConfigureCluster(nodes);
// Use short election timeouts so polling in TransferLeadershipAsync
// converges quickly in tests without requiring real async delays.
node.ElectionTimeoutMinMs = 5;
node.ElectionTimeoutMaxMs = 10;
}
return (nodes, transport);
}
private static RaftNode ElectLeader(RaftNode[] nodes)
{
var candidate = nodes[0];
candidate.StartElection(nodes.Length);
foreach (var voter in nodes.Skip(1))
candidate.ReceiveVote(voter.GrantVote(candidate.Term, candidate.Id), nodes.Length);
return candidate;
}
// -- Wire format tests --
// Go reference: raft.go TimeoutNow wire encoding
[Fact]
public void TimeoutNowRpc_wire_format_roundtrip()
{
var wire = new RaftTimeoutNowWire(Term: 7UL, LeaderId: "n1");
var encoded = wire.Encode();
encoded.Length.ShouldBe(RaftTimeoutNowWire.MessageLen); // 16 bytes
var decoded = RaftTimeoutNowWire.Decode(encoded);
decoded.Term.ShouldBe(7UL);
decoded.LeaderId.ShouldBe("n1");
}
[Fact]
public void TimeoutNowRpc_wire_format_preserves_term_and_leader_id()
{
var wire = new RaftTimeoutNowWire(Term: 42UL, LeaderId: "node5");
var decoded = RaftTimeoutNowWire.Decode(wire.Encode());
decoded.Term.ShouldBe(42UL);
decoded.LeaderId.ShouldBe("node5");
}
[Fact]
public void TimeoutNowRpc_decode_throws_on_wrong_length()
{
Should.Throw<ArgumentException>(() =>
RaftTimeoutNowWire.Decode(new byte[10]));
}
[Fact]
public void TimeoutNowRpc_message_len_is_16_bytes()
{
RaftTimeoutNowWire.MessageLen.ShouldBe(16);
}
// -- ReceiveTimeoutNow logic tests --
// Go reference: raft.go processTimeoutNow -- follower starts election immediately
[Fact]
public void ReceiveTimeoutNow_triggers_immediate_election_on_follower()
{
var (nodes, _) = CreateCluster(3);
var follower = nodes[1]; // starts as follower
follower.Role.ShouldBe(RaftRole.Follower);
follower.ReceiveTimeoutNow(term: 0);
// Node should now be a candidate (or leader if it self-voted quorum)
follower.Role.ShouldBeOneOf(RaftRole.Candidate, RaftRole.Leader);
}
[Fact]
public void ReceiveTimeoutNow_updates_term_when_sender_term_is_higher()
{
var node = new RaftNode("follower");
node.TermState.CurrentTerm = 3;
node.ReceiveTimeoutNow(term: 10);
// ReceiveTimeoutNow sets term to 10, then StartElection increments to 11
node.TermState.CurrentTerm.ShouldBe(11);
}
[Fact]
public void ReceiveTimeoutNow_increments_term_and_starts_campaign()
{
var node = new RaftNode("n1");
node.TermState.CurrentTerm = 2;
var termBefore = node.Term;
node.ReceiveTimeoutNow(term: 0);
// StartElection increments the term regardless of whether the node wins.
node.Term.ShouldBe(termBefore + 1);
// With no cluster configured, quorum = 1 (self-vote), so the node becomes leader.
node.Role.ShouldBeOneOf(RaftRole.Candidate, RaftRole.Leader);
}
[Fact]
public void ReceiveTimeoutNow_on_single_node_makes_it_leader()
{
// Single-node cluster: quorum = 1, so self-vote is sufficient.
var node = new RaftNode("solo");
node.ConfigureCluster([node]);
node.ReceiveTimeoutNow(term: 0);
node.IsLeader.ShouldBeTrue();
}
// -- Proposal blocking during transfer --
// Go reference: raft.go -- leader rejects new entries while transfer is in progress.
// BlockingTimeoutNowTransport signals via SemaphoreSlim when SendTimeoutNowAsync is
// entered, letting the test observe the _transferInProgress flag without timing deps.
[Fact]
public async Task TransferLeadership_leader_blocks_proposals_during_transfer()
{
var blockingTransport = new BlockingTimeoutNowTransport();
var node = new RaftNode("leader", blockingTransport);
node.ConfigureCluster([node]);
node.StartElection(1); // become leader
node.IsLeader.ShouldBeTrue();
using var cts = new CancellationTokenSource();
var transferTask = node.TransferLeadershipAsync("n2", cts.Token);
// Wait until SendTimeoutNowAsync is entered -- transfer flag is guaranteed set.
await blockingTransport.WaitUntilBlockingAsync();
// ProposeAsync must throw because the transfer flag is set.
var ex = await Should.ThrowAsync<InvalidOperationException>(
() => node.ProposeAsync("cmd", CancellationToken.None).AsTask());
ex.Message.ShouldContain("Leadership transfer in progress");
// Cancel and await proper completion to avoid test resource leaks.
await cts.CancelAsync();
await Should.ThrowAsync<OperationCanceledException>(() => transferTask);
}
// Go reference: raft.go -- only leader can initiate leadership transfer
[Fact]
public async Task TransferLeadership_only_leader_can_transfer()
{
var transport = new InMemoryRaftTransport();
var follower = new RaftNode("follower", transport);
follower.Role.ShouldBe(RaftRole.Follower);
var ex = await Should.ThrowAsync<InvalidOperationException>(
() => follower.TransferLeadershipAsync("n2", CancellationToken.None));
ex.Message.ShouldContain("Only the leader");
}
// Go reference: raft.go -- TransferLeadershipAsync requires a configured transport
[Fact]
public async Task TransferLeadership_throws_when_no_transport_configured()
{
// No transport injected.
var node = new RaftNode("leader");
node.StartElection(1); // become leader (single node, quorum = 1)
node.IsLeader.ShouldBeTrue();
var ex = await Should.ThrowAsync<InvalidOperationException>(
() => node.TransferLeadershipAsync("n2", CancellationToken.None));
ex.Message.ShouldContain("No transport configured");
}
// Go reference: raft.go sendTimeoutNow -- target becomes leader after receiving TimeoutNow.
// VoteGrantingTransport delivers TimeoutNow and immediately grants votes so the target
// is already leader before the polling loop runs -- no Task.Delay required.
[Fact]
public async Task TransferLeadership_target_becomes_leader()
{
var transport = new VoteGrantingTransport();
var nodes = Enumerable.Range(1, 3)
.Select(i => new RaftNode($"n{i}", transport))
.ToArray();
foreach (var node in nodes)
{
transport.Register(node);
node.ConfigureCluster(nodes);
node.ElectionTimeoutMinMs = 5;
node.ElectionTimeoutMaxMs = 10;
}
var leader = nodes[0];
leader.StartElection(nodes.Length);
foreach (var voter in nodes.Skip(1))
leader.ReceiveVote(voter.GrantVote(leader.Term, leader.Id), nodes.Length);
leader.IsLeader.ShouldBeTrue();
var target = nodes[1];
// VoteGrantingTransport makes the target a leader synchronously during TimeoutNow
// delivery, so the first poll iteration in TransferLeadershipAsync succeeds.
var result = await leader.TransferLeadershipAsync(target.Id, CancellationToken.None);
result.ShouldBeTrue();
target.IsLeader.ShouldBeTrue();
}
// Go reference: raft.go sendTimeoutNow -- returns false when target doesn't respond.
// "ghost" is not registered in the transport so TimeoutNow is a no-op and the
// polling loop times out after 2x election timeout.
[Fact]
public async Task TransferLeadership_timeout_on_unreachable_target()
{
var transport = new InMemoryRaftTransport();
var leader = new RaftNode("leader", transport);
leader.ConfigureCluster([leader]);
transport.Register(leader);
leader.StartElection(1);
// Very short timeouts so the poll deadline is reached quickly.
leader.ElectionTimeoutMinMs = 5;
leader.ElectionTimeoutMaxMs = 10;
// "ghost" is not registered -- TimeoutNow is a no-op; target never becomes leader.
var result = await leader.TransferLeadershipAsync("ghost", CancellationToken.None);
result.ShouldBeFalse();
}
// -- Integration: flag lifecycle --
[Fact]
public async Task TransferLeadership_clears_transfer_flag_after_success()
{
var transport = new VoteGrantingTransport();
var nodes = Enumerable.Range(1, 3)
.Select(i => new RaftNode($"n{i}", transport))
.ToArray();
foreach (var node in nodes)
{
transport.Register(node);
node.ConfigureCluster(nodes);
node.ElectionTimeoutMinMs = 5;
node.ElectionTimeoutMaxMs = 10;
}
var leader = nodes[0];
leader.StartElection(nodes.Length);
foreach (var voter in nodes.Skip(1))
leader.ReceiveVote(voter.GrantVote(leader.Term, leader.Id), nodes.Length);
var target = nodes[1];
var success = await leader.TransferLeadershipAsync(target.Id, CancellationToken.None);
success.ShouldBeTrue();
// After transfer completes the flag must be cleared.
leader.TransferInProgress.ShouldBeFalse();
}
[Fact]
public async Task TransferLeadership_clears_transfer_flag_after_timeout()
{
var transport = new InMemoryRaftTransport();
var leader = new RaftNode("leader", transport);
leader.ConfigureCluster([leader]);
transport.Register(leader);
leader.StartElection(1);
leader.ElectionTimeoutMinMs = 5;
leader.ElectionTimeoutMaxMs = 10;
// "ghost" is not registered -- transfer times out.
await leader.TransferLeadershipAsync("ghost", CancellationToken.None);
// Flag must be cleared regardless of outcome.
leader.TransferInProgress.ShouldBeFalse();
}
}
/// <summary>
/// A transport that blocks inside <see cref="SendTimeoutNowAsync"/> until the
/// provided <see cref="CancellationToken"/> is cancelled. Exposes a semaphore
/// so the test can synchronize on when the leader transfer flag is set.
/// </summary>
file sealed class BlockingTimeoutNowTransport : IRaftTransport
{
private readonly SemaphoreSlim _entered = new(0, 1);
/// <summary>
/// Returns a task that completes once <see cref="SendTimeoutNowAsync"/> has been
/// entered and the leader's transfer flag is guaranteed to be set.
/// </summary>
public Task WaitUntilBlockingAsync() => _entered.WaitAsync();
public Task<IReadOnlyList<AppendResult>> AppendEntriesAsync(
string leaderId, IReadOnlyList<string> followerIds, RaftLogEntry entry, CancellationToken ct)
=> Task.FromResult<IReadOnlyList<AppendResult>>([]);
public Task<VoteResponse> RequestVoteAsync(
string candidateId, string voterId, VoteRequest request, CancellationToken ct)
=> Task.FromResult(new VoteResponse { Granted = false });
public Task InstallSnapshotAsync(
string leaderId, string followerId, RaftSnapshot snapshot, CancellationToken ct)
=> Task.CompletedTask;
public async Task SendTimeoutNowAsync(string leaderId, string targetId, ulong term, CancellationToken ct)
{
// Signal that the transfer flag is set -- the test can now probe ProposeAsync.
_entered.Release();
// Block until the test cancels the token.
var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
await using var reg = ct.Register(() => tcs.TrySetCanceled(ct));
await tcs.Task;
}
public Task SendHeartbeatAsync(string leaderId, IReadOnlyList<string> followerIds, int term, Action<string> onAck, CancellationToken ct)
=> Task.CompletedTask;
}
/// <summary>
/// A transport that, when delivering a TimeoutNow RPC, also immediately grants
/// votes to the target candidate so it reaches quorum synchronously. This makes
/// the target become leader before TransferLeadershipAsync starts polling, removing
/// any need for Task.Delay waits in the test.
/// </summary>
file sealed class VoteGrantingTransport : IRaftTransport
{
private readonly Dictionary<string, RaftNode> _nodes = new(StringComparer.Ordinal);
public void Register(RaftNode node) => _nodes[node.Id] = node;
public Task<IReadOnlyList<AppendResult>> AppendEntriesAsync(
string leaderId, IReadOnlyList<string> followerIds, RaftLogEntry entry, CancellationToken ct)
{
var results = new List<AppendResult>(followerIds.Count);
foreach (var followerId in followerIds)
{
if (_nodes.TryGetValue(followerId, out var node))
{
node.ReceiveReplicatedEntry(entry);
results.Add(new AppendResult { FollowerId = followerId, Success = true });
}
else
{
results.Add(new AppendResult { FollowerId = followerId, Success = false });
}
}
return Task.FromResult<IReadOnlyList<AppendResult>>(results);
}
public Task<VoteResponse> RequestVoteAsync(
string candidateId, string voterId, VoteRequest request, CancellationToken ct)
{
if (_nodes.TryGetValue(voterId, out var node))
return Task.FromResult(node.GrantVote(request.Term, candidateId));
return Task.FromResult(new VoteResponse { Granted = false });
}
public Task InstallSnapshotAsync(
string leaderId, string followerId, RaftSnapshot snapshot, CancellationToken ct)
=> Task.CompletedTask;
/// <summary>
/// Delivers TimeoutNow to the target (triggering an immediate election), then
/// grants votes from every other peer so the target reaches quorum synchronously.
/// This ensures the target is already leader before TransferLeadershipAsync polls,
/// removing any timing dependency between delivery and vote propagation.
/// </summary>
public Task SendTimeoutNowAsync(string leaderId, string targetId, ulong term, CancellationToken ct)
{
if (!_nodes.TryGetValue(targetId, out var target))
return Task.CompletedTask;
// Trigger immediate election on the target node.
target.ReceiveTimeoutNow(term);
// Grant peer votes so the target reaches quorum immediately.
if (target.Role == RaftRole.Candidate)
{
var clusterSize = _nodes.Count;
foreach (var (peerId, peer) in _nodes)
{
if (string.Equals(peerId, targetId, StringComparison.Ordinal))
continue;
var vote = peer.GrantVote(target.Term, targetId);
target.ReceiveVote(vote, clusterSize);
if (target.IsLeader)
break;
}
}
return Task.CompletedTask;
}
public Task SendHeartbeatAsync(string leaderId, IReadOnlyList<string> followerIds, int term, Action<string> onAck, CancellationToken ct)
=> Task.CompletedTask;
}