using NATS.Server.Raft; namespace NATS.Server.Tests.Raft; /// /// Tests for RAFT leadership transfer via TimeoutNow RPC (Gap 8.4). /// The leader sends a TimeoutNow message to a target follower, which immediately /// starts an election. The leader blocks proposals while the transfer is in flight. /// Go reference: raft.go sendTimeoutNow / processTimeoutNow /// public class RaftLeadershipTransferTests { // -- Helpers -- private static (RaftNode[] nodes, InMemoryRaftTransport transport) CreateCluster(int size) { var transport = new InMemoryRaftTransport(); var nodes = Enumerable.Range(1, size) .Select(i => new RaftNode($"n{i}", transport)) .ToArray(); foreach (var node in nodes) { transport.Register(node); node.ConfigureCluster(nodes); // Use short election timeouts so polling in TransferLeadershipAsync // converges quickly in tests without requiring real async delays. node.ElectionTimeoutMinMs = 5; node.ElectionTimeoutMaxMs = 10; } return (nodes, transport); } private static RaftNode ElectLeader(RaftNode[] nodes) { var candidate = nodes[0]; candidate.StartElection(nodes.Length); foreach (var voter in nodes.Skip(1)) candidate.ReceiveVote(voter.GrantVote(candidate.Term, candidate.Id), nodes.Length); return candidate; } // -- Wire format tests -- // Go reference: raft.go TimeoutNow wire encoding [Fact] public void TimeoutNowRpc_wire_format_roundtrip() { var wire = new RaftTimeoutNowWire(Term: 7UL, LeaderId: "n1"); var encoded = wire.Encode(); encoded.Length.ShouldBe(RaftTimeoutNowWire.MessageLen); // 16 bytes var decoded = RaftTimeoutNowWire.Decode(encoded); decoded.Term.ShouldBe(7UL); decoded.LeaderId.ShouldBe("n1"); } [Fact] public void TimeoutNowRpc_wire_format_preserves_term_and_leader_id() { var wire = new RaftTimeoutNowWire(Term: 42UL, LeaderId: "node5"); var decoded = RaftTimeoutNowWire.Decode(wire.Encode()); decoded.Term.ShouldBe(42UL); decoded.LeaderId.ShouldBe("node5"); } [Fact] public void TimeoutNowRpc_decode_throws_on_wrong_length() { Should.Throw(() => RaftTimeoutNowWire.Decode(new byte[10])); } [Fact] public void TimeoutNowRpc_message_len_is_16_bytes() { RaftTimeoutNowWire.MessageLen.ShouldBe(16); } // -- ReceiveTimeoutNow logic tests -- // Go reference: raft.go processTimeoutNow -- follower starts election immediately [Fact] public void ReceiveTimeoutNow_triggers_immediate_election_on_follower() { var (nodes, _) = CreateCluster(3); var follower = nodes[1]; // starts as follower follower.Role.ShouldBe(RaftRole.Follower); follower.ReceiveTimeoutNow(term: 0); // Node should now be a candidate (or leader if it self-voted quorum) follower.Role.ShouldBeOneOf(RaftRole.Candidate, RaftRole.Leader); } [Fact] public void ReceiveTimeoutNow_updates_term_when_sender_term_is_higher() { var node = new RaftNode("follower"); node.TermState.CurrentTerm = 3; node.ReceiveTimeoutNow(term: 10); // ReceiveTimeoutNow sets term to 10, then StartElection increments to 11 node.TermState.CurrentTerm.ShouldBe(11); } [Fact] public void ReceiveTimeoutNow_increments_term_and_starts_campaign() { var node = new RaftNode("n1"); node.TermState.CurrentTerm = 2; var termBefore = node.Term; node.ReceiveTimeoutNow(term: 0); // StartElection increments the term regardless of whether the node wins. node.Term.ShouldBe(termBefore + 1); // With no cluster configured, quorum = 1 (self-vote), so the node becomes leader. node.Role.ShouldBeOneOf(RaftRole.Candidate, RaftRole.Leader); } [Fact] public void ReceiveTimeoutNow_on_single_node_makes_it_leader() { // Single-node cluster: quorum = 1, so self-vote is sufficient. var node = new RaftNode("solo"); node.ConfigureCluster([node]); node.ReceiveTimeoutNow(term: 0); node.IsLeader.ShouldBeTrue(); } // -- Proposal blocking during transfer -- // Go reference: raft.go -- leader rejects new entries while transfer is in progress. // BlockingTimeoutNowTransport signals via SemaphoreSlim when SendTimeoutNowAsync is // entered, letting the test observe the _transferInProgress flag without timing deps. [Fact] public async Task TransferLeadership_leader_blocks_proposals_during_transfer() { var blockingTransport = new BlockingTimeoutNowTransport(); var node = new RaftNode("leader", blockingTransport); node.ConfigureCluster([node]); node.StartElection(1); // become leader node.IsLeader.ShouldBeTrue(); using var cts = new CancellationTokenSource(); var transferTask = node.TransferLeadershipAsync("n2", cts.Token); // Wait until SendTimeoutNowAsync is entered -- transfer flag is guaranteed set. await blockingTransport.WaitUntilBlockingAsync(); // ProposeAsync must throw because the transfer flag is set. var ex = await Should.ThrowAsync( () => node.ProposeAsync("cmd", CancellationToken.None).AsTask()); ex.Message.ShouldContain("Leadership transfer in progress"); // Cancel and await proper completion to avoid test resource leaks. await cts.CancelAsync(); await Should.ThrowAsync(() => transferTask); } // Go reference: raft.go -- only leader can initiate leadership transfer [Fact] public async Task TransferLeadership_only_leader_can_transfer() { var transport = new InMemoryRaftTransport(); var follower = new RaftNode("follower", transport); follower.Role.ShouldBe(RaftRole.Follower); var ex = await Should.ThrowAsync( () => follower.TransferLeadershipAsync("n2", CancellationToken.None)); ex.Message.ShouldContain("Only the leader"); } // Go reference: raft.go -- TransferLeadershipAsync requires a configured transport [Fact] public async Task TransferLeadership_throws_when_no_transport_configured() { // No transport injected. var node = new RaftNode("leader"); node.StartElection(1); // become leader (single node, quorum = 1) node.IsLeader.ShouldBeTrue(); var ex = await Should.ThrowAsync( () => node.TransferLeadershipAsync("n2", CancellationToken.None)); ex.Message.ShouldContain("No transport configured"); } // Go reference: raft.go sendTimeoutNow -- target becomes leader after receiving TimeoutNow. // VoteGrantingTransport delivers TimeoutNow and immediately grants votes so the target // is already leader before the polling loop runs -- no Task.Delay required. [Fact] public async Task TransferLeadership_target_becomes_leader() { var transport = new VoteGrantingTransport(); var nodes = Enumerable.Range(1, 3) .Select(i => new RaftNode($"n{i}", transport)) .ToArray(); foreach (var node in nodes) { transport.Register(node); node.ConfigureCluster(nodes); node.ElectionTimeoutMinMs = 5; node.ElectionTimeoutMaxMs = 10; } var leader = nodes[0]; leader.StartElection(nodes.Length); foreach (var voter in nodes.Skip(1)) leader.ReceiveVote(voter.GrantVote(leader.Term, leader.Id), nodes.Length); leader.IsLeader.ShouldBeTrue(); var target = nodes[1]; // VoteGrantingTransport makes the target a leader synchronously during TimeoutNow // delivery, so the first poll iteration in TransferLeadershipAsync succeeds. var result = await leader.TransferLeadershipAsync(target.Id, CancellationToken.None); result.ShouldBeTrue(); target.IsLeader.ShouldBeTrue(); } // Go reference: raft.go sendTimeoutNow -- returns false when target doesn't respond. // "ghost" is not registered in the transport so TimeoutNow is a no-op and the // polling loop times out after 2x election timeout. [Fact] public async Task TransferLeadership_timeout_on_unreachable_target() { var transport = new InMemoryRaftTransport(); var leader = new RaftNode("leader", transport); leader.ConfigureCluster([leader]); transport.Register(leader); leader.StartElection(1); // Very short timeouts so the poll deadline is reached quickly. leader.ElectionTimeoutMinMs = 5; leader.ElectionTimeoutMaxMs = 10; // "ghost" is not registered -- TimeoutNow is a no-op; target never becomes leader. var result = await leader.TransferLeadershipAsync("ghost", CancellationToken.None); result.ShouldBeFalse(); } // -- Integration: flag lifecycle -- [Fact] public async Task TransferLeadership_clears_transfer_flag_after_success() { var transport = new VoteGrantingTransport(); var nodes = Enumerable.Range(1, 3) .Select(i => new RaftNode($"n{i}", transport)) .ToArray(); foreach (var node in nodes) { transport.Register(node); node.ConfigureCluster(nodes); node.ElectionTimeoutMinMs = 5; node.ElectionTimeoutMaxMs = 10; } var leader = nodes[0]; leader.StartElection(nodes.Length); foreach (var voter in nodes.Skip(1)) leader.ReceiveVote(voter.GrantVote(leader.Term, leader.Id), nodes.Length); var target = nodes[1]; var success = await leader.TransferLeadershipAsync(target.Id, CancellationToken.None); success.ShouldBeTrue(); // After transfer completes the flag must be cleared. leader.TransferInProgress.ShouldBeFalse(); } [Fact] public async Task TransferLeadership_clears_transfer_flag_after_timeout() { var transport = new InMemoryRaftTransport(); var leader = new RaftNode("leader", transport); leader.ConfigureCluster([leader]); transport.Register(leader); leader.StartElection(1); leader.ElectionTimeoutMinMs = 5; leader.ElectionTimeoutMaxMs = 10; // "ghost" is not registered -- transfer times out. await leader.TransferLeadershipAsync("ghost", CancellationToken.None); // Flag must be cleared regardless of outcome. leader.TransferInProgress.ShouldBeFalse(); } } /// /// A transport that blocks inside until the /// provided is cancelled. Exposes a semaphore /// so the test can synchronize on when the leader transfer flag is set. /// file sealed class BlockingTimeoutNowTransport : IRaftTransport { private readonly SemaphoreSlim _entered = new(0, 1); /// /// Returns a task that completes once has been /// entered and the leader's transfer flag is guaranteed to be set. /// public Task WaitUntilBlockingAsync() => _entered.WaitAsync(); public Task> AppendEntriesAsync( string leaderId, IReadOnlyList followerIds, RaftLogEntry entry, CancellationToken ct) => Task.FromResult>([]); public Task RequestVoteAsync( string candidateId, string voterId, VoteRequest request, CancellationToken ct) => Task.FromResult(new VoteResponse { Granted = false }); public Task InstallSnapshotAsync( string leaderId, string followerId, RaftSnapshot snapshot, CancellationToken ct) => Task.CompletedTask; public async Task SendTimeoutNowAsync(string leaderId, string targetId, ulong term, CancellationToken ct) { // Signal that the transfer flag is set -- the test can now probe ProposeAsync. _entered.Release(); // Block until the test cancels the token. var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); await using var reg = ct.Register(() => tcs.TrySetCanceled(ct)); await tcs.Task; } public Task SendHeartbeatAsync(string leaderId, IReadOnlyList followerIds, int term, Action onAck, CancellationToken ct) => Task.CompletedTask; } /// /// A transport that, when delivering a TimeoutNow RPC, also immediately grants /// votes to the target candidate so it reaches quorum synchronously. This makes /// the target become leader before TransferLeadershipAsync starts polling, removing /// any need for Task.Delay waits in the test. /// file sealed class VoteGrantingTransport : IRaftTransport { private readonly Dictionary _nodes = new(StringComparer.Ordinal); public void Register(RaftNode node) => _nodes[node.Id] = node; public Task> AppendEntriesAsync( string leaderId, IReadOnlyList followerIds, RaftLogEntry entry, CancellationToken ct) { var results = new List(followerIds.Count); foreach (var followerId in followerIds) { if (_nodes.TryGetValue(followerId, out var node)) { node.ReceiveReplicatedEntry(entry); results.Add(new AppendResult { FollowerId = followerId, Success = true }); } else { results.Add(new AppendResult { FollowerId = followerId, Success = false }); } } return Task.FromResult>(results); } public Task RequestVoteAsync( string candidateId, string voterId, VoteRequest request, CancellationToken ct) { if (_nodes.TryGetValue(voterId, out var node)) return Task.FromResult(node.GrantVote(request.Term, candidateId)); return Task.FromResult(new VoteResponse { Granted = false }); } public Task InstallSnapshotAsync( string leaderId, string followerId, RaftSnapshot snapshot, CancellationToken ct) => Task.CompletedTask; /// /// Delivers TimeoutNow to the target (triggering an immediate election), then /// grants votes from every other peer so the target reaches quorum synchronously. /// This ensures the target is already leader before TransferLeadershipAsync polls, /// removing any timing dependency between delivery and vote propagation. /// public Task SendTimeoutNowAsync(string leaderId, string targetId, ulong term, CancellationToken ct) { if (!_nodes.TryGetValue(targetId, out var target)) return Task.CompletedTask; // Trigger immediate election on the target node. target.ReceiveTimeoutNow(term); // Grant peer votes so the target reaches quorum immediately. if (target.Role == RaftRole.Candidate) { var clusterSize = _nodes.Count; foreach (var (peerId, peer) in _nodes) { if (string.Equals(peerId, targetId, StringComparison.Ordinal)) continue; var vote = peer.GrantVote(target.Term, targetId); target.ReceiveVote(vote, clusterSize); if (target.IsLeader) break; } } return Task.CompletedTask; } public Task SendHeartbeatAsync(string leaderId, IReadOnlyList followerIds, int term, Action onAck, CancellationToken ct) => Task.CompletedTask; }