feat(raft): add commit queue, election timers, and peer health tracking (B1+B2+B3)
- CommitQueue<T>: channel-based queue for committed entries awaiting state machine application - RaftPeerState: tracks replication and health state (nextIndex, matchIndex, lastContact) - RaftNode: CommitIndex/ProcessedIndex tracking, election timer with randomized 150-300ms interval, peer state integration with heartbeat and replication updates - 52 new tests across RaftApplyQueueTests, RaftElectionTimerTests, RaftHealthTests
This commit is contained in:
263
tests/NATS.Server.Tests/Raft/RaftElectionTimerTests.cs
Normal file
263
tests/NATS.Server.Tests/Raft/RaftElectionTimerTests.cs
Normal file
@@ -0,0 +1,263 @@
|
||||
using NATS.Server.Raft;
|
||||
|
||||
namespace NATS.Server.Tests.Raft;
|
||||
|
||||
/// <summary>
|
||||
/// Tests for election timeout management and campaign triggering in RaftNode.
|
||||
/// Go reference: raft.go:1400-1450 (resetElectionTimeout), raft.go:1500-1550 (campaign logic).
|
||||
/// </summary>
|
||||
public class RaftElectionTimerTests : IDisposable
|
||||
{
|
||||
private readonly List<RaftNode> _nodesToDispose = [];
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
foreach (var node in _nodesToDispose)
|
||||
node.Dispose();
|
||||
}
|
||||
|
||||
private RaftNode CreateTrackedNode(string id)
|
||||
{
|
||||
var node = new RaftNode(id);
|
||||
_nodesToDispose.Add(node);
|
||||
return node;
|
||||
}
|
||||
|
||||
private RaftNode[] CreateTrackedCluster(int size)
|
||||
{
|
||||
var nodes = Enumerable.Range(1, size)
|
||||
.Select(i => CreateTrackedNode($"n{i}"))
|
||||
.ToArray();
|
||||
foreach (var node in nodes)
|
||||
node.ConfigureCluster(nodes);
|
||||
return nodes;
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ResetElectionTimeout_prevents_election_while_receiving_heartbeats()
|
||||
{
|
||||
// Node with very short timeout for testing
|
||||
var nodes = CreateTrackedCluster(3);
|
||||
var node = nodes[0];
|
||||
node.ElectionTimeoutMinMs = 50;
|
||||
node.ElectionTimeoutMaxMs = 80;
|
||||
|
||||
node.StartElectionTimer();
|
||||
|
||||
// Keep resetting to prevent election
|
||||
for (int i = 0; i < 5; i++)
|
||||
{
|
||||
Thread.Sleep(30);
|
||||
node.ResetElectionTimeout();
|
||||
}
|
||||
|
||||
// Node should still be a follower since we kept resetting the timer
|
||||
node.Role.ShouldBe(RaftRole.Follower);
|
||||
node.StopElectionTimer();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CampaignImmediately_triggers_election_without_timer()
|
||||
{
|
||||
var nodes = CreateTrackedCluster(3);
|
||||
var candidate = nodes[0];
|
||||
|
||||
candidate.Role.ShouldBe(RaftRole.Follower);
|
||||
candidate.Term.ShouldBe(0);
|
||||
|
||||
candidate.CampaignImmediately();
|
||||
|
||||
// Should have started an election
|
||||
candidate.Role.ShouldBe(RaftRole.Candidate);
|
||||
candidate.Term.ShouldBe(1);
|
||||
candidate.TermState.VotedFor.ShouldBe(candidate.Id);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CampaignImmediately_single_node_becomes_leader()
|
||||
{
|
||||
var node = CreateTrackedNode("solo");
|
||||
node.AddMember("solo");
|
||||
|
||||
node.CampaignImmediately();
|
||||
|
||||
node.IsLeader.ShouldBeTrue();
|
||||
node.Role.ShouldBe(RaftRole.Leader);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Expired_timer_triggers_campaign_when_follower()
|
||||
{
|
||||
var nodes = CreateTrackedCluster(3);
|
||||
var node = nodes[0];
|
||||
|
||||
// Use very short timeouts for testing
|
||||
node.ElectionTimeoutMinMs = 30;
|
||||
node.ElectionTimeoutMaxMs = 50;
|
||||
node.Role.ShouldBe(RaftRole.Follower);
|
||||
|
||||
node.StartElectionTimer();
|
||||
|
||||
// Wait long enough for the timer to fire
|
||||
await Task.Delay(200);
|
||||
|
||||
// The timer callback should have triggered an election
|
||||
node.Role.ShouldBe(RaftRole.Candidate);
|
||||
node.Term.ShouldBeGreaterThan(0);
|
||||
node.TermState.VotedFor.ShouldBe(node.Id);
|
||||
|
||||
node.StopElectionTimer();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Timer_does_not_trigger_campaign_when_leader()
|
||||
{
|
||||
var nodes = CreateTrackedCluster(3);
|
||||
var node = nodes[0];
|
||||
|
||||
// Make this node the leader first
|
||||
node.StartElection(nodes.Length);
|
||||
foreach (var voter in nodes.Skip(1))
|
||||
node.ReceiveVote(voter.GrantVote(node.Term, node.Id), nodes.Length);
|
||||
node.IsLeader.ShouldBeTrue();
|
||||
var termBefore = node.Term;
|
||||
|
||||
// Use very short timeouts
|
||||
node.ElectionTimeoutMinMs = 30;
|
||||
node.ElectionTimeoutMaxMs = 50;
|
||||
node.StartElectionTimer();
|
||||
|
||||
// Wait for timer to fire
|
||||
await Task.Delay(200);
|
||||
|
||||
// Should still be leader, no new election started
|
||||
node.IsLeader.ShouldBeTrue();
|
||||
// Term may have incremented if re-election happened, but role stays leader
|
||||
// The key assertion is the node didn't transition to Candidate
|
||||
node.Role.ShouldBe(RaftRole.Leader);
|
||||
|
||||
node.StopElectionTimer();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Timer_does_not_trigger_campaign_when_candidate()
|
||||
{
|
||||
var node = CreateTrackedNode("n1");
|
||||
node.AddMember("n1");
|
||||
node.AddMember("n2");
|
||||
node.AddMember("n3");
|
||||
|
||||
// Start an election manually (becomes Candidate but not Leader since no quorum)
|
||||
node.StartElection(clusterSize: 3);
|
||||
node.Role.ShouldBe(RaftRole.Candidate);
|
||||
var termAfterElection = node.Term;
|
||||
|
||||
// Use very short timeouts
|
||||
node.ElectionTimeoutMinMs = 30;
|
||||
node.ElectionTimeoutMaxMs = 50;
|
||||
node.StartElectionTimer();
|
||||
|
||||
// Wait for timer to fire
|
||||
await Task.Delay(200);
|
||||
|
||||
// Timer should not trigger additional campaigns when already candidate
|
||||
// (the callback only triggers for Follower state)
|
||||
node.Role.ShouldNotBe(RaftRole.Follower);
|
||||
|
||||
node.StopElectionTimer();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Election_timeout_range_is_configurable()
|
||||
{
|
||||
var node = CreateTrackedNode("n1");
|
||||
node.ElectionTimeoutMinMs.ShouldBe(150);
|
||||
node.ElectionTimeoutMaxMs.ShouldBe(300);
|
||||
|
||||
node.ElectionTimeoutMinMs = 500;
|
||||
node.ElectionTimeoutMaxMs = 1000;
|
||||
node.ElectionTimeoutMinMs.ShouldBe(500);
|
||||
node.ElectionTimeoutMaxMs.ShouldBe(1000);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void StopElectionTimer_is_safe_when_no_timer_started()
|
||||
{
|
||||
var node = CreateTrackedNode("n1");
|
||||
// Should not throw
|
||||
node.StopElectionTimer();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void StopElectionTimer_can_be_called_multiple_times()
|
||||
{
|
||||
var node = CreateTrackedNode("n1");
|
||||
node.StartElectionTimer();
|
||||
node.StopElectionTimer();
|
||||
node.StopElectionTimer(); // Should not throw
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ReceiveHeartbeat_resets_election_timeout()
|
||||
{
|
||||
var nodes = CreateTrackedCluster(3);
|
||||
var node = nodes[0];
|
||||
|
||||
node.ElectionTimeoutMinMs = 50;
|
||||
node.ElectionTimeoutMaxMs = 80;
|
||||
node.StartElectionTimer();
|
||||
|
||||
// Simulate heartbeats coming in regularly, preventing election
|
||||
for (int i = 0; i < 8; i++)
|
||||
{
|
||||
Thread.Sleep(30);
|
||||
node.ReceiveHeartbeat(term: 1);
|
||||
}
|
||||
|
||||
// Should still be follower since heartbeats kept resetting the timer
|
||||
node.Role.ShouldBe(RaftRole.Follower);
|
||||
node.StopElectionTimer();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Timer_fires_after_heartbeats_stop()
|
||||
{
|
||||
var nodes = CreateTrackedCluster(3);
|
||||
var node = nodes[0];
|
||||
|
||||
node.ElectionTimeoutMinMs = 40;
|
||||
node.ElectionTimeoutMaxMs = 60;
|
||||
node.StartElectionTimer();
|
||||
|
||||
// Send a few heartbeats
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
Thread.Sleep(20);
|
||||
node.ReceiveHeartbeat(term: 1);
|
||||
}
|
||||
|
||||
node.Role.ShouldBe(RaftRole.Follower);
|
||||
|
||||
// Stop sending heartbeats and wait for timer to fire
|
||||
await Task.Delay(200);
|
||||
|
||||
// Should have started an election
|
||||
node.Role.ShouldBe(RaftRole.Candidate);
|
||||
node.StopElectionTimer();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Dispose_stops_election_timer()
|
||||
{
|
||||
var node = new RaftNode("n1");
|
||||
node.ElectionTimeoutMinMs = 30;
|
||||
node.ElectionTimeoutMaxMs = 50;
|
||||
node.StartElectionTimer();
|
||||
|
||||
// Dispose should stop the timer cleanly
|
||||
node.Dispose();
|
||||
|
||||
// Calling dispose again should be safe
|
||||
node.Dispose();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user