- CommitQueue<T>: channel-based queue for committed entries awaiting state machine application - RaftPeerState: tracks replication and health state (nextIndex, matchIndex, lastContact) - RaftNode: CommitIndex/ProcessedIndex tracking, election timer with randomized 150-300ms interval, peer state integration with heartbeat and replication updates - 52 new tests across RaftApplyQueueTests, RaftElectionTimerTests, RaftHealthTests
343 lines
10 KiB
C#
343 lines
10 KiB
C#
using NATS.Server.Raft;
|
|
|
|
namespace NATS.Server.Tests.Raft;
|
|
|
|
/// <summary>
|
|
/// Tests for RaftPeerState health classification and peer tracking in RaftNode.
|
|
/// Go reference: raft.go peer tracking (nextIndex, matchIndex, last contact, isCurrent).
|
|
/// </summary>
|
|
public class RaftHealthTests
|
|
{
|
|
// -- Helpers --
|
|
|
|
private static (RaftNode[] nodes, InMemoryRaftTransport transport) CreateCluster(int size)
|
|
{
|
|
var transport = new InMemoryRaftTransport();
|
|
var nodes = Enumerable.Range(1, size)
|
|
.Select(i => new RaftNode($"n{i}", transport))
|
|
.ToArray();
|
|
foreach (var node in nodes)
|
|
{
|
|
transport.Register(node);
|
|
node.ConfigureCluster(nodes);
|
|
}
|
|
return (nodes, transport);
|
|
}
|
|
|
|
private static RaftNode ElectLeader(RaftNode[] nodes)
|
|
{
|
|
var candidate = nodes[0];
|
|
candidate.StartElection(nodes.Length);
|
|
foreach (var voter in nodes.Skip(1))
|
|
candidate.ReceiveVote(voter.GrantVote(candidate.Term, candidate.Id), nodes.Length);
|
|
return candidate;
|
|
}
|
|
|
|
// -- RaftPeerState unit tests --
|
|
|
|
[Fact]
|
|
public void PeerState_defaults_are_correct()
|
|
{
|
|
var peer = new RaftPeerState { PeerId = "n2" };
|
|
peer.PeerId.ShouldBe("n2");
|
|
peer.NextIndex.ShouldBe(1);
|
|
peer.MatchIndex.ShouldBe(0);
|
|
peer.Active.ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public void IsCurrent_returns_true_when_within_timeout()
|
|
{
|
|
var peer = new RaftPeerState { PeerId = "n2" };
|
|
peer.LastContact = DateTime.UtcNow;
|
|
|
|
peer.IsCurrent(TimeSpan.FromSeconds(5)).ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public void IsCurrent_returns_false_when_stale()
|
|
{
|
|
var peer = new RaftPeerState { PeerId = "n2" };
|
|
peer.LastContact = DateTime.UtcNow.AddSeconds(-10);
|
|
|
|
peer.IsCurrent(TimeSpan.FromSeconds(5)).ShouldBeFalse();
|
|
}
|
|
|
|
[Fact]
|
|
public void IsHealthy_returns_true_for_active_recent_peer()
|
|
{
|
|
var peer = new RaftPeerState { PeerId = "n2", Active = true };
|
|
peer.LastContact = DateTime.UtcNow;
|
|
|
|
peer.IsHealthy(TimeSpan.FromSeconds(5)).ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public void IsHealthy_returns_false_for_inactive_peer()
|
|
{
|
|
var peer = new RaftPeerState { PeerId = "n2", Active = false };
|
|
peer.LastContact = DateTime.UtcNow;
|
|
|
|
peer.IsHealthy(TimeSpan.FromSeconds(5)).ShouldBeFalse();
|
|
}
|
|
|
|
[Fact]
|
|
public void IsHealthy_returns_false_for_stale_active_peer()
|
|
{
|
|
var peer = new RaftPeerState { PeerId = "n2", Active = true };
|
|
peer.LastContact = DateTime.UtcNow.AddSeconds(-10);
|
|
|
|
peer.IsHealthy(TimeSpan.FromSeconds(5)).ShouldBeFalse();
|
|
}
|
|
|
|
// -- Peer state initialization via ConfigureCluster --
|
|
|
|
[Fact]
|
|
public void ConfigureCluster_initializes_peer_states()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var node = nodes[0];
|
|
|
|
var peerStates = node.GetPeerStates();
|
|
peerStates.Count.ShouldBe(2); // 2 peers, not counting self
|
|
|
|
peerStates.ContainsKey("n2").ShouldBeTrue();
|
|
peerStates.ContainsKey("n3").ShouldBeTrue();
|
|
peerStates.ContainsKey("n1").ShouldBeFalse(); // Self excluded
|
|
}
|
|
|
|
[Fact]
|
|
public void ConfigureCluster_sets_initial_peer_state_values()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var peerStates = nodes[0].GetPeerStates();
|
|
|
|
foreach (var (peerId, state) in peerStates)
|
|
{
|
|
state.NextIndex.ShouldBe(1);
|
|
state.MatchIndex.ShouldBe(0);
|
|
state.Active.ShouldBeTrue();
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void ConfigureCluster_five_node_has_four_peers()
|
|
{
|
|
var (nodes, _) = CreateCluster(5);
|
|
nodes[0].GetPeerStates().Count.ShouldBe(4);
|
|
}
|
|
|
|
// -- LastContact updates on heartbeat --
|
|
|
|
[Fact]
|
|
public void LastContact_updates_on_heartbeat_from_known_peer()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var node = nodes[0];
|
|
|
|
// Set contact time in the past
|
|
var peerStates = node.GetPeerStates();
|
|
var oldTime = DateTime.UtcNow.AddMinutes(-5);
|
|
peerStates["n2"].LastContact = oldTime;
|
|
|
|
// Receive heartbeat from n2
|
|
node.ReceiveHeartbeat(term: 1, fromPeerId: "n2");
|
|
|
|
peerStates["n2"].LastContact.ShouldBeGreaterThan(oldTime);
|
|
}
|
|
|
|
[Fact]
|
|
public void LastContact_not_updated_for_unknown_peer()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var node = nodes[0];
|
|
|
|
// Heartbeat from unknown peer should not crash
|
|
node.ReceiveHeartbeat(term: 1, fromPeerId: "unknown-node");
|
|
|
|
// Existing peers should be unchanged
|
|
var peerStates = node.GetPeerStates();
|
|
peerStates.ContainsKey("unknown-node").ShouldBeFalse();
|
|
}
|
|
|
|
[Fact]
|
|
public void LastContact_not_updated_when_fromPeerId_null()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var node = nodes[0];
|
|
|
|
var oldContact = DateTime.UtcNow.AddMinutes(-5);
|
|
node.GetPeerStates()["n2"].LastContact = oldContact;
|
|
|
|
// Heartbeat without peer ID
|
|
node.ReceiveHeartbeat(term: 1);
|
|
|
|
// Should not update any peer contact times (no peer specified)
|
|
node.GetPeerStates()["n2"].LastContact.ShouldBe(oldContact);
|
|
}
|
|
|
|
// -- IsCurrent on RaftNode --
|
|
|
|
[Fact]
|
|
public void Leader_is_always_current()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var leader = ElectLeader(nodes);
|
|
|
|
leader.IsCurrent(TimeSpan.FromSeconds(1)).ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public void Follower_is_current_when_peer_recently_contacted()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var follower = nodes[1];
|
|
|
|
// Peer states are initialized with current time by ConfigureCluster
|
|
follower.IsCurrent(TimeSpan.FromSeconds(5)).ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public void Follower_is_not_current_when_all_peers_stale()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var follower = nodes[1];
|
|
|
|
// Make all peers stale
|
|
foreach (var (_, state) in follower.GetPeerStates())
|
|
state.LastContact = DateTime.UtcNow.AddMinutes(-10);
|
|
|
|
follower.IsCurrent(TimeSpan.FromSeconds(5)).ShouldBeFalse();
|
|
}
|
|
|
|
// -- IsHealthy on RaftNode --
|
|
|
|
[Fact]
|
|
public void Leader_is_healthy_when_majority_peers_responsive()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var leader = ElectLeader(nodes);
|
|
|
|
// All peers recently contacted
|
|
leader.IsHealthy(TimeSpan.FromSeconds(5)).ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public void Leader_is_unhealthy_when_majority_peers_unresponsive()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var leader = ElectLeader(nodes);
|
|
|
|
// Make all peers stale
|
|
foreach (var (_, state) in leader.GetPeerStates())
|
|
state.LastContact = DateTime.UtcNow.AddMinutes(-10);
|
|
|
|
leader.IsHealthy(TimeSpan.FromSeconds(5)).ShouldBeFalse();
|
|
}
|
|
|
|
[Fact]
|
|
public void Follower_is_healthy_when_leader_peer_responsive()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var follower = nodes[1];
|
|
|
|
// At least one peer (simulating leader) is recent
|
|
follower.IsHealthy(TimeSpan.FromSeconds(5)).ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public void Follower_is_unhealthy_when_no_peers_responsive()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var follower = nodes[1];
|
|
|
|
// Make all peers stale
|
|
foreach (var (_, state) in follower.GetPeerStates())
|
|
state.LastContact = DateTime.UtcNow.AddMinutes(-10);
|
|
|
|
follower.IsHealthy(TimeSpan.FromSeconds(5)).ShouldBeFalse();
|
|
}
|
|
|
|
// -- MatchIndex / NextIndex tracking during replication --
|
|
|
|
[Fact]
|
|
public async Task MatchIndex_and_NextIndex_update_during_replication()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var leader = ElectLeader(nodes);
|
|
|
|
var index = await leader.ProposeAsync("cmd-1", default);
|
|
|
|
var peerStates = leader.GetPeerStates();
|
|
// Both followers should have updated match/next indices
|
|
foreach (var (_, state) in peerStates)
|
|
{
|
|
state.MatchIndex.ShouldBe(index);
|
|
state.NextIndex.ShouldBe(index + 1);
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public async Task MatchIndex_advances_monotonically_with_proposals()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var leader = ElectLeader(nodes);
|
|
|
|
var index1 = await leader.ProposeAsync("cmd-1", default);
|
|
var index2 = await leader.ProposeAsync("cmd-2", default);
|
|
var index3 = await leader.ProposeAsync("cmd-3", default);
|
|
|
|
var peerStates = leader.GetPeerStates();
|
|
foreach (var (_, state) in peerStates)
|
|
{
|
|
state.MatchIndex.ShouldBe(index3);
|
|
state.NextIndex.ShouldBe(index3 + 1);
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public async Task LastContact_updates_on_successful_replication()
|
|
{
|
|
var (nodes, _) = CreateCluster(3);
|
|
var leader = ElectLeader(nodes);
|
|
|
|
// Set peer contacts in the past
|
|
foreach (var (_, state) in leader.GetPeerStates())
|
|
state.LastContact = DateTime.UtcNow.AddMinutes(-5);
|
|
|
|
await leader.ProposeAsync("cmd-1", default);
|
|
|
|
// Successful replication should update LastContact
|
|
foreach (var (_, state) in leader.GetPeerStates())
|
|
state.LastContact.ShouldBeGreaterThan(DateTime.UtcNow.AddSeconds(-2));
|
|
}
|
|
|
|
[Fact]
|
|
public void Peer_states_empty_before_cluster_configuration()
|
|
{
|
|
var node = new RaftNode("n1");
|
|
node.GetPeerStates().Count.ShouldBe(0);
|
|
}
|
|
|
|
[Fact]
|
|
public void ConfigureCluster_clears_previous_peer_states()
|
|
{
|
|
var (nodes, transport) = CreateCluster(3);
|
|
var node = nodes[0];
|
|
node.GetPeerStates().Count.ShouldBe(2);
|
|
|
|
// Reconfigure with 5 nodes
|
|
var moreNodes = Enumerable.Range(1, 5)
|
|
.Select(i => new RaftNode($"m{i}", transport))
|
|
.ToArray();
|
|
foreach (var n in moreNodes)
|
|
transport.Register(n);
|
|
node.ConfigureCluster(moreNodes);
|
|
|
|
// Should now have 4 peers (5 nodes minus self)
|
|
// Note: the node's ID is "n1" but cluster members are "m1"-"m5"
|
|
// So all 5 are peers since none match "n1"
|
|
node.GetPeerStates().Count.ShouldBe(5);
|
|
}
|
|
}
|