Files
natsdotnet/tests/NATS.Server.Raft.Tests/Raft/RaftElectionTimerTests.cs
Joseph Doherty 88a82ee860 docs: add XML doc comments to server types and fix flaky test timings
Add XML doc comments to public properties across EventTypes, Connz, Varz,
NatsOptions, StreamConfig, IStreamStore, FileStore, MqttListener,
MqttSessionStore, MessageTraceContext, and JetStreamApiResponse. Fix flaky
tests by increasing timing margins (ResponseTracker expiry 1ms→50ms,
sleep 50ms→200ms) and document known flaky test patterns in tests.md.
2026-03-13 18:47:48 -04:00

271 lines
8.2 KiB
C#

using NATS.Server;
using NATS.Server.Raft;
using NATS.Server.TestUtilities;
namespace NATS.Server.Raft.Tests.Raft;
/// <summary>
/// Tests for election timeout management and campaign triggering in RaftNode.
/// Go reference: raft.go:1400-1450 (resetElectionTimeout), raft.go:1500-1550 (campaign logic).
/// </summary>
public class RaftElectionTimerTests : IDisposable
{
private readonly List<RaftNode> _nodesToDispose = [];
public void Dispose()
{
foreach (var node in _nodesToDispose)
node.Dispose();
}
private RaftNode CreateTrackedNode(string id)
{
var node = new RaftNode(id);
_nodesToDispose.Add(node);
return node;
}
private RaftNode[] CreateTrackedCluster(int size)
{
var nodes = Enumerable.Range(1, size)
.Select(i => CreateTrackedNode($"n{i}"))
.ToArray();
foreach (var node in nodes)
node.ConfigureCluster(nodes);
return nodes;
}
[Fact]
[SlopwatchSuppress("SW004", "Testing election timer reset requires real delays to verify timer does not fire prematurely")]
public async Task ResetElectionTimeout_prevents_election_while_receiving_heartbeats()
{
// Node with very short timeout for testing
var nodes = CreateTrackedCluster(3);
var node = nodes[0];
node.ElectionTimeoutMinMs = 50;
node.ElectionTimeoutMaxMs = 80;
node.StartElectionTimer();
// Keep resetting to prevent election
for (int i = 0; i < 5; i++)
{
await Task.Delay(30);
node.ResetElectionTimeout();
}
// Node should still be a follower since we kept resetting the timer
node.Role.ShouldBe(RaftRole.Follower);
node.StopElectionTimer();
}
[Fact]
public void CampaignImmediately_triggers_election_without_timer()
{
var nodes = CreateTrackedCluster(3);
var candidate = nodes[0];
candidate.Role.ShouldBe(RaftRole.Follower);
candidate.Term.ShouldBe(0);
candidate.CampaignImmediately();
// Should have started an election
candidate.Role.ShouldBe(RaftRole.Candidate);
candidate.Term.ShouldBe(1);
candidate.TermState.VotedFor.ShouldBe(candidate.Id);
}
[Fact]
public void CampaignImmediately_single_node_becomes_leader()
{
var node = CreateTrackedNode("solo");
node.AddMember("solo");
node.CampaignImmediately();
node.IsLeader.ShouldBeTrue();
node.Role.ShouldBe(RaftRole.Leader);
}
[Fact]
[SlopwatchSuppress("SW004", "Testing election timer expiry requires waiting longer than the configured timeout to observe state change")]
public async Task Expired_timer_triggers_campaign_when_follower()
{
var nodes = CreateTrackedCluster(3);
var node = nodes[0];
// Use very short timeouts for testing
node.ElectionTimeoutMinMs = 30;
node.ElectionTimeoutMaxMs = 50;
node.Role.ShouldBe(RaftRole.Follower);
node.StartElectionTimer();
// Wait long enough for the timer to fire
await Task.Delay(200);
// The timer callback should have triggered an election
node.Role.ShouldBe(RaftRole.Candidate);
node.Term.ShouldBeGreaterThan(0);
node.TermState.VotedFor.ShouldBe(node.Id);
node.StopElectionTimer();
}
[Fact]
[SlopwatchSuppress("SW004", "Testing that leaders ignore election timer requires waiting for timer expiry to confirm no state transition")]
public async Task Timer_does_not_trigger_campaign_when_leader()
{
var nodes = CreateTrackedCluster(3);
var node = nodes[0];
// Make this node the leader first
node.StartElection(nodes.Length);
foreach (var voter in nodes.Skip(1))
node.ReceiveVote(voter.GrantVote(node.Term, node.Id), nodes.Length);
node.IsLeader.ShouldBeTrue();
var termBefore = node.Term;
// Use very short timeouts
node.ElectionTimeoutMinMs = 30;
node.ElectionTimeoutMaxMs = 50;
node.StartElectionTimer();
// Wait for timer to fire
await Task.Delay(200);
// Should still be leader, no new election started
node.IsLeader.ShouldBeTrue();
// Term may have incremented if re-election happened, but role stays leader
// The key assertion is the node didn't transition to Candidate
node.Role.ShouldBe(RaftRole.Leader);
node.StopElectionTimer();
}
[Fact]
[SlopwatchSuppress("SW004", "Testing that candidates ignore election timer requires waiting for timer expiry to confirm no state transition")]
public async Task Timer_does_not_trigger_campaign_when_candidate()
{
var node = CreateTrackedNode("n1");
node.AddMember("n1");
node.AddMember("n2");
node.AddMember("n3");
// Start an election manually (becomes Candidate but not Leader since no quorum)
node.StartElection(clusterSize: 3);
node.Role.ShouldBe(RaftRole.Candidate);
var termAfterElection = node.Term;
// Use very short timeouts
node.ElectionTimeoutMinMs = 30;
node.ElectionTimeoutMaxMs = 50;
node.StartElectionTimer();
// Wait for timer to fire
await Task.Delay(200);
// Timer should not trigger additional campaigns when already candidate
// (the callback only triggers for Follower state)
node.Role.ShouldNotBe(RaftRole.Follower);
node.StopElectionTimer();
}
[Fact]
public void Election_timeout_range_is_configurable()
{
var node = CreateTrackedNode("n1");
node.ElectionTimeoutMinMs.ShouldBe(150);
node.ElectionTimeoutMaxMs.ShouldBe(300);
node.ElectionTimeoutMinMs = 500;
node.ElectionTimeoutMaxMs = 1000;
node.ElectionTimeoutMinMs.ShouldBe(500);
node.ElectionTimeoutMaxMs.ShouldBe(1000);
}
[Fact]
public void StopElectionTimer_is_safe_when_no_timer_started()
{
var node = CreateTrackedNode("n1");
// Should not throw
node.StopElectionTimer();
}
[Fact]
public void StopElectionTimer_can_be_called_multiple_times()
{
var node = CreateTrackedNode("n1");
node.StartElectionTimer();
node.StopElectionTimer();
node.StopElectionTimer(); // Should not throw
}
[Fact]
[SlopwatchSuppress("SW004", "Testing heartbeat-driven timer reset requires real delays to simulate periodic heartbeat arrival")]
public async Task ReceiveHeartbeat_resets_election_timeout()
{
var nodes = CreateTrackedCluster(3);
var node = nodes[0];
node.ElectionTimeoutMinMs = 50;
node.ElectionTimeoutMaxMs = 80;
node.StartElectionTimer();
// Simulate heartbeats coming in regularly, preventing election
for (int i = 0; i < 8; i++)
{
await Task.Delay(30);
node.ReceiveHeartbeat(term: 1);
}
// Should still be follower since heartbeats kept resetting the timer
node.Role.ShouldBe(RaftRole.Follower);
node.StopElectionTimer();
}
[Fact]
public async Task Timer_fires_after_heartbeats_stop()
{
var nodes = CreateTrackedCluster(3);
var node = nodes[0];
node.ElectionTimeoutMinMs = 40;
node.ElectionTimeoutMaxMs = 60;
node.StartElectionTimer();
// Send a few heartbeats
for (int i = 0; i < 3; i++)
{
await Task.Delay(20);
node.ReceiveHeartbeat(term: 1);
}
node.Role.ShouldBe(RaftRole.Follower);
// Stop sending heartbeats and wait for timer to fire
await PollHelper.WaitOrThrowAsync(() => node.Role == RaftRole.Candidate, "election timeout", timeoutMs: 5000);
// Should have started an election
node.Role.ShouldBe(RaftRole.Candidate);
node.StopElectionTimer();
}
[Fact]
public void Dispose_stops_election_timer()
{
var node = new RaftNode("n1");
node.ElectionTimeoutMinMs = 30;
node.ElectionTimeoutMaxMs = 50;
node.StartElectionTimer();
// Dispose should stop the timer cleanly
node.Dispose();
// Calling dispose again should be safe
node.Dispose();
}
}