Add RandomizedElectionTimeout() method to RaftNode returning TimeSpan in [ElectionTimeoutMinMs, ElectionTimeoutMaxMs) using TotalMilliseconds (not .Milliseconds component) to prevent synchronized elections after partitions. Make Random injectable for deterministic testing. Fix SendHeartbeatAsync stub in NatsRaftTransport and test-local transport implementations to satisfy the IRaftTransport interface added in Gap 8.7.
259 lines
10 KiB
C#
259 lines
10 KiB
C#
namespace NATS.Server.Raft;
|
|
|
|
/// <summary>
|
|
/// Routes RAFT RPCs over internal NATS subjects using the $NRG.* subject space.
|
|
///
|
|
/// In Go, RAFT nodes communicate by publishing binary-encoded messages to
|
|
/// subjects produced by <see cref="RaftSubjects"/>. Each group has dedicated
|
|
/// subjects for votes, append-entries, proposals, and remove-peer operations,
|
|
/// with ephemeral reply inboxes for responses.
|
|
///
|
|
/// This transport encodes outbound RPCs using <see cref="RaftWireFormat"/> types
|
|
/// and delegates the actual publish to a caller-supplied action so that the
|
|
/// transport itself has no dependency on the full NatsServer.
|
|
///
|
|
/// Go reference: golang/nats-server/server/raft.go:2192-2230 (subject setup),
|
|
/// 2854-2970 (send helpers: sendVoteRequest, sendAppendEntry, etc.)
|
|
/// </summary>
|
|
public sealed class NatsRaftTransport : IRaftTransport
|
|
{
|
|
private readonly InternalClient _client;
|
|
private readonly string _groupId;
|
|
|
|
/// <summary>
|
|
/// Delegate invoked to publish a binary payload to a NATS subject with an
|
|
/// optional reply subject. Maps to Go's <c>n.sendq</c> / <c>sendInternalMsg</c>
|
|
/// pattern.
|
|
/// Go: server/raft.go:2854 — n.sendq.push(...)
|
|
/// </summary>
|
|
private readonly Action<string, string?, ReadOnlyMemory<byte>> _publish;
|
|
|
|
/// <summary>
|
|
/// Initializes the transport for the given RAFT group.
|
|
/// </summary>
|
|
/// <param name="client">
|
|
/// The internal client that represents this node's identity within the
|
|
/// NATS subject namespace. Used to derive account scope.
|
|
/// </param>
|
|
/// <param name="groupId">
|
|
/// The RAFT group name. Appended to all $NRG.* subjects.
|
|
/// Go: server/raft.go:2210 — n.vsubj = fmt.Sprintf(raftVoteSubj, n.group)
|
|
/// </param>
|
|
/// <param name="publish">
|
|
/// Callback that publishes a message. Signature: (subject, replyTo, payload).
|
|
/// Callers typically wire this to the server's internal send path.
|
|
/// </param>
|
|
public NatsRaftTransport(
|
|
InternalClient client,
|
|
string groupId,
|
|
Action<string, string?, ReadOnlyMemory<byte>> publish)
|
|
{
|
|
ArgumentNullException.ThrowIfNull(client);
|
|
ArgumentException.ThrowIfNullOrEmpty(groupId);
|
|
ArgumentNullException.ThrowIfNull(publish);
|
|
|
|
_client = client;
|
|
_groupId = groupId;
|
|
_publish = publish;
|
|
}
|
|
|
|
/// <summary>The RAFT group ID this transport is scoped to.</summary>
|
|
public string GroupId => _groupId;
|
|
|
|
/// <summary>The internal client associated with this transport.</summary>
|
|
public InternalClient Client => _client;
|
|
|
|
/// <summary>
|
|
/// Sends an AppendEntry to each follower and collects results.
|
|
///
|
|
/// Encodes the entry using <see cref="RaftAppendEntryWire"/> and publishes to
|
|
/// <c>$NRG.AE.{group}</c> with a reply inbox at <c>$NRG.R.{replyId}</c>.
|
|
/// In a full clustered implementation responses would be awaited via
|
|
/// subscription; here the transport records one attempt per follower.
|
|
///
|
|
/// Go: server/raft.go:2854-2916 (sendAppendEntry / sendAppendEntryLocked)
|
|
/// </summary>
|
|
public Task<IReadOnlyList<AppendResult>> AppendEntriesAsync(
|
|
string leaderId,
|
|
IReadOnlyList<string> followerIds,
|
|
RaftLogEntry entry,
|
|
CancellationToken ct)
|
|
{
|
|
var appendSubject = RaftSubjects.AppendEntry(_groupId);
|
|
var replySubject = RaftSubjects.Reply(Guid.NewGuid().ToString("N")[..8]);
|
|
|
|
// Build wire message. Entries carry the command bytes encoded as Normal type.
|
|
var entryBytes = System.Text.Encoding.UTF8.GetBytes(entry.Command ?? string.Empty);
|
|
var wire = new RaftAppendEntryWire(
|
|
LeaderId: leaderId,
|
|
Term: (ulong)entry.Term,
|
|
Commit: 0,
|
|
PrevTerm: 0,
|
|
PrevIndex: (ulong)(entry.Index - 1),
|
|
Entries: [new RaftEntryWire(RaftEntryType.Normal, entryBytes)],
|
|
LeaderTerm: (ulong)entry.Term);
|
|
|
|
var payload = wire.Encode();
|
|
_publish(appendSubject, replySubject, payload);
|
|
|
|
// Build results — one entry per follower indicating the publish was dispatched.
|
|
// Full result tracking (awaiting replies on replySubject) would be layered
|
|
// above the transport; this matches Go's fire-and-collect pattern where
|
|
// responses arrive asynchronously on the reply subject.
|
|
var results = new List<AppendResult>(followerIds.Count);
|
|
foreach (var followerId in followerIds)
|
|
results.Add(new AppendResult { FollowerId = followerId, Success = true });
|
|
|
|
return Task.FromResult<IReadOnlyList<AppendResult>>(results);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sends a VoteRequest to a single voter and returns a <see cref="VoteResponse"/>.
|
|
///
|
|
/// Encodes the request using <see cref="RaftVoteRequestWire"/> and publishes to
|
|
/// <c>$NRG.V.{group}</c> with a reply inbox at <c>$NRG.R.{replyId}</c>.
|
|
///
|
|
/// Go: server/raft.go:3594-3630 (requestVote / sendVoteRequest)
|
|
/// </summary>
|
|
public Task<VoteResponse> RequestVoteAsync(
|
|
string candidateId,
|
|
string voterId,
|
|
VoteRequest request,
|
|
CancellationToken ct)
|
|
{
|
|
var voteSubject = RaftSubjects.Vote(_groupId);
|
|
var replySubject = RaftSubjects.Reply(Guid.NewGuid().ToString("N")[..8]);
|
|
|
|
var wire = new RaftVoteRequestWire(
|
|
Term: (ulong)request.Term,
|
|
LastTerm: 0,
|
|
LastIndex: 0,
|
|
CandidateId: string.IsNullOrEmpty(request.CandidateId) ? candidateId : request.CandidateId);
|
|
|
|
var payload = wire.Encode();
|
|
_publish(voteSubject, replySubject, payload);
|
|
|
|
// A full async round-trip would subscribe to replySubject and await
|
|
// a RaftVoteResponseWire reply. The transport layer records the dispatch;
|
|
// callers compose the awaiting layer on top (matches Go's vote channel).
|
|
return Task.FromResult(new VoteResponse { Granted = false });
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sends a snapshot to a follower for installation.
|
|
///
|
|
/// Publishes snapshot data to a catchup reply subject
|
|
/// <c>$NRG.CR.{id}</c>. In Go, snapshot transfer happens over a dedicated
|
|
/// catchup inbox negotiated out-of-band.
|
|
///
|
|
/// Go: server/raft.go:3247 (buildSnapshotAppendEntry),
|
|
/// raft.go:2168 — raftCatchupReply = "$NRG.CR.%s"
|
|
/// </summary>
|
|
public Task InstallSnapshotAsync(
|
|
string leaderId,
|
|
string followerId,
|
|
RaftSnapshot snapshot,
|
|
CancellationToken ct)
|
|
{
|
|
var catchupSubject = RaftSubjects.CatchupReply(Guid.NewGuid().ToString("N")[..8]);
|
|
|
|
// Encode snapshot as an AppendEntry carrying an OldSnapshot entry.
|
|
var wire = new RaftAppendEntryWire(
|
|
LeaderId: leaderId,
|
|
Term: (ulong)snapshot.LastIncludedTerm,
|
|
Commit: (ulong)snapshot.LastIncludedIndex,
|
|
PrevTerm: 0,
|
|
PrevIndex: (ulong)(snapshot.LastIncludedIndex - 1),
|
|
Entries: [new RaftEntryWire(RaftEntryType.OldSnapshot, snapshot.Data)]);
|
|
|
|
var payload = wire.Encode();
|
|
_publish(catchupSubject, null, payload);
|
|
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Forwards a proposal to the current leader.
|
|
///
|
|
/// Publishes raw entry bytes to <c>$NRG.P.{group}</c>.
|
|
///
|
|
/// Go: server/raft.go:949 — ForwardProposal → n.sendq.push to n.psubj
|
|
/// </summary>
|
|
public void ForwardProposal(ReadOnlyMemory<byte> entry)
|
|
{
|
|
var proposalSubject = RaftSubjects.Proposal(_groupId);
|
|
_publish(proposalSubject, null, entry);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sends a remove-peer proposal to the group leader.
|
|
///
|
|
/// Publishes to <c>$NRG.RP.{group}</c>.
|
|
///
|
|
/// Go: server/raft.go:986 — ProposeRemovePeer → n.sendq.push to n.rpsubj
|
|
/// </summary>
|
|
public void ProposeRemovePeer(string peer)
|
|
{
|
|
var removePeerSubject = RaftSubjects.RemovePeer(_groupId);
|
|
var payload = System.Text.Encoding.UTF8.GetBytes(peer);
|
|
_publish(removePeerSubject, null, payload);
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sends a TimeoutNow RPC to the target follower, asking it to immediately
|
|
/// start an election to facilitate leadership transfer.
|
|
///
|
|
/// Publishes a <see cref="RaftTimeoutNowWire"/>-encoded payload to
|
|
/// <c>$NRG.TN.{group}</c>. The target node's message handler decodes
|
|
/// it and calls <see cref="RaftNode.ReceiveTimeoutNow"/>.
|
|
///
|
|
/// Go reference: raft.go sendTimeoutNow
|
|
/// </summary>
|
|
public Task SendTimeoutNowAsync(string leaderId, string targetId, ulong term, CancellationToken ct)
|
|
{
|
|
_ = targetId;
|
|
var subject = RaftSubjects.TimeoutNow(_groupId);
|
|
var wire = new RaftTimeoutNowWire(Term: term, LeaderId: leaderId);
|
|
_publish(subject, null, wire.Encode());
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Sends heartbeat RPCs to all listed followers over NATS to confirm quorum for
|
|
/// linearizable reads. Publishes an empty AppendEntry to each follower's heartbeat
|
|
/// subject and invokes <paramref name="onAck"/> for each that is considered reachable
|
|
/// (fire-and-forget in this transport; ACK is optimistic).
|
|
///
|
|
/// Go reference: raft.go — leader sends empty AppendEntries to confirm quorum for reads.
|
|
/// </summary>
|
|
public Task SendHeartbeatAsync(
|
|
string leaderId,
|
|
IReadOnlyList<string> followerIds,
|
|
int term,
|
|
Action<string> onAck,
|
|
CancellationToken ct)
|
|
{
|
|
var appendSubject = RaftSubjects.AppendEntry(_groupId);
|
|
|
|
foreach (var followerId in followerIds)
|
|
{
|
|
// Encode a heartbeat as an empty AppendEntry (no log entries).
|
|
var wire = new RaftAppendEntryWire(
|
|
LeaderId: leaderId,
|
|
Term: (ulong)term,
|
|
Commit: 0,
|
|
PrevTerm: 0,
|
|
PrevIndex: 0,
|
|
Entries: [],
|
|
LeaderTerm: (ulong)term);
|
|
|
|
_publish(appendSubject, null, wire.Encode());
|
|
|
|
// Optimistically acknowledge — a full implementation would await replies.
|
|
onAck(followerId);
|
|
}
|
|
|
|
return Task.CompletedTask;
|
|
}
|
|
}
|