Files
natsdotnet/src/NATS.Server/Raft/RaftWireFormat.cs
Joseph Doherty 824e0b3607 feat(raft): add membership proposals, snapshot checkpoints, and log compaction (B4+B5+B6)
- ProposeAddPeerAsync/ProposeRemovePeerAsync: single-change-at-a-time membership
  changes through RAFT consensus (Go ref: raft.go:961-1019)
- RaftLog.Compact: removes entries up to given index for log compaction
- CreateSnapshotCheckpointAsync: creates snapshot and compacts log in one operation
- DrainAndReplaySnapshotAsync: drains commit queue, installs snapshot, resets indices
- Pre-vote protocol skipped (Go NATS doesn't implement it either)
- 23 new tests in RaftMembershipAndSnapshotTests
2026-02-24 17:08:59 -05:00

518 lines
19 KiB
C#

using System.Buffers.Binary;
using System.Text;
namespace NATS.Server.Raft;
// Binary wire format types matching Go's raft.go encoding exactly.
// Go reference: golang/nats-server/server/raft.go
//
// All integers are little-endian. ID fields are exactly 8 bytes, zero-padded
// if shorter (or truncated if longer), matching Go's idLen = 8 constant.
// Go: server/raft.go:2756 — const idLen = 8
/// <summary>
/// Wire-format constants matching Go's raft.go definitions.
/// Go: server/raft.go:2756-2757
/// </summary>
internal static class RaftWireConstants
{
/// <summary>
/// Fixed width of all peer/leader/candidate ID fields on the wire.
/// Go: server/raft.go:2756 — const idLen = 8
/// </summary>
public const int IdLen = 8;
/// <summary>
/// Fixed byte length of a VoteRequest message.
/// Go: server/raft.go:4558 — const voteRequestLen = 24 + idLen = 32
/// </summary>
public const int VoteRequestLen = 24 + IdLen; // 32
/// <summary>
/// Fixed byte length of a VoteResponse message.
/// Go: server/raft.go:4737 — const voteResponseLen = 8 + 8 + 1 = 17
/// </summary>
public const int VoteResponseLen = 8 + 8 + 1; // 17
/// <summary>
/// Minimum byte length of an AppendEntry message (header only, no entries).
/// Go: server/raft.go:2660 — const appendEntryBaseLen = idLen + 4*8 + 2 = 42
/// </summary>
public const int AppendEntryBaseLen = IdLen + 4 * 8 + 2; // 42
/// <summary>
/// Fixed byte length of an AppendEntryResponse message.
/// Go: server/raft.go:2757 — const appendEntryResponseLen = 24 + 1 = 25
/// </summary>
public const int AppendEntryResponseLen = 24 + 1; // 25
}
/// <summary>
/// Entry types matching Go's EntryType constants.
/// Go: server/raft.go:2607-2618
/// </summary>
public enum RaftEntryType : byte
{
Normal = 0,
OldSnapshot = 1,
PeerState = 2,
AddPeer = 3,
RemovePeer = 4,
LeaderTransfer = 5,
Snapshot = 6,
}
/// <summary>
/// A single RAFT log entry encoded inside an AppendEntry message.
/// Wire layout (inline within AppendEntry body):
/// [4] size uint32 LE — equals 1 + len(Data)
/// [1] type byte
/// [*] data raw bytes
/// Go: server/raft.go:2641-2644 (Entry struct), 2699-2704 (encode loop)
/// </summary>
public readonly record struct RaftEntryWire(RaftEntryType Type, byte[] Data);
/// <summary>
/// Binary wire encoding of a RAFT VoteRequest.
/// Fixed 32-byte layout (little-endian):
/// [0..7] term uint64
/// [8..15] lastTerm uint64
/// [16..23] lastIndex uint64
/// [24..31] candidateId 8-byte ASCII, zero-padded
/// Go: server/raft.go:4549-4583 (voteRequest struct, encode, decodeVoteRequest)
/// </summary>
public readonly record struct RaftVoteRequestWire(
ulong Term,
ulong LastTerm,
ulong LastIndex,
string CandidateId)
{
/// <summary>
/// Encodes this VoteRequest to a 32-byte little-endian buffer.
/// Go: server/raft.go:4560-4568 — voteRequest.encode()
/// </summary>
public byte[] Encode()
{
var buf = new byte[RaftWireConstants.VoteRequestLen];
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(0), Term);
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(8), LastTerm);
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(16), LastIndex);
RaftWireHelpers.WriteId(buf.AsSpan(24), CandidateId);
return buf;
}
/// <summary>
/// Decodes a VoteRequest from a span. Throws <see cref="ArgumentException"/>
/// if the span is not exactly 32 bytes.
/// Go: server/raft.go:4571-4583 — decodeVoteRequest()
/// </summary>
public static RaftVoteRequestWire Decode(ReadOnlySpan<byte> msg)
{
if (msg.Length != RaftWireConstants.VoteRequestLen)
throw new ArgumentException(
$"VoteRequest requires exactly {RaftWireConstants.VoteRequestLen} bytes, got {msg.Length}.",
nameof(msg));
return new RaftVoteRequestWire(
Term: BinaryPrimitives.ReadUInt64LittleEndian(msg[0..]),
LastTerm: BinaryPrimitives.ReadUInt64LittleEndian(msg[8..]),
LastIndex: BinaryPrimitives.ReadUInt64LittleEndian(msg[16..]),
CandidateId: RaftWireHelpers.ReadId(msg[24..]));
}
}
/// <summary>
/// Binary wire encoding of a RAFT VoteResponse.
/// Fixed 17-byte layout (little-endian):
/// [0..7] term uint64
/// [8..15] peer 8-byte ASCII, zero-padded
/// [16] flags bit 0 = granted, bit 1 = empty-log marker
/// Go: server/raft.go:4729-4762 (voteResponse struct, encode, decodeVoteResponse)
/// </summary>
public readonly record struct RaftVoteResponseWire(
ulong Term,
string PeerId,
bool Granted,
bool Empty = false)
{
/// <summary>
/// Encodes this VoteResponse to a 17-byte buffer.
/// Go: server/raft.go:4739-4751 — voteResponse.encode()
/// </summary>
public byte[] Encode()
{
var buf = new byte[RaftWireConstants.VoteResponseLen];
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(0), Term);
RaftWireHelpers.WriteId(buf.AsSpan(8), PeerId);
byte flags = 0;
if (Granted) flags |= 1;
if (Empty) flags |= 2;
buf[16] = flags;
return buf;
}
/// <summary>
/// Decodes a VoteResponse from a span. Throws <see cref="ArgumentException"/>
/// if the span is not exactly 17 bytes.
/// Go: server/raft.go:4753-4762 — decodeVoteResponse()
/// </summary>
public static RaftVoteResponseWire Decode(ReadOnlySpan<byte> msg)
{
if (msg.Length != RaftWireConstants.VoteResponseLen)
throw new ArgumentException(
$"VoteResponse requires exactly {RaftWireConstants.VoteResponseLen} bytes, got {msg.Length}.",
nameof(msg));
var flags = msg[16];
return new RaftVoteResponseWire(
Term: BinaryPrimitives.ReadUInt64LittleEndian(msg[0..]),
PeerId: RaftWireHelpers.ReadId(msg[8..]),
Granted: (flags & 1) != 0,
Empty: (flags & 2) != 0);
}
}
/// <summary>
/// Binary wire encoding of a RAFT AppendEntry message (variable length).
/// Layout (little-endian):
/// [0..7] leaderId 8-byte ASCII, zero-padded
/// [8..15] term uint64
/// [16..23] commit uint64
/// [24..31] pterm uint64
/// [32..39] pindex uint64
/// [40..41] entryCount uint16
/// [42+] entries each: [4:size uint32][1:type][data...]
/// where size = 1 + len(data)
/// [tail] leaderTerm uvarint (appended after entries; old nodes ignore it)
/// Go: server/raft.go:2557-2569 (appendEntry struct), 2662-2746 (encode/decode)
/// </summary>
public readonly record struct RaftAppendEntryWire(
string LeaderId,
ulong Term,
ulong Commit,
ulong PrevTerm,
ulong PrevIndex,
IReadOnlyList<RaftEntryWire> Entries,
ulong LeaderTerm = 0)
{
/// <summary>
/// Encodes this AppendEntry to a byte array.
/// Go: server/raft.go:2662-2711 — appendEntry.encode()
/// </summary>
public byte[] Encode()
{
if (Entries.Count > ushort.MaxValue)
throw new ArgumentException($"Too many entries: {Entries.Count} exceeds uint16 max.", nameof(Entries));
// Calculate total entry data size.
// Go: server/raft.go:2670-2678 — elen += ulen + 1 + 4
var elen = 0;
foreach (var e in Entries)
elen += 4 + 1 + e.Data.Length; // 4-byte size prefix + 1-byte type + data
// Encode leaderTerm as uvarint.
// Go: server/raft.go:2681-2682 — binary.PutUvarint(_lterm[:], ae.lterm)
Span<byte> ltermBuf = stackalloc byte[10];
var ltermLen = RaftWireHelpers.WriteUvarint(ltermBuf, LeaderTerm);
var totalLen = RaftWireConstants.AppendEntryBaseLen + elen + ltermLen;
var buf = new byte[totalLen];
var span = buf.AsSpan();
// Go: server/raft.go:2693-2698 — copy leader and write fixed fields
RaftWireHelpers.WriteId(span[0..], LeaderId);
BinaryPrimitives.WriteUInt64LittleEndian(span[8..], Term);
BinaryPrimitives.WriteUInt64LittleEndian(span[16..], Commit);
BinaryPrimitives.WriteUInt64LittleEndian(span[24..], PrevTerm);
BinaryPrimitives.WriteUInt64LittleEndian(span[32..], PrevIndex);
BinaryPrimitives.WriteUInt16LittleEndian(span[40..], (ushort)Entries.Count);
// Go: server/raft.go:2699-2705 — encode each entry
var pos = RaftWireConstants.AppendEntryBaseLen;
foreach (var e in Entries)
{
// size = 1 (type) + len(data)
// Go: server/raft.go:2702 — le.AppendUint32(buf, uint32(1+len(e.Data)))
BinaryPrimitives.WriteUInt32LittleEndian(span[pos..], (uint)(1 + e.Data.Length));
pos += 4;
buf[pos++] = (byte)e.Type;
e.Data.CopyTo(span[pos..]);
pos += e.Data.Length;
}
// Append leaderTerm uvarint.
// Go: server/raft.go:2709 — buf = append(buf, lterm...)
ltermBuf[..ltermLen].CopyTo(span[pos..]);
return buf;
}
/// <summary>
/// Decodes an AppendEntry from a span. Throws <see cref="ArgumentException"/>
/// if the buffer is shorter than the minimum header length or malformed.
/// Go: server/raft.go:2714-2746 — decodeAppendEntry()
/// </summary>
public static RaftAppendEntryWire Decode(ReadOnlySpan<byte> msg)
{
if (msg.Length < RaftWireConstants.AppendEntryBaseLen)
throw new ArgumentException(
$"AppendEntry requires at least {RaftWireConstants.AppendEntryBaseLen} bytes, got {msg.Length}.",
nameof(msg));
// Go: server/raft.go:2721 — ae := newAppendEntry(string(msg[:idLen]), ...)
var leaderId = RaftWireHelpers.ReadId(msg[0..]);
var term = BinaryPrimitives.ReadUInt64LittleEndian(msg[8..]);
var commit = BinaryPrimitives.ReadUInt64LittleEndian(msg[16..]);
var pterm = BinaryPrimitives.ReadUInt64LittleEndian(msg[24..]);
var pindex = BinaryPrimitives.ReadUInt64LittleEndian(msg[32..]);
// Go: server/raft.go:2725 — ne, ri := int(le.Uint16(msg[40:])), uint64(42)
var entryCount = BinaryPrimitives.ReadUInt16LittleEndian(msg[40..]);
var entries = new List<RaftEntryWire>(entryCount);
var ri = RaftWireConstants.AppendEntryBaseLen;
// Go: server/raft.go:2726-2737 — decode entries loop
for (var i = 0; i < entryCount; i++)
{
if (ri >= msg.Length - 1)
throw new ArgumentException("AppendEntry buffer truncated while reading entries.", nameof(msg));
var ml = (int)BinaryPrimitives.ReadUInt32LittleEndian(msg[ri..]);
ri += 4;
if (ml <= 0 || ri + ml > msg.Length)
throw new ArgumentException("AppendEntry entry size is out of bounds.", nameof(msg));
var entryType = (RaftEntryType)msg[ri];
var data = msg[(ri + 1)..(ri + ml)].ToArray();
entries.Add(new RaftEntryWire(entryType, data));
ri += ml;
}
// Decode optional leaderTerm uvarint from tail bytes.
// Go: server/raft.go:2739-2743 — if lterm, n := binary.Uvarint(msg[ri:]); n > 0 ...
ulong lterm = 0;
if (ri < msg.Length)
RaftWireHelpers.ReadUvarint(msg[ri..], out lterm);
return new RaftAppendEntryWire(
LeaderId: leaderId,
Term: term,
Commit: commit,
PrevTerm: pterm,
PrevIndex: pindex,
Entries: entries,
LeaderTerm: lterm);
}
}
/// <summary>
/// Binary wire encoding of a RAFT AppendEntryResponse.
/// Fixed 25-byte layout (little-endian):
/// [0..7] term uint64
/// [8..15] index uint64
/// [16..23] peerId 8-byte ASCII, zero-padded
/// [24] success 0 or 1
/// Go: server/raft.go:2760-2817 (appendEntryResponse struct, encode, decodeAppendEntryResponse)
/// </summary>
public readonly record struct RaftAppendEntryResponseWire(
ulong Term,
ulong Index,
string PeerId,
bool Success)
{
/// <summary>
/// Encodes this AppendEntryResponse to a 25-byte buffer.
/// Go: server/raft.go:2777-2794 — appendEntryResponse.encode()
/// </summary>
public byte[] Encode()
{
var buf = new byte[RaftWireConstants.AppendEntryResponseLen];
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(0), Term);
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(8), Index);
RaftWireHelpers.WriteId(buf.AsSpan(16), PeerId);
buf[24] = Success ? (byte)1 : (byte)0;
return buf;
}
/// <summary>
/// Decodes an AppendEntryResponse from a span. Throws <see cref="ArgumentException"/>
/// if the span is not exactly 25 bytes.
/// Go: server/raft.go:2799-2817 — decodeAppendEntryResponse()
/// </summary>
public static RaftAppendEntryResponseWire Decode(ReadOnlySpan<byte> msg)
{
if (msg.Length != RaftWireConstants.AppendEntryResponseLen)
throw new ArgumentException(
$"AppendEntryResponse requires exactly {RaftWireConstants.AppendEntryResponseLen} bytes, got {msg.Length}.",
nameof(msg));
return new RaftAppendEntryResponseWire(
Term: BinaryPrimitives.ReadUInt64LittleEndian(msg[0..]),
Index: BinaryPrimitives.ReadUInt64LittleEndian(msg[8..]),
PeerId: RaftWireHelpers.ReadId(msg[16..]),
// Go: server/raft.go:2815 — ar.success = msg[24] == 1
Success: msg[24] == 1);
}
}
/// <summary>
/// Binary wire encoding of a RAFT Pre-Vote request.
/// Same layout as VoteRequest (32 bytes) — Go uses same encoding for pre-vote.
/// The pre-vote round does NOT increment the term; it tests whether a candidate
/// would win an election before disrupting the cluster.
/// Go reference: raft.go:1600-1700 (pre-vote logic)
/// </summary>
public readonly record struct RaftPreVoteRequestWire(
ulong Term,
ulong LastTerm,
ulong LastIndex,
string CandidateId)
{
/// <summary>
/// Encodes this PreVoteRequest to a 32-byte little-endian buffer.
/// Same layout as VoteRequest.
/// </summary>
public byte[] Encode()
{
var buf = new byte[RaftWireConstants.VoteRequestLen];
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(0), Term);
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(8), LastTerm);
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(16), LastIndex);
RaftWireHelpers.WriteId(buf.AsSpan(24), CandidateId);
return buf;
}
/// <summary>
/// Decodes a PreVoteRequest from a span. Throws <see cref="ArgumentException"/>
/// if the span is not exactly 32 bytes.
/// </summary>
public static RaftPreVoteRequestWire Decode(ReadOnlySpan<byte> msg)
{
if (msg.Length != RaftWireConstants.VoteRequestLen)
throw new ArgumentException(
$"PreVoteRequest requires exactly {RaftWireConstants.VoteRequestLen} bytes, got {msg.Length}.",
nameof(msg));
return new RaftPreVoteRequestWire(
Term: BinaryPrimitives.ReadUInt64LittleEndian(msg[0..]),
LastTerm: BinaryPrimitives.ReadUInt64LittleEndian(msg[8..]),
LastIndex: BinaryPrimitives.ReadUInt64LittleEndian(msg[16..]),
CandidateId: RaftWireHelpers.ReadId(msg[24..]));
}
}
/// <summary>
/// Binary wire encoding of a RAFT Pre-Vote response.
/// Same layout as VoteResponse (17 bytes) with Empty always false.
/// Go reference: raft.go:1600-1700 (pre-vote logic)
/// </summary>
public readonly record struct RaftPreVoteResponseWire(
ulong Term,
string PeerId,
bool Granted)
{
/// <summary>
/// Encodes this PreVoteResponse to a 17-byte buffer.
/// Same layout as VoteResponse with Empty flag always false.
/// </summary>
public byte[] Encode()
{
var buf = new byte[RaftWireConstants.VoteResponseLen];
BinaryPrimitives.WriteUInt64LittleEndian(buf.AsSpan(0), Term);
RaftWireHelpers.WriteId(buf.AsSpan(8), PeerId);
buf[16] = Granted ? (byte)1 : (byte)0;
return buf;
}
/// <summary>
/// Decodes a PreVoteResponse from a span. Throws <see cref="ArgumentException"/>
/// if the span is not exactly 17 bytes.
/// </summary>
public static RaftPreVoteResponseWire Decode(ReadOnlySpan<byte> msg)
{
if (msg.Length != RaftWireConstants.VoteResponseLen)
throw new ArgumentException(
$"PreVoteResponse requires exactly {RaftWireConstants.VoteResponseLen} bytes, got {msg.Length}.",
nameof(msg));
return new RaftPreVoteResponseWire(
Term: BinaryPrimitives.ReadUInt64LittleEndian(msg[0..]),
PeerId: RaftWireHelpers.ReadId(msg[8..]),
Granted: (msg[16] & 1) != 0);
}
}
/// <summary>
/// Shared encoding helpers for all RAFT wire format types.
/// </summary>
internal static class RaftWireHelpers
{
/// <summary>
/// Writes a peer/leader ID to an 8-byte span. IDs shorter than 8 bytes are
/// zero-padded; IDs longer than 8 bytes are silently truncated (matching Go's
/// copy(buf[:idLen], id) semantics).
/// Go: server/raft.go:2693 — copy(buf[:idLen], ae.leader)
/// </summary>
public static void WriteId(Span<byte> dest, string id)
{
// Zero-fill the 8-byte slot first.
dest[..RaftWireConstants.IdLen].Clear();
var bytes = Encoding.ASCII.GetBytes(id);
var copyLen = Math.Min(bytes.Length, RaftWireConstants.IdLen);
bytes.AsSpan(0, copyLen).CopyTo(dest);
}
/// <summary>
/// Reads a peer/leader ID from an 8-byte span, trimming trailing null bytes so
/// that zero-padded IDs decode back to their original string.
/// Go: server/raft.go:4581 — string(copyBytes(msg[24:24+idLen]))
/// </summary>
public static string ReadId(ReadOnlySpan<byte> src)
{
var idBytes = src[..RaftWireConstants.IdLen];
var len = idBytes.Length;
while (len > 0 && idBytes[len - 1] == 0)
len--;
return Encoding.ASCII.GetString(idBytes[..len]);
}
/// <summary>
/// Writes a uint64 as a uvarint into <paramref name="buf"/> and returns the
/// number of bytes written (1-10).
/// Go: server/raft.go:2682 — binary.PutUvarint(_lterm[:], ae.lterm)
/// </summary>
public static int WriteUvarint(Span<byte> buf, ulong value)
{
var pos = 0;
while (value > 0x7F)
{
buf[pos++] = (byte)((value & 0x7F) | 0x80);
value >>= 7;
}
buf[pos++] = (byte)value;
return pos;
}
/// <summary>
/// Reads a uvarint from <paramref name="buf"/> into <paramref name="value"/>
/// and returns the number of bytes consumed (0 on overflow or empty input).
/// Go: server/raft.go:2740 — binary.Uvarint(msg[ri:])
/// </summary>
public static int ReadUvarint(ReadOnlySpan<byte> buf, out ulong value)
{
value = 0;
var shift = 0;
for (var i = 0; i < buf.Length && i < 10; i++)
{
var b = buf[i];
value |= ((ulong)(b & 0x7F)) << shift;
if ((b & 0x80) == 0)
return i + 1;
shift += 7;
}
value = 0;
return 0; // overflow or empty
}
}