feat(raft): add binary WAL and VotedFor persistence

Implements a binary write-ahead log (RaftWal) for durable RAFT entry
storage, replacing in-memory-only semantics. The WAL uses a magic header
("NWAL" + version), length-prefixed records with per-record CRC32
integrity checking, and CompactAsync with atomic temp-file rename.
Load() tolerates truncated or corrupt tail records for crash safety.

Also fixes RaftNode to persist and reload TermState.VotedFor via a
meta.json file alongside term.txt, ensuring vote durability across
restarts. Falls back gracefully to legacy term.txt when meta.json is
absent.

6 new tests in RaftWalTests: persist/recover, compact, truncation
tolerance, VotedFor round-trip, empty WAL, and CRC corruption.
All 458 Raft tests pass.
This commit is contained in:
Joseph Doherty
2026-02-25 01:31:23 -05:00
parent 3ab683489e
commit c9ac4b9918
3 changed files with 441 additions and 3 deletions

View File

@@ -0,0 +1,147 @@
using NATS.Server.Raft;
// Go reference: server/raft.go (WAL binary format, compaction, CRC integrity)
namespace NATS.Server.Tests.Raft;
public class RaftWalTests : IDisposable
{
private readonly string _root;
public RaftWalTests()
{
_root = Path.Combine(Path.GetTempPath(), $"nats-wal-{Guid.NewGuid():N}");
Directory.CreateDirectory(_root);
}
public void Dispose()
{
if (Directory.Exists(_root))
Directory.Delete(_root, recursive: true);
}
// Go reference: server/raft.go WAL append + recover
[Fact]
public async Task Wal_persists_and_recovers_entries()
{
var walPath = Path.Combine(_root, "raft.wal");
// Write entries
{
using var wal = new RaftWal(walPath);
await wal.AppendAsync(new RaftLogEntry(1, 1, "cmd-1"));
await wal.AppendAsync(new RaftLogEntry(2, 1, "cmd-2"));
await wal.AppendAsync(new RaftLogEntry(3, 2, "cmd-3"));
await wal.SyncAsync();
}
// Recover
using var recovered = RaftWal.Load(walPath);
var entries = recovered.Entries.ToList();
entries.Count.ShouldBe(3);
entries[0].Index.ShouldBe(1);
entries[0].Term.ShouldBe(1);
entries[0].Command.ShouldBe("cmd-1");
entries[2].Index.ShouldBe(3);
entries[2].Term.ShouldBe(2);
}
// Go reference: server/raft.go compactLog
[Fact]
public async Task Wal_compact_removes_old_entries()
{
var walPath = Path.Combine(_root, "compact.wal");
using var wal = new RaftWal(walPath);
for (int i = 1; i <= 10; i++)
await wal.AppendAsync(new RaftLogEntry(i, 1, $"cmd-{i}"));
await wal.SyncAsync();
await wal.CompactAsync(5); // remove entries 1-5
using var recovered = RaftWal.Load(walPath);
recovered.Entries.Count().ShouldBe(5);
recovered.Entries.First().Index.ShouldBe(6);
}
// Go reference: server/raft.go WAL crash-truncation tolerance
[Fact]
public async Task Wal_handles_truncated_file()
{
var walPath = Path.Combine(_root, "truncated.wal");
{
using var wal = new RaftWal(walPath);
await wal.AppendAsync(new RaftLogEntry(1, 1, "good-entry"));
await wal.AppendAsync(new RaftLogEntry(2, 1, "will-be-truncated"));
await wal.SyncAsync();
}
// Truncate last few bytes to simulate crash
using (var fs = File.OpenWrite(walPath))
fs.SetLength(fs.Length - 3);
using var recovered = RaftWal.Load(walPath);
recovered.Entries.Count().ShouldBeGreaterThanOrEqualTo(1);
recovered.Entries.First().Command.ShouldBe("good-entry");
}
// Go reference: server/raft.go storeMeta (term + votedFor persistence)
[Fact]
public async Task RaftNode_persists_term_and_vote()
{
var dir = Path.Combine(_root, "node-persist");
Directory.CreateDirectory(dir);
{
using var node = new RaftNode("n1", persistDirectory: dir);
node.TermState.CurrentTerm = 5;
node.TermState.VotedFor = "n2";
await node.PersistAsync(default);
}
using var recovered = new RaftNode("n1", persistDirectory: dir);
await recovered.LoadPersistedStateAsync(default);
recovered.Term.ShouldBe(5);
recovered.TermState.VotedFor.ShouldBe("n2");
}
// Go reference: server/raft.go WAL empty file edge case
[Fact]
public async Task Wal_empty_file_loads_no_entries()
{
var walPath = Path.Combine(_root, "empty.wal");
{
using var wal = new RaftWal(walPath);
await wal.SyncAsync();
}
using var recovered = RaftWal.Load(walPath);
recovered.Entries.Count().ShouldBe(0);
}
// Go reference: server/raft.go WAL CRC integrity check
[Fact]
public async Task Wal_crc_validates_record_integrity()
{
var walPath = Path.Combine(_root, "crc.wal");
{
using var wal = new RaftWal(walPath);
await wal.AppendAsync(new RaftLogEntry(1, 1, "valid"));
await wal.AppendAsync(new RaftLogEntry(2, 1, "also-valid"));
await wal.SyncAsync();
}
// Corrupt one byte in the tail of the file (inside the second record)
var bytes = File.ReadAllBytes(walPath);
bytes[^5] ^= 0xFF;
File.WriteAllBytes(walPath, bytes);
// Load should recover at least the first record, stopping at the corrupt second
using var recovered = RaftWal.Load(walPath);
recovered.Entries.Count().ShouldBeGreaterThanOrEqualTo(1);
recovered.Entries.First().Command.ShouldBe("valid");
}
}