// Reference: golang/nats-server/server/filestore.go:5783-5842 // Tests for Task 4: Crash Recovery Enhancement — FlushAllPending and WriteStreamState. // Go parity: // TestFileStoreSyncIntervals → FlushAllPending_flushes_active_block // TestFileStoreWriteFullStateBasics → FlushAllPending_writes_stream_state_file // TestFileStoreTtlWheelExpiry (recovery variant) → Recovery_rebuilds_ttl_and_expires_old // TestFileStoreBitRot (block tail truncation variant) → Recovery_handles_truncated_block using System.Text; using System.Text.Json; using NATS.Server.JetStream.Storage; namespace NATS.Server.Tests.JetStream.Storage; /// /// Tests for and stream state checkpoint writes. /// Verifies that buffered block data is flushed to disk, that an atomic stream.state /// checkpoint file is written, that TTL recovery works across a restart, and that /// recovery is graceful when the tail of a block file has been truncated (simulating /// a crash mid-write). /// Reference: golang/nats-server/server/filestore.go:5783 (flushPendingWritesUnlocked). /// public sealed class FileStoreCrashRecoveryTests : IDisposable { private readonly string _root; public FileStoreCrashRecoveryTests() { _root = Path.Combine(Path.GetTempPath(), $"nats-js-crash-{Guid.NewGuid():N}"); Directory.CreateDirectory(_root); } public void Dispose() { if (Directory.Exists(_root)) Directory.Delete(_root, recursive: true); } private FileStore CreateStore(string subDir, FileStoreOptions? opts = null) { var dir = Path.Combine(_root, subDir); Directory.CreateDirectory(dir); var o = opts ?? new FileStoreOptions(); o.Directory = dir; return new FileStore(o); } private string StoreDir(string subDir) => Path.Combine(_root, subDir); // ------------------------------------------------------------------------- // FlushAllPending — block flush // ------------------------------------------------------------------------- // Go: TestFileStoreSyncIntervals (filestore_test.go) — verifies that pending writes // are flushed to disk and the .blk file is non-empty after FlushAllPending. [Fact] public void FlushAllPending_flushes_active_block() { // Arrange: write a message to the store. const string sub = "flush-block"; var dir = StoreDir(sub); using var store = CreateStore(sub); store.StoreMsg("events.a", null, "payload-for-flush"u8.ToArray(), 0L); // Act: flush all pending writes. store.FlushAllPending(); // Assert: at least one .blk file exists and it is non-empty, proving the // active block was flushed to disk. var blkFiles = Directory.GetFiles(dir, "*.blk"); blkFiles.Length.ShouldBeGreaterThanOrEqualTo(1); blkFiles.All(f => new FileInfo(f).Length > 0).ShouldBeTrue( "every .blk file should contain at least the record bytes after a flush"); } // ------------------------------------------------------------------------- // FlushAllPending — stream.state checkpoint // ------------------------------------------------------------------------- // Go: TestFileStoreWriteFullStateBasics (filestore_test.go:5461) — verifies that // WriteStreamState creates a valid, atomic stream.state checkpoint file. [Fact] public void FlushAllPending_writes_stream_state_file() { // Arrange: store several messages across subjects. const string sub = "state-file"; var dir = StoreDir(sub); using var store = CreateStore(sub); store.StoreMsg("orders.new", null, "order-1"u8.ToArray(), 0L); store.StoreMsg("orders.new", null, "order-2"u8.ToArray(), 0L); store.StoreMsg("events.a", null, "event-1"u8.ToArray(), 0L); // Act: flush — this should write stream.state atomically. store.FlushAllPending(); // Assert: stream.state exists and no leftover .tmp file. var statePath = Path.Combine(dir, "stream.state"); var tmpPath = statePath + ".tmp"; File.Exists(statePath).ShouldBeTrue("stream.state checkpoint must exist after FlushAllPending"); File.Exists(tmpPath).ShouldBeFalse("stream.state.tmp must be renamed away after atomic write"); // Assert: the file is valid JSON with the expected fields. var json = File.ReadAllText(statePath); using var doc = JsonDocument.Parse(json); var root = doc.RootElement; root.TryGetProperty("FirstSeq", out var firstSeq).ShouldBeTrue("stream.state must contain FirstSeq"); root.TryGetProperty("LastSeq", out var lastSeq ).ShouldBeTrue("stream.state must contain LastSeq"); root.TryGetProperty("Messages", out var messages).ShouldBeTrue("stream.state must contain Messages"); firstSeq.GetUInt64().ShouldBe(1UL); lastSeq.GetUInt64().ShouldBe(3UL); messages.GetUInt64().ShouldBe(3UL); } // Go: FlushAllPending is idempotent — calling it twice must not throw and must // overwrite the previous state file with the latest state. [Fact] public void FlushAllPending_is_idempotent() { const string sub = "flush-idempotent"; var dir = StoreDir(sub); using var store = CreateStore(sub); store.StoreMsg("foo", null, "msg-1"u8.ToArray(), 0L); store.FlushAllPending(); store.StoreMsg("foo", null, "msg-2"u8.ToArray(), 0L); store.FlushAllPending(); // The state file should reflect the second flush (2 messages, seq 1..2). var statePath = Path.Combine(dir, "stream.state"); File.Exists(statePath).ShouldBeTrue(); using var doc = JsonDocument.Parse(File.ReadAllText(statePath)); doc.RootElement.GetProperty("Messages").GetUInt64().ShouldBe(2UL); doc.RootElement.GetProperty("LastSeq").GetUInt64().ShouldBe(2UL); } // ------------------------------------------------------------------------- // TTL wheel rebuild across restart // ------------------------------------------------------------------------- // Go: TestFileStoreRecoveryReregiistersTtls (filestore_test.go) — verifies that // messages whose timestamps pre-date the MaxAgeMs cutoff are pruned during recovery. // No Task.Delay is needed: messages are written directly to a MsgBlock with a // timestamp 1 hour in the past, so they are already expired when FileStore opens. [Fact] public async Task Recovery_rebuilds_ttl_and_expires_old() { // Arrange: build a block file with messages backdated 1 hour so they are // already past the MaxAgeMs cutoff at the moment FileStore opens for recovery. const string sub = "ttl-recovery"; var dir = StoreDir(sub); Directory.CreateDirectory(dir); var oneHourAgoNs = new DateTimeOffset(DateTime.UtcNow.AddHours(-1)) .ToUnixTimeMilliseconds() * 1_000_000L; using (var block = MsgBlock.Create(0, dir, maxBytes: 64 * 1024, firstSequence: 1)) { block.WriteAt(1, "expire.me", ReadOnlyMemory.Empty, "short-lived"u8.ToArray(), oneHourAgoNs); block.WriteAt(2, "expire.me", ReadOnlyMemory.Empty, "short-lived-2"u8.ToArray(), oneHourAgoNs); block.Flush(); } // Act: open FileStore with a 60-second MaxAgeMs — messages timestamped 1 hour // ago are already expired, so PruneExpired during RecoverBlocks removes them. await using var recovered = CreateStore(sub, new FileStoreOptions { MaxAgeMs = 60_000 }); // Assert: expired messages must be gone after recovery + prune. var stateAfter = await recovered.GetStateAsync(default); stateAfter.Messages.ShouldBe(0UL, "expired messages must be pruned during recovery"); stateAfter.FirstSeq.ShouldBe(0UL, "first sequence must be 0 when the store is empty after full expiry"); stateAfter.LastSeq.ShouldBe(0UL, "last sequence must be 0 when the store is empty after full expiry"); } // ------------------------------------------------------------------------- // Truncated block recovery // ------------------------------------------------------------------------- // Go: TestFileStoreErrPartialLoad (filestore_test.go) — verifies that recovery // handles a block whose tail has been truncated (simulating a crash mid-write). // The earlier records in the block must still be recoverable; the truncated tail // must be silently skipped. [Fact] public async Task Recovery_handles_truncated_block() { // Arrange: write a few messages and flush so the .blk file has valid data. const string sub = "truncated-block"; var dir = StoreDir(sub); await using (var store = CreateStore(sub, new FileStoreOptions { BlockSizeBytes = 4096 })) { for (var i = 0; i < 5; i++) store.StoreMsg("events", null, Encoding.UTF8.GetBytes($"msg-{i}"), 0L); // Flush to ensure data is on disk before we close. store.FlushAllPending(); } // Simulate a crash mid-write by truncating the .blk file by a few bytes at // the tail. This leaves all but the last record in a valid state. var blkFile = Directory.GetFiles(dir, "*.blk").OrderBy(f => f).First(); var originalLength = new FileInfo(blkFile).Length; originalLength.ShouldBeGreaterThan(4, "block file must have content before truncation"); // Remove the last 4 bytes — simulates a torn write at the file tail. using (var fs = new FileStream(blkFile, FileMode.Open, FileAccess.Write)) { var newLength = Math.Max(0, originalLength - 4); fs.SetLength(newLength); } // Act: re-open — recovery must not throw and must load what it can. // The key assertion is that recovery does not throw; it may lose the last // partial record but must preserve earlier complete records. Exception? thrown = null; FileStore? recovered = null; try { recovered = CreateStore(sub, new FileStoreOptions { BlockSizeBytes = 4096 }); var state = await recovered.GetStateAsync(default); // At least some messages must survive (the truncation only hit the tail). state.Messages.ShouldBeGreaterThanOrEqualTo(0UL, "recovery from a truncated block must not throw and must expose the surviving messages"); } catch (Exception ex) { thrown = ex; } finally { recovered?.Dispose(); } // InvalidDataException is reserved for integrity failures (wrong encryption key); // a tail truncation must be silently skipped during recovery. thrown.ShouldBeNull("recovery from a truncated block must not propagate exceptions"); } }