perf: add FileStore buffered writes, O(1) state tracking, and eliminate redundant per-publish work
Implement Go-parity background flush loop (coalesce 16KB/8ms) in MsgBlock/FileStore, replace O(n) GetStateAsync with incremental counters, skip PruneExpired/LoadAsync/ PrunePerSubject when not needed, and bypass RAFT for single-replica streams. Fix counter tracking bugs in RemoveMsg/EraseMsg/TTL expiry and ObjectDisposedException races in flush loop disposal. FileStore optimizations verified with 3112/3112 JetStream tests passing; async publish benchmark remains at ~174 msg/s due to E2E protocol path bottleneck.
This commit is contained in:
@@ -48,6 +48,12 @@ public sealed class MsgBlock : IDisposable
|
||||
// Reference: golang/nats-server/server/filestore.go:236 (cache field)
|
||||
private Dictionary<ulong, MessageRecord>? _cache;
|
||||
|
||||
// Pending write buffer — accumulates encoded records for batched disk writes.
|
||||
// The background flush loop in FileStore coalesces these into fewer I/O calls.
|
||||
// Reference: golang/nats-server/server/filestore.go:6700 (cache.buf write path).
|
||||
private readonly List<(byte[] Data, long Offset)> _pendingWrites = new();
|
||||
private int _pendingBytes;
|
||||
|
||||
// Go: msgBlock.lchk — last written record checksum (XxHash64, 8 bytes).
|
||||
// Tracked so callers can chain checksum verification across blocks.
|
||||
// Reference: golang/nats-server/server/filestore.go:2204 (lchk field)
|
||||
@@ -147,6 +153,23 @@ public sealed class MsgBlock : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Total bytes of pending (not yet flushed to disk) writes in this block.
|
||||
/// Used by the background flush loop to decide when to coalesce.
|
||||
/// </summary>
|
||||
public int PendingWriteSize
|
||||
{
|
||||
get
|
||||
{
|
||||
if (_disposed)
|
||||
return 0;
|
||||
try { _lock.EnterReadLock(); }
|
||||
catch (ObjectDisposedException) { return 0; }
|
||||
try { return _pendingBytes; }
|
||||
finally { _lock.ExitReadLock(); }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The XxHash64 checksum of the last record written to this block (8 bytes), or null
|
||||
/// if no records have been written yet. Updated after every <see cref="Write"/>,
|
||||
@@ -230,8 +253,10 @@ public sealed class MsgBlock : IDisposable
|
||||
var encoded = MessageRecord.Encode(record);
|
||||
var offset = _writeOffset;
|
||||
|
||||
// Write at the current append offset using positional I/O
|
||||
RandomAccess.Write(_handle, encoded, offset);
|
||||
// Buffer the write for batched disk I/O — the background flush loop
|
||||
// in FileStore will coalesce pending writes.
|
||||
_pendingWrites.Add((encoded, offset));
|
||||
_pendingBytes += encoded.Length;
|
||||
_writeOffset = offset + encoded.Length;
|
||||
|
||||
_index[sequence] = (offset, encoded.Length);
|
||||
@@ -295,7 +320,10 @@ public sealed class MsgBlock : IDisposable
|
||||
var encoded = MessageRecord.Encode(record);
|
||||
var offset = _writeOffset;
|
||||
|
||||
RandomAccess.Write(_handle, encoded, offset);
|
||||
// Buffer the write for batched disk I/O — the background flush loop
|
||||
// in FileStore will coalesce pending writes.
|
||||
_pendingWrites.Add((encoded, offset));
|
||||
_pendingBytes += encoded.Length;
|
||||
_writeOffset = offset + encoded.Length;
|
||||
|
||||
_index[sequence] = (offset, encoded.Length);
|
||||
@@ -333,7 +361,8 @@ public sealed class MsgBlock : IDisposable
|
||||
/// <returns>The decoded record, or null if not found or deleted.</returns>
|
||||
public MessageRecord? Read(ulong sequence)
|
||||
{
|
||||
_lock.EnterReadLock();
|
||||
// Use a write lock because we may need to flush pending writes.
|
||||
_lock.EnterWriteLock();
|
||||
try
|
||||
{
|
||||
if (_deleted.Contains(sequence))
|
||||
@@ -347,6 +376,15 @@ public sealed class MsgBlock : IDisposable
|
||||
if (!_index.TryGetValue(sequence, out var entry))
|
||||
return null;
|
||||
|
||||
// Flush pending writes so disk reads see the latest data.
|
||||
if (_pendingWrites.Count > 0)
|
||||
{
|
||||
foreach (var (data, off) in _pendingWrites)
|
||||
RandomAccess.Write(_handle, data, off);
|
||||
_pendingWrites.Clear();
|
||||
_pendingBytes = 0;
|
||||
}
|
||||
|
||||
var buffer = new byte[entry.Length];
|
||||
RandomAccess.Read(_handle, buffer, entry.Offset);
|
||||
|
||||
@@ -354,7 +392,7 @@ public sealed class MsgBlock : IDisposable
|
||||
}
|
||||
finally
|
||||
{
|
||||
_lock.ExitReadLock();
|
||||
_lock.ExitWriteLock();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -384,6 +422,15 @@ public sealed class MsgBlock : IDisposable
|
||||
if (!_deleted.Add(sequence))
|
||||
return false;
|
||||
|
||||
// Flush any pending writes so the record is on disk before we read it back.
|
||||
if (_pendingWrites.Count > 0)
|
||||
{
|
||||
foreach (var (data, off) in _pendingWrites)
|
||||
RandomAccess.Write(_handle, data, off);
|
||||
_pendingWrites.Clear();
|
||||
_pendingBytes = 0;
|
||||
}
|
||||
|
||||
// Read the existing record, re-encode with Deleted flag, write back in-place.
|
||||
// The encoded size doesn't change (only flags byte + checksum differ).
|
||||
var buffer = new byte[entry.Length];
|
||||
@@ -455,7 +502,9 @@ public sealed class MsgBlock : IDisposable
|
||||
var encoded = MessageRecord.Encode(record);
|
||||
var offset = _writeOffset;
|
||||
|
||||
RandomAccess.Write(_handle, encoded, offset);
|
||||
// Buffer the write for batched disk I/O.
|
||||
_pendingWrites.Add((encoded, offset));
|
||||
_pendingBytes += encoded.Length;
|
||||
_writeOffset = offset + encoded.Length;
|
||||
|
||||
_index[sequence] = (offset, encoded.Length);
|
||||
@@ -500,6 +549,44 @@ public sealed class MsgBlock : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Flushes all buffered (pending) writes to disk in a single batch.
|
||||
/// Called by the background flush loop in FileStore, or synchronously on
|
||||
/// block seal / dispose to ensure all data reaches disk.
|
||||
/// Reference: golang/nats-server/server/filestore.go:7592 (flushPendingMsgsLocked).
|
||||
/// </summary>
|
||||
/// <returns>The number of bytes flushed.</returns>
|
||||
public int FlushPending()
|
||||
{
|
||||
if (_disposed)
|
||||
return 0;
|
||||
|
||||
try
|
||||
{
|
||||
_lock.EnterWriteLock();
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
// Block was disposed concurrently (e.g. during PurgeAsync).
|
||||
return 0;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (_pendingWrites.Count == 0)
|
||||
return 0;
|
||||
|
||||
foreach (var (data, offset) in _pendingWrites)
|
||||
RandomAccess.Write(_handle, data, offset);
|
||||
|
||||
var flushed = _pendingBytes;
|
||||
_pendingWrites.Clear();
|
||||
_pendingBytes = 0;
|
||||
return flushed;
|
||||
}
|
||||
finally { _lock.ExitWriteLock(); }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if the given sequence number has been soft-deleted in this block.
|
||||
/// Reference: golang/nats-server/server/filestore.go — dmap (deleted map) lookup.
|
||||
@@ -559,11 +646,20 @@ public sealed class MsgBlock : IDisposable
|
||||
/// </summary>
|
||||
public IEnumerable<(ulong Sequence, string Subject)> EnumerateNonDeleted()
|
||||
{
|
||||
// Snapshot index and deleted set under the read lock, then decode outside it.
|
||||
// Snapshot index and deleted set under a write lock (may need to flush pending).
|
||||
List<(long Offset, int Length, ulong Seq)> entries;
|
||||
_lock.EnterReadLock();
|
||||
_lock.EnterWriteLock();
|
||||
try
|
||||
{
|
||||
// Flush pending writes so disk reads see latest data.
|
||||
if (_pendingWrites.Count > 0)
|
||||
{
|
||||
foreach (var (data, off) in _pendingWrites)
|
||||
RandomAccess.Write(_handle, data, off);
|
||||
_pendingWrites.Clear();
|
||||
_pendingBytes = 0;
|
||||
}
|
||||
|
||||
entries = new List<(long, int, ulong)>(_index.Count);
|
||||
foreach (var (seq, (offset, length)) in _index)
|
||||
{
|
||||
@@ -573,7 +669,7 @@ public sealed class MsgBlock : IDisposable
|
||||
}
|
||||
finally
|
||||
{
|
||||
_lock.ExitReadLock();
|
||||
_lock.ExitWriteLock();
|
||||
}
|
||||
|
||||
// Sort by sequence for deterministic output.
|
||||
@@ -609,13 +705,22 @@ public sealed class MsgBlock : IDisposable
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Flushes any buffered writes to disk.
|
||||
/// Flushes any pending buffered writes and then syncs the file to disk.
|
||||
/// </summary>
|
||||
public void Flush()
|
||||
{
|
||||
_lock.EnterWriteLock();
|
||||
try
|
||||
{
|
||||
// Flush pending buffered writes first.
|
||||
if (_pendingWrites.Count > 0)
|
||||
{
|
||||
foreach (var (data, offset) in _pendingWrites)
|
||||
RandomAccess.Write(_handle, data, offset);
|
||||
_pendingWrites.Clear();
|
||||
_pendingBytes = 0;
|
||||
}
|
||||
|
||||
_file.Flush(flushToDisk: true);
|
||||
}
|
||||
finally
|
||||
@@ -636,6 +741,15 @@ public sealed class MsgBlock : IDisposable
|
||||
_lock.EnterWriteLock();
|
||||
try
|
||||
{
|
||||
// Flush pending buffered writes before closing.
|
||||
if (_pendingWrites.Count > 0)
|
||||
{
|
||||
foreach (var (data, offset) in _pendingWrites)
|
||||
RandomAccess.Write(_handle, data, offset);
|
||||
_pendingWrites.Clear();
|
||||
_pendingBytes = 0;
|
||||
}
|
||||
|
||||
_file.Flush();
|
||||
_file.Dispose();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user