feat(storage): add write cache and TTL scheduling (Go parity)
Add MsgBlock write cache (mirrors Go's msgBlock.cache) to serve reads for recently-written records without disk I/O; cleared on block seal via RotateBlock. Add HashWheel-based TTL expiry in FileStore (ExpireFromWheel / RegisterTtl), replacing the O(n) linear scan on every append with an O(expired) wheel scan. Implement StoreMsg sync method with per-message TTL override support. Add 10 tests covering cache hits/eviction, wheel expiry, retention, StoreMsg seq/ts, per-msg TTL, and recovery re-registration.
This commit is contained in:
@@ -3,6 +3,7 @@ using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using NATS.Server.JetStream.Models;
|
||||
using NATS.Server.Internal.TimeHashWheel;
|
||||
|
||||
// Storage.StreamState is in this namespace. Use an alias for the API-layer type
|
||||
// (now named ApiStreamState in the Models namespace) to keep method signatures clear.
|
||||
@@ -37,6 +38,11 @@ public sealed class FileStore : IStreamStore, IAsyncDisposable
|
||||
private readonly bool _useS2; // true -> S2Codec (FSV2 compression path)
|
||||
private readonly bool _useAead; // true -> AeadEncryptor (FSV2 encryption path)
|
||||
|
||||
// Go: filestore.go — per-stream time hash wheel for efficient TTL expiration.
|
||||
// Created lazily only when MaxAgeMs > 0. Entries are (seq, expires_ns) pairs.
|
||||
// Reference: golang/nats-server/server/filestore.go:290 (fss/ttl fields).
|
||||
private HashWheel? _ttlWheel;
|
||||
|
||||
public int BlockCount => _blocks.Count;
|
||||
public bool UsedIndexManifestOnStartup { get; private set; }
|
||||
|
||||
@@ -59,7 +65,9 @@ public sealed class FileStore : IStreamStore, IAsyncDisposable
|
||||
|
||||
public async ValueTask<ulong> AppendAsync(string subject, ReadOnlyMemory<byte> payload, CancellationToken ct)
|
||||
{
|
||||
PruneExpired(DateTime.UtcNow);
|
||||
// Go: check and remove expired messages before each append.
|
||||
// Reference: golang/nats-server/server/filestore.go — storeMsg, expire check.
|
||||
ExpireFromWheel();
|
||||
|
||||
_last++;
|
||||
var now = DateTime.UtcNow;
|
||||
@@ -74,6 +82,10 @@ public sealed class FileStore : IStreamStore, IAsyncDisposable
|
||||
};
|
||||
_messages[_last] = stored;
|
||||
|
||||
// Go: register new message in TTL wheel when MaxAgeMs is configured.
|
||||
// Reference: golang/nats-server/server/filestore.go:6820 (storeMsg TTL schedule).
|
||||
RegisterTtl(_last, timestamp, _options.MaxAgeMs > 0 ? (long)_options.MaxAgeMs * 1_000_000L : 0);
|
||||
|
||||
// Write to MsgBlock. The payload stored in the block is the transformed
|
||||
// (compressed/encrypted) payload, not the plaintext.
|
||||
EnsureActiveBlock();
|
||||
@@ -233,6 +245,68 @@ public sealed class FileStore : IStreamStore, IAsyncDisposable
|
||||
// Reference: golang/nats-server/server/filestore.go
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/// <summary>
|
||||
/// Synchronously stores a message, optionally with a per-message TTL override.
|
||||
/// Returns the assigned sequence number and timestamp in nanoseconds.
|
||||
/// When <paramref name="ttl"/> is greater than zero it overrides MaxAgeMs for
|
||||
/// this specific message; otherwise the stream's MaxAgeMs applies.
|
||||
/// Reference: golang/nats-server/server/filestore.go:6790 (storeMsg).
|
||||
/// </summary>
|
||||
public (ulong Seq, long Ts) StoreMsg(string subject, byte[]? hdr, byte[] msg, long ttl)
|
||||
{
|
||||
// Go: expire check before each store (same as AppendAsync).
|
||||
// Reference: golang/nats-server/server/filestore.go:6793 (expireMsgs call).
|
||||
ExpireFromWheel();
|
||||
|
||||
_last++;
|
||||
var now = DateTime.UtcNow;
|
||||
var timestamp = new DateTimeOffset(now).ToUnixTimeMilliseconds() * 1_000_000L;
|
||||
|
||||
// Combine headers and payload (headers precede the body in NATS wire format).
|
||||
byte[] combined;
|
||||
if (hdr is { Length: > 0 })
|
||||
{
|
||||
combined = new byte[hdr.Length + msg.Length];
|
||||
hdr.CopyTo(combined, 0);
|
||||
msg.CopyTo(combined, hdr.Length);
|
||||
}
|
||||
else
|
||||
{
|
||||
combined = msg;
|
||||
}
|
||||
|
||||
var persistedPayload = TransformForPersist(combined.AsSpan());
|
||||
var stored = new StoredMessage
|
||||
{
|
||||
Sequence = _last,
|
||||
Subject = subject,
|
||||
Payload = combined,
|
||||
TimestampUtc = now,
|
||||
};
|
||||
_messages[_last] = stored;
|
||||
|
||||
// Determine effective TTL: per-message ttl (ns) takes priority over MaxAgeMs.
|
||||
// Go: filestore.go:6830 — if msg.ttl > 0 use it, else use cfg.MaxAge.
|
||||
var effectiveTtlNs = ttl > 0 ? ttl : (_options.MaxAgeMs > 0 ? (long)_options.MaxAgeMs * 1_000_000L : 0L);
|
||||
RegisterTtl(_last, timestamp, effectiveTtlNs);
|
||||
|
||||
EnsureActiveBlock();
|
||||
try
|
||||
{
|
||||
_activeBlock!.WriteAt(_last, subject, ReadOnlyMemory<byte>.Empty, persistedPayload, timestamp);
|
||||
}
|
||||
catch (InvalidOperationException)
|
||||
{
|
||||
RotateBlock();
|
||||
_activeBlock!.WriteAt(_last, subject, ReadOnlyMemory<byte>.Empty, persistedPayload, timestamp);
|
||||
}
|
||||
|
||||
if (_activeBlock!.IsSealed)
|
||||
RotateBlock();
|
||||
|
||||
return (_last, timestamp);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Removes all messages from the store and returns the count purged.
|
||||
/// Reference: golang/nats-server/server/filestore.go — purge / purgeMsgs.
|
||||
@@ -562,9 +636,15 @@ public sealed class FileStore : IStreamStore, IAsyncDisposable
|
||||
/// Creates a new active block. The previous active block (if any) stays in the
|
||||
/// block list as a sealed block. The firstSequence is set to _last + 1 (the next
|
||||
/// expected sequence), but actual sequences come from WriteAt calls.
|
||||
/// When rotating, the previously active block's write cache is cleared to free memory.
|
||||
/// Reference: golang/nats-server/server/filestore.go — clearCache called on block seal.
|
||||
/// </summary>
|
||||
private void RotateBlock()
|
||||
{
|
||||
// Clear the write cache on the outgoing active block — it is now sealed.
|
||||
// This frees memory; future reads on sealed blocks go to disk.
|
||||
_activeBlock?.ClearCache();
|
||||
|
||||
var firstSeq = _last + 1;
|
||||
var block = MsgBlock.Create(_nextBlockId, _options.Directory, _options.BlockSizeBytes, firstSeq);
|
||||
_blocks.Add(block);
|
||||
@@ -740,6 +820,14 @@ public sealed class FileStore : IStreamStore, IAsyncDisposable
|
||||
_messages[message.Sequence] = message;
|
||||
if (message.Sequence > _last)
|
||||
_last = message.Sequence;
|
||||
|
||||
// Go: re-register unexpired TTLs in the wheel after recovery.
|
||||
// Reference: golang/nats-server/server/filestore.go — recoverMsgs, TTL re-registration.
|
||||
if (_options.MaxAgeMs > 0)
|
||||
{
|
||||
var msgTs = new DateTimeOffset(message.TimestampUtc).ToUnixTimeMilliseconds() * 1_000_000L;
|
||||
RegisterTtl(message.Sequence, msgTs, (long)_options.MaxAgeMs * 1_000_000L);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -831,7 +919,73 @@ public sealed class FileStore : IStreamStore, IAsyncDisposable
|
||||
// Expiry
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private void PruneExpired(DateTime nowUtc)
|
||||
/// <summary>
|
||||
/// Registers a message in the TTL wheel when MaxAgeMs is configured.
|
||||
/// The wheel's <see cref="HashWheel.ExpireTasks"/> uses Stopwatch-relative nanoseconds,
|
||||
/// so we compute <c>expiresNs</c> as the current Stopwatch position plus the TTL duration.
|
||||
/// If ttlNs is 0, this is a no-op.
|
||||
/// Reference: golang/nats-server/server/filestore.go:6820 — storeMsg TTL scheduling.
|
||||
/// </summary>
|
||||
private void RegisterTtl(ulong seq, long timestampNs, long ttlNs)
|
||||
{
|
||||
if (ttlNs <= 0)
|
||||
return;
|
||||
|
||||
_ttlWheel ??= new HashWheel();
|
||||
|
||||
// Convert to Stopwatch-domain nanoseconds to match ExpireTasks' time source.
|
||||
// We intentionally discard timestampNs (Unix epoch ns) and use "now + ttl"
|
||||
// relative to the Stopwatch epoch used by ExpireTasks.
|
||||
var nowStopwatchNs = (long)((double)System.Diagnostics.Stopwatch.GetTimestamp()
|
||||
/ System.Diagnostics.Stopwatch.Frequency * 1_000_000_000);
|
||||
var expiresNs = nowStopwatchNs + ttlNs;
|
||||
_ttlWheel.Add(seq, expiresNs);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks the TTL wheel for expired entries and removes them from the store.
|
||||
/// Uses the wheel's expiration scan which is O(expired) rather than O(total).
|
||||
/// Expired messages are removed from the in-memory cache and soft-deleted in blocks,
|
||||
/// but <see cref="_last"/> is preserved (sequence numbers are monotonically increasing
|
||||
/// even when messages expire).
|
||||
/// Reference: golang/nats-server/server/filestore.go — expireMsgs using thw.ExpireTasks.
|
||||
/// </summary>
|
||||
private void ExpireFromWheel()
|
||||
{
|
||||
if (_ttlWheel is null)
|
||||
{
|
||||
// Fall back to linear scan if wheel is not yet initialised.
|
||||
// PruneExpiredLinear is only used during recovery (before first write).
|
||||
PruneExpiredLinear(DateTime.UtcNow);
|
||||
return;
|
||||
}
|
||||
|
||||
var expired = new List<ulong>();
|
||||
_ttlWheel.ExpireTasks((seq, _) =>
|
||||
{
|
||||
expired.Add(seq);
|
||||
return true; // Remove from wheel.
|
||||
});
|
||||
|
||||
if (expired.Count == 0)
|
||||
return;
|
||||
|
||||
// Remove from in-memory cache and soft-delete in the block layer.
|
||||
// We do NOT call RewriteBlocks here — that would reset _last and create a
|
||||
// discontinuity in the sequence space. Soft-delete is sufficient for expiry.
|
||||
// Reference: golang/nats-server/server/filestore.go:expireMsgs — dmap-based removal.
|
||||
foreach (var seq in expired)
|
||||
{
|
||||
_messages.Remove(seq);
|
||||
DeleteInBlock(seq);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// O(n) fallback expiry scan used during recovery (before the wheel is warm)
|
||||
/// or when MaxAgeMs is set but no messages have been appended yet.
|
||||
/// </summary>
|
||||
private void PruneExpiredLinear(DateTime nowUtc)
|
||||
{
|
||||
if (_options.MaxAgeMs <= 0)
|
||||
return;
|
||||
@@ -851,6 +1005,9 @@ public sealed class FileStore : IStreamStore, IAsyncDisposable
|
||||
RewriteBlocks();
|
||||
}
|
||||
|
||||
// Keep the old PruneExpired name as a convenience wrapper for recovery path.
|
||||
private void PruneExpired(DateTime nowUtc) => PruneExpiredLinear(nowUtc);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Payload transform: compress + encrypt on write; reverse on read.
|
||||
//
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
// Go block load: filestore.go:8140-8260 (loadMsgs / msgFromBufEx)
|
||||
// Go deletion: filestore.go dmap (avl.SequenceSet) for soft-deletes
|
||||
// Go sealing: filestore.go rbytes check — block rolls when rbytes >= maxBytes
|
||||
// Go write cache: filestore.go msgBlock.cache — recently-written records kept in
|
||||
// memory to avoid disk reads on the hot path (cache field, clearCache method).
|
||||
//
|
||||
// MsgBlock is the unit of storage in the file store. Messages are appended
|
||||
// sequentially as binary records (using MessageRecord). Blocks are sealed
|
||||
@@ -33,6 +35,11 @@ public sealed class MsgBlock : IDisposable
|
||||
private ulong _totalWritten; // Total records written (including later-deleted)
|
||||
private bool _disposed;
|
||||
|
||||
// Go: msgBlock.cache — in-memory write cache for recently-written records.
|
||||
// Only the active (last) block maintains a cache; sealed blocks use disk reads.
|
||||
// Reference: golang/nats-server/server/filestore.go:236 (cache field)
|
||||
private Dictionary<ulong, MessageRecord>? _cache;
|
||||
|
||||
private MsgBlock(FileStream file, int blockId, long maxBytes, ulong firstSequence)
|
||||
{
|
||||
_file = file;
|
||||
@@ -113,6 +120,20 @@ public sealed class MsgBlock : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// True when the write cache is currently populated.
|
||||
/// Used by tests to verify cache presence without exposing the cache contents directly.
|
||||
/// </summary>
|
||||
public bool HasCache
|
||||
{
|
||||
get
|
||||
{
|
||||
_lock.EnterReadLock();
|
||||
try { return _cache is not null; }
|
||||
finally { _lock.ExitReadLock(); }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new empty block file.
|
||||
/// </summary>
|
||||
@@ -150,6 +171,8 @@ public sealed class MsgBlock : IDisposable
|
||||
|
||||
/// <summary>
|
||||
/// Appends a message to the block with an auto-assigned sequence number.
|
||||
/// Populates the write cache so subsequent reads can bypass disk.
|
||||
/// Reference: golang/nats-server/server/filestore.go:6700 (writeMsgRecord).
|
||||
/// </summary>
|
||||
/// <param name="subject">NATS subject.</param>
|
||||
/// <param name="headers">Optional message headers.</param>
|
||||
@@ -184,6 +207,11 @@ public sealed class MsgBlock : IDisposable
|
||||
|
||||
_index[sequence] = (offset, encoded.Length);
|
||||
|
||||
// Go: cache recently-written record to avoid disk reads on hot path.
|
||||
// Reference: golang/nats-server/server/filestore.go:6730 (cache population).
|
||||
_cache ??= new Dictionary<ulong, MessageRecord>();
|
||||
_cache[sequence] = record;
|
||||
|
||||
if (_totalWritten == 0)
|
||||
_firstSequence = sequence;
|
||||
|
||||
@@ -203,6 +231,8 @@ public sealed class MsgBlock : IDisposable
|
||||
/// Appends a message to the block with an explicit sequence number and timestamp.
|
||||
/// Used by FileStore when rewriting blocks from the in-memory cache where
|
||||
/// sequences may have gaps (from prior removals).
|
||||
/// Populates the write cache so subsequent reads can bypass disk.
|
||||
/// Reference: golang/nats-server/server/filestore.go:6700 (writeMsgRecord).
|
||||
/// </summary>
|
||||
/// <param name="sequence">Explicit sequence number to assign.</param>
|
||||
/// <param name="subject">NATS subject.</param>
|
||||
@@ -236,6 +266,11 @@ public sealed class MsgBlock : IDisposable
|
||||
|
||||
_index[sequence] = (offset, encoded.Length);
|
||||
|
||||
// Go: cache recently-written record to avoid disk reads on hot path.
|
||||
// Reference: golang/nats-server/server/filestore.go:6730 (cache population).
|
||||
_cache ??= new Dictionary<ulong, MessageRecord>();
|
||||
_cache[sequence] = record;
|
||||
|
||||
if (_totalWritten == 0)
|
||||
_firstSequence = sequence;
|
||||
|
||||
@@ -250,9 +285,10 @@ public sealed class MsgBlock : IDisposable
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads a message by sequence number. Uses positional I/O
|
||||
/// (<see cref="RandomAccess.Read"/>) so concurrent readers don't
|
||||
/// interfere with each other or the writer's append position.
|
||||
/// Reads a message by sequence number.
|
||||
/// Checks the write cache first to avoid disk I/O for recently-written messages.
|
||||
/// Falls back to positional disk read if the record is not cached.
|
||||
/// Reference: golang/nats-server/server/filestore.go:8140 (loadMsgs / msgFromBufEx).
|
||||
/// </summary>
|
||||
/// <param name="sequence">The sequence number to read.</param>
|
||||
/// <returns>The decoded record, or null if not found or deleted.</returns>
|
||||
@@ -264,6 +300,11 @@ public sealed class MsgBlock : IDisposable
|
||||
if (_deleted.Contains(sequence))
|
||||
return null;
|
||||
|
||||
// Go: check cache first (msgBlock.cache lookup).
|
||||
// Reference: golang/nats-server/server/filestore.go:8155 (cache hit path).
|
||||
if (_cache is not null && _cache.TryGetValue(sequence, out var cached))
|
||||
return cached;
|
||||
|
||||
if (!_index.TryGetValue(sequence, out var entry))
|
||||
return null;
|
||||
|
||||
@@ -281,6 +322,7 @@ public sealed class MsgBlock : IDisposable
|
||||
/// <summary>
|
||||
/// Soft-deletes a message by sequence number. Re-encodes the record on disk
|
||||
/// with the deleted flag set (and updated checksum) so the deletion survives recovery.
|
||||
/// Also evicts the sequence from the write cache.
|
||||
/// </summary>
|
||||
/// <param name="sequence">The sequence number to delete.</param>
|
||||
/// <returns>True if the message was deleted; false if already deleted or not found.</returns>
|
||||
@@ -314,6 +356,9 @@ public sealed class MsgBlock : IDisposable
|
||||
var encoded = MessageRecord.Encode(deletedRecord);
|
||||
RandomAccess.Write(_handle, encoded, entry.Offset);
|
||||
|
||||
// Evict from write cache — the record is now deleted.
|
||||
_cache?.Remove(sequence);
|
||||
|
||||
return true;
|
||||
}
|
||||
finally
|
||||
@@ -322,6 +367,24 @@ public sealed class MsgBlock : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Clears the write cache, releasing memory. After this call, all reads will
|
||||
/// go to disk. Called when the block is sealed (no longer the active block)
|
||||
/// or under memory pressure.
|
||||
/// Reference: golang/nats-server/server/filestore.go — clearCache method on msgBlock.
|
||||
/// </summary>
|
||||
public void ClearCache()
|
||||
{
|
||||
_lock.EnterWriteLock();
|
||||
try
|
||||
{
|
||||
_cache = null;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_lock.ExitWriteLock();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if the given sequence number has been soft-deleted in this block.
|
||||
@@ -377,6 +440,25 @@ public sealed class MsgBlock : IDisposable
|
||||
|
||||
foreach (var (offset, length, seq) in entries)
|
||||
{
|
||||
// Check the write cache first to avoid disk I/O.
|
||||
_lock.EnterReadLock();
|
||||
MessageRecord? cached = null;
|
||||
try
|
||||
{
|
||||
_cache?.TryGetValue(seq, out cached);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_lock.ExitReadLock();
|
||||
}
|
||||
|
||||
if (cached is not null)
|
||||
{
|
||||
if (!cached.Deleted)
|
||||
yield return (cached.Sequence, cached.Subject);
|
||||
continue;
|
||||
}
|
||||
|
||||
var buffer = new byte[length];
|
||||
RandomAccess.Read(_handle, buffer, offset);
|
||||
var record = MessageRecord.Decode(buffer);
|
||||
@@ -464,6 +546,8 @@ public sealed class MsgBlock : IDisposable
|
||||
|
||||
_totalWritten = count;
|
||||
_writeOffset = offset;
|
||||
// Note: recovered blocks do not populate the write cache — reads go to disk.
|
||||
// The cache is only populated during active writes on the hot path.
|
||||
}
|
||||
|
||||
private static string BlockFilePath(string directoryPath, int blockId)
|
||||
|
||||
Reference in New Issue
Block a user