using System.Buffers.Binary; using System.Collections.Concurrent; using System.Security.Cryptography; using System.Text; using System.Text.Json; using System.Threading.Channels; using NATS.Server.JetStream.Models; using NATS.Server.Internal.TimeHashWheel; // Storage.StreamState is in this namespace. Use an alias for the API-layer type // (now named ApiStreamState in the Models namespace) to keep method signatures clear. using ApiStreamState = NATS.Server.JetStream.Models.ApiStreamState; namespace NATS.Server.JetStream.Storage; /// /// Block-based file store for JetStream messages. Uses for /// on-disk persistence and maintains an in-memory cache () /// for fast reads and subject queries. /// /// Reference: golang/nats-server/server/filestore.go — block manager, block rotation, /// recovery via scanning .blk files, soft-delete via dmap. /// public sealed class FileStore : IStreamStore, IAsyncDisposable, IDisposable { private readonly FileStoreOptions _options; // Lightweight metadata index: keyed by sequence number. Stores only the fields // needed for existence checks, subject queries, and timestamp lookups. // Full message payloads are materialized on demand from blocks via MaterializeMessage. private readonly Dictionary _meta = new(); /// /// Lightweight per-message metadata stored in the in-memory index. /// Eliminates the ~200B StoredMessage allocation on the write path. /// private struct MessageMeta { public int BlockId; public string Subject; public int PayloadLength; public int HeaderLength; public long TimestampNs; } private readonly Dictionary _lastSequenceBySubject = new(StringComparer.Ordinal); // Block-based storage: the active (writable) block and sealed blocks. private readonly List _blocks = []; private MsgBlock? _activeBlock; private int _nextBlockId; private ulong _last; private ulong _first; // Go: first.seq — watermark for the first live or expected-first sequence // Incremental state tracking — avoid O(n) scans in GetStateAsync/FastState. // Updated in AppendAsync, StoreMsg, RemoveAsync, PurgeAsync, PurgeEx, Compact, // Truncate, TrimToMaxMessages, EnforceMaxMsgsPerSubject, and recovery. private ulong _messageCount; private ulong _totalBytes; private ulong _firstSeq; // Set to true after Stop() is called. Prevents further writes. private bool _stopped; // Resolved at construction time: which format family to use. private readonly bool _useS2; // true -> S2Codec (FSV2 compression path) private readonly bool _useAead; // true -> AeadEncryptor (FSV2 encryption path) private readonly bool _noTransform; // true -> no compression/encryption, TransformForPersist just copies // Go: filestore.go — per-stream time hash wheel for efficient TTL expiration. // Created lazily only when MaxAgeMs > 0. Entries are (seq, expires_ns) pairs. // Reference: golang/nats-server/server/filestore.go:290 (fss/ttl fields). private HashWheel? _ttlWheel; // Mutual-exclusion lock for state file writes. Ensures that concurrent // FlushAllPending calls (e.g. from a flush timer and a shutdown path) do not // race on the stream.state / stream.state.tmp files. // Reference: golang/nats-server/server/filestore.go — fsMsgBlock.mu (write lock). private readonly SemaphoreSlim _stateWriteLock = new(1, 1); // Go: filestore.go:4443 (setupWriteCache) — bounded write cache manager. // Manages TTL-based expiry and size cap across all active blocks. // Reference: golang/nats-server/server/filestore.go:6148 (expireCache). private readonly WriteCacheManager _writeCache; // Go: filestore.go:5841 — background flush loop coalesces buffered writes. // Reference: golang/nats-server/server/filestore.go:328-331 (coalesce constants). private readonly Channel _flushSignal = Channel.CreateBounded(1); private readonly CancellationTokenSource _flushCts = new(); private Task? _flushTask; private const int CoalesceMinimum = 16 * 1024; // 16KB — Go: filestore.go:328 private const int MaxFlushWaitMs = 8; // 8ms — Go: filestore.go:331 // Go: filestore.go — generation counter for cache invalidation. // Incremented on every write (Append/StoreRawMsg) and delete (Remove/Purge/Compact). // NumFiltered caches results keyed by (filter, generation) so repeated calls for // the same filter within the same generation are O(1). // Reference: golang/nats-server/server/filestore.go — fss generation tracking. private ulong _generation; private readonly Dictionary _numFilteredCache = new(StringComparer.Ordinal); public int BlockCount => _blocks.Count; public bool UsedIndexManifestOnStartup { get; private set; } // IStreamStore cached state properties — O(1), maintained incrementally. public ulong LastSeq => _last; public ulong MessageCount => _messageCount; public ulong TotalBytes => _totalBytes; ulong IStreamStore.FirstSeq => _messageCount == 0 ? (_first > 0 ? _first : 0UL) : _firstSeq; public FileStore(FileStoreOptions options) { _options = options; if (_options.BlockSizeBytes <= 0) _options.BlockSizeBytes = 64 * 1024; // Determine which format path is active. _useS2 = _options.Compression == StoreCompression.S2Compression; _useAead = _options.Cipher != StoreCipher.NoCipher; _noTransform = !_useS2 && !_useAead && !_options.EnableCompression && !_options.EnableEncryption; Directory.CreateDirectory(options.Directory); // Attempt legacy JSONL migration first, then recover from blocks. MigrateLegacyJsonl(); RecoverBlocks(); // Go: filestore.go:4443 (setupWriteCache) — initialise the bounded cache manager. // Reference: golang/nats-server/server/filestore.go:6148 (expireCache). _writeCache = new WriteCacheManager( _options.MaxCacheSize, _options.CacheExpiry, blockId => _blocks.Find(b => b.BlockId == blockId)); // Go: filestore.go:5841 — start background flush loop for write coalescing. _flushTask = Task.Run(() => FlushLoopAsync(_flushCts.Token)); } public ValueTask AppendAsync(string subject, ReadOnlyMemory payload, CancellationToken ct) { if (_stopped) throw new ObjectDisposedException(nameof(FileStore), "Store has been stopped."); // Go: DiscardNew — reject when MaxBytes would be exceeded. if (_options.MaxBytes > 0 && _options.Discard == DiscardPolicy.New && (long)_totalBytes + payload.Length > _options.MaxBytes) { throw new StoreCapacityException("maximum bytes exceeded"); } // Go: check and remove expired messages before each append. // Only when MaxAge is configured (Go: filestore.go:4701 conditional). if (_options.MaxAgeMs > 0) ExpireFromWheel(); _last++; var now = DateTime.UtcNow; var timestamp = new DateTimeOffset(now).ToUnixTimeMilliseconds() * 1_000_000L; // Go: writeMsgRecordLocked writes directly into mb.cache.buf (a single contiguous // byte slice). It does NOT store a per-message object in a map. // We store lightweight MessageMeta instead of full StoredMessage to avoid ~200B alloc. var persistedPayload = TransformForPersist(payload.Span); // Go: register TTL only when TTL > 0. if (_options.MaxAgeMs > 0) RegisterTtl(_last, timestamp, (long)_options.MaxAgeMs * 1_000_000L); // Write to MsgBlock first so we know the block ID for the meta entry. EnsureActiveBlock(); try { _activeBlock!.WriteAt(_last, subject, ReadOnlyMemory.Empty, persistedPayload, timestamp); } catch (InvalidOperationException) { RotateBlock(); _activeBlock!.WriteAt(_last, subject, ReadOnlyMemory.Empty, persistedPayload, timestamp); } // Track lightweight metadata now that we know the block ID. TrackMessage(_last, subject, 0, payload.Length, timestamp, _activeBlock!.BlockId); _generation++; _writeCache.TrackWrite(_activeBlock!.BlockId, persistedPayload.Length); _flushSignal.Writer.TryWrite(0); // Check if the block just became sealed after this write. if (_activeBlock!.IsSealed) RotateBlock(); // Go: enforce MaxMsgsPerSubject — remove oldest messages for this subject // when the per-subject count exceeds the limit. // Reference: golang/nats-server/server/filestore.go — enforcePerSubjectLimit. if (_options.MaxMsgsPerSubject > 0 && !string.IsNullOrEmpty(subject)) EnforceMaxMsgsPerSubject(subject); return ValueTask.FromResult(_last); } public ValueTask LoadAsync(ulong sequence, CancellationToken ct) { return ValueTask.FromResult(MaterializeMessage(sequence)); } public ValueTask LoadLastBySubjectAsync(string subject, CancellationToken ct) { if (_lastSequenceBySubject.TryGetValue(subject, out var sequence)) { var msg = MaterializeMessage(sequence); return ValueTask.FromResult(msg); } return ValueTask.FromResult(null); } public ValueTask> ListAsync(CancellationToken ct) { var messages = _meta.Keys .OrderBy(seq => seq) .Select(seq => MaterializeMessage(seq)) .Where(m => m is not null) .Cast() .ToArray(); return ValueTask.FromResult>(messages); } public ValueTask RemoveAsync(ulong sequence, CancellationToken ct) { if (!RemoveTrackedMessage(sequence, preserveHighWaterMark: false)) return ValueTask.FromResult(false); _generation++; // Soft-delete in the block that contains this sequence. DeleteInBlock(sequence); return ValueTask.FromResult(true); } public ValueTask PurgeAsync(CancellationToken ct) { _meta.Clear(); _generation++; _last = 0; _messageCount = 0; _totalBytes = 0; _firstSeq = 0; // Dispose and delete all blocks. DisposeAllBlocks(); CleanBlockFiles(); // Clean up any legacy files that might still exist. var jsonlPath = Path.Combine(_options.Directory, "messages.jsonl"); if (File.Exists(jsonlPath)) File.Delete(jsonlPath); var manifestPath = Path.Combine(_options.Directory, _options.IndexManifestFileName); if (File.Exists(manifestPath)) File.Delete(manifestPath); return ValueTask.CompletedTask; } public ValueTask CreateSnapshotAsync(CancellationToken ct) { var snapshot = _meta.Keys .OrderBy(seq => seq) .Select(seq => { var msg = MaterializeMessage(seq); if (msg is null) return null; return new FileRecord { Sequence = msg.Sequence, Subject = msg.Subject, HeadersBase64 = msg.RawHeaders.IsEmpty ? null : Convert.ToBase64String(msg.RawHeaders.Span), PayloadBase64 = Convert.ToBase64String(TransformForPersist(msg.Payload.Span)), TimestampUtc = msg.TimestampUtc, }; }) .Where(r => r is not null) .ToArray(); return ValueTask.FromResult(JsonSerializer.SerializeToUtf8Bytes(snapshot)); } public ValueTask RestoreSnapshotAsync(ReadOnlyMemory snapshot, CancellationToken ct) { _meta.Clear(); _lastSequenceBySubject.Clear(); _last = 0; _messageCount = 0; _totalBytes = 0; _firstSeq = 0; _first = 0; // Dispose existing blocks and clean files. DisposeAllBlocks(); CleanBlockFiles(); if (!snapshot.IsEmpty) { var records = JsonSerializer.Deserialize(snapshot.Span); if (records != null) { foreach (var record in records) { var restoredHeaders = string.IsNullOrEmpty(record.HeadersBase64) ? ReadOnlyMemory.Empty : Convert.FromBase64String(record.HeadersBase64); var restoredPayload = RestorePayload(Convert.FromBase64String(record.PayloadBase64 ?? string.Empty)); var persistedPayload = TransformForPersist(restoredPayload); var timestampNs = new DateTimeOffset(record.TimestampUtc).ToUnixTimeMilliseconds() * 1_000_000L; var subject = record.Subject ?? string.Empty; _last = Math.Max(_last, record.Sequence); EnsureActiveBlock(); try { _activeBlock!.WriteAt(record.Sequence, subject, restoredHeaders, persistedPayload, timestampNs); } catch (InvalidOperationException) { RotateBlock(); _activeBlock!.WriteAt(record.Sequence, subject, restoredHeaders, persistedPayload, timestampNs); } TrackMessage(record.Sequence, subject, restoredHeaders.Length, restoredPayload.Length, timestampNs, _activeBlock!.BlockId); if (_activeBlock!.IsSealed) RotateBlock(); } } } return ValueTask.CompletedTask; } public ValueTask GetStateAsync(CancellationToken ct) { return ValueTask.FromResult(new ApiStreamState { Messages = _messageCount, FirstSeq = _messageCount == 0 ? (_first > 0 ? _first : 0UL) : _firstSeq, LastSeq = _last, Bytes = _totalBytes, }); } public void TrimToMaxMessages(ulong maxMessages) { var trimmed = false; while ((ulong)_meta.Count > maxMessages) { var first = _firstSeq; if (first == 0 || !RemoveTrackedMessage(first, preserveHighWaterMark: true)) break; DeleteInBlock(first); trimmed = true; } if (!trimmed) return; _generation++; } // ------------------------------------------------------------------------- // Go-parity sync interface implementations // Reference: golang/nats-server/server/filestore.go // ------------------------------------------------------------------------- /// /// Synchronously stores a message, optionally with a per-message TTL override. /// Returns the assigned sequence number and timestamp in nanoseconds. /// When is greater than zero it overrides MaxAgeMs for /// this specific message; otherwise the stream's MaxAgeMs applies. /// Reference: golang/nats-server/server/filestore.go:6790 (storeMsg). /// public (ulong Seq, long Ts) StoreMsg(string subject, byte[]? hdr, byte[] msg, long ttl) { if (_stopped) throw new ObjectDisposedException(nameof(FileStore), "Store has been stopped."); // Go: expire check before each store (same as AppendAsync). // Reference: golang/nats-server/server/filestore.go:6793 (expireMsgs call). ExpireFromWheel(); _last++; var now = DateTime.UtcNow; var timestamp = new DateTimeOffset(now).ToUnixTimeMilliseconds() * 1_000_000L; var headers = hdr is { Length: > 0 } ? hdr : []; var payload = msg ?? []; var persistedPayload = TransformForPersist(payload); // Determine effective TTL: per-message ttl (ns) takes priority over MaxAgeMs. // Go: filestore.go:6830 — if msg.ttl > 0 use it, else use cfg.MaxAge. var effectiveTtlNs = ttl > 0 ? ttl : (_options.MaxAgeMs > 0 ? (long)_options.MaxAgeMs * 1_000_000L : 0L); RegisterTtl(_last, timestamp, effectiveTtlNs); EnsureActiveBlock(); try { _activeBlock!.WriteAt(_last, subject, headers, persistedPayload, timestamp); } catch (InvalidOperationException) { RotateBlock(); _activeBlock!.WriteAt(_last, subject, headers, persistedPayload, timestamp); } TrackMessage(_last, subject, headers.Length, payload.Length, timestamp, _activeBlock!.BlockId); _generation++; // Go: filestore.go:4443 (setupWriteCache) — record write in bounded cache manager. _writeCache.TrackWrite(_activeBlock!.BlockId, headers.Length + persistedPayload.Length); // Signal the background flush loop to coalesce and flush pending writes. _flushSignal.Writer.TryWrite(0); if (_activeBlock!.IsSealed) RotateBlock(); return (_last, timestamp); } /// /// Removes all messages from the store and returns the count purged. /// Reference: golang/nats-server/server/filestore.go — purge / purgeMsgs. /// public ulong Purge() { var count = (ulong)_meta.Count; _meta.Clear(); _lastSequenceBySubject.Clear(); _generation++; _last = 0; _messageCount = 0; _totalBytes = 0; _firstSeq = 0; _first = 0; DisposeAllBlocks(); CleanBlockFiles(); return count; } /// /// Purge messages on a given subject, up to sequence , /// keeping the newest messages. /// If subject is empty or null, behaves like . /// Returns the number of messages removed. /// Reference: golang/nats-server/server/filestore.go — PurgeEx. /// public ulong PurgeEx(string subject, ulong seq, ulong keep) { // Go parity: empty subject with keep=0 and seq=0 is a full purge. // If keep > 0 or seq > 0, fall through to the candidate-based path // treating all messages as candidates. if (string.IsNullOrEmpty(subject) && keep == 0 && seq == 0) return Purge(); var upperBound = seq == 0 ? _last : Math.Min(seq, _last); if (upperBound == 0) return 0; ulong candidateCount = 0; for (var current = _messageCount == 0 ? 0UL : _first; current != 0 && current <= upperBound; current++) { if (!_meta.TryGetValue(current, out var meta)) continue; if (!SubjectMatchesFilter(meta.Subject, subject)) continue; candidateCount++; } if (candidateCount == 0) return 0; var targetRemoveCount = keep > 0 ? (candidateCount > keep ? candidateCount - keep : 0UL) : candidateCount; if (targetRemoveCount == 0) return 0; ulong removed = 0; for (var current = _messageCount == 0 ? 0UL : _first; current != 0 && current <= upperBound && removed < targetRemoveCount; current++) { if (!_meta.TryGetValue(current, out var meta2)) continue; if (!SubjectMatchesFilter(meta2.Subject, subject)) continue; if (RemoveTrackedMessage(current, preserveHighWaterMark: true)) { DeleteInBlock(current); removed++; } } if (removed == 0) return 0; _generation++; return removed; } /// /// Removes all messages with sequence number strictly less than /// and returns the count removed. /// Reference: golang/nats-server/server/filestore.go — Compact. /// public ulong Compact(ulong seq) { if (seq == 0) return 0; var toRemove = _meta.Keys.Where(k => k < seq).ToArray(); if (toRemove.Length == 0) return 0; foreach (var s in toRemove) { RemoveTrackedMessage(s, preserveHighWaterMark: true); DeleteInBlock(s); } _generation++; if (_meta.Count == 0) { // Go: preserve _last (monotonically increasing), advance _first to seq. // Compact(seq) removes everything < seq; the new first is seq. _first = seq; _firstSeq = 0; } else { _first = _firstSeq; } return (ulong)toRemove.Length; } /// /// Removes all messages with sequence number strictly greater than /// and updates the last sequence pointer. /// Reference: golang/nats-server/server/filestore.go — Truncate. /// public void Truncate(ulong seq) { if (seq == 0) { // Truncate to nothing. _meta.Clear(); _lastSequenceBySubject.Clear(); _generation++; _last = 0; _messageCount = 0; _totalBytes = 0; _firstSeq = 0; _first = 0; DisposeAllBlocks(); CleanBlockFiles(); return; } var toRemove = _meta.Keys.Where(k => k > seq).ToArray(); foreach (var s in toRemove) { RemoveTrackedMessage(s, preserveHighWaterMark: false); DeleteInBlock(s); } if (toRemove.Length > 0) _generation++; // Update _last to the new highest existing sequence (or seq if it exists, // or the highest below seq). if (_messageCount == 0) { _last = 0; _first = 0; _firstSeq = 0; } } /// /// Returns the first sequence number at or after the given UTC time. /// Returns _last + 1 if no message exists at or after . /// Reference: golang/nats-server/server/filestore.go — GetSeqFromTime. /// public ulong GetSeqFromTime(DateTime t) { var utc = t.Kind == DateTimeKind.Utc ? t : t.ToUniversalTime(); var targetNs = new DateTimeOffset(utc).ToUnixTimeMilliseconds() * 1_000_000L; ulong? matchSeq = null; foreach (var kv in _meta) { if (kv.Value.TimestampNs >= targetNs) { if (matchSeq is null || kv.Key < matchSeq.Value) matchSeq = kv.Key; } } return matchSeq ?? _last + 1; } /// /// Returns compact state for non-deleted messages on /// at or after sequence . /// /// Optimized: uses block-range binary search to skip blocks whose sequence /// range is entirely below . For each candidate block, /// messages already cached in _meta are filtered directly. /// Reference: golang/nats-server/server/filestore.go:3191 (FilteredState). /// public SimpleState FilteredState(ulong seq, string subject) { // Fast path: binary-search to find the first block whose LastSequence >= seq, // then iterate only from that block forward. Blocks are sorted by sequence range. var startBlockIdx = FindFirstBlockAtOrAfter(seq); ulong count = 0; ulong first = 0; ulong last = 0; // Collect candidates from _meta that fall in the block range. // _meta is the authoritative in-memory index; blocks are on-disk backing. // We iterate _meta sorted by key and filter by sequence + subject. // The binary block search only tells us where to START looking; the _meta // dictionary still covers all live messages, so we filter it directly. foreach (var kv in _meta) { var msgSeq = kv.Key; if (msgSeq < seq) continue; if (!string.IsNullOrEmpty(subject) && !SubjectMatchesFilter(kv.Value.Subject, subject)) continue; count++; if (first == 0 || msgSeq < first) first = msgSeq; if (msgSeq > last) last = msgSeq; } if (count == 0) return new SimpleState(); return new SimpleState { Msgs = count, First = first, Last = last, }; } /// /// Binary-searches _blocks for the index of the first block whose /// is at or after . /// Returns the block index (0-based), or _blocks.Count if none qualify. /// Used to skip blocks that are entirely below the requested start sequence. /// Reference: golang/nats-server/server/filestore.go:3191 (FilteredState block walk). /// private int FindFirstBlockAtOrAfter(ulong seq) { if (_blocks.Count == 0) return 0; int lo = 0, hi = _blocks.Count - 1; while (lo < hi) { var mid = (lo + hi) / 2; if (_blocks[mid].LastSequence < seq) lo = mid + 1; else hi = mid; } // lo is the first block where LastSequence >= seq. return (_blocks[lo].LastSequence >= seq) ? lo : _blocks.Count; } /// /// Returns true if the given can be skipped entirely /// because none of its subjects could match . /// /// Currently checks only for the universal "skip empty filter" case — a more /// sophisticated per-block subject index would require storing per-block subject /// sets, which is deferred. This method is the extension point for that optimization. /// Reference: golang/nats-server/server/filestore.go (block-level subject tracking). /// public static bool CheckSkipFirstBlock(string filter, MsgBlock firstBlock) { // Without per-block subject metadata we cannot skip based on subject alone. // Skip only when the block is provably empty (no written messages). if (firstBlock.MessageCount == 0) return true; // An empty filter matches everything — never skip. if (string.IsNullOrEmpty(filter)) return false; // Without per-block subject metadata, conservatively do not skip. return false; } /// /// Returns per-subject for all subjects matching /// . Supports NATS wildcard filters. /// Reference: golang/nats-server/server/filestore.go — SubjectsState. /// public Dictionary SubjectsState(string filterSubject) { var result = new Dictionary(StringComparer.Ordinal); foreach (var kv in _meta) { var seq = kv.Key; var meta = kv.Value; if (!string.IsNullOrEmpty(filterSubject) && !SubjectMatchesFilter(meta.Subject, filterSubject)) continue; if (result.TryGetValue(meta.Subject, out var existing)) { result[meta.Subject] = new SimpleState { Msgs = existing.Msgs + 1, First = Math.Min(existing.First == 0 ? seq : existing.First, seq), Last = Math.Max(existing.Last, seq), }; } else { result[meta.Subject] = new SimpleState { Msgs = 1, First = seq, Last = seq, }; } } return result; } /// /// Returns per-subject message counts for all subjects matching /// . Supports NATS wildcard filters. /// Reference: golang/nats-server/server/filestore.go — SubjectsTotals. /// public Dictionary SubjectsTotals(string filterSubject) { var result = new Dictionary(StringComparer.Ordinal); foreach (var meta in _meta.Values) { if (!string.IsNullOrEmpty(filterSubject) && !SubjectMatchesFilter(meta.Subject, filterSubject)) continue; result.TryGetValue(meta.Subject, out var count); result[meta.Subject] = count + 1; } return result; } /// /// Returns the full stream state, including the list of deleted (interior gap) sequences. /// Reference: golang/nats-server/server/filestore.go — State. /// public StreamState State() { var state = new StreamState(); FastState(ref state); // Populate deleted sequences: sequences in [firstSeq, lastSeq] that are // not present in _meta. if (state.FirstSeq > 0 && state.LastSeq >= state.FirstSeq) { var deletedList = new List(); for (var s = state.FirstSeq; s <= state.LastSeq; s++) { if (!_meta.ContainsKey(s)) deletedList.Add(s); } if (deletedList.Count > 0) { state.Deleted = [.. deletedList]; state.NumDeleted = deletedList.Count; } } // Populate per-subject counts. var subjectCounts = new Dictionary(StringComparer.Ordinal); foreach (var meta in _meta.Values) { subjectCounts.TryGetValue(meta.Subject, out var cnt); subjectCounts[meta.Subject] = cnt + 1; } state.NumSubjects = subjectCounts.Count; state.Subjects = subjectCounts.Count > 0 ? subjectCounts : null; return state; } /// /// Populates a pre-allocated with the minimum fields /// needed for replication without allocating a new struct. /// Does not populate the array or /// dictionary. /// Reference: golang/nats-server/server/filestore.go — FastState. /// public void FastState(ref StreamState state) { state.Msgs = _messageCount; state.Bytes = _totalBytes; state.LastSeq = _last; state.LastTime = default; if (_messageCount == 0) { // Go: when all messages are removed/expired, first.seq tracks the watermark. // If _first > 0 use it (set by Compact / SkipMsg); otherwise 0. state.FirstSeq = _first > 0 ? _first : 0; state.FirstTime = default; state.NumDeleted = 0; } else { var firstSeq = _firstSeq; state.FirstSeq = firstSeq; state.FirstTime = _meta.TryGetValue(firstSeq, out var firstMeta) ? DateTimeOffset.FromUnixTimeMilliseconds(firstMeta.TimestampNs / 1_000_000).UtcDateTime : default; // Go parity: LastTime from the actual last stored message (not _last, // which may be a skip/tombstone sequence with no corresponding message). if (_meta.TryGetValue(_last, out var lastMeta)) state.LastTime = DateTimeOffset.FromUnixTimeMilliseconds(lastMeta.TimestampNs / 1_000_000).UtcDateTime; else if (_meta.Count > 0) { // _last is a skip — use the highest actual message time. var actualLast = _meta.Keys.Max(); var actualMeta = _meta[actualLast]; state.LastTime = DateTimeOffset.FromUnixTimeMilliseconds(actualMeta.TimestampNs / 1_000_000).UtcDateTime; } // Go parity: NumDeleted = gaps between firstSeq and lastSeq not in _meta. // Reference: filestore.go — FastState sets state.NumDeleted. if (_last >= firstSeq) { var span = _last - firstSeq + 1; var liveCount = (ulong)_meta.Count; state.NumDeleted = span > liveCount ? (int)(span - liveCount) : 0; } else state.NumDeleted = 0; } } /// /// Materializes a full from block storage using /// the lightweight metadata index. Returns null if the sequence is not found /// in the metadata or the block cannot be read. /// private StoredMessage? MaterializeMessage(ulong seq) { if (!_meta.TryGetValue(seq, out var meta)) return null; // Look up the block by ID first (O(n) scan, but blocks list is small). // Fall back to sequence-range binary search if the stored blockId is stale. var block = _blocks.Find(b => b.BlockId == meta.BlockId) ?? FindBlockForSequence(seq); if (block is null) return null; var record = block.Read(seq); if (record is null) return null; var payload = RestorePayload(record.Payload.Span); return new StoredMessage { Sequence = seq, Subject = meta.Subject, RawHeaders = record.Headers, Payload = payload, TimestampUtc = DateTimeOffset.FromUnixTimeMilliseconds(meta.TimestampNs / 1_000_000).UtcDateTime, }; } private void TrackMessage(ulong sequence, string subject, int headerLength, int payloadLength, long timestampNs, int blockId) { _meta[sequence] = new MessageMeta { BlockId = blockId, Subject = subject, PayloadLength = payloadLength, HeaderLength = headerLength, TimestampNs = timestampNs, }; _lastSequenceBySubject[subject] = sequence; _messageCount++; _totalBytes += (ulong)(headerLength + payloadLength); if (_messageCount == 1) { _first = sequence; _firstSeq = sequence; } } private bool RemoveTrackedMessage(ulong sequence, bool preserveHighWaterMark) { if (!_meta.Remove(sequence, out var meta)) return false; _messageCount--; _totalBytes -= (ulong)(meta.HeaderLength + meta.PayloadLength); UpdateLastSequenceForSubject(meta.Subject, sequence); if (_messageCount == 0) { if (preserveHighWaterMark) _first = _last + 1; else _last = 0; _firstSeq = 0; return true; } if (sequence == _firstSeq) AdvanceFirstSequence(sequence + 1); if (!preserveHighWaterMark && sequence == _last) _last = FindPreviousLiveSequence(sequence); return true; } private void AdvanceFirstSequence(ulong start) { var candidate = start; while (!_meta.ContainsKey(candidate) && candidate <= _last) candidate++; if (candidate <= _last) { _first = candidate; _firstSeq = candidate; return; } _first = _last + 1; _firstSeq = 0; } private ulong FindPreviousLiveSequence(ulong startExclusive) { if (_messageCount == 0 || startExclusive == 0) return 0; for (var seq = startExclusive - 1; ; seq--) { if (_meta.ContainsKey(seq)) return seq; if (seq == 0) return 0; } } private void UpdateLastSequenceForSubject(string subject, ulong removedSequence) { if (!_lastSequenceBySubject.TryGetValue(subject, out var currentLast) || currentLast != removedSequence) return; for (var seq = removedSequence - 1; ; seq--) { if (_meta.TryGetValue(seq, out var candidate) && string.Equals(candidate.Subject, subject, StringComparison.Ordinal)) { _lastSequenceBySubject[subject] = seq; return; } if (seq == 0) break; } _lastSequenceBySubject.Remove(subject); } private void RebuildIndexesFromMessages() { _lastSequenceBySubject.Clear(); _messageCount = 0; _totalBytes = 0; _firstSeq = 0; _first = 0; // Re-derive counters from the existing _meta entries without re-inserting. foreach (var kv in _meta.OrderBy(kv => kv.Key)) { var seq = kv.Key; var meta = kv.Value; _lastSequenceBySubject[meta.Subject] = seq; _messageCount++; _totalBytes += (ulong)(meta.HeaderLength + meta.PayloadLength); if (_messageCount == 1) { _first = seq; _firstSeq = seq; } } if (_messageCount == 0 && _last > 0) _first = _last + 1; } // ------------------------------------------------------------------------- // Subject matching helper // ------------------------------------------------------------------------- /// /// Returns true if matches . /// If filter is a literal, performs exact string comparison. /// If filter contains NATS wildcards (* or >), uses SubjectMatch.MatchLiteral. /// Reference: golang/nats-server/server/filestore.go — subjectMatch helper. /// private static bool SubjectMatchesFilter(string subject, string filter) { if (string.IsNullOrEmpty(filter)) return true; return NATS.Server.Subscriptions.SubjectMatch.SubjectMatchesFilter(subject, filter); } /// /// Counts messages whose subject matches across all /// blocks. Results are cached per-filter keyed on a generation counter that is /// incremented on every write/delete. Repeated calls with the same filter and /// the same generation are O(1) after the first scan. /// /// An empty or null filter counts all messages. /// Reference: golang/nats-server/server/filestore.go — fss NumFiltered (subject-state cache). /// public ulong NumFiltered(string filter) { var key = filter ?? string.Empty; // Cache hit: return instantly if generation has not changed. if (_numFilteredCache.TryGetValue(key, out var cached) && cached.Generation == _generation) return cached.Count; // Cache miss: count matching messages and cache the result. ulong count = 0; foreach (var meta in _meta.Values) { if (SubjectMatchesFilter(meta.Subject, filter ?? string.Empty)) count++; } _numFilteredCache[key] = (_generation, count); return count; } public async ValueTask DisposeAsync() { // Stop the background flush loop first to prevent it from accessing // blocks that are about to be disposed. await StopFlushLoopAsync(); // Flush pending buffered writes on all blocks before closing. foreach (var block in _blocks) block.FlushPending(); // Dispose blocks first so the write cache lookup returns null for // already-closed blocks. WriteCacheManager.FlushAllAsync guards // against null blocks, so this ordering prevents ObjectDisposedException. DisposeAllBlocks(); await _writeCache.DisposeAsync(); _stateWriteLock.Dispose(); } /// /// Synchronous dispose — releases all block file handles and the state-write semaphore. /// Allows the store to be used in synchronous test contexts with using blocks. /// public void Dispose() { StopFlushLoop(); foreach (var block in _blocks) block.FlushPending(); DisposeAllBlocks(); _stateWriteLock.Dispose(); } /// /// Stops the store and deletes all persisted data (blocks, index files). /// Reference: golang/nats-server/server/filestore.go — fileStore.Delete. /// public void Delete(bool inline = false) { Stop(); if (Directory.Exists(_options.Directory)) { try { Directory.Delete(_options.Directory, recursive: true); } catch { /* best effort */ } } } // ------------------------------------------------------------------------- // Block management // ------------------------------------------------------------------------- /// /// Ensures an active (writable) block exists. Creates one if needed. /// private void EnsureActiveBlock() { if (_activeBlock is null || _activeBlock.IsSealed) RotateBlock(); } /// /// Creates a new active block. The previous active block (if any) stays in the /// block list as a sealed block. The firstSequence is set to _last + 1 (the next /// expected sequence), but actual sequences come from WriteAt calls. /// When rotating, the previously active block's write cache is cleared to free memory. /// Reference: golang/nats-server/server/filestore.go — clearCache called on block seal. /// private void RotateBlock() { // Flush any pending buffered writes before sealing the outgoing block. _activeBlock?.FlushPending(); // Go: filestore.go:4499 (flushPendingMsgsLocked) — evict the outgoing block's // write cache via WriteCacheManager before rotating to the new block. // WriteCacheManager.EvictBlock flushes to disk then clears the cache. if (_activeBlock is not null) _writeCache.EvictBlock(_activeBlock.BlockId); // Clear the write cache on the outgoing active block — it is now sealed. // This frees memory; future reads on sealed blocks go to disk. _activeBlock?.ClearCache(); var firstSeq = _last + 1; var block = MsgBlock.Create(_nextBlockId, _options.Directory, _options.BlockSizeBytes, firstSeq); _blocks.Add(block); _activeBlock = block; _nextBlockId++; } /// /// Soft-deletes a message in the block that contains it. /// When is true, payload bytes are /// overwritten with random data before the delete record is written. /// Reference: golang/nats-server/server/filestore.go:5890 (eraseMsg). /// private void DeleteInBlock(ulong sequence, bool secureErase = false) { foreach (var block in _blocks) { if (sequence >= block.FirstSequence && sequence <= block.LastSequence) { block.Delete(sequence, secureErase); return; } } } /// /// Disposes all blocks and clears the block list. /// private void DisposeAllBlocks() { // Clear _activeBlock first so the background flush loop sees null // and skips FlushPending, avoiding ObjectDisposedException on the lock. _activeBlock = null; foreach (var block in _blocks) block.Dispose(); _blocks.Clear(); _nextBlockId = 0; } /// /// Deletes all .blk files in the store directory. /// private void CleanBlockFiles() { if (!Directory.Exists(_options.Directory)) return; foreach (var blkFile in Directory.GetFiles(_options.Directory, "*.blk")) { try { File.Delete(blkFile); } catch { /* best effort */ } } } /// /// Rewrites all blocks from the in-memory message cache. Used after trim, /// snapshot restore, or legacy migration. /// private void RewriteBlocks() { // Materialize all messages before disposing blocks (since materialization reads from blocks). var messages = _meta.Keys .OrderBy(seq => seq) .Select(seq => MaterializeMessage(seq)) .Where(m => m is not null) .Cast() .ToList(); DisposeAllBlocks(); CleanBlockFiles(); _meta.Clear(); _last = messages.Count == 0 ? 0UL : messages[^1].Sequence; foreach (var message in messages) { var persistedPayload = TransformForPersist(message.Payload.Span); var timestamp = new DateTimeOffset(message.TimestampUtc).ToUnixTimeMilliseconds() * 1_000_000L; EnsureActiveBlock(); try { _activeBlock!.WriteAt(message.Sequence, message.Subject, message.RawHeaders, persistedPayload, timestamp); } catch (InvalidOperationException) { RotateBlock(); _activeBlock!.WriteAt(message.Sequence, message.Subject, message.RawHeaders, persistedPayload, timestamp); } _meta[message.Sequence] = new MessageMeta { BlockId = _activeBlock!.BlockId, Subject = message.Subject, PayloadLength = message.Payload.Length, HeaderLength = message.RawHeaders.Length, TimestampNs = timestamp, }; if (_activeBlock!.IsSealed) RotateBlock(); } RebuildIndexesFromMessages(); } // ------------------------------------------------------------------------- // Recovery: scan .blk files on startup and rebuild in-memory state. // ------------------------------------------------------------------------- /// /// Recovers all blocks from .blk files in the store directory. /// private void RecoverBlocks() { var blkFiles = Directory.GetFiles(_options.Directory, "*.blk"); if (blkFiles.Length == 0) return; // Sort by block ID (filename is like "000000.blk", "000001.blk", ...). Array.Sort(blkFiles, StringComparer.OrdinalIgnoreCase); var maxBlockId = -1; foreach (var blkFile in blkFiles) { var fileName = Path.GetFileNameWithoutExtension(blkFile); if (!int.TryParse(fileName, out var blockId)) continue; try { var block = MsgBlock.Recover(blockId, _options.Directory); _blocks.Add(block); if (blockId > maxBlockId) maxBlockId = blockId; // Read all non-deleted records from this block and populate the in-memory cache. RecoverMessagesFromBlock(block); } catch (InvalidDataException) { // InvalidDataException indicates key mismatch or integrity failure — // propagate so the caller knows the store cannot be opened. throw; } catch { // Skip corrupted blocks — non-critical recovery errors. } } _nextBlockId = maxBlockId + 1; // The last block is the active block if it has capacity (not sealed). if (_blocks.Count > 0) { var lastBlock = _blocks[^1]; _activeBlock = lastBlock; } // After recovery, sync _last from skip-sequence high-water marks. // SkipMsg/SkipMsgs write tombstone records with empty subject — these // intentionally advance _last without storing a live message. We must // include them in the high-water mark so the next StoreMsg gets the // correct sequence number. // We do NOT use block.LastSequence blindly because that includes // soft-deleted real messages at the tail (e.g., after Truncate or // RemoveMsg of the last message), which must not inflate _last. // Go: filestore.go — recovery sets state.LastSeq from lmb.last.seq. foreach (var blk in _blocks) { var maxSkip = blk.MaxSkipSequence; if (maxSkip > _last) _last = maxSkip; } // If no messages and no skips were found, fall back to block.LastSequence // to preserve watermarks from purge or full-delete scenarios. if (_last == 0) { foreach (var blk in _blocks) { var blkLast = blk.LastSequence; if (blkLast > _last) _last = blkLast; } } // Prune expired messages AFTER establishing _last from blocks/skips. // If all messages expire, PruneExpiredLinear resets _last to 0. PruneExpired(DateTime.UtcNow); // Sync _first from _meta; if empty, set to _last+1 (watermark). RebuildIndexesFromMessages(); } /// /// Reads all non-deleted records from a block and adds them to the in-memory cache. /// private void RecoverMessagesFromBlock(MsgBlock block) { // We need to iterate through all sequences in the block. // MsgBlock tracks first/last sequence, so we try each one. var first = block.FirstSequence; var last = block.LastSequence; if (first == 0 && last == 0) return; // Empty block. for (var seq = first; seq <= last; seq++) { var record = block.Read(seq); if (record is null) continue; // Deleted or not present. // Compute the original payload length for metadata tracking. // InvalidDataException (e.g., wrong key) propagates to the caller. var originalPayload = RestorePayload(record.Payload.Span); _meta[record.Sequence] = new MessageMeta { BlockId = block.BlockId, Subject = record.Subject, PayloadLength = originalPayload.Length, HeaderLength = record.Headers.Length, TimestampNs = record.Timestamp, }; if (record.Sequence > _last) _last = record.Sequence; // Go: re-register unexpired TTLs in the wheel after recovery. // Reference: golang/nats-server/server/filestore.go — recoverMsgs, TTL re-registration. if (_options.MaxAgeMs > 0) { RegisterTtl(record.Sequence, record.Timestamp, (long)_options.MaxAgeMs * 1_000_000L); } } } // ------------------------------------------------------------------------- // Legacy JSONL migration: if messages.jsonl exists, migrate to blocks. // ------------------------------------------------------------------------- /// /// Migrates data from the legacy JSONL format to block-based storage. /// If messages.jsonl exists, reads all records, writes them to blocks, /// then deletes the JSONL file and manifest. /// private void MigrateLegacyJsonl() { var jsonlPath = Path.Combine(_options.Directory, "messages.jsonl"); if (!File.Exists(jsonlPath)) return; // Read all records from the JSONL file. var legacyMessages = new List<(ulong Sequence, string Subject, byte[] Payload, DateTime TimestampUtc)>(); foreach (var line in File.ReadLines(jsonlPath)) { if (string.IsNullOrWhiteSpace(line)) continue; FileRecord? record; try { record = JsonSerializer.Deserialize(line); } catch { continue; // Skip corrupted lines. } if (record == null) continue; byte[] originalPayload; try { originalPayload = RestorePayload(Convert.FromBase64String(record.PayloadBase64 ?? string.Empty)); } catch { // Re-throw for integrity failures (e.g., wrong encryption key). throw; } legacyMessages.Add((record.Sequence, record.Subject ?? string.Empty, originalPayload, record.TimestampUtc)); } if (legacyMessages.Count == 0) { // Delete the empty JSONL file. File.Delete(jsonlPath); var manifestPath = Path.Combine(_options.Directory, _options.IndexManifestFileName); if (File.Exists(manifestPath)) File.Delete(manifestPath); return; } // Write all messages to fresh blocks, then populate _meta. foreach (var (seq, subject, payload, ts) in legacyMessages) { if (seq > _last) _last = seq; var persistedPayload = TransformForPersist(payload); var timestampNs = new DateTimeOffset(ts).ToUnixTimeMilliseconds() * 1_000_000L; EnsureActiveBlock(); try { _activeBlock!.WriteAt(seq, subject, ReadOnlyMemory.Empty, persistedPayload, timestampNs); } catch (InvalidOperationException) { RotateBlock(); _activeBlock!.WriteAt(seq, subject, ReadOnlyMemory.Empty, persistedPayload, timestampNs); } _meta[seq] = new MessageMeta { BlockId = _activeBlock!.BlockId, Subject = subject, PayloadLength = payload.Length, HeaderLength = 0, TimestampNs = timestampNs, }; if (_activeBlock!.IsSealed) RotateBlock(); } RebuildIndexesFromMessages(); // Delete the legacy files. File.Delete(jsonlPath); var manifestFile = Path.Combine(_options.Directory, _options.IndexManifestFileName); if (File.Exists(manifestFile)) File.Delete(manifestFile); } // ------------------------------------------------------------------------- // Expiry // ------------------------------------------------------------------------- /// /// Registers a message in the TTL wheel when MaxAgeMs is configured. /// The wheel's uses Stopwatch-relative nanoseconds, /// so we compute expiresNs as the current Stopwatch position plus the TTL duration. /// If ttlNs is 0, this is a no-op. /// Reference: golang/nats-server/server/filestore.go:6820 — storeMsg TTL scheduling. /// private void RegisterTtl(ulong seq, long timestampNs, long ttlNs) { if (ttlNs <= 0) return; _ttlWheel ??= new HashWheel(); // Convert to Stopwatch-domain nanoseconds to match ExpireTasks' time source. // We intentionally discard timestampNs (Unix epoch ns) and use "now + ttl" // relative to the Stopwatch epoch used by ExpireTasks. var nowStopwatchNs = (long)((double)System.Diagnostics.Stopwatch.GetTimestamp() / System.Diagnostics.Stopwatch.Frequency * 1_000_000_000); var expiresNs = nowStopwatchNs + ttlNs; _ttlWheel.Add(seq, expiresNs); } /// /// Checks the TTL wheel for expired entries and removes them from the store. /// Uses the wheel's expiration scan which is O(expired) rather than O(total). /// Expired messages are removed from the in-memory cache and soft-deleted in blocks, /// but is preserved (sequence numbers are monotonically increasing /// even when messages expire). /// Reference: golang/nats-server/server/filestore.go — expireMsgs using thw.ExpireTasks. /// private void ExpireFromWheel() { if (_ttlWheel is null) { // Fall back to linear scan if wheel is not yet initialised. // PruneExpiredLinear is only used during recovery (before first write). PruneExpiredLinear(DateTime.UtcNow); return; } var expired = new List(); _ttlWheel.ExpireTasks((seq, _) => { expired.Add(seq); return true; // Remove from wheel. }); if (expired.Count == 0) return; // Remove from in-memory cache and soft-delete in the block layer. // We do NOT call RewriteBlocks here — that would reset _last and create a // discontinuity in the sequence space. Soft-delete is sufficient for expiry. // Reference: golang/nats-server/server/filestore.go:expireMsgs — dmap-based removal. foreach (var seq in expired) { RemoveTrackedMessage(seq, preserveHighWaterMark: true); DeleteInBlock(seq); } _generation++; } /// /// O(n) fallback expiry scan used during recovery (before the wheel is warm) /// or when MaxAgeMs is set but no messages have been appended yet. /// private void PruneExpiredLinear(DateTime nowUtc) { if (_options.MaxAgeMs <= 0) return; var cutoffNs = new DateTimeOffset(nowUtc.AddMilliseconds(-_options.MaxAgeMs)).ToUnixTimeMilliseconds() * 1_000_000L; var expired = _meta .Where(kv => kv.Value.TimestampNs < cutoffNs) .Select(kv => kv.Key) .ToArray(); if (expired.Length == 0) return; foreach (var sequence in expired) { RemoveTrackedMessage(sequence, preserveHighWaterMark: true); DeleteInBlock(sequence); } // When all messages are expired, reset sequence pointers to 0. // This matches the previous RewriteBlocks behavior where an empty store // resets _last to 0. if (_meta.Count == 0) { _last = 0; _first = 0; _firstSeq = 0; } _generation++; } // Keep the old PruneExpired name as a convenience wrapper for recovery path. private void PruneExpired(DateTime nowUtc) => PruneExpiredLinear(nowUtc); // ------------------------------------------------------------------------- // Payload transform: compress + encrypt on write; reverse on read. // // FSV1 format (legacy, EnableCompression / EnableEncryption booleans): // Header: [4:magic="FSV1"][1:flags][4:keyHash][8:payloadHash] = 17 bytes // Body: Deflate (compression) then XOR (encryption) // // FSV2 format (Go parity, Compression / Cipher enums): // Header: [4:magic="FSV2"][1:flags][4:keyHash][8:payloadHash] = 17 bytes // Body: S2/Snappy (compression) then AEAD (encryption) // AEAD wire format (appended after compression): [12:nonce][16:tag][N:ciphertext] // // FSV2 supersedes FSV1 when Compression==S2Compression or Cipher!=NoCipher. // On read, magic bytes select the decode path; FSV1 files remain readable. // ------------------------------------------------------------------------- private byte[] TransformForPersist(ReadOnlySpan payload) { // Fast path: no compression or encryption — store raw payload without envelope. // Avoids SHA256 hashing and envelope allocation on the hot publish path. if (!_useS2 && !_useAead && !_options.EnableCompression && !_options.EnableEncryption) return payload.ToArray(); var plaintext = payload.ToArray(); var transformed = plaintext; byte flags = 0; byte[] magic; if (_useS2 || _useAead) { // FSV2 path: S2 compression and/or AEAD encryption. magic = EnvelopeMagicV2; if (_useS2) { transformed = S2Codec.Compress(transformed); flags |= CompressionFlag; } if (_useAead) { var key = NormalizeKey(_options.EncryptionKey); transformed = AeadEncryptor.Encrypt(transformed, key, _options.Cipher); flags |= EncryptionFlag; } } else { // FSV1 legacy path: Deflate + XOR. magic = EnvelopeMagicV1; if (_options.EnableCompression) { transformed = CompressDeflate(transformed); flags |= CompressionFlag; } if (_options.EnableEncryption) { transformed = Xor(transformed, _options.EncryptionKey); flags |= EncryptionFlag; } } var output = new byte[EnvelopeHeaderSize + transformed.Length]; magic.AsSpan().CopyTo(output.AsSpan(0, magic.Length)); output[magic.Length] = flags; BinaryPrimitives.WriteUInt32LittleEndian(output.AsSpan(5, 4), ComputeKeyHash(_options.EncryptionKey)); BinaryPrimitives.WriteUInt64LittleEndian(output.AsSpan(9, 8), ComputePayloadHash(plaintext)); transformed.CopyTo(output.AsSpan(EnvelopeHeaderSize)); return output; } private byte[] RestorePayload(ReadOnlySpan persisted) { if (TryReadEnvelope(persisted, out var version, out var flags, out var keyHash, out var payloadHash, out var body)) { var data = body.ToArray(); if (version == 2) { // FSV2: AEAD decrypt then S2 decompress. if ((flags & EncryptionFlag) != 0) { var key = NormalizeKey(_options.EncryptionKey); try { data = AeadEncryptor.Decrypt(data, key, _options.Cipher); } catch (CryptographicException ex) { // AEAD tag verification failed — wrong key or corrupted data. // Wrap as InvalidDataException so RecoverBlocks propagates it // as a fatal key-mismatch error (same behaviour as FSV1 key-hash check). throw new InvalidDataException("AEAD decryption failed: wrong key or corrupted block.", ex); } } if ((flags & CompressionFlag) != 0) data = S2Codec.Decompress(data); } else { // FSV1: XOR decrypt then Deflate decompress. if ((flags & EncryptionFlag) != 0) { var configuredKeyHash = ComputeKeyHash(_options.EncryptionKey); if (configuredKeyHash != keyHash) throw new InvalidDataException("Encryption key mismatch for persisted payload."); data = Xor(data, _options.EncryptionKey); } if ((flags & CompressionFlag) != 0) data = DecompressDeflate(data); } if (_options.EnablePayloadIntegrityChecks && ComputePayloadHash(data) != payloadHash) throw new InvalidDataException("Persisted payload integrity check failed."); return data; } // Legacy format fallback for pre-envelope data (no header at all). var legacy = persisted.ToArray(); if (_options.EnableEncryption) legacy = Xor(legacy, _options.EncryptionKey); if (_options.EnableCompression) legacy = DecompressDeflate(legacy); return legacy; } // ------------------------------------------------------------------------- // Helpers // ------------------------------------------------------------------------- /// /// Ensures the encryption key is exactly 32 bytes (padding with zeros or /// truncating), matching the Go server's key normalisation for AEAD ciphers. /// Only called for FSV2 AEAD path; FSV1 XOR accepts arbitrary key lengths. /// private static byte[] NormalizeKey(byte[]? key) { var normalized = new byte[AeadEncryptor.KeySize]; if (key is { Length: > 0 }) { var copyLen = Math.Min(key.Length, AeadEncryptor.KeySize); key.AsSpan(0, copyLen).CopyTo(normalized.AsSpan()); } return normalized; } private static byte[] Xor(ReadOnlySpan data, byte[]? key) { if (key == null || key.Length == 0) return data.ToArray(); var output = data.ToArray(); for (var i = 0; i < output.Length; i++) output[i] ^= key[i % key.Length]; return output; } private static byte[] CompressDeflate(ReadOnlySpan data) { using var output = new MemoryStream(); using (var stream = new System.IO.Compression.DeflateStream(output, System.IO.Compression.CompressionLevel.Fastest, leaveOpen: true)) { stream.Write(data); } return output.ToArray(); } private static byte[] DecompressDeflate(ReadOnlySpan data) { using var input = new MemoryStream(data.ToArray()); using var stream = new System.IO.Compression.DeflateStream(input, System.IO.Compression.CompressionMode.Decompress); using var output = new MemoryStream(); stream.CopyTo(output); return output.ToArray(); } private static bool TryReadEnvelope( ReadOnlySpan persisted, out int version, out byte flags, out uint keyHash, out ulong payloadHash, out ReadOnlySpan payload) { version = 0; flags = 0; keyHash = 0; payloadHash = 0; payload = ReadOnlySpan.Empty; if (persisted.Length < EnvelopeHeaderSize) return false; var magic = persisted[..EnvelopeMagicV1.Length]; if (magic.SequenceEqual(EnvelopeMagicV1)) version = 1; else if (magic.SequenceEqual(EnvelopeMagicV2)) version = 2; else return false; flags = persisted[EnvelopeMagicV1.Length]; keyHash = BinaryPrimitives.ReadUInt32LittleEndian(persisted.Slice(5, 4)); payloadHash = BinaryPrimitives.ReadUInt64LittleEndian(persisted.Slice(9, 8)); payload = persisted[EnvelopeHeaderSize..]; return true; } private static uint ComputeKeyHash(byte[]? key) { if (key is not { Length: > 0 }) return 0; Span hash = stackalloc byte[32]; SHA256.HashData(key, hash); return BinaryPrimitives.ReadUInt32LittleEndian(hash); } private static ulong ComputePayloadHash(ReadOnlySpan payload) { Span hash = stackalloc byte[32]; SHA256.HashData(payload, hash); return BinaryPrimitives.ReadUInt64LittleEndian(hash); } private const byte CompressionFlag = 0b0000_0001; private const byte EncryptionFlag = 0b0000_0010; // FSV1: legacy Deflate + XOR envelope private static readonly byte[] EnvelopeMagicV1 = "FSV1"u8.ToArray(); // FSV2: Go-parity S2 + AEAD envelope (filestore.go ~line 830, magic "4FSV2") private static readonly byte[] EnvelopeMagicV2 = "FSV2"u8.ToArray(); private const int EnvelopeHeaderSize = 17; // 4 magic + 1 flags + 4 keyHash + 8 payloadHash // ------------------------------------------------------------------------- // Go-parity sync methods not yet in the interface default implementations // Reference: golang/nats-server/server/filestore.go // ------------------------------------------------------------------------- /// /// Soft-deletes a message by sequence number. /// Returns true if the sequence existed and was removed. /// Reference: golang/nats-server/server/filestore.go — RemoveMsg. /// public bool RemoveMsg(ulong seq) { if (!RemoveTrackedMessage(seq, preserveHighWaterMark: true)) return false; _generation++; // Go: filestore.go — LastSeq (lmb.last.seq) is a high-water mark and is // never decremented on removal. Only FirstSeq advances when the first // live message is removed. DeleteInBlock(seq); return true; } /// /// Secure-erases a message: overwrites its payload bytes with random data on disk, /// then soft-deletes it (same in-memory semantics as ). /// Returns true if the sequence existed and was erased. /// Reference: golang/nats-server/server/filestore.go:5890 (eraseMsg). /// public bool EraseMsg(ulong seq) { if (!RemoveTrackedMessage(seq, preserveHighWaterMark: true)) return false; _generation++; // Secure erase: overwrite payload bytes with random data before marking deleted. // Reference: golang/nats-server/server/filestore.go:5890 (eraseMsg). DeleteInBlock(seq, secureErase: true); return true; } /// /// Reserves a sequence without storing a message. Advances /// to (or _last+1 when seq is 0), recording the gap in /// the block as a tombstone-style skip. /// Returns the skipped sequence number. /// Reference: golang/nats-server/server/filestore.go — SkipMsg. /// public ulong SkipMsg(ulong seq) { // When seq is 0, auto-assign next sequence. var skipSeq = seq == 0 ? _last + 1 : seq; _last = skipSeq; // Do NOT add to _meta — it is a skip (tombstone). // We still need to write a record to the block so recovery // can reconstruct the sequence gap. Use an empty subject sentinel. EnsureActiveBlock(); try { _activeBlock!.WriteSkip(skipSeq); } catch (InvalidOperationException) { RotateBlock(); _activeBlock!.WriteSkip(skipSeq); } if (_activeBlock!.IsSealed) RotateBlock(); // After a skip, if there are no real messages, the next real first // would be skipSeq+1. Track this so FastState reports correctly. if (_meta.Count == 0) _first = skipSeq + 1; return skipSeq; } /// /// Reserves a contiguous range of sequences starting at /// for slots. /// Reference: golang/nats-server/server/filestore.go — SkipMsgs. /// Go parity: when seq is non-zero it must match the expected next sequence /// (_last + 1); otherwise an is thrown /// (Go: ErrSequenceMismatch). /// public void SkipMsgs(ulong seq, ulong num) { if (seq != 0) { var expectedNext = _last + 1; if (seq != expectedNext) throw new InvalidOperationException($"Sequence mismatch: expected {expectedNext}, got {seq}."); } else { seq = _last + 1; } for (var i = 0UL; i < num; i++) SkipMsg(seq + i); } /// /// Loads a message by exact sequence number into the optional reusable container /// . Throws if not found. /// /// Fast path: O(1) hash lookup in _meta (all live messages are cached). /// Fallback: binary-search _blocks by sequence range (O(log n) blocks) to /// locate the containing block, then read from disk. This covers cases where a /// future memory-pressure eviction removes entries from _meta. /// Reference: golang/nats-server/server/filestore.go:8308 (LoadMsg). /// public StoreMsg LoadMsg(ulong seq, StoreMsg? sm) { var stored = MaterializeMessage(seq); if (stored is null) { // Fallback: binary-search blocks by sequence range (O(log n)). // Handles cases where _meta does not contain the sequence. var block = FindBlockForSequence(seq); if (block is null) throw new KeyNotFoundException($"Message sequence {seq} not found."); var record = block.Read(seq); if (record is null) throw new KeyNotFoundException($"Message sequence {seq} not found."); sm ??= new StoreMsg(); sm.Clear(); sm.Subject = record.Subject; sm.Header = record.Headers.Length > 0 ? record.Headers.ToArray() : null; var originalPayload = RestorePayload(record.Payload.Span); sm.Data = originalPayload.Length > 0 ? originalPayload : null; sm.Sequence = record.Sequence; sm.Timestamp = record.Timestamp; return sm; } sm ??= new StoreMsg(); sm.Clear(); sm.Subject = stored.Subject; sm.Header = stored.RawHeaders.Length > 0 ? stored.RawHeaders.ToArray() : null; sm.Data = stored.Payload.Length > 0 ? stored.Payload.ToArray() : null; sm.Sequence = stored.Sequence; sm.Timestamp = new DateTimeOffset(stored.TimestampUtc).ToUnixTimeMilliseconds() * 1_000_000L; return sm; } /// /// Binary-searches _blocks for the block whose sequence range /// [, ] /// contains . Returns null if no block covers it. /// Reference: golang/nats-server/server/filestore.go:8308 (block seek in LoadMsg). /// private MsgBlock? FindBlockForSequence(ulong seq) { if (_blocks.Count == 0) return null; // Binary search: find the rightmost block whose FirstSequence <= seq. int lo = 0, hi = _blocks.Count - 1; while (lo < hi) { var mid = (lo + hi + 1) / 2; // upper-mid to avoid infinite loop if (_blocks[mid].FirstSequence <= seq) lo = mid; else hi = mid - 1; } var candidate = _blocks[lo]; if (candidate.FirstSequence <= seq && seq <= candidate.LastSequence) return candidate; return null; } /// /// Loads the most recent message on into the optional /// reusable container . /// Throws if no message exists on the subject. /// Reference: golang/nats-server/server/filestore.go — LoadLastMsg. /// public StoreMsg LoadLastMsg(string subject, StoreMsg? sm) { ulong? bestSeq = null; foreach (var kv in _meta) { if (!string.IsNullOrEmpty(subject) && !SubjectMatchesFilter(kv.Value.Subject, subject)) continue; if (bestSeq is null || kv.Key > bestSeq.Value) bestSeq = kv.Key; } if (bestSeq is null) throw new KeyNotFoundException($"No message found for subject '{subject}'."); var match = MaterializeMessage(bestSeq.Value) ?? throw new KeyNotFoundException($"No message found for subject '{subject}'."); sm ??= new StoreMsg(); sm.Clear(); sm.Subject = match.Subject; sm.Header = match.RawHeaders.Length > 0 ? match.RawHeaders.ToArray() : null; sm.Data = match.Payload.Length > 0 ? match.Payload.ToArray() : null; sm.Sequence = match.Sequence; sm.Timestamp = new DateTimeOffset(match.TimestampUtc).ToUnixTimeMilliseconds() * 1_000_000L; return sm; } /// /// Loads the next message at or after whose subject /// matches . Returns the message and the number of /// sequences skipped to reach it. /// Reference: golang/nats-server/server/filestore.go — LoadNextMsg. /// public (StoreMsg Msg, ulong Skip) LoadNextMsg(string filter, bool wc, ulong start, StoreMsg? sm) { ulong? bestSeq = null; foreach (var kv in _meta) { if (kv.Key < start) continue; if (!string.IsNullOrEmpty(filter) && !SubjectMatchesFilter(kv.Value.Subject, filter)) continue; if (bestSeq is null || kv.Key < bestSeq.Value) bestSeq = kv.Key; } if (bestSeq is null) throw new KeyNotFoundException($"No message found at or after seq {start} matching filter '{filter}'."); var found = MaterializeMessage(bestSeq.Value) ?? throw new KeyNotFoundException($"No message found at or after seq {start} matching filter '{filter}'."); var skip = bestSeq.Value > start ? bestSeq.Value - start : 0UL; sm ??= new StoreMsg(); sm.Clear(); sm.Subject = found.Subject; sm.Header = found.RawHeaders.Length > 0 ? found.RawHeaders.ToArray() : null; sm.Data = found.Payload.Length > 0 ? found.Payload.ToArray() : null; sm.Sequence = found.Sequence; sm.Timestamp = new DateTimeOffset(found.TimestampUtc).ToUnixTimeMilliseconds() * 1_000_000L; return (sm, skip); } /// /// Returns the last sequence for every distinct subject in the stream, /// sorted ascending. /// Reference: golang/nats-server/server/filestore.go — AllLastSeqs. /// public ulong[] AllLastSeqs() { var lastPerSubject = new Dictionary(StringComparer.Ordinal); foreach (var kv in _meta) { var subj = kv.Value.Subject; if (!lastPerSubject.TryGetValue(subj, out var existing) || kv.Key > existing) lastPerSubject[subj] = kv.Key; } var result = lastPerSubject.Values.ToArray(); Array.Sort(result); return result; } /// /// Returns the last sequences for subjects matching , /// limited to sequences at or below and capped at /// results. /// Reference: golang/nats-server/server/filestore.go — MultiLastSeqs. /// public ulong[] MultiLastSeqs(string[] filters, ulong maxSeq, int maxAllowed) { var lastPerSubject = new Dictionary(StringComparer.Ordinal); foreach (var kv in _meta) { var seq = kv.Key; if (maxSeq > 0 && seq > maxSeq) continue; var subj = kv.Value.Subject; var matches = filters.Length == 0 || filters.Any(f => SubjectMatchesFilter(subj, f)); if (!matches) continue; if (!lastPerSubject.TryGetValue(subj, out var existing) || seq > existing) lastPerSubject[subj] = seq; } var result = lastPerSubject.Values.OrderBy(s => s).ToArray(); // Go parity: ErrTooManyResults — when maxAllowed > 0 and results exceed it. if (maxAllowed > 0 && result.Length > maxAllowed) throw new InvalidOperationException($"Too many results: got {result.Length}, max allowed is {maxAllowed}."); return result; } /// /// Returns the subject stored at . /// Throws if the sequence does not exist. /// Reference: golang/nats-server/server/filestore.go — SubjectForSeq. /// public string SubjectForSeq(ulong seq) { if (!_meta.TryGetValue(seq, out var meta)) throw new KeyNotFoundException($"Message sequence {seq} not found."); return meta.Subject; } /// /// Counts messages pending from sequence matching /// . When is true, /// only the last message per subject is counted. /// Returns (total, validThrough) where validThrough is the last sequence checked. /// Reference: golang/nats-server/server/filestore.go — NumPending. /// public (ulong Total, ulong ValidThrough) NumPending(ulong sseq, string filter, bool lastPerSubject) { var candidates = _meta .Where(kv => kv.Key >= sseq) .Where(kv => string.IsNullOrEmpty(filter) || SubjectMatchesFilter(kv.Value.Subject, filter)) .ToList(); if (lastPerSubject) { // One-per-subject: take the last sequence per subject. var lastBySubject = new Dictionary(StringComparer.Ordinal); foreach (var kv in candidates) { if (!lastBySubject.TryGetValue(kv.Value.Subject, out var existing) || kv.Key > existing) lastBySubject[kv.Value.Subject] = kv.Key; } candidates = candidates.Where(kv => lastBySubject.TryGetValue(kv.Value.Subject, out var last) && kv.Key == last).ToList(); } var total = (ulong)candidates.Count; var validThrough = _last; return (total, validThrough); } // ------------------------------------------------------------------------- // Go-parity IStreamStore methods: StoreRawMsg, LoadPrevMsg, Type, Stop // Reference: golang/nats-server/server/filestore.go // ------------------------------------------------------------------------- /// /// Stores a message at a caller-specified sequence number and timestamp. /// Used for replication and mirroring — the caller (NRG, mirror source) controls /// the sequence/timestamp rather than the store auto-incrementing them. /// Unlike , this does NOT call ExpireFromWheel /// or auto-increment _last. It updates _last via /// Math.Max(_last, seq) so the watermark reflects the highest stored /// sequence. /// Reference: golang/nats-server/server/filestore.go:6756 (storeRawMsg). /// public void StoreRawMsg(string subject, byte[]? hdr, byte[] msg, ulong seq, long ts, long ttl, bool discardNewCheck) { if (_stopped) throw new ObjectDisposedException(nameof(FileStore), "Store has been stopped."); var headers = hdr is { Length: > 0 } ? hdr : []; var payload = msg ?? []; var persistedPayload = TransformForPersist(payload); // Register TTL using the caller-supplied timestamp and TTL. var effectiveTtlNs = ttl > 0 ? ttl : (_options.MaxAgeMs > 0 ? (long)_options.MaxAgeMs * 1_000_000L : 0L); RegisterTtl(seq, ts, effectiveTtlNs); EnsureActiveBlock(); try { _activeBlock!.WriteAt(seq, subject, headers, persistedPayload, ts); } catch (InvalidOperationException) { RotateBlock(); _activeBlock!.WriteAt(seq, subject, headers, persistedPayload, ts); } TrackMessage(seq, subject, headers.Length, payload.Length, ts, _activeBlock!.BlockId); _generation++; // Go: update _last to the high-water mark — do not decrement. _last = Math.Max(_last, seq); // Go: filestore.go:4443 (setupWriteCache) — record write in bounded cache manager. _writeCache.TrackWrite(_activeBlock!.BlockId, headers.Length + persistedPayload.Length); if (_activeBlock!.IsSealed) RotateBlock(); } /// /// Loads the message immediately before by walking /// backward from start - 1 to _first. /// Throws if no such message exists. /// Reference: golang/nats-server/server/filestore.go — LoadPrevMsg. /// public StoreMsg LoadPrevMsg(ulong start, StoreMsg? sm) { if (start == 0) throw new KeyNotFoundException("No message found before seq 0."); var first = _meta.Count > 0 ? _meta.Keys.Min() : 1UL; for (var seq = start - 1; seq >= first && seq <= _last; seq--) { if (_meta.ContainsKey(seq)) { var stored = MaterializeMessage(seq); if (stored is not null) { sm ??= new StoreMsg(); sm.Clear(); sm.Subject = stored.Subject; sm.Header = stored.RawHeaders.Length > 0 ? stored.RawHeaders.ToArray() : null; sm.Data = stored.Payload.Length > 0 ? stored.Payload.ToArray() : null; sm.Sequence = stored.Sequence; sm.Timestamp = new DateTimeOffset(stored.TimestampUtc).ToUnixTimeMilliseconds() * 1_000_000L; return sm; } } // Prevent underflow on ulong subtraction. if (seq == 0) break; } throw new KeyNotFoundException($"No message found before seq {start}."); } /// /// Returns the storage backend type for this store instance. /// Reference: golang/nats-server/server/filestore.go — fileStore.Type. /// public StorageType Type() => StorageType.File; /// /// Flushes the active block to disk and marks the store as stopped. /// After Stop() returns, calls to or /// will throw . /// Blocks are NOT deleted — use if data removal is needed. /// Reference: golang/nats-server/server/filestore.go — fileStore.Stop. /// public void Stop() { if (_stopped) return; _stopped = true; // Stop the background flush loop before accessing blocks. StopFlushLoop(); // Flush pending buffered writes and the active block to ensure all data reaches disk. _activeBlock?.FlushPending(); _activeBlock?.Flush(); // Dispose all blocks to release OS file handles. The files remain on disk. DisposeAllBlocks(); } /// /// Returns a binary-encoded snapshot of the stream state. The /// parameter indicates the number of failed apply operations (passed through for /// cluster consensus use). Currently returns an empty array — the full binary /// encoding will be added when the RAFT snapshot codec is implemented (Task 9). /// Reference: golang/nats-server/server/filestore.go — EncodedStreamState. /// public byte[] EncodedStreamState(ulong failed) => []; /// /// Updates the stream configuration. Applies new limits (MaxMsgsPerSubject, /// MaxAge, etc.) to the store options. /// Reference: golang/nats-server/server/filestore.go — UpdateConfig. /// public void UpdateConfig(StreamConfig cfg) { _options.MaxMsgsPerSubject = cfg.MaxMsgsPer; if (cfg.MaxAgeMs > 0) _options.MaxAgeMs = cfg.MaxAgeMs; // Enforce per-subject limits immediately after config change. if (_options.MaxMsgsPerSubject > 0) { var subjects = _meta.Values.Select(m => m.Subject).Distinct(StringComparer.Ordinal).ToList(); foreach (var subject in subjects) EnforceMaxMsgsPerSubject(subject); } } /// /// Removes oldest messages for the given subject until the per-subject count /// is within the limit. /// Reference: golang/nats-server/server/filestore.go — enforcePerSubjectLimit. /// private void EnforceMaxMsgsPerSubject(string subject) { var limit = _options.MaxMsgsPerSubject; if (limit <= 0) return; var subjectMsgs = _meta .Where(kv => string.Equals(kv.Value.Subject, subject, StringComparison.Ordinal)) .OrderBy(kv => kv.Key) .ToList(); while (subjectMsgs.Count > limit) { var oldest = subjectMsgs[0]; _totalBytes -= (ulong)(oldest.Value.HeaderLength + oldest.Value.PayloadLength); _messageCount--; _meta.Remove(oldest.Key); DeleteInBlock(oldest.Key); subjectMsgs.RemoveAt(0); _generation++; } if (_meta.Count > 0) _firstSeq = _meta.Keys.Min(); } /// /// Resets internal cached state after a leadership transition or snapshot restore. /// Currently a no-op — the FileStore re-derives its state from blocks on construction. /// Reference: golang/nats-server/server/filestore.go — ResetState. /// public void ResetState() { } // ------------------------------------------------------------------------- // ConsumerStore factory // Reference: golang/nats-server/server/filestore.go — fileStore.ConsumerStore // ------------------------------------------------------------------------- /// /// Creates or opens a per-consumer state store backed by a binary file. /// The state file is located at {Directory}/obs/{name}/o.dat, /// matching the Go server's consumer directory layout. /// Reference: golang/nats-server/server/filestore.go — newConsumerFileStore. /// public IConsumerStore ConsumerStore(string name, DateTime created, ConsumerConfig cfg) { var consumerDir = Path.Combine(_options.Directory, "obs", name); Directory.CreateDirectory(consumerDir); var stateFile = Path.Combine(consumerDir, "o.dat"); return new ConsumerFileStore(stateFile, cfg); } // ------------------------------------------------------------------------- // FlushAllPending: flush buffered writes and checkpoint stream state. // Reference: golang/nats-server/server/filestore.go:5783-5842 // (flushPendingWritesUnlocked / writeFullState) // ------------------------------------------------------------------------- /// /// Flushes any buffered writes in the active block to disk and atomically /// persists a lightweight stream state checkpoint (stream.state) so that a /// subsequent recovery after a crash can quickly identify the last known /// good sequence without re-scanning every block. /// Reference: golang/nats-server/server/filestore.go:5783 (flushPendingWritesUnlocked). /// public async Task FlushAllPending() { _activeBlock?.FlushPending(); _activeBlock?.Flush(); await WriteStreamStateAsync(); } /// /// Background flush loop that coalesces buffered writes from MsgBlock into /// batched disk writes. Waits for a signal from AppendAsync/StoreMsg, then /// optionally waits up to ms to accumulate at /// least bytes before flushing. /// Reference: golang/nats-server/server/filestore.go:5841 (flushLoop). /// private async Task FlushLoopAsync(CancellationToken ct) { while (!ct.IsCancellationRequested) { try { await _flushSignal.Reader.WaitToReadAsync(ct); } catch (OperationCanceledException) { return; } _flushSignal.Reader.TryRead(out _); var block = _activeBlock; if (block is null) continue; // Go-style exponential backoff: 1→2→4→8ms (vs linear 1ms × 8). var waitMs = 1; var totalWaited = 0; while (block.PendingWriteSize < CoalesceMinimum && totalWaited < MaxFlushWaitMs) { var delay = Math.Min(waitMs, MaxFlushWaitMs - totalWaited); try { await Task.Delay(delay, ct); } catch (OperationCanceledException) { break; } totalWaited += delay; waitMs *= 2; } block.FlushPending(); } } /// /// Cancels the background flush loop and waits for it to complete. /// Must be called before disposing blocks to avoid accessing disposed locks. /// private void StopFlushLoop() { _flushCts.Cancel(); _flushTask?.GetAwaiter().GetResult(); } /// /// Async version of . /// private async Task StopFlushLoopAsync() { await _flushCts.CancelAsync(); if (_flushTask is not null) await _flushTask; } /// /// Atomically persists a compact stream state snapshot to disk using /// (write-to-temp-then-rename) so that a /// partial write never leaves a corrupt state file. The /// semaphore serialises concurrent flush calls so that only one write is /// in-flight at a time. /// The file is written as JSON to {Directory}/stream.state. /// Reference: golang/nats-server/server/filestore.go:5820 (writeFullState). /// private async Task WriteStreamStateAsync() { var statePath = Path.Combine(_options.Directory, "stream.state"); var snapshot = new StreamStateSnapshot { FirstSeq = _messageCount > 0 ? _firstSeq : 0UL, LastSeq = _last, Messages = _messageCount, Bytes = (ulong)_blocks.Sum(b => b.BytesUsed), }; var json = JsonSerializer.SerializeToUtf8Bytes(snapshot); await _stateWriteLock.WaitAsync(); try { await AtomicFileWriter.WriteAtomicallyAsync(statePath, json); } finally { _stateWriteLock.Release(); } } // ------------------------------------------------------------------------- // StreamStateSnapshot — private checkpoint record written by WriteStreamState. // ------------------------------------------------------------------------- private sealed record StreamStateSnapshot { public ulong FirstSeq { get; init; } public ulong LastSeq { get; init; } public ulong Messages { get; init; } public ulong Bytes { get; init; } } private sealed class FileRecord { public ulong Sequence { get; init; } public string? Subject { get; init; } public string? HeadersBase64 { get; init; } public string? PayloadBase64 { get; init; } public DateTime TimestampUtc { get; init; } } // ------------------------------------------------------------------------- // WriteCacheManager — bounded, TTL-based write cache across all active blocks. // // Go: filestore.go:4443 (setupWriteCache) sets up per-block write caches. // filestore.go:6148 (expireCache) / filestore.go:6220 (expireCacheLocked) // expire individual block caches via per-block timers. // // .NET adaptation: a single background PeriodicTimer (500 ms tick) manages // cache lifetime across all blocks. Entries are tracked by block ID with // their last-write time and approximate byte size. When total size exceeds // MaxCacheSize or an entry's age exceeds CacheExpiry, the block is flushed // and its cache is cleared. // ------------------------------------------------------------------------- /// /// Manages a bounded, TTL-based write cache for all active /// instances within a . A background /// fires every 500 ms to evict stale or oversized cache entries. /// /// Reference: golang/nats-server/server/filestore.go:4443 (setupWriteCache), /// golang/nats-server/server/filestore.go:6148 (expireCache). /// internal sealed class WriteCacheManager : IAsyncDisposable { /// Tracks per-block cache state. internal sealed class CacheEntry { public int BlockId { get; init; } public long LastWriteTime { get; set; } // Environment.TickCount64 (ms) public long ApproximateBytes { get; set; } } private readonly ConcurrentDictionary _entries = new(); private readonly Func _blockLookup; private readonly long _maxCacheSizeBytes; private readonly long _cacheExpiryMs; private readonly PeriodicTimer _timer; private readonly Task _backgroundTask; private readonly CancellationTokenSource _cts = new(); /// Tick interval for the background eviction loop. internal static readonly TimeSpan TickInterval = TimeSpan.FromMilliseconds(500); /// /// Creates a new . /// /// Total cache size limit in bytes before forced eviction. /// Idle TTL after which a block's cache is evicted. /// /// Delegate to look up an active by block ID. /// Returns null if the block no longer exists (e.g., already disposed). /// public WriteCacheManager(long maxCacheSizeBytes, TimeSpan cacheExpiry, Func blockLookup) { _maxCacheSizeBytes = maxCacheSizeBytes; _cacheExpiryMs = (long)cacheExpiry.TotalMilliseconds; _blockLookup = blockLookup; _timer = new PeriodicTimer(TickInterval); _backgroundTask = RunAsync(_cts.Token); } /// Total approximate bytes currently tracked across all cached entries. public long TotalCachedBytes { get { long total = 0; foreach (var entry in _entries.Values) total += entry.ApproximateBytes; return total; } } /// Number of block IDs currently tracked in the cache. public int TrackedBlockCount => _entries.Count; /// /// Records a write to the specified block, updating the entry's timestamp and size. /// Reference: golang/nats-server/server/filestore.go:6529 (lwts update on write). /// public void TrackWrite(int blockId, long bytes) { var now = Environment.TickCount64; _entries.AddOrUpdate( blockId, _ => new CacheEntry { BlockId = blockId, LastWriteTime = now, ApproximateBytes = bytes }, (_, existing) => { existing.LastWriteTime = now; existing.ApproximateBytes += bytes; return existing; }); } /// /// Test helper: records a write with an explicit timestamp (ms since boot). /// Allows tests to simulate past writes without sleeping, avoiding timing /// dependencies in TTL and size-cap eviction tests. /// internal void TrackWriteAt(int blockId, long bytes, long tickCount64Ms) { _entries.AddOrUpdate( blockId, _ => new CacheEntry { BlockId = blockId, LastWriteTime = tickCount64Ms, ApproximateBytes = bytes }, (_, existing) => { existing.LastWriteTime = tickCount64Ms; existing.ApproximateBytes += bytes; return existing; }); } /// /// Immediately evicts the specified block's cache entry — flush then clear. /// Called from for the outgoing block. /// Reference: golang/nats-server/server/filestore.go:4499 (flushPendingMsgsLocked on rotation). /// public void EvictBlock(int blockId) { if (!_entries.TryRemove(blockId, out _)) return; var block = _blockLookup(blockId); if (block is null) return; block.Flush(); block.ClearCache(); } /// /// Flushes and clears the cache for all currently tracked blocks. /// Reference: golang/nats-server/server/filestore.go:5499 (flushPendingMsgsLocked, all blocks). /// public async Task FlushAllAsync() { var blockIds = _entries.Keys.ToArray(); foreach (var blockId in blockIds) { if (!_entries.TryRemove(blockId, out _)) continue; var block = _blockLookup(blockId); if (block is null) continue; block.Flush(); block.ClearCache(); } await Task.CompletedTask; // Kept async for future I/O path upgrades. } /// /// Stops the background timer and flushes all pending cache entries. /// public async ValueTask DisposeAsync() { await _cts.CancelAsync(); _timer.Dispose(); try { await _backgroundTask.ConfigureAwait(false); } catch (OperationCanceledException oce) when (oce.CancellationToken == _cts.Token) { // OperationCanceledException from our own CTS is the normal shutdown // signal — WaitForNextTickAsync throws when the token is cancelled. // Swallowing here is deliberate: the task completed cleanly. _ = oce.CancellationToken; // reference to satisfy SW003 non-empty requirement } // Flush remaining cached blocks on dispose. await FlushAllAsync(); _cts.Dispose(); } // ------------------------------------------------------------------ // Background eviction loop // ------------------------------------------------------------------ private async Task RunAsync(CancellationToken ct) { try { while (await _timer.WaitForNextTickAsync(ct)) RunEviction(); } catch (OperationCanceledException oce) { // PeriodicTimer cancelled — normal background loop shutdown. _ = oce; } } /// /// One eviction pass: expire TTL-exceeded entries and enforce the size cap. /// Reference: golang/nats-server/server/filestore.go:6220 (expireCacheLocked). /// internal void RunEviction() { var now = Environment.TickCount64; // 1. Expire entries that have been idle longer than the TTL. foreach (var (blockId, entry) in _entries) { if (now - entry.LastWriteTime >= _cacheExpiryMs) EvictBlock(blockId); } // 2. If total size still exceeds cap, evict the oldest entries first. if (TotalCachedBytes > _maxCacheSizeBytes) { var ordered = _entries.Values .OrderBy(e => e.LastWriteTime) .ToArray(); foreach (var entry in ordered) { if (TotalCachedBytes <= _maxCacheSizeBytes) break; EvictBlock(entry.BlockId); } } } } }