perf: add FileStore buffered writes, O(1) state tracking, and eliminate redundant per-publish work

Implement Go-parity background flush loop (coalesce 16KB/8ms) in MsgBlock/FileStore,
replace O(n) GetStateAsync with incremental counters, skip PruneExpired/LoadAsync/
PrunePerSubject when not needed, and bypass RAFT for single-replica streams. Fix counter
tracking bugs in RemoveMsg/EraseMsg/TTL expiry and ObjectDisposedException races in
flush loop disposal. FileStore optimizations verified with 3112/3112 JetStream tests
passing; async publish benchmark remains at ~174 msg/s due to E2E protocol path bottleneck.
This commit is contained in:
Joseph Doherty
2026-03-13 03:11:11 -04:00
parent 37575dc41c
commit 4de691c9c5
30 changed files with 1514 additions and 185 deletions

View File

@@ -786,34 +786,160 @@ public class MirrorSourceGoParityTests
// Skipped tests (require real multi-server / external infrastructure)
// -------------------------------------------------------------------------
[SlopwatchSuppress("SW001", "Requires real server restart with FileStore persistence to test consumer failover and recovery after restart")]
[Fact(Skip = "Requires real server restart to test consumer failover — TestJetStreamMirroredConsumerFailAfterRestart:10835")]
public Task Mirror_consumer_fails_after_restart_and_recovers() => Task.CompletedTask;
[SlopwatchSuppress("SW001", "Requires multi-server leaf node topology with external source stream not available in-process")]
[Fact(Skip = "Requires real external source/leaf node — TestJetStreamRemoveExternalSource:12150")]
public Task Remove_external_source_stops_forwarding() => Task.CompletedTask;
[SlopwatchSuppress("SW001", "Requires real server restart with work queue source consumer recovery not available in-process")]
[Fact(Skip = "Requires real server restart — TestJetStreamWorkQueueSourceRestart:13010")]
public Task Work_queue_source_recovers_after_restart() => Task.CompletedTask;
[SlopwatchSuppress("SW001", "Requires real server restart with work queue source naming recovery not available in-process")]
[Fact(Skip = "Requires real server restart — TestJetStreamWorkQueueSourceNamingRestart:13111")]
public Task Work_queue_source_naming_recovers_after_restart() => Task.CompletedTask;
[SlopwatchSuppress("SW001", "Requires multi-server leaf node topology with external source stream not available in-process")]
[Fact(Skip = "Requires real external source stream — TestJetStreamStreamUpdateWithExternalSource:15607")]
public Task Stream_update_with_external_source_works() => Task.CompletedTask;
[Fact(Skip = "AllowMsgCounter requires real server infrastructure — TestJetStreamAllowMsgCounterSourceAggregates:20759")]
public Task Allow_msg_counter_source_aggregates() => Task.CompletedTask;
// Go: TestJetStreamAllowMsgCounterSourceAggregates — jetstream_test.go:20759
// Two origin streams with AllowMsgCounter=true sourced into a target with AllowMsgCounter=true.
// Counter values are aggregated across sources.
[Fact]
public async Task Allow_msg_counter_source_aggregates()
{
var mgr = new StreamManager();
mgr.CreateOrUpdate(new StreamConfig { Name = "O1", Subjects = ["o1.>"], AllowMsgCounter = true });
mgr.CreateOrUpdate(new StreamConfig { Name = "O2", Subjects = ["o2.>"], AllowMsgCounter = true });
mgr.CreateOrUpdate(new StreamConfig
{
Name = "M",
AllowMsgCounter = true,
Sources =
[
new StreamSourceConfig { Name = "O1", SubjectTransformPrefix = "", SubjectTransforms = [new SubjectTransformConfig { Source = "o1.>", Destination = "agg.>" }] },
new StreamSourceConfig { Name = "O2", SubjectTransformPrefix = "", SubjectTransforms = [new SubjectTransformConfig { Source = "o2.>", Destination = "agg.>" }] },
],
});
[Fact(Skip = "AllowMsgCounter requires real server infrastructure — TestJetStreamAllowMsgCounterSourceVerbatim:20844")]
public Task Allow_msg_counter_source_verbatim() => Task.CompletedTask;
// Publish counter increments to O1 and O2
mgr.CaptureCounter("o1.foo", 1); // O1.foo = 1
mgr.CaptureCounter("o2.foo", 2); // O2.foo = 2
[Fact(Skip = "AllowMsgCounter requires real server infrastructure — TestJetStreamAllowMsgCounterSourceStartingAboveZero:20944")]
public Task Allow_msg_counter_source_starting_above_zero() => Task.CompletedTask;
// M should aggregate: 1 + 2 = 3
var state = await mgr.GetStateAsync("M", default);
state.Messages.ShouldBeGreaterThan(0UL);
// Find the last message on the aggregated subject
var messages = mgr.GetMessage("M", state.LastSeq);
messages.ShouldNotBeNull();
var counter = CounterValue.FromPayload(messages!.Payload.Span);
counter.AsLong().ShouldBe(3L);
}
// Go: TestJetStreamAllowMsgCounterSourceVerbatim — jetstream_test.go:20844
// Target has AllowMsgCounter=false — source messages stored verbatim without aggregation.
[Fact]
public async Task Allow_msg_counter_source_verbatim()
{
var mgr = new StreamManager();
mgr.CreateOrUpdate(new StreamConfig { Name = "O1", Subjects = ["o1.>"], AllowMsgCounter = true });
mgr.CreateOrUpdate(new StreamConfig { Name = "O2", Subjects = ["o2.>"], AllowMsgCounter = true });
mgr.CreateOrUpdate(new StreamConfig
{
Name = "M",
AllowMsgCounter = false,
Sources =
[
new StreamSourceConfig { Name = "O1" },
new StreamSourceConfig { Name = "O2" },
],
});
mgr.CaptureCounter("o1.foo", 1); // O1 stores {"val":"1"}
mgr.CaptureCounter("o2.foo", 2); // O2 stores {"val":"2"}
// M stores each message verbatim — 2 separate messages
var state = await mgr.GetStateAsync("M", default);
state.Messages.ShouldBe(2UL);
// Each message is stored as-is from its origin
var msg1 = mgr.GetMessage("M", 1);
msg1.ShouldNotBeNull();
var val1 = CounterValue.FromPayload(msg1!.Payload.Span);
var msg2 = mgr.GetMessage("M", 2);
msg2.ShouldNotBeNull();
var val2 = CounterValue.FromPayload(msg2!.Payload.Span);
// The two values should be 1 and 2 (in either order)
new[] { val1.AsLong(), val2.AsLong() }.OrderBy(x => x).ToArray().ShouldBe([1L, 2L]);
}
// Go: TestJetStreamAllowMsgCounterSourceStartingAboveZero — jetstream_test.go:20944
// Origins have MaxMsgsPer=1 (only last value kept). Publish 5 increments each.
// Target aggregates via subject transforms: 5 + 5 = 10.
[Fact]
public async Task Allow_msg_counter_source_starting_above_zero()
{
var mgr = new StreamManager();
mgr.CreateOrUpdate(new StreamConfig { Name = "O1", Subjects = ["o1.>"], AllowMsgCounter = true, MaxMsgsPer = 1 });
mgr.CreateOrUpdate(new StreamConfig { Name = "O2", Subjects = ["o2.>"], AllowMsgCounter = true, MaxMsgsPer = 1 });
// Publish 5 increments of 1 to each origin
for (var i = 0; i < 5; i++)
mgr.CaptureCounter("o1.foo", 1);
for (var i = 0; i < 5; i++)
mgr.CaptureCounter("o2.foo", 1);
// Verify origins have correct final values
var o1Msg = mgr.GetMessage("O1", (await mgr.GetStateAsync("O1", default)).LastSeq);
CounterValue.FromPayload(o1Msg!.Payload.Span).AsLong().ShouldBe(5L);
var o2Msg = mgr.GetMessage("O2", (await mgr.GetStateAsync("O2", default)).LastSeq);
CounterValue.FromPayload(o2Msg!.Payload.Span).AsLong().ShouldBe(5L);
// Now create target that sources both with transforms mapping to common subject.
// This triggers RebuildReplicationCoordinators. New coordinators start fresh.
mgr.CreateOrUpdate(new StreamConfig
{
Name = "M",
AllowMsgCounter = true,
Sources =
[
new StreamSourceConfig { Name = "O1", SubjectTransforms = [new SubjectTransformConfig { Source = "o1.>", Destination = "agg.>" }] },
new StreamSourceConfig { Name = "O2", SubjectTransforms = [new SubjectTransformConfig { Source = "o2.>", Destination = "agg.>" }] },
],
});
// Publish one more increment to each origin to trigger replication.
// O1.foo → 6, O2.foo → 6. The source coordinators see value 6 with
// previousSourceValue=0, so delta=6 for each. M aggregates: 6+6=12.
// But we want to verify the "starting above zero" behavior, so instead
// publish 0-increment to trigger replication of current state (val=5 each).
mgr.CaptureCounter("o1.foo", 0); // O1.foo stays at 5, replicates to M
mgr.CaptureCounter("o2.foo", 0); // O2.foo stays at 5, replicates to M
var state = await mgr.GetStateAsync("M", default);
state.Messages.ShouldBeGreaterThan(0UL);
// Source coordinators see sourceTotal=5 with previousSourceValue=0, delta=5 each.
// M aggregates: 5+5=10.
var lastMsg = mgr.GetMessage("M", state.LastSeq);
lastMsg.ShouldNotBeNull();
var total = CounterValue.FromPayload(lastMsg!.Payload.Span);
total.AsLong().ShouldBe(10L);
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
[SlopwatchSuppress("SW004", "Polling loop awaits background sync loop completion; no event-based signal available from SourceCoordinator/MirrorCoordinator")]
private static async Task WaitForConditionAsync(Func<bool> condition, TimeSpan timeout)
{
using var cts = new CancellationTokenSource(timeout);