Add LMDB oplog migration path with dual-write cutover support
All checks were successful
NuGet Package Publish / nuget (push) Successful in 1m16s
All checks were successful
NuGet Package Publish / nuget (push) Successful in 1m16s
Introduce LMDB oplog store, migration flags, telemetry/backfill tooling, and parity tests to enable staged Surreal-to-LMDB rollout with rollback coverage.
This commit is contained in:
@@ -10,6 +10,7 @@ using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
using ZB.MOM.WW.CBDDC.Network;
|
||||
using ZB.MOM.WW.CBDDC.Network.Security;
|
||||
using ZB.MOM.WW.CBDDC.Persistence.Lmdb;
|
||||
using ZB.MOM.WW.CBDDC.Persistence.Surreal;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.E2E.Tests;
|
||||
@@ -240,6 +241,92 @@ public class ClusterCrudSyncE2ETests
|
||||
}, 60, "Node B did not catch up missed reconnect mutations.", () => BuildDiagnostics(nodeA, nodeB));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies reconnect catch-up still works when reads are cut over to LMDB with dual-write enabled.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task PeerReconnect_ShouldCatchUpMissedChanges_WithLmdbPreferredReads()
|
||||
{
|
||||
var clusterToken = Guid.NewGuid().ToString("N");
|
||||
int nodeAPort = GetAvailableTcpPort();
|
||||
int nodeBPort = GetAvailableTcpPort();
|
||||
while (nodeBPort == nodeAPort) nodeBPort = GetAvailableTcpPort();
|
||||
|
||||
await using var nodeA = TestPeerNode.Create(
|
||||
"node-a",
|
||||
nodeAPort,
|
||||
clusterToken,
|
||||
[
|
||||
new KnownPeerConfiguration
|
||||
{
|
||||
NodeId = "node-b",
|
||||
Host = "127.0.0.1",
|
||||
Port = nodeBPort
|
||||
}
|
||||
],
|
||||
useLmdbOplog: true,
|
||||
dualWriteOplog: true,
|
||||
preferLmdbReads: true);
|
||||
|
||||
await using var nodeB = TestPeerNode.Create(
|
||||
"node-b",
|
||||
nodeBPort,
|
||||
clusterToken,
|
||||
[
|
||||
new KnownPeerConfiguration
|
||||
{
|
||||
NodeId = "node-a",
|
||||
Host = "127.0.0.1",
|
||||
Port = nodeAPort
|
||||
}
|
||||
],
|
||||
useLmdbOplog: true,
|
||||
dualWriteOplog: true,
|
||||
preferLmdbReads: true);
|
||||
|
||||
await nodeA.StartAsync();
|
||||
await nodeB.StartAsync();
|
||||
|
||||
await nodeB.StopAsync();
|
||||
|
||||
const string userId = "reconnect-lmdb-user";
|
||||
await nodeA.UpsertUserAsync(new User
|
||||
{
|
||||
Id = userId,
|
||||
Name = "Offline Create",
|
||||
Age = 20,
|
||||
Address = new Address { City = "Rome" }
|
||||
});
|
||||
|
||||
await nodeA.UpsertUserAsync(new User
|
||||
{
|
||||
Id = userId,
|
||||
Name = "Offline Update",
|
||||
Age = 21,
|
||||
Address = new Address { City = "Milan" }
|
||||
});
|
||||
|
||||
await nodeA.UpsertUserAsync(new User
|
||||
{
|
||||
Id = userId,
|
||||
Name = "Offline Final",
|
||||
Age = 22,
|
||||
Address = new Address { City = "Turin" }
|
||||
});
|
||||
|
||||
await nodeB.StartAsync();
|
||||
|
||||
await AssertEventuallyAsync(() =>
|
||||
{
|
||||
var replicated = nodeB.ReadUser(userId);
|
||||
return replicated is not null &&
|
||||
replicated.Name == "Offline Final" &&
|
||||
replicated.Age == 22 &&
|
||||
replicated.Address?.City == "Turin";
|
||||
}, 60, "Node B did not catch up missed reconnect mutations with LMDB preferred reads.",
|
||||
() => BuildDiagnostics(nodeA, nodeB));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies a burst of rapid multi-node mutations converges to a deterministic final state.
|
||||
/// </summary>
|
||||
@@ -572,6 +659,9 @@ public class ClusterCrudSyncE2ETests
|
||||
/// <param name="workDirOverride">An optional working directory override for test artifacts.</param>
|
||||
/// <param name="preserveWorkDirOnDispose">A value indicating whether to preserve the working directory on dispose.</param>
|
||||
/// <param name="useFaultInjectedCheckpointStore">A value indicating whether to inject a checkpoint persistence that fails once.</param>
|
||||
/// <param name="useLmdbOplog">A value indicating whether to enable the LMDB oplog migration path.</param>
|
||||
/// <param name="dualWriteOplog">A value indicating whether oplog writes should be mirrored to Surreal + LMDB.</param>
|
||||
/// <param name="preferLmdbReads">A value indicating whether reads should prefer LMDB.</param>
|
||||
/// <returns>A configured <see cref="TestPeerNode" /> instance.</returns>
|
||||
public static TestPeerNode Create(
|
||||
string nodeId,
|
||||
@@ -580,7 +670,10 @@ public class ClusterCrudSyncE2ETests
|
||||
IReadOnlyList<KnownPeerConfiguration> knownPeers,
|
||||
string? workDirOverride = null,
|
||||
bool preserveWorkDirOnDispose = false,
|
||||
bool useFaultInjectedCheckpointStore = false)
|
||||
bool useFaultInjectedCheckpointStore = false,
|
||||
bool useLmdbOplog = false,
|
||||
bool dualWriteOplog = true,
|
||||
bool preferLmdbReads = false)
|
||||
{
|
||||
string workDir = workDirOverride ?? Path.Combine(Path.GetTempPath(), $"cbddc-e2e-{nodeId}-{Guid.NewGuid():N}");
|
||||
Directory.CreateDirectory(workDir);
|
||||
@@ -620,13 +713,47 @@ public class ClusterCrudSyncE2ETests
|
||||
if (useFaultInjectedCheckpointStore)
|
||||
{
|
||||
services.AddSingleton<ISurrealCdcCheckpointPersistence, CrashAfterFirstAdvanceCheckpointPersistence>();
|
||||
coreBuilder.AddCBDDCSurrealEmbedded<FaultInjectedSampleDocumentStore>(surrealOptionsFactory)
|
||||
.AddCBDDCNetwork<StaticPeerNodeConfigurationProvider>(false);
|
||||
var registration = coreBuilder.AddCBDDCSurrealEmbedded<FaultInjectedSampleDocumentStore>(surrealOptionsFactory);
|
||||
if (useLmdbOplog)
|
||||
registration.AddCBDDCLmdbOplog(
|
||||
_ => new LmdbOplogOptions
|
||||
{
|
||||
EnvironmentPath = Path.Combine(workDir, "oplog-lmdb"),
|
||||
MapSizeBytes = 128L * 1024 * 1024,
|
||||
MaxDatabases = 16,
|
||||
PruneBatchSize = 256
|
||||
},
|
||||
flags =>
|
||||
{
|
||||
flags.UseLmdbOplog = true;
|
||||
flags.DualWriteOplog = dualWriteOplog;
|
||||
flags.PreferLmdbReads = preferLmdbReads;
|
||||
flags.ReconciliationInterval = TimeSpan.Zero;
|
||||
});
|
||||
|
||||
registration.AddCBDDCNetwork<StaticPeerNodeConfigurationProvider>(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
coreBuilder.AddCBDDCSurrealEmbedded<SampleDocumentStore>(surrealOptionsFactory)
|
||||
.AddCBDDCNetwork<StaticPeerNodeConfigurationProvider>(false);
|
||||
var registration = coreBuilder.AddCBDDCSurrealEmbedded<SampleDocumentStore>(surrealOptionsFactory);
|
||||
if (useLmdbOplog)
|
||||
registration.AddCBDDCLmdbOplog(
|
||||
_ => new LmdbOplogOptions
|
||||
{
|
||||
EnvironmentPath = Path.Combine(workDir, "oplog-lmdb"),
|
||||
MapSizeBytes = 128L * 1024 * 1024,
|
||||
MaxDatabases = 16,
|
||||
PruneBatchSize = 256
|
||||
},
|
||||
flags =>
|
||||
{
|
||||
flags.UseLmdbOplog = true;
|
||||
flags.DualWriteOplog = dualWriteOplog;
|
||||
flags.PreferLmdbReads = preferLmdbReads;
|
||||
flags.ReconciliationInterval = TimeSpan.Zero;
|
||||
});
|
||||
|
||||
registration.AddCBDDCNetwork<StaticPeerNodeConfigurationProvider>(false);
|
||||
}
|
||||
|
||||
// Deterministic tests: sync uses explicit known peers, so disable UDP discovery.
|
||||
|
||||
Reference in New Issue
Block a user