Files
CBDDC/tests/ZB.MOM.WW.CBDDC.E2E.Tests/ClusterCrudSyncE2ETests.cs
Joseph Doherty cce24fa8f3
All checks were successful
NuGet Package Publish / nuget (push) Successful in 1m16s
Add LMDB oplog migration path with dual-write cutover support
Introduce LMDB oplog store, migration flags, telemetry/backfill tooling, and parity tests to enable staged Surreal-to-LMDB rollout with rollback coverage.
2026-02-22 17:44:57 -05:00

1330 lines
47 KiB
C#

using System.Collections.Concurrent;
using System.Net;
using System.Net.Sockets;
using System.Text.Json;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Core.Sync;
using ZB.MOM.WW.CBDDC.Network;
using ZB.MOM.WW.CBDDC.Network.Security;
using ZB.MOM.WW.CBDDC.Persistence.Lmdb;
using ZB.MOM.WW.CBDDC.Persistence.Surreal;
namespace ZB.MOM.WW.CBDDC.E2E.Tests;
public class ClusterCrudSyncE2ETests
{
/// <summary>
/// Verifies two real peers replicate create, update, and delete operations in both directions.
/// </summary>
[Fact]
public async Task TwoPeers_ShouldReplicateCrudBidirectionally()
{
var clusterToken = Guid.NewGuid().ToString("N");
int nodeAPort = GetAvailableTcpPort();
int nodeBPort = GetAvailableTcpPort();
while (nodeBPort == nodeAPort) nodeBPort = GetAvailableTcpPort();
var nodeA = TestPeerNode.Create(
"node-a",
nodeAPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-b",
Host = "127.0.0.1",
Port = nodeBPort
}
]);
await using var nodeB = TestPeerNode.Create(
"node-b",
nodeBPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-a",
Host = "127.0.0.1",
Port = nodeAPort
}
]);
await nodeA.StartAsync();
await nodeB.StartAsync();
const int timeoutSeconds = 45;
var nodeAUserId = "user-from-a";
await nodeA.UpsertUserAsync(new User
{
Id = nodeAUserId,
Name = "Alice",
Age = 30,
Address = new Address { City = "Austin" }
});
await AssertEventuallyAsync(() =>
{
var replicated = nodeB.ReadUser(nodeAUserId);
return replicated is not null
&& replicated.Name == "Alice"
&& replicated.Age == 30
&& replicated.Address?.City == "Austin";
}, timeoutSeconds, "Node B did not receive create from node A.", () => BuildDiagnostics(nodeA, nodeB));
await AssertEventuallyAsync(
() => nodeA.ReadUser(nodeAUserId) is not null,
timeoutSeconds,
"Node A could not read back its own created user.",
() => BuildDiagnostics(nodeA, nodeB));
await nodeA.DeleteUserAsync(nodeAUserId);
await nodeA.UpsertUserAsync(new User
{
Id = nodeAUserId,
Name = "Alice Updated",
Age = 31,
Address = new Address { City = "Dallas" }
});
await AssertEventuallyAsync(() =>
{
var replicated = nodeB.ReadUser(nodeAUserId);
return replicated is not null
&& replicated.Name == "Alice Updated"
&& replicated.Age == 31
&& replicated.Address?.City == "Dallas";
}, timeoutSeconds, "Node B did not receive update from node A.", () => BuildDiagnostics(nodeA, nodeB));
await nodeA.DeleteUserAsync(nodeAUserId);
await AssertEventuallyAsync(
() => nodeB.ReadUser(nodeAUserId) is null,
timeoutSeconds,
"Node B did not receive delete from node A.",
() => BuildDiagnostics(nodeA, nodeB));
var nodeBUserId = "user-from-b";
await nodeB.UpsertUserAsync(new User
{
Id = nodeBUserId,
Name = "Bob",
Age = 40,
Address = new Address { City = "Boston" }
});
await AssertEventuallyAsync(() =>
{
var replicated = nodeA.ReadUser(nodeBUserId);
return replicated is not null
&& replicated.Name == "Bob"
&& replicated.Age == 40
&& replicated.Address?.City == "Boston";
}, timeoutSeconds, "Node A did not receive create from node B.", () => BuildDiagnostics(nodeA, nodeB));
await AssertEventuallyAsync(
() => nodeB.ReadUser(nodeBUserId) is not null,
timeoutSeconds,
"Node B could not read back its own created user.",
() => BuildDiagnostics(nodeA, nodeB));
await nodeB.DeleteUserAsync(nodeBUserId);
await nodeB.UpsertUserAsync(new User
{
Id = nodeBUserId,
Name = "Bob Updated",
Age = 41,
Address = new Address { City = "Denver" }
});
await AssertEventuallyAsync(() =>
{
var replicated = nodeA.ReadUser(nodeBUserId);
return replicated is not null
&& replicated.Name == "Bob Updated"
&& replicated.Age == 41
&& replicated.Address?.City == "Denver";
}, timeoutSeconds, "Node A did not receive update from node B.", () => BuildDiagnostics(nodeA, nodeB));
await nodeB.DeleteUserAsync(nodeBUserId);
await AssertEventuallyAsync(
() => nodeA.ReadUser(nodeBUserId) is null,
timeoutSeconds,
"Node A did not receive delete from node B.",
() => BuildDiagnostics(nodeA, nodeB));
}
/// <summary>
/// Verifies a reconnecting peer catches up mutations that happened while it was offline.
/// </summary>
[Fact]
public async Task PeerReconnect_ShouldCatchUpMissedChanges()
{
var clusterToken = Guid.NewGuid().ToString("N");
int nodeAPort = GetAvailableTcpPort();
int nodeBPort = GetAvailableTcpPort();
while (nodeBPort == nodeAPort) nodeBPort = GetAvailableTcpPort();
var nodeA = TestPeerNode.Create(
"node-a",
nodeAPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-b",
Host = "127.0.0.1",
Port = nodeBPort
}
]);
await using var nodeB = TestPeerNode.Create(
"node-b",
nodeBPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-a",
Host = "127.0.0.1",
Port = nodeAPort
}
]);
await nodeA.StartAsync();
await nodeB.StartAsync();
await nodeB.StopAsync();
const string userId = "reconnect-user";
await nodeA.UpsertUserAsync(new User
{
Id = userId,
Name = "Offline Create",
Age = 20,
Address = new Address { City = "Rome" }
});
await nodeA.UpsertUserAsync(new User
{
Id = userId,
Name = "Offline Update",
Age = 21,
Address = new Address { City = "Milan" }
});
await nodeA.UpsertUserAsync(new User
{
Id = userId,
Name = "Offline Final",
Age = 22,
Address = new Address { City = "Turin" }
});
await nodeB.StartAsync();
await AssertEventuallyAsync(() =>
{
var replicated = nodeB.ReadUser(userId);
return replicated is not null &&
replicated.Name == "Offline Final" &&
replicated.Age == 22 &&
replicated.Address?.City == "Turin";
}, 60, "Node B did not catch up missed reconnect mutations.", () => BuildDiagnostics(nodeA, nodeB));
}
/// <summary>
/// Verifies reconnect catch-up still works when reads are cut over to LMDB with dual-write enabled.
/// </summary>
[Fact]
public async Task PeerReconnect_ShouldCatchUpMissedChanges_WithLmdbPreferredReads()
{
var clusterToken = Guid.NewGuid().ToString("N");
int nodeAPort = GetAvailableTcpPort();
int nodeBPort = GetAvailableTcpPort();
while (nodeBPort == nodeAPort) nodeBPort = GetAvailableTcpPort();
await using var nodeA = TestPeerNode.Create(
"node-a",
nodeAPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-b",
Host = "127.0.0.1",
Port = nodeBPort
}
],
useLmdbOplog: true,
dualWriteOplog: true,
preferLmdbReads: true);
await using var nodeB = TestPeerNode.Create(
"node-b",
nodeBPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-a",
Host = "127.0.0.1",
Port = nodeAPort
}
],
useLmdbOplog: true,
dualWriteOplog: true,
preferLmdbReads: true);
await nodeA.StartAsync();
await nodeB.StartAsync();
await nodeB.StopAsync();
const string userId = "reconnect-lmdb-user";
await nodeA.UpsertUserAsync(new User
{
Id = userId,
Name = "Offline Create",
Age = 20,
Address = new Address { City = "Rome" }
});
await nodeA.UpsertUserAsync(new User
{
Id = userId,
Name = "Offline Update",
Age = 21,
Address = new Address { City = "Milan" }
});
await nodeA.UpsertUserAsync(new User
{
Id = userId,
Name = "Offline Final",
Age = 22,
Address = new Address { City = "Turin" }
});
await nodeB.StartAsync();
await AssertEventuallyAsync(() =>
{
var replicated = nodeB.ReadUser(userId);
return replicated is not null &&
replicated.Name == "Offline Final" &&
replicated.Age == 22 &&
replicated.Address?.City == "Turin";
}, 60, "Node B did not catch up missed reconnect mutations with LMDB preferred reads.",
() => BuildDiagnostics(nodeA, nodeB));
}
/// <summary>
/// Verifies a burst of rapid multi-node mutations converges to a deterministic final state.
/// </summary>
[Fact]
public async Task MultiChangeBurst_ShouldConvergeDeterministically()
{
var clusterToken = Guid.NewGuid().ToString("N");
int nodeAPort = GetAvailableTcpPort();
int nodeBPort = GetAvailableTcpPort();
while (nodeBPort == nodeAPort) nodeBPort = GetAvailableTcpPort();
await using var nodeA = TestPeerNode.Create(
"node-a",
nodeAPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-b",
Host = "127.0.0.1",
Port = nodeBPort
}
]);
await using var nodeB = TestPeerNode.Create(
"node-b",
nodeBPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-a",
Host = "127.0.0.1",
Port = nodeAPort
}
]);
await nodeA.StartAsync();
await nodeB.StartAsync();
const int burstCount = 8;
for (var i = 0; i < burstCount; i++)
{
string aId = $"burst-a-{i:D2}";
string bId = $"burst-b-{i:D2}";
await nodeA.UpsertUserAsync(new User
{
Id = aId,
Name = $"A-{i}",
Age = 30 + i,
Address = new Address { City = $"CityA-{i}" }
});
await nodeB.UpsertUserAsync(new User
{
Id = bId,
Name = $"B-{i}",
Age = 40 + i,
Address = new Address { City = $"CityB-{i}" }
});
}
await AssertEventuallyAsync(
() => nodeA.Context.Users.FindAll().Count() == burstCount * 2 &&
nodeB.Context.Users.FindAll().Count() == burstCount * 2,
60,
"Burst convergence did not reach expected document counts.",
() => BuildDiagnostics(nodeA, nodeB));
await AssertEventuallyAsync(() =>
{
for (var i = 0; i < burstCount; i++)
{
var aOnB = nodeB.ReadUser($"burst-a-{i:D2}");
var bOnA = nodeA.ReadUser($"burst-b-{i:D2}");
if (aOnB is null || bOnA is null) return false;
if (aOnB.Name != $"A-{i}" || bOnA.Name != $"B-{i}") return false;
}
return true;
}, 60, "Burst convergence content mismatch.", () => BuildDiagnostics(nodeA, nodeB));
}
/// <summary>
/// Verifies recovery safety when a process crashes after oplog commit but before checkpoint advance.
/// </summary>
[Fact]
public async Task CrashBetweenOplogAndCheckpoint_ShouldReplaySafelyOnRestart()
{
var clusterToken = Guid.NewGuid().ToString("N");
int nodeAPort = GetAvailableTcpPort();
int nodeBPort = GetAvailableTcpPort();
while (nodeBPort == nodeAPort) nodeBPort = GetAvailableTcpPort();
string sharedWorkDir = Path.Combine(Path.GetTempPath(), $"cbddc-e2e-crash-{Guid.NewGuid():N}");
Directory.CreateDirectory(sharedWorkDir);
await using var nodeA = TestPeerNode.Create(
"node-a",
nodeAPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-b",
Host = "127.0.0.1",
Port = nodeBPort
}
],
workDirOverride: sharedWorkDir,
preserveWorkDirOnDispose: true,
useFaultInjectedCheckpointStore: true);
bool nodeADisposed = false;
try
{
await using var nodeB = TestPeerNode.Create(
"node-b",
nodeBPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-a",
Host = "127.0.0.1",
Port = nodeAPort
}
]);
await nodeA.StartAsync();
await nodeB.StartAsync();
const string userId = "crash-window-user";
var payload = new User
{
Id = userId,
Name = "Crash Recovered",
Age = 45,
Address = new Address { City = "Naples" }
};
await Should.ThrowAsync<InvalidOperationException>(() => nodeA.UpsertUserAsync(payload));
nodeA.ReadUser(userId).ShouldNotBeNull();
nodeA.GetLocalOplogCountForKey("Users", userId).ShouldBe(1);
await nodeA.StopAsync();
await nodeA.DisposeAsync();
nodeADisposed = true;
TestPeerNode? recoveredNodeA = null;
for (var attempt = 0; attempt < 10; attempt++)
try
{
recoveredNodeA = TestPeerNode.Create(
"node-a",
nodeAPort,
clusterToken,
[
new KnownPeerConfiguration
{
NodeId = "node-b",
Host = "127.0.0.1",
Port = nodeBPort
}
],
workDirOverride: sharedWorkDir);
break;
}
catch (Exception ex) when (IsRocksDbLockContention(ex) && attempt < 9)
{
await Task.Delay(100);
}
recoveredNodeA.ShouldNotBeNull();
await using (recoveredNodeA)
{
await recoveredNodeA.StartAsync();
await AssertEventuallyAsync(() =>
{
var replicated = nodeB.ReadUser(userId);
return replicated is not null
&& replicated.Name == payload.Name
&& replicated.Age == payload.Age
&& replicated.Address?.City == payload.Address?.City;
}, 60, "Node B did not converge after crash-window recovery.", () => BuildDiagnostics(recoveredNodeA, nodeB));
await AssertEventuallyAsync(
() => recoveredNodeA.GetOplogCountForKey("Users", userId) == 1 &&
nodeB.GetOplogCountForKey("Users", userId) == 1,
60,
"Crash-window recovery created duplicate oplog entries.",
() => BuildDiagnostics(recoveredNodeA, nodeB));
}
}
finally
{
if (!nodeADisposed) await nodeA.DisposeAsync();
}
}
private static async Task AssertEventuallyAsync(
Func<bool> predicate,
int timeoutSeconds,
string failureMessage,
Func<string>? diagnostics = null)
{
var timeout = TimeSpan.FromSeconds(timeoutSeconds);
var startedAt = DateTime.UtcNow;
while (DateTime.UtcNow - startedAt < timeout)
{
if (predicate()) return;
await Task.Delay(250);
}
string suffix = diagnostics is null ? string.Empty : $"{Environment.NewLine}{diagnostics()}";
throw new ShouldAssertException($"{failureMessage}{suffix}");
}
private static string BuildDiagnostics(TestPeerNode nodeA, TestPeerNode nodeB)
{
int nodeAUserCount = nodeA.Context.Users.FindAll().Count();
int nodeBUserCount = nodeB.Context.Users.FindAll().Count();
int nodeAOplogCount = nodeA.Context.OplogEntries.FindAll().Count();
int nodeBOplogCount = nodeB.Context.OplogEntries.FindAll().Count();
string nodeAOplogByAuthor = string.Join(
", ",
nodeA.Context.OplogEntries.FindAll()
.GroupBy(e => e.TimestampNodeId)
.Select(g => $"{g.Key}:{g.Count()}"));
string nodeBOplogByAuthor = string.Join(
", ",
nodeB.Context.OplogEntries.FindAll()
.GroupBy(e => e.TimestampNodeId)
.Select(g => $"{g.Key}:{g.Count()}"));
string nodeAUsers = string.Join(", ",
nodeA.Context.Users.FindAll().Select(u => $"{u.Id}:{u.Name}:{u.Age}:{u.Address?.City}"));
string nodeBUsers = string.Join(", ",
nodeB.Context.Users.FindAll().Select(u => $"{u.Id}:{u.Name}:{u.Age}:{u.Address?.City}"));
return string.Join(
Environment.NewLine,
"Diagnostics:",
$"NodeA users={nodeAUserCount}, oplog={nodeAOplogCount}",
$"NodeA users detail={nodeAUsers}",
$"NodeA oplog by author={nodeAOplogByAuthor}",
$"NodeB users={nodeBUserCount}, oplog={nodeBOplogCount}",
$"NodeB users detail={nodeBUsers}",
$"NodeB oplog by author={nodeBOplogByAuthor}",
"NodeA logs:",
nodeA.GetRecentLogs(),
"NodeB logs:",
nodeB.GetRecentLogs());
}
private static int GetAvailableTcpPort()
{
using var listener = new TcpListener(IPAddress.Loopback, 0);
listener.Start();
return ((IPEndPoint)listener.LocalEndpoint).Port;
}
private static bool IsRocksDbLockContention(Exception exception)
{
return exception.ToString().Contains("No locks available", StringComparison.OrdinalIgnoreCase);
}
private sealed class TestPeerNode : IAsyncDisposable
{
private readonly InMemoryLogSink _logSink;
private readonly ICBDDCNode _node;
private readonly string _nodeId;
private readonly IOplogStore _oplogStore;
private readonly ServiceProvider _services;
private readonly string _workDir;
private readonly bool _preserveWorkDirOnDispose;
private long _lastPhysicalTime = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
private int _logicalCounter;
private bool _started;
private TestPeerNode(
ServiceProvider services,
ICBDDCNode node,
IOplogStore oplogStore,
SampleDbContext context,
InMemoryLogSink logSink,
string workDir,
string nodeId,
bool preserveWorkDirOnDispose)
{
_services = services;
_node = node;
_oplogStore = oplogStore;
Context = context;
_logSink = logSink;
_workDir = workDir;
_nodeId = nodeId;
_preserveWorkDirOnDispose = preserveWorkDirOnDispose;
}
/// <summary>
/// Gets the Surreal-backed context used by this test peer.
/// </summary>
public SampleDbContext Context { get; }
/// <inheritdoc />
public async ValueTask DisposeAsync()
{
try
{
await StopAsync();
}
catch
{
}
_services.Dispose();
if (!_preserveWorkDirOnDispose) TryDeleteDirectory(_workDir);
}
/// <summary>
/// Creates a test peer node and wires all required services.
/// </summary>
/// <param name="nodeId">The unique node identifier.</param>
/// <param name="tcpPort">The TCP port used by the node listener.</param>
/// <param name="authToken">The cluster authentication token.</param>
/// <param name="knownPeers">The known peers this node can connect to.</param>
/// <param name="workDirOverride">An optional working directory override for test artifacts.</param>
/// <param name="preserveWorkDirOnDispose">A value indicating whether to preserve the working directory on dispose.</param>
/// <param name="useFaultInjectedCheckpointStore">A value indicating whether to inject a checkpoint persistence that fails once.</param>
/// <param name="useLmdbOplog">A value indicating whether to enable the LMDB oplog migration path.</param>
/// <param name="dualWriteOplog">A value indicating whether oplog writes should be mirrored to Surreal + LMDB.</param>
/// <param name="preferLmdbReads">A value indicating whether reads should prefer LMDB.</param>
/// <returns>A configured <see cref="TestPeerNode" /> instance.</returns>
public static TestPeerNode Create(
string nodeId,
int tcpPort,
string authToken,
IReadOnlyList<KnownPeerConfiguration> knownPeers,
string? workDirOverride = null,
bool preserveWorkDirOnDispose = false,
bool useFaultInjectedCheckpointStore = false,
bool useLmdbOplog = false,
bool dualWriteOplog = true,
bool preferLmdbReads = false)
{
string workDir = workDirOverride ?? Path.Combine(Path.GetTempPath(), $"cbddc-e2e-{nodeId}-{Guid.NewGuid():N}");
Directory.CreateDirectory(workDir);
string dbPath = Path.Combine(workDir, "node.rocksdb");
string surrealDatabase = nodeId.Replace("-", "_", StringComparison.Ordinal);
var configProvider = new StaticPeerNodeConfigurationProvider(new PeerNodeConfiguration
{
NodeId = nodeId,
TcpPort = tcpPort,
AuthToken = authToken,
KnownPeers = knownPeers.ToList()
});
var services = new ServiceCollection();
services.AddSingleton(new InMemoryLogSink(nodeId));
services.AddSingleton<ILoggerProvider, InMemoryLoggerProvider>();
services.AddLogging(builder => builder.SetMinimumLevel(LogLevel.Debug));
services.AddSingleton(configProvider);
services.AddSingleton<IPeerNodeConfigurationProvider>(configProvider);
services.AddSingleton<ICBDDCSurrealSchemaInitializer, SampleSurrealSchemaInitializer>();
services.AddSingleton<SampleDbContext>();
var surrealOptionsFactory = new Func<IServiceProvider, CBDDCSurrealEmbeddedOptions>(_ => new CBDDCSurrealEmbeddedOptions
{
Endpoint = "rocksdb://local",
DatabasePath = dbPath,
Namespace = "cbddc_e2e",
Database = surrealDatabase,
Cdc = new CBDDCSurrealCdcOptions
{
Enabled = true,
ConsumerId = $"{nodeId}-main"
}
});
var coreBuilder = services.AddCBDDCCore();
if (useFaultInjectedCheckpointStore)
{
services.AddSingleton<ISurrealCdcCheckpointPersistence, CrashAfterFirstAdvanceCheckpointPersistence>();
var registration = coreBuilder.AddCBDDCSurrealEmbedded<FaultInjectedSampleDocumentStore>(surrealOptionsFactory);
if (useLmdbOplog)
registration.AddCBDDCLmdbOplog(
_ => new LmdbOplogOptions
{
EnvironmentPath = Path.Combine(workDir, "oplog-lmdb"),
MapSizeBytes = 128L * 1024 * 1024,
MaxDatabases = 16,
PruneBatchSize = 256
},
flags =>
{
flags.UseLmdbOplog = true;
flags.DualWriteOplog = dualWriteOplog;
flags.PreferLmdbReads = preferLmdbReads;
flags.ReconciliationInterval = TimeSpan.Zero;
});
registration.AddCBDDCNetwork<StaticPeerNodeConfigurationProvider>(false);
}
else
{
var registration = coreBuilder.AddCBDDCSurrealEmbedded<SampleDocumentStore>(surrealOptionsFactory);
if (useLmdbOplog)
registration.AddCBDDCLmdbOplog(
_ => new LmdbOplogOptions
{
EnvironmentPath = Path.Combine(workDir, "oplog-lmdb"),
MapSizeBytes = 128L * 1024 * 1024,
MaxDatabases = 16,
PruneBatchSize = 256
},
flags =>
{
flags.UseLmdbOplog = true;
flags.DualWriteOplog = dualWriteOplog;
flags.PreferLmdbReads = preferLmdbReads;
flags.ReconciliationInterval = TimeSpan.Zero;
});
registration.AddCBDDCNetwork<StaticPeerNodeConfigurationProvider>(false);
}
// Deterministic tests: sync uses explicit known peers, so disable UDP discovery.
services.AddSingleton<IDiscoveryService, PassiveDiscoveryService>();
services.AddSingleton<IPeerHandshakeService, NoOpHandshakeService>();
var provider = services.BuildServiceProvider();
var node = provider.GetRequiredService<ICBDDCNode>();
var oplogStore = provider.GetRequiredService<IOplogStore>();
var context = provider.GetRequiredService<SampleDbContext>();
var logSink = provider.GetRequiredService<InMemoryLogSink>();
return new TestPeerNode(
provider,
node,
oplogStore,
context,
logSink,
workDir,
nodeId,
preserveWorkDirOnDispose);
}
/// <summary>
/// Starts the underlying node when it has not been started yet.
/// </summary>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task StartAsync()
{
if (_started) return;
await _node.Start();
_started = true;
}
/// <summary>
/// Stops the underlying node when it is currently running.
/// </summary>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task StopAsync()
{
if (!_started) return;
try
{
await _node.Stop();
}
catch (ObjectDisposedException)
{
}
catch (AggregateException ex) when (ex.InnerExceptions.All(e => e is ObjectDisposedException))
{
}
_started = false;
}
/// <summary>
/// Reads a user document by identifier.
/// </summary>
/// <param name="userId">The identifier of the user to read.</param>
/// <returns>The matching user when found; otherwise <see langword="null" />.</returns>
public User? ReadUser(string userId)
{
return Context.Users.Find(u => u.Id == userId).FirstOrDefault();
}
/// <summary>
/// Gets the local oplog entry count for a collection/key pair produced by this node.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="key">The document key.</param>
/// <returns>The number of local oplog entries matching the key.</returns>
public int GetLocalOplogCountForKey(string collection, string key)
{
return Context.OplogEntries.FindAll()
.Count(e =>
string.Equals(e.Collection, collection, StringComparison.Ordinal) &&
string.Equals(e.Key, key, StringComparison.Ordinal) &&
string.Equals(e.TimestampNodeId, _nodeId, StringComparison.Ordinal));
}
/// <summary>
/// Gets the total oplog entry count for a collection/key pair across nodes.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="key">The document key.</param>
/// <returns>The number of oplog entries matching the key.</returns>
public int GetOplogCountForKey(string collection, string key)
{
return Context.OplogEntries.FindAll()
.Count(e =>
string.Equals(e.Collection, collection, StringComparison.Ordinal) &&
string.Equals(e.Key, key, StringComparison.Ordinal));
}
/// <summary>
/// Inserts or updates a user and persists the matching oplog entry.
/// </summary>
/// <param name="user">The user payload to upsert.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task UpsertUserAsync(User user)
{
await PersistUserMutationWithOplogFallbackAsync(
user.Id,
OperationType.Put,
JsonSerializer.SerializeToElement(user),
async () =>
{
var existing = Context.Users.Find(u => u.Id == user.Id).FirstOrDefault();
if (existing == null)
await Context.Users.InsertAsync(user);
else
await Context.Users.UpdateAsync(user);
await Context.SaveChangesAsync();
});
}
/// <summary>
/// Deletes a user and persists the matching oplog entry.
/// </summary>
/// <param name="userId">The identifier of the user to delete.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task DeleteUserAsync(string userId)
{
await PersistUserMutationWithOplogFallbackAsync(
userId,
OperationType.Delete,
null,
async () =>
{
await Context.Users.DeleteAsync(userId);
await Context.SaveChangesAsync();
});
}
/// <summary>
/// Gets recent in-memory logs captured for this node.
/// </summary>
/// <param name="max">The maximum number of log entries to return.</param>
/// <returns>A newline-delimited string of recent log entries.</returns>
public string GetRecentLogs(int max = 50)
{
return _logSink.GetRecent(max);
}
private async Task PersistUserMutationWithOplogFallbackAsync(
string userId,
OperationType operationType,
JsonElement? payload,
Func<Task> mutation)
{
int oplogCountBefore = Context.OplogEntries.FindAll().Count();
await mutation();
// Prefer native CDC path; fallback only when CDC fails to emit.
var deadline = DateTime.UtcNow.AddSeconds(3);
while (DateTime.UtcNow < deadline)
{
if (Context.OplogEntries.FindAll().Count() > oplogCountBefore) return;
await Task.Delay(50);
}
string previousHash = await _oplogStore.GetLastEntryHashAsync(_nodeId) ?? string.Empty;
var fallbackEntry = new OplogEntry(
"Users",
userId,
operationType,
payload,
NextTimestamp(),
previousHash);
await _oplogStore.AppendOplogEntryAsync(fallbackEntry);
await Context.SaveChangesAsync();
}
private HlcTimestamp NextTimestamp()
{
long now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
if (now > _lastPhysicalTime)
{
_lastPhysicalTime = now;
_logicalCounter = 0;
}
else
{
_logicalCounter++;
}
return new HlcTimestamp(_lastPhysicalTime, _logicalCounter, _nodeId);
}
private static void TryDeleteDirectory(string path)
{
if (!Directory.Exists(path)) return;
try
{
Directory.Delete(path, true);
}
catch
{
}
}
}
private sealed class FaultInjectedSampleDocumentStore : SurrealDocumentStore<SampleDbContext>
{
private const string UsersCollection = "Users";
private const string TodoListsCollection = "TodoLists";
/// <summary>
/// Initializes a new instance of the <see cref="FaultInjectedSampleDocumentStore" /> class.
/// </summary>
/// <param name="context">The sample database context.</param>
/// <param name="configProvider">The peer node configuration provider.</param>
/// <param name="vectorClockService">The vector clock service.</param>
/// <param name="checkpointPersistence">The checkpoint persistence implementation.</param>
/// <param name="logger">The optional logger instance.</param>
public FaultInjectedSampleDocumentStore(
SampleDbContext context,
IPeerNodeConfigurationProvider configProvider,
IVectorClockService vectorClockService,
ISurrealCdcCheckpointPersistence checkpointPersistence,
ILogger<FaultInjectedSampleDocumentStore>? logger = null)
: base(
context,
context.SurrealEmbeddedClient,
context.SchemaInitializer,
configProvider,
vectorClockService,
new LastWriteWinsConflictResolver(),
checkpointPersistence,
new SurrealCdcPollingOptions
{
Enabled = false,
EnableLiveSelectAccelerator = false
},
logger)
{
WatchCollection(UsersCollection, context.Users, u => u.Id);
WatchCollection(TodoListsCollection, context.TodoLists, t => t.Id);
}
/// <inheritdoc />
protected override async Task ApplyContentToEntityAsync(
string collection,
string key,
JsonElement content,
CancellationToken cancellationToken)
{
await UpsertEntityAsync(collection, key, content, cancellationToken);
}
/// <inheritdoc />
protected override async Task ApplyContentToEntitiesBatchAsync(
IEnumerable<(string Collection, string Key, JsonElement Content)> documents,
CancellationToken cancellationToken)
{
foreach ((string collection, string key, var content) in documents)
await UpsertEntityAsync(collection, key, content, cancellationToken);
}
/// <inheritdoc />
protected override async Task<JsonElement?> GetEntityAsJsonAsync(
string collection,
string key,
CancellationToken cancellationToken)
{
return collection switch
{
UsersCollection => SerializeEntity(await _context.Users.FindByIdAsync(key, cancellationToken)),
TodoListsCollection => SerializeEntity(await _context.TodoLists.FindByIdAsync(key, cancellationToken)),
_ => null
};
}
/// <inheritdoc />
protected override async Task RemoveEntityAsync(
string collection,
string key,
CancellationToken cancellationToken)
{
await DeleteEntityAsync(collection, key, cancellationToken);
}
/// <inheritdoc />
protected override async Task RemoveEntitiesBatchAsync(
IEnumerable<(string Collection, string Key)> documents,
CancellationToken cancellationToken)
{
foreach ((string collection, string key) in documents)
await DeleteEntityAsync(collection, key, cancellationToken);
}
/// <inheritdoc />
protected override async Task<IEnumerable<(string Key, JsonElement Content)>> GetAllEntitiesAsJsonAsync(
string collection,
CancellationToken cancellationToken)
{
return collection switch
{
UsersCollection => (await _context.Users.FindAllAsync(cancellationToken))
.Select(u => (u.Id, SerializeEntity(u)!.Value))
.ToList(),
TodoListsCollection => (await _context.TodoLists.FindAllAsync(cancellationToken))
.Select(t => (t.Id, SerializeEntity(t)!.Value))
.ToList(),
_ => []
};
}
private async Task UpsertEntityAsync(
string collection,
string key,
JsonElement content,
CancellationToken cancellationToken)
{
switch (collection)
{
case UsersCollection:
var user = content.Deserialize<User>() ??
throw new InvalidOperationException("Failed to deserialize user.");
user.Id = key;
if (await _context.Users.FindByIdAsync(key, cancellationToken) == null)
await _context.Users.InsertAsync(user, cancellationToken);
else
await _context.Users.UpdateAsync(user, cancellationToken);
break;
case TodoListsCollection:
var todo = content.Deserialize<TodoList>() ??
throw new InvalidOperationException("Failed to deserialize todo list.");
todo.Id = key;
if (await _context.TodoLists.FindByIdAsync(key, cancellationToken) == null)
await _context.TodoLists.InsertAsync(todo, cancellationToken);
else
await _context.TodoLists.UpdateAsync(todo, cancellationToken);
break;
default:
throw new NotSupportedException($"Collection '{collection}' is not supported for sync.");
}
}
private async Task DeleteEntityAsync(string collection, string key, CancellationToken cancellationToken)
{
switch (collection)
{
case UsersCollection:
await _context.Users.DeleteAsync(key, cancellationToken);
break;
case TodoListsCollection:
await _context.TodoLists.DeleteAsync(key, cancellationToken);
break;
}
}
private static JsonElement? SerializeEntity<T>(T? entity) where T : class
{
return entity == null ? null : JsonSerializer.SerializeToElement(entity);
}
}
private sealed class CrashAfterFirstAdvanceCheckpointPersistence : ISurrealCdcCheckpointPersistence
{
private int _failOnNextAdvance = 1;
/// <inheritdoc />
public Task<SurrealCdcCheckpoint?> GetCheckpointAsync(
string? consumerId = null,
CancellationToken cancellationToken = default)
{
return Task.FromResult<SurrealCdcCheckpoint?>(null);
}
/// <inheritdoc />
public Task UpsertCheckpointAsync(
HlcTimestamp timestamp,
string lastHash,
string? consumerId = null,
CancellationToken cancellationToken = default,
long? versionstampCursor = null)
{
return Task.CompletedTask;
}
/// <inheritdoc />
public Task AdvanceCheckpointAsync(
OplogEntry entry,
string? consumerId = null,
CancellationToken cancellationToken = default)
{
if (Interlocked.Exchange(ref _failOnNextAdvance, 0) == 1)
throw new InvalidOperationException("Injected crash between oplog commit and checkpoint advance.");
return Task.CompletedTask;
}
}
private sealed class PassiveDiscoveryService : IDiscoveryService
{
/// <inheritdoc />
public IEnumerable<PeerNode> GetActivePeers()
{
return Array.Empty<PeerNode>();
}
/// <inheritdoc />
public Task Start()
{
return Task.CompletedTask;
}
/// <inheritdoc />
public Task Stop()
{
return Task.CompletedTask;
}
}
private sealed class StaticPeerNodeConfigurationProvider : IPeerNodeConfigurationProvider
{
private PeerNodeConfiguration _configuration;
/// <summary>
/// Initializes a new instance of the <see cref="StaticPeerNodeConfigurationProvider" /> class.
/// </summary>
/// <param name="configuration">The initial peer node configuration.</param>
public StaticPeerNodeConfigurationProvider(PeerNodeConfiguration configuration)
{
_configuration = configuration;
}
/// <inheritdoc />
public event PeerNodeConfigurationChangedEventHandler? ConfigurationChanged;
/// <inheritdoc />
public Task<PeerNodeConfiguration> GetConfiguration()
{
return Task.FromResult(_configuration);
}
/// <inheritdoc />
public void Update(PeerNodeConfiguration configuration)
{
_configuration = configuration;
ConfigurationChanged?.Invoke(this, configuration);
}
}
private sealed class InMemoryLogSink
{
private readonly ConcurrentQueue<string> _entries = new();
private readonly string _nodeId;
/// <summary>
/// Initializes a new instance of the <see cref="InMemoryLogSink" /> class.
/// </summary>
/// <param name="nodeId">The node identifier associated with emitted logs.</param>
public InMemoryLogSink(string nodeId)
{
_nodeId = nodeId;
}
/// <summary>
/// Adds a log entry to the in-memory sink.
/// </summary>
/// <param name="category">The log category.</param>
/// <param name="level">The log level.</param>
/// <param name="message">The formatted log message.</param>
/// <param name="exception">The optional exception associated with the log entry.</param>
public void Add(string category, LogLevel level, string message, Exception? exception)
{
var text = $"[{DateTime.UtcNow:O}] {_nodeId} {level} {category}: {message}";
if (exception is not null) text = $"{text}{Environment.NewLine}{exception}";
_entries.Enqueue(text);
while (_entries.Count > 500 && _entries.TryDequeue(out _))
{
}
}
/// <summary>
/// Gets the most recent log entries from the sink.
/// </summary>
/// <param name="max">The maximum number of entries to return.</param>
/// <returns>A newline-delimited string of recent log entries, or a placeholder when none exist.</returns>
public string GetRecent(int max)
{
string[] entries = _entries.ToArray();
if (entries.Length == 0) return "<no logs>";
return string.Join(Environment.NewLine, entries.TakeLast(max));
}
}
private sealed class InMemoryLoggerProvider : ILoggerProvider
{
private readonly InMemoryLogSink _sink;
/// <summary>
/// Initializes a new instance of the <see cref="InMemoryLoggerProvider" /> class.
/// </summary>
/// <param name="sink">The shared sink used to capture log messages.</param>
public InMemoryLoggerProvider(InMemoryLogSink sink)
{
_sink = sink;
}
/// <inheritdoc />
public ILogger CreateLogger(string categoryName)
{
return new InMemoryLogger(categoryName, _sink);
}
/// <inheritdoc />
public void Dispose()
{
}
}
private sealed class InMemoryLogger : ILogger
{
private readonly string _categoryName;
private readonly InMemoryLogSink _sink;
/// <summary>
/// Initializes a new instance of the <see cref="InMemoryLogger" /> class.
/// </summary>
/// <param name="categoryName">The logger category name.</param>
/// <param name="sink">The sink that stores emitted log messages.</param>
public InMemoryLogger(string categoryName, InMemoryLogSink sink)
{
_categoryName = categoryName;
_sink = sink;
}
/// <inheritdoc />
public IDisposable BeginScope<TState>(TState state) where TState : notnull
{
return NullScope.Instance;
}
/// <inheritdoc />
public bool IsEnabled(LogLevel logLevel)
{
return true;
}
/// <inheritdoc />
public void Log<TState>(
LogLevel logLevel,
EventId eventId,
TState state,
Exception? exception,
Func<TState, Exception?, string> formatter)
{
_sink.Add(_categoryName, logLevel, formatter(state, exception), exception);
}
}
private sealed class NullScope : IDisposable
{
public static readonly NullScope Instance = new();
/// <inheritdoc />
public void Dispose()
{
}
}
}