Files
CBDDC/src/ZB.MOM.WW.CBDDC.Persistence/Lmdb/LmdbOplogBackfillTool.cs
Joseph Doherty cce24fa8f3
All checks were successful
NuGet Package Publish / nuget (push) Successful in 1m16s
Add LMDB oplog migration path with dual-write cutover support
Introduce LMDB oplog store, migration flags, telemetry/backfill tooling, and parity tests to enable staged Surreal-to-LMDB rollout with rollback coverage.
2026-02-22 17:44:57 -05:00

278 lines
11 KiB
C#

using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Persistence.Surreal;
namespace ZB.MOM.WW.CBDDC.Persistence.Lmdb;
/// <summary>
/// Backfills LMDB oplog content from Surreal and validates parity.
/// </summary>
public sealed class LmdbOplogBackfillTool
{
private readonly LmdbOplogStore _destination;
private readonly ILogger<LmdbOplogBackfillTool> _logger;
private readonly SurrealOplogStore _source;
/// <summary>
/// Initializes a new instance of the <see cref="LmdbOplogBackfillTool" /> class.
/// </summary>
public LmdbOplogBackfillTool(
SurrealOplogStore source,
LmdbOplogStore destination,
ILogger<LmdbOplogBackfillTool>? logger = null)
{
_source = source ?? throw new ArgumentNullException(nameof(source));
_destination = destination ?? throw new ArgumentNullException(nameof(destination));
_logger = logger ?? NullLogger<LmdbOplogBackfillTool>.Instance;
}
/// <summary>
/// Backfills one dataset from Surreal to LMDB and validates parity.
/// </summary>
public async Task<LmdbOplogBackfillReport> BackfillAsync(
string datasetId,
CancellationToken cancellationToken = default)
{
string normalizedDatasetId = DatasetId.Normalize(datasetId);
var sourceEntries = (await _source.ExportAsync(normalizedDatasetId, cancellationToken))
.OrderBy(entry => entry.Timestamp.PhysicalTime)
.ThenBy(entry => entry.Timestamp.LogicalCounter)
.ThenBy(entry => entry.Timestamp.NodeId, StringComparer.Ordinal)
.ThenBy(entry => entry.Hash, StringComparer.Ordinal)
.ToList();
await _destination.MergeAsync(sourceEntries, normalizedDatasetId, cancellationToken);
LmdbOplogBackfillReport report = await ValidateParityAsync(normalizedDatasetId, sourceEntries, cancellationToken);
_logger.LogInformation(
"LMDB oplog backfill {Result} for dataset {DatasetId}. Source={SourceCount}, Destination={DestinationCount}, HashSpotChecks={HashSpotChecks}, ChainSpotChecks={ChainSpotChecks}.",
report.IsSuccess ? "succeeded" : "failed",
report.DatasetId,
report.SourceCount,
report.DestinationCount,
report.HashSpotCheckCount,
report.ChainSpotCheckCount);
return report;
}
/// <summary>
/// Validates parity only without running a backfill merge.
/// </summary>
public async Task<LmdbOplogBackfillReport> ValidateParityAsync(
string datasetId,
CancellationToken cancellationToken = default)
{
string normalizedDatasetId = DatasetId.Normalize(datasetId);
var sourceEntries = (await _source.ExportAsync(normalizedDatasetId, cancellationToken)).ToList();
return await ValidateParityAsync(normalizedDatasetId, sourceEntries, cancellationToken);
}
/// <summary>
/// Backfills and throws when parity validation fails.
/// </summary>
public async Task<LmdbOplogBackfillReport> BackfillOrThrowAsync(
string datasetId,
CancellationToken cancellationToken = default)
{
LmdbOplogBackfillReport report = await BackfillAsync(datasetId, cancellationToken);
if (report.IsSuccess) return report;
throw new InvalidOperationException(
$"LMDB oplog backfill parity failed for dataset '{report.DatasetId}'. " +
$"Source={report.SourceCount}, Destination={report.DestinationCount}, " +
$"CountsMatch={report.CountsMatch}, CountsPerNodeMatch={report.CountsPerNodeMatch}, " +
$"LatestHashPerNodeMatch={report.LatestHashPerNodeMatch}, HashSpotChecksPassed={report.HashSpotChecksPassed}, " +
$"ChainSpotChecksPassed={report.ChainSpotChecksPassed}.");
}
private async Task<LmdbOplogBackfillReport> ValidateParityAsync(
string datasetId,
List<OplogEntry> sourceEntries,
CancellationToken cancellationToken)
{
var sourceOrdered = sourceEntries
.OrderBy(entry => entry.Timestamp.PhysicalTime)
.ThenBy(entry => entry.Timestamp.LogicalCounter)
.ThenBy(entry => entry.Timestamp.NodeId, StringComparer.Ordinal)
.ThenBy(entry => entry.Hash, StringComparer.Ordinal)
.ToList();
var destinationOrdered = (await _destination.ExportAsync(datasetId, cancellationToken))
.OrderBy(entry => entry.Timestamp.PhysicalTime)
.ThenBy(entry => entry.Timestamp.LogicalCounter)
.ThenBy(entry => entry.Timestamp.NodeId, StringComparer.Ordinal)
.ThenBy(entry => entry.Hash, StringComparer.Ordinal)
.ToList();
bool countsMatch = sourceOrdered.Count == destinationOrdered.Count;
IReadOnlyDictionary<string, int> sourceCountByNode = CountByNode(sourceOrdered);
IReadOnlyDictionary<string, int> destinationCountByNode = CountByNode(destinationOrdered);
bool countsPerNodeMatch = DictionaryEqual(sourceCountByNode, destinationCountByNode);
IReadOnlyDictionary<string, string> sourceLatestHashByNode = LatestHashByNode(sourceOrdered);
IReadOnlyDictionary<string, string> destinationLatestHashByNode = LatestHashByNode(destinationOrdered);
bool latestHashPerNodeMatch = DictionaryEqual(sourceLatestHashByNode, destinationLatestHashByNode);
(bool hashSpotChecksPassed, int hashSpotCheckCount) = await RunHashSpotChecksAsync(
datasetId,
sourceOrdered,
cancellationToken);
(bool chainSpotChecksPassed, int chainSpotCheckCount) = await RunChainSpotChecksAsync(
datasetId,
sourceOrdered,
cancellationToken);
return new LmdbOplogBackfillReport(
datasetId,
sourceOrdered.Count,
destinationOrdered.Count,
sourceCountByNode,
destinationCountByNode,
sourceLatestHashByNode,
destinationLatestHashByNode,
hashSpotCheckCount,
chainSpotCheckCount,
countsMatch,
countsPerNodeMatch,
latestHashPerNodeMatch,
hashSpotChecksPassed,
chainSpotChecksPassed);
}
private async Task<(bool Passed, int Count)> RunHashSpotChecksAsync(
string datasetId,
IReadOnlyList<OplogEntry> sourceEntries,
CancellationToken cancellationToken)
{
if (sourceEntries.Count == 0) return (true, 0);
var sampleIndexes = BuildSampleIndexes(sourceEntries.Count, Math.Min(10, sourceEntries.Count));
foreach (int index in sampleIndexes)
{
string hash = sourceEntries[index].Hash;
OplogEntry? destinationEntry = await _destination.GetEntryByHashAsync(hash, datasetId, cancellationToken);
if (destinationEntry == null) return (false, sampleIndexes.Count);
}
return (true, sampleIndexes.Count);
}
private async Task<(bool Passed, int Count)> RunChainSpotChecksAsync(
string datasetId,
IReadOnlyList<OplogEntry> sourceEntries,
CancellationToken cancellationToken)
{
if (sourceEntries.Count < 2) return (true, 0);
var sourceByHash = sourceEntries.ToDictionary(entry => entry.Hash, StringComparer.Ordinal);
var checks = sourceEntries
.Where(entry => !string.IsNullOrWhiteSpace(entry.PreviousHash) &&
sourceByHash.ContainsKey(entry.PreviousHash))
.Take(5)
.Select(entry => (StartHash: entry.PreviousHash, EndHash: entry.Hash))
.ToList();
foreach (var check in checks)
{
string[] sourceChain = (await _source.GetChainRangeAsync(check.StartHash, check.EndHash, datasetId, cancellationToken))
.Select(entry => entry.Hash)
.ToArray();
string[] destinationChain =
(await _destination.GetChainRangeAsync(check.StartHash, check.EndHash, datasetId, cancellationToken))
.Select(entry => entry.Hash)
.ToArray();
if (!sourceChain.SequenceEqual(destinationChain, StringComparer.Ordinal))
return (false, checks.Count);
}
return (true, checks.Count);
}
private static IReadOnlyDictionary<string, int> CountByNode(IEnumerable<OplogEntry> entries)
{
return entries
.Where(entry => !string.IsNullOrWhiteSpace(entry.Timestamp.NodeId))
.GroupBy(entry => entry.Timestamp.NodeId, StringComparer.Ordinal)
.ToDictionary(group => group.Key, group => group.Count(), StringComparer.Ordinal);
}
private static IReadOnlyDictionary<string, string> LatestHashByNode(IEnumerable<OplogEntry> entries)
{
return entries
.Where(entry => !string.IsNullOrWhiteSpace(entry.Timestamp.NodeId))
.GroupBy(entry => entry.Timestamp.NodeId, StringComparer.Ordinal)
.ToDictionary(
group => group.Key,
group => group
.OrderByDescending(entry => entry.Timestamp.PhysicalTime)
.ThenByDescending(entry => entry.Timestamp.LogicalCounter)
.ThenByDescending(entry => entry.Hash, StringComparer.Ordinal)
.First()
.Hash,
StringComparer.Ordinal);
}
private static bool DictionaryEqual<T>(
IReadOnlyDictionary<string, T> left,
IReadOnlyDictionary<string, T> right)
{
if (left.Count != right.Count) return false;
foreach (var pair in left)
{
if (!right.TryGetValue(pair.Key, out T? rightValue)) return false;
if (!EqualityComparer<T>.Default.Equals(pair.Value, rightValue)) return false;
}
return true;
}
private static List<int> BuildSampleIndexes(int totalCount, int sampleCount)
{
if (sampleCount <= 0 || totalCount <= 0) return [];
if (sampleCount >= totalCount) return Enumerable.Range(0, totalCount).ToList();
var indexes = new HashSet<int>();
for (var i = 0; i < sampleCount; i++)
{
int index = (int)Math.Round(i * (totalCount - 1d) / (sampleCount - 1d));
indexes.Add(Math.Clamp(index, 0, totalCount - 1));
}
return indexes.OrderBy(value => value).ToList();
}
}
/// <summary>
/// Parity report produced by the LMDB backfill tool.
/// </summary>
public sealed record LmdbOplogBackfillReport(
string DatasetId,
int SourceCount,
int DestinationCount,
IReadOnlyDictionary<string, int> SourceCountByNode,
IReadOnlyDictionary<string, int> DestinationCountByNode,
IReadOnlyDictionary<string, string> SourceLatestHashByNode,
IReadOnlyDictionary<string, string> DestinationLatestHashByNode,
int HashSpotCheckCount,
int ChainSpotCheckCount,
bool CountsMatch,
bool CountsPerNodeMatch,
bool LatestHashPerNodeMatch,
bool HashSpotChecksPassed,
bool ChainSpotChecksPassed)
{
/// <summary>
/// Gets a value indicating whether parity validation passed all checks.
/// </summary>
public bool IsSuccess =>
CountsMatch &&
CountsPerNodeMatch &&
LatestHashPerNodeMatch &&
HashSpotChecksPassed &&
ChainSpotChecksPassed;
}