Implement checkpoint modes with docs/tests and reorganize project file layout
All checks were successful
NuGet Publish / build-and-pack (push) Successful in 46s
NuGet Publish / publish-to-gitea (push) Successful in 53s

This commit is contained in:
Joseph Doherty
2026-02-21 07:56:36 -05:00
parent 3ffd468c79
commit 4c6aaa5a3f
96 changed files with 744 additions and 249 deletions

View File

@@ -0,0 +1,176 @@
using System.IO.Compression;
using System.Security.Cryptography;
using ZB.MOM.WW.CBDD.Bson;
using ZB.MOM.WW.CBDD.Core.Compression;
using ZB.MOM.WW.CBDD.Core.Storage;
using ZB.MOM.WW.CBDD.Shared;
namespace ZB.MOM.WW.CBDD.Tests;
public class CompressionCompatibilityTests
{
/// <summary>
/// Verifies opening legacy uncompressed files with compression enabled does not mutate database bytes.
/// </summary>
[Fact]
public void OpeningLegacyUncompressedFile_WithCompressionEnabled_ShouldNotMutateDbFile()
{
var dbPath = NewDbPath();
var idList = new List<ObjectId>();
try
{
using (var db = new TestDbContext(dbPath))
{
idList.Add(db.Users.Insert(new User { Name = "legacy-a", Age = 10 }));
idList.Add(db.Users.Insert(new User { Name = "legacy-b", Age = 11 }));
db.SaveChanges();
db.ForceCheckpoint();
}
var beforeSize = new FileInfo(dbPath).Length;
var beforeHash = ComputeFileHash(dbPath);
var compressionOptions = new CompressionOptions
{
EnableCompression = true,
MinSizeBytes = 0,
MinSavingsPercent = 0,
Codec = CompressionCodec.Brotli,
Level = CompressionLevel.Fastest
};
using (var reopened = new TestDbContext(dbPath, compressionOptions))
{
reopened.Users.FindById(idList[0])!.Name.ShouldBe("legacy-a");
reopened.Users.FindById(idList[1])!.Name.ShouldBe("legacy-b");
reopened.Users.Count().ShouldBe(2);
}
var afterSize = new FileInfo(dbPath).Length;
var afterHash = ComputeFileHash(dbPath);
afterSize.ShouldBe(beforeSize);
afterHash.ShouldBe(beforeHash);
}
finally
{
CleanupFiles(dbPath);
}
}
/// <summary>
/// Verifies mixed compressed and uncompressed documents remain readable after partial migration.
/// </summary>
[Fact]
public void MixedFormatDocuments_ShouldRemainReadableAfterPartialMigration()
{
var dbPath = NewDbPath();
ObjectId legacyId;
ObjectId compressedId;
try
{
using (var db = new TestDbContext(dbPath))
{
legacyId = db.Users.Insert(new User { Name = "legacy-uncompressed", Age = 22 });
db.SaveChanges();
db.ForceCheckpoint();
}
var compressionOptions = new CompressionOptions
{
EnableCompression = true,
MinSizeBytes = 0,
MinSavingsPercent = 0,
Codec = CompressionCodec.Brotli,
Level = CompressionLevel.Fastest
};
using (var migrated = new TestDbContext(dbPath, compressionOptions))
{
compressedId = migrated.Users.Insert(new User { Name = BuildPayload(24_000), Age = 33 });
migrated.SaveChanges();
migrated.ForceCheckpoint();
}
using (var verify = new TestDbContext(dbPath, compressionOptions))
{
verify.Users.FindById(legacyId)!.Name.ShouldBe("legacy-uncompressed");
verify.Users.FindById(compressedId)!.Name.Length.ShouldBeGreaterThan(10_000);
var counts = CountActiveDataSlots(verify.Storage);
counts.Compressed.ShouldBeGreaterThanOrEqualTo(1);
counts.Uncompressed.ShouldBeGreaterThanOrEqualTo(1);
}
}
finally
{
CleanupFiles(dbPath);
}
}
private static (int Compressed, int Uncompressed) CountActiveDataSlots(StorageEngine storage)
{
var buffer = new byte[storage.PageSize];
var compressed = 0;
var uncompressed = 0;
for (uint pageId = 1; pageId < storage.PageCount; pageId++)
{
storage.ReadPage(pageId, null, buffer);
var header = SlottedPageHeader.ReadFrom(buffer);
if (header.PageType != PageType.Data)
continue;
for (ushort slotIndex = 0; slotIndex < header.SlotCount; slotIndex++)
{
var slotOffset = SlottedPageHeader.Size + (slotIndex * SlotEntry.Size);
var slot = SlotEntry.ReadFrom(buffer.AsSpan(slotOffset, SlotEntry.Size));
if ((slot.Flags & SlotFlags.Deleted) != 0)
continue;
if ((slot.Flags & SlotFlags.Compressed) != 0)
compressed++;
else
uncompressed++;
}
}
return (compressed, uncompressed);
}
private static string ComputeFileHash(string path)
{
using var stream = File.OpenRead(path);
using var sha256 = SHA256.Create();
return Convert.ToHexString(sha256.ComputeHash(stream));
}
private static string BuildPayload(int approxLength)
{
var builder = new System.Text.StringBuilder(approxLength + 256);
var i = 0;
while (builder.Length < approxLength)
{
builder.Append("compat-payload-");
builder.Append(i.ToString("D8"));
builder.Append('|');
i++;
}
return builder.ToString();
}
private static string NewDbPath()
=> Path.Combine(Path.GetTempPath(), $"compression_compat_{Guid.NewGuid():N}.db");
private static void CleanupFiles(string dbPath)
{
var walPath = Path.ChangeExtension(dbPath, ".wal");
var markerPath = $"{dbPath}.compact.state";
if (File.Exists(dbPath)) File.Delete(dbPath);
if (File.Exists(walPath)) File.Delete(walPath);
if (File.Exists(markerPath)) File.Delete(markerPath);
}
}

View File

@@ -0,0 +1,209 @@
using System.Buffers.Binary;
using System.IO.Compression;
using ZB.MOM.WW.CBDD.Bson;
using ZB.MOM.WW.CBDD.Core.Compression;
using ZB.MOM.WW.CBDD.Core.Storage;
using ZB.MOM.WW.CBDD.Shared;
namespace ZB.MOM.WW.CBDD.Tests;
public class CompressionCorruptionTests
{
/// <summary>
/// Verifies corrupted compressed payload checksum triggers invalid data errors.
/// </summary>
[Fact]
public void Read_WithBadChecksum_ShouldThrowInvalidData()
{
var dbPath = NewDbPath();
var options = CompressionEnabledOptions();
try
{
using var db = new TestDbContext(dbPath, options);
var id = InsertCheckpointAndCorrupt(db, header =>
{
var currentChecksum = BinaryPrimitives.ReadUInt32LittleEndian(header.Slice(12, 4));
BinaryPrimitives.WriteUInt32LittleEndian(header.Slice(12, 4), currentChecksum + 1);
});
var ex = Should.Throw<InvalidDataException>(() => db.Users.FindById(id));
ex.Message.ShouldContain("checksum mismatch");
db.GetCompressionStats().ChecksumFailureCount.ShouldBeGreaterThanOrEqualTo(1);
}
finally
{
CleanupFiles(dbPath);
}
}
/// <summary>
/// Verifies invalid original length metadata triggers invalid data errors.
/// </summary>
[Fact]
public void Read_WithBadOriginalLength_ShouldThrowInvalidData()
{
var dbPath = NewDbPath();
var options = CompressionEnabledOptions();
try
{
using var db = new TestDbContext(dbPath, options);
var id = InsertCheckpointAndCorrupt(db, header =>
{
BinaryPrimitives.WriteInt32LittleEndian(header.Slice(4, 4), -1);
});
var ex = Should.Throw<InvalidDataException>(() => db.Users.FindById(id));
ex.Message.ShouldContain("decompress");
}
finally
{
CleanupFiles(dbPath);
}
}
/// <summary>
/// Verifies oversized declared decompressed length enforces safety guardrails.
/// </summary>
[Fact]
public void Read_WithOversizedDeclaredLength_ShouldEnforceGuardrail()
{
var dbPath = NewDbPath();
var options = CompressionEnabledOptions(maxDecompressedSizeBytes: 2048);
try
{
using var db = new TestDbContext(dbPath, options);
var id = InsertCheckpointAndCorrupt(db, header =>
{
BinaryPrimitives.WriteInt32LittleEndian(header.Slice(4, 4), 2049);
});
var ex = Should.Throw<InvalidDataException>(() => db.Users.FindById(id));
ex.Message.ShouldContain("invalid decompressed length");
db.GetCompressionStats().SafetyLimitRejectionCount.ShouldBeGreaterThanOrEqualTo(1);
}
finally
{
CleanupFiles(dbPath);
}
}
/// <summary>
/// Verifies invalid codec identifiers in compressed headers trigger invalid data errors.
/// </summary>
[Fact]
public void Read_WithInvalidCodecId_ShouldThrowInvalidData()
{
var dbPath = NewDbPath();
var options = CompressionEnabledOptions();
try
{
using var db = new TestDbContext(dbPath, options);
var id = InsertCheckpointAndCorrupt(db, header =>
{
header[0] = 0; // CompressionCodec.None is invalid for compressed payload header.
});
var ex = Should.Throw<InvalidDataException>(() => db.Users.FindById(id));
ex.Message.ShouldContain("invalid codec");
}
finally
{
CleanupFiles(dbPath);
}
}
private static ObjectId InsertCheckpointAndCorrupt(TestDbContext db, HeaderMutator mutateHeader)
{
var user = new User
{
Name = BuildPayload(16_000),
Age = 33
};
var id = db.Users.Insert(user);
db.SaveChanges();
db.ForceCheckpoint();
var (pageId, slot, _) = FindFirstCompressedSlot(db.Storage);
((slot.Flags & SlotFlags.HasOverflow) != 0).ShouldBeFalse();
var page = new byte[db.Storage.PageSize];
db.Storage.ReadPage(pageId, null, page);
var headerSlice = page.AsSpan(slot.Offset, CompressedPayloadHeader.Size);
mutateHeader(headerSlice);
db.Storage.WritePageImmediate(pageId, page);
return id;
}
private static (uint PageId, SlotEntry Slot, ushort SlotIndex) FindFirstCompressedSlot(StorageEngine storage)
{
var buffer = new byte[storage.PageSize];
for (uint pageId = 1; pageId < storage.PageCount; pageId++)
{
storage.ReadPage(pageId, null, buffer);
var header = SlottedPageHeader.ReadFrom(buffer);
if (header.PageType != PageType.Data)
continue;
for (ushort slotIndex = 0; slotIndex < header.SlotCount; slotIndex++)
{
var slotOffset = SlottedPageHeader.Size + (slotIndex * SlotEntry.Size);
var slot = SlotEntry.ReadFrom(buffer.AsSpan(slotOffset, SlotEntry.Size));
if ((slot.Flags & SlotFlags.Deleted) != 0)
continue;
if ((slot.Flags & SlotFlags.Compressed) != 0)
return (pageId, slot, slotIndex);
}
}
throw new InvalidOperationException("No active compressed slot found for corruption test setup.");
}
private static CompressionOptions CompressionEnabledOptions(int maxDecompressedSizeBytes = 32 * 1024)
{
return new CompressionOptions
{
EnableCompression = true,
MinSizeBytes = 0,
MinSavingsPercent = 0,
Codec = CompressionCodec.Brotli,
Level = CompressionLevel.Fastest,
MaxDecompressedSizeBytes = maxDecompressedSizeBytes
};
}
private delegate void HeaderMutator(Span<byte> header);
private static string BuildPayload(int approxLength)
{
var builder = new System.Text.StringBuilder(approxLength + 256);
var i = 0;
while (builder.Length < approxLength)
{
builder.Append("corruption-payload-");
builder.Append(i.ToString("D8"));
builder.Append('|');
i++;
}
return builder.ToString();
}
private static string NewDbPath()
=> Path.Combine(Path.GetTempPath(), $"compression_corruption_{Guid.NewGuid():N}.db");
private static void CleanupFiles(string dbPath)
{
var walPath = Path.ChangeExtension(dbPath, ".wal");
var markerPath = $"{dbPath}.compact.state";
if (File.Exists(dbPath)) File.Delete(dbPath);
if (File.Exists(walPath)) File.Delete(walPath);
if (File.Exists(markerPath)) File.Delete(markerPath);
}
}

View File

@@ -0,0 +1,220 @@
using System.IO.Compression;
using ZB.MOM.WW.CBDD.Bson;
using ZB.MOM.WW.CBDD.Core.Compression;
using ZB.MOM.WW.CBDD.Core.Storage;
using ZB.MOM.WW.CBDD.Shared;
namespace ZB.MOM.WW.CBDD.Tests;
public class CompressionInsertReadTests
{
/// <summary>
/// Tests insert with threshold should store mixed compressed and uncompressed slots.
/// </summary>
[Fact]
public void Insert_WithThreshold_ShouldStoreMixedCompressedAndUncompressedSlots()
{
var dbPath = NewDbPath();
var options = new CompressionOptions
{
EnableCompression = true,
MinSizeBytes = 4096,
MinSavingsPercent = 0,
Codec = CompressionCodec.Brotli,
Level = CompressionLevel.Fastest
};
try
{
using var db = new TestDbContext(dbPath, options);
var small = new User { Name = "tiny", Age = 10 };
var large = new User { Name = BuildPayload(24_000), Age = 11 };
var smallId = db.Users.Insert(small);
var largeId = db.Users.Insert(large);
db.SaveChanges();
db.Users.FindById(smallId)!.Name.ShouldBe(small.Name);
db.Users.FindById(largeId)!.Name.ShouldBe(large.Name);
var counts = CountActiveDataSlots(db.Storage);
counts.Total.ShouldBeGreaterThanOrEqualTo(2);
counts.Compressed.ShouldBeGreaterThanOrEqualTo(1);
counts.Compressed.ShouldBeLessThan(counts.Total);
}
finally
{
CleanupFiles(dbPath);
}
}
/// <summary>
/// Tests find by id should read mixed compressed and uncompressed documents.
/// </summary>
[Fact]
public void FindById_ShouldReadMixedCompressedAndUncompressedDocuments()
{
var dbPath = NewDbPath();
var options = new CompressionOptions
{
EnableCompression = true,
MinSizeBytes = 512,
MinSavingsPercent = 0,
Codec = CompressionCodec.Brotli,
Level = CompressionLevel.Fastest
};
var ids = new List<ObjectId>();
try
{
using (var db = new TestDbContext(dbPath, options))
{
ids.Add(db.Users.Insert(new User { Name = "small-a", Age = 1 }));
ids.Add(db.Users.Insert(new User { Name = BuildPayload(18_000), Age = 2 }));
ids.Add(db.Users.Insert(new User { Name = "small-b", Age = 3 }));
ids.Add(db.Users.Insert(new User { Name = BuildPayload(26_000), Age = 4 }));
db.SaveChanges();
db.ForceCheckpoint();
}
using (var reopened = new TestDbContext(dbPath, options))
{
reopened.Users.FindById(ids[0])!.Name.ShouldBe("small-a");
reopened.Users.FindById(ids[2])!.Name.ShouldBe("small-b");
reopened.Users.FindById(ids[1])!.Name.Length.ShouldBeGreaterThan(10_000);
reopened.Users.FindById(ids[3])!.Name.Length.ShouldBeGreaterThan(10_000);
var counts = CountActiveDataSlots(reopened.Storage);
counts.Compressed.ShouldBeGreaterThanOrEqualTo(1);
counts.Compressed.ShouldBeLessThan(counts.Total);
}
}
finally
{
CleanupFiles(dbPath);
}
}
/// <summary>
/// Tests insert when codec throws should fallback to uncompressed storage.
/// </summary>
[Fact]
public void Insert_WhenCodecThrows_ShouldFallbackToUncompressedStorage()
{
var dbPath = NewDbPath();
var options = new CompressionOptions
{
EnableCompression = true,
MinSizeBytes = 0,
MinSavingsPercent = 0,
Codec = CompressionCodec.Brotli,
Level = CompressionLevel.Fastest
};
try
{
using var db = new TestDbContext(dbPath, options);
db.Storage.CompressionService.RegisterCodec(new FailingBrotliCodec());
var user = new User { Name = BuildPayload(20_000), Age = 7 };
var id = db.Users.Insert(user);
db.SaveChanges();
db.Users.FindById(id)!.Name.ShouldBe(user.Name);
var stats = db.GetCompressionStats();
stats.CompressionFailureCount.ShouldBeGreaterThanOrEqualTo(1);
stats.CompressedDocumentCount.ShouldBe(0);
var counts = CountActiveDataSlots(db.Storage);
counts.Compressed.ShouldBe(0);
}
finally
{
CleanupFiles(dbPath);
}
}
private static (int Total, int Compressed) CountActiveDataSlots(StorageEngine storage)
{
var buffer = new byte[storage.PageSize];
var total = 0;
var compressed = 0;
for (uint pageId = 1; pageId < storage.PageCount; pageId++)
{
storage.ReadPage(pageId, null, buffer);
var header = SlottedPageHeader.ReadFrom(buffer);
if (header.PageType != PageType.Data)
continue;
for (ushort slotIndex = 0; slotIndex < header.SlotCount; slotIndex++)
{
var slotOffset = SlottedPageHeader.Size + (slotIndex * SlotEntry.Size);
var slot = SlotEntry.ReadFrom(buffer.AsSpan(slotOffset, SlotEntry.Size));
if ((slot.Flags & SlotFlags.Deleted) != 0)
continue;
total++;
if ((slot.Flags & SlotFlags.Compressed) != 0)
compressed++;
}
}
return (total, compressed);
}
private static string BuildPayload(int approxLength)
{
var builder = new System.Text.StringBuilder(approxLength + 256);
var i = 0;
while (builder.Length < approxLength)
{
builder.Append("payload-");
builder.Append(i.ToString("D8"));
builder.Append('|');
i++;
}
return builder.ToString();
}
private static string NewDbPath()
=> Path.Combine(Path.GetTempPath(), $"compression_insert_read_{Guid.NewGuid():N}.db");
private static void CleanupFiles(string dbPath)
{
var walPath = Path.ChangeExtension(dbPath, ".wal");
var markerPath = $"{dbPath}.compact.state";
if (File.Exists(dbPath)) File.Delete(dbPath);
if (File.Exists(walPath)) File.Delete(walPath);
if (File.Exists(markerPath)) File.Delete(markerPath);
}
private sealed class FailingBrotliCodec : ICompressionCodec
{
/// <summary>
/// Gets or sets the codec.
/// </summary>
public CompressionCodec Codec => CompressionCodec.Brotli;
/// <summary>
/// Tests compress.
/// </summary>
/// <param name="input">Payload bytes to compress.</param>
/// <param name="level">Compression level.</param>
public byte[] Compress(ReadOnlySpan<byte> input, CompressionLevel level)
=> throw new InvalidOperationException("Forced codec failure for test coverage.");
/// <summary>
/// Tests decompress.
/// </summary>
/// <param name="input">Compressed payload bytes.</param>
/// <param name="expectedLength">Expected decompressed payload length.</param>
/// <param name="maxDecompressedSizeBytes">Maximum allowed decompressed size.</param>
public byte[] Decompress(ReadOnlySpan<byte> input, int expectedLength, int maxDecompressedSizeBytes)
=> throw new InvalidOperationException("This codec should not be used for reads in this scenario.");
}
}

View File

@@ -0,0 +1,179 @@
using System.IO.Compression;
using System.IO.MemoryMappedFiles;
using ZB.MOM.WW.CBDD.Core.Compression;
using ZB.MOM.WW.CBDD.Core.Storage;
using ZB.MOM.WW.CBDD.Shared;
namespace ZB.MOM.WW.CBDD.Tests;
public class CompressionOverflowTests
{
/// <summary>
/// Tests insert compressed document spanning overflow pages should round trip.
/// </summary>
[Fact]
public void Insert_CompressedDocumentSpanningOverflowPages_ShouldRoundTrip()
{
var dbPath = NewDbPath();
var options = new CompressionOptions
{
EnableCompression = true,
MinSizeBytes = 64,
MinSavingsPercent = 0,
Codec = CompressionCodec.Deflate,
Level = CompressionLevel.Fastest
};
try
{
using var db = new TestDbContext(dbPath, TinyPageConfig(), options);
var payload = BuildPayload(300_000);
var id = db.Users.Insert(new User { Name = payload, Age = 40 });
db.SaveChanges();
var found = db.Users.FindById(id);
found.ShouldNotBeNull();
found.Name.ShouldBe(payload);
var counts = CountSlotModes(db.Storage);
counts.CompressedOverflow.ShouldBeGreaterThanOrEqualTo(1);
counts.OverflowPages.ShouldBeGreaterThanOrEqualTo(1);
}
finally
{
CleanupFiles(dbPath);
}
}
/// <summary>
/// Tests update should transition across compression thresholds.
/// </summary>
[Fact]
public void Update_ShouldTransitionAcrossCompressionThresholds()
{
var dbPath = NewDbPath();
var options = new CompressionOptions
{
EnableCompression = true,
MinSizeBytes = 2048,
MinSavingsPercent = 0,
Codec = CompressionCodec.Brotli,
Level = CompressionLevel.Fastest
};
try
{
using var db = new TestDbContext(dbPath, TinyPageConfig(), options);
var user = new User { Name = "small", Age = 1 };
var id = db.Users.Insert(user);
db.SaveChanges();
CountSlotModes(db.Storage).Compressed.ShouldBe(0);
user.Name = BuildPayload(120_000);
db.Users.Update(user).ShouldBeTrue();
db.SaveChanges();
var afterLarge = db.Users.FindById(id);
afterLarge.ShouldNotBeNull();
afterLarge.Name.ShouldBe(user.Name);
var largeCounts = CountSlotModes(db.Storage);
largeCounts.Compressed.ShouldBeGreaterThanOrEqualTo(1);
user.Name = "small-again";
db.Users.Update(user).ShouldBeTrue();
db.SaveChanges();
var afterShrink = db.Users.FindById(id);
afterShrink.ShouldNotBeNull();
afterShrink.Name.ShouldBe("small-again");
var finalCounts = CountSlotModes(db.Storage);
finalCounts.Compressed.ShouldBe(0);
}
finally
{
CleanupFiles(dbPath);
}
}
private static (int Compressed, int CompressedOverflow, int OverflowPages) CountSlotModes(StorageEngine storage)
{
var buffer = new byte[storage.PageSize];
var compressed = 0;
var compressedOverflow = 0;
var overflowPages = 0;
for (uint pageId = 1; pageId < storage.PageCount; pageId++)
{
storage.ReadPage(pageId, null, buffer);
var header = SlottedPageHeader.ReadFrom(buffer);
if (header.PageType == PageType.Overflow)
{
overflowPages++;
continue;
}
if (header.PageType != PageType.Data)
continue;
for (ushort slotIndex = 0; slotIndex < header.SlotCount; slotIndex++)
{
var slotOffset = SlottedPageHeader.Size + (slotIndex * SlotEntry.Size);
var slot = SlotEntry.ReadFrom(buffer.AsSpan(slotOffset, SlotEntry.Size));
if ((slot.Flags & SlotFlags.Deleted) != 0)
continue;
var isCompressed = (slot.Flags & SlotFlags.Compressed) != 0;
var hasOverflow = (slot.Flags & SlotFlags.HasOverflow) != 0;
if (isCompressed)
compressed++;
if (isCompressed && hasOverflow)
compressedOverflow++;
}
}
return (compressed, compressedOverflow, overflowPages);
}
private static PageFileConfig TinyPageConfig()
{
return new PageFileConfig
{
PageSize = 16 * 1024,
InitialFileSize = 1024 * 1024,
Access = MemoryMappedFileAccess.ReadWrite
};
}
private static string BuildPayload(int approxLength)
{
var builder = new System.Text.StringBuilder(approxLength + 256);
var i = 0;
while (builder.Length < approxLength)
{
builder.Append("overflow-payload-");
builder.Append(i.ToString("D7"));
builder.Append('|');
i++;
}
return builder.ToString();
}
private static string NewDbPath()
=> Path.Combine(Path.GetTempPath(), $"compression_overflow_{Guid.NewGuid():N}.db");
private static void CleanupFiles(string dbPath)
{
var walPath = Path.ChangeExtension(dbPath, ".wal");
var markerPath = $"{dbPath}.compact.state";
if (File.Exists(dbPath)) File.Delete(dbPath);
if (File.Exists(walPath)) File.Delete(walPath);
if (File.Exists(markerPath)) File.Delete(markerPath);
}
}