Initialize CBDD solution and add a .NET-focused gitignore for generated artifacts.

This commit is contained in:
Joseph Doherty
2026-02-20 12:54:07 -05:00
commit b8ed5ec500
214 changed files with 101452 additions and 0 deletions

View File

@@ -0,0 +1,448 @@
using System.Buffers.Binary;
using ZB.MOM.WW.CBDD.Core.Compression;
using ZB.MOM.WW.CBDD.Core.Indexing;
namespace ZB.MOM.WW.CBDD.Core.Storage;
/// <summary>
/// Aggregated page counts grouped by page type.
/// </summary>
public sealed class PageTypeUsageEntry
{
public PageType PageType { get; init; }
public int PageCount { get; init; }
}
/// <summary>
/// Per-collection page usage summary.
/// </summary>
public sealed class CollectionPageUsageEntry
{
public string CollectionName { get; init; } = string.Empty;
public int TotalDistinctPages { get; init; }
public int DataPages { get; init; }
public int OverflowPages { get; init; }
public int IndexPages { get; init; }
public int OtherPages { get; init; }
}
/// <summary>
/// Per-collection compression ratio summary.
/// </summary>
public sealed class CollectionCompressionRatioEntry
{
public string CollectionName { get; init; } = string.Empty;
public long DocumentCount { get; init; }
public long CompressedDocumentCount { get; init; }
public long BytesBeforeCompression { get; init; }
public long BytesAfterCompression { get; init; }
public double CompressionRatio => BytesAfterCompression <= 0 ? 1.0 : (double)BytesBeforeCompression / BytesAfterCompression;
}
/// <summary>
/// Summary of free-list and reclaimable tail information.
/// </summary>
public sealed class FreeListSummary
{
public uint PageCount { get; init; }
public int FreePageCount { get; init; }
public long FreeBytes { get; init; }
public double FragmentationPercent { get; init; }
public uint TailReclaimablePages { get; init; }
}
/// <summary>
/// Single page entry in fragmentation reporting.
/// </summary>
public sealed class FragmentationPageEntry
{
public uint PageId { get; init; }
public PageType PageType { get; init; }
public bool IsFreePage { get; init; }
public int FreeBytes { get; init; }
}
/// <summary>
/// Detailed fragmentation map and totals.
/// </summary>
public sealed class FragmentationMapReport
{
public IReadOnlyList<FragmentationPageEntry> Pages { get; init; } = Array.Empty<FragmentationPageEntry>();
public long TotalFreeBytes { get; init; }
public double FragmentationPercent { get; init; }
public uint TailReclaimablePages { get; init; }
}
public sealed partial class StorageEngine
{
/// <summary>
/// Gets page usage grouped by page type.
/// </summary>
public IReadOnlyList<PageTypeUsageEntry> GetPageUsageByPageType()
{
var pageCount = _pageFile.NextPageId;
var buffer = new byte[_pageFile.PageSize];
var counts = new Dictionary<PageType, int>();
for (uint pageId = 0; pageId < pageCount; pageId++)
{
_pageFile.ReadPage(pageId, buffer);
var pageType = PageHeader.ReadFrom(buffer).PageType;
counts[pageType] = counts.TryGetValue(pageType, out var count) ? count + 1 : 1;
}
return counts
.OrderBy(x => (byte)x.Key)
.Select(x => new PageTypeUsageEntry
{
PageType = x.Key,
PageCount = x.Value
})
.ToArray();
}
/// <summary>
/// Gets per-collection page usage by resolving primary-index locations and related index roots.
/// </summary>
public IReadOnlyList<CollectionPageUsageEntry> GetPageUsageByCollection()
{
var metadataEntries = GetAllCollectionMetadata();
var results = new List<CollectionPageUsageEntry>(metadataEntries.Count);
foreach (var metadata in metadataEntries)
{
var pageIds = new HashSet<uint>();
if (metadata.PrimaryRootPageId != 0)
pageIds.Add(metadata.PrimaryRootPageId);
if (metadata.SchemaRootPageId != 0)
pageIds.Add(metadata.SchemaRootPageId);
foreach (var indexMetadata in metadata.Indexes)
{
if (indexMetadata.RootPageId != 0)
pageIds.Add(indexMetadata.RootPageId);
}
foreach (var location in EnumeratePrimaryLocations(metadata))
{
pageIds.Add(location.PageId);
if (TryReadFirstOverflowPage(location, out var firstOverflowPage))
{
AddOverflowChainPages(pageIds, firstOverflowPage);
}
}
int data = 0;
int overflow = 0;
int indexPages = 0;
int other = 0;
var pageBuffer = new byte[_pageFile.PageSize];
foreach (var pageId in pageIds)
{
if (pageId >= _pageFile.NextPageId)
continue;
_pageFile.ReadPage(pageId, pageBuffer);
var pageType = PageHeader.ReadFrom(pageBuffer).PageType;
if (pageType == PageType.Data)
{
data++;
}
else if (pageType == PageType.Overflow)
{
overflow++;
}
else if (pageType == PageType.Index || pageType == PageType.Vector || pageType == PageType.Spatial)
{
indexPages++;
}
else
{
other++;
}
}
results.Add(new CollectionPageUsageEntry
{
CollectionName = metadata.Name,
TotalDistinctPages = pageIds.Count,
DataPages = data,
OverflowPages = overflow,
IndexPages = indexPages,
OtherPages = other
});
}
return results;
}
/// <summary>
/// Gets per-collection logical-vs-stored compression ratios.
/// </summary>
public IReadOnlyList<CollectionCompressionRatioEntry> GetCompressionRatioByCollection()
{
var metadataEntries = GetAllCollectionMetadata();
var results = new List<CollectionCompressionRatioEntry>(metadataEntries.Count);
foreach (var metadata in metadataEntries)
{
long docs = 0;
long compressedDocs = 0;
long bytesBefore = 0;
long bytesAfter = 0;
foreach (var location in EnumeratePrimaryLocations(metadata))
{
if (!TryReadSlotPayloadStats(location, out var isCompressed, out var originalBytes, out var storedBytes))
continue;
docs++;
if (isCompressed)
compressedDocs++;
bytesBefore += originalBytes;
bytesAfter += storedBytes;
}
results.Add(new CollectionCompressionRatioEntry
{
CollectionName = metadata.Name,
DocumentCount = docs,
CompressedDocumentCount = compressedDocs,
BytesBeforeCompression = bytesBefore,
BytesAfterCompression = bytesAfter
});
}
return results;
}
/// <summary>
/// Gets free-list summary for diagnostics.
/// </summary>
public FreeListSummary GetFreeListSummary()
{
var snapshot = CaptureCompactionSnapshot();
return new FreeListSummary
{
PageCount = snapshot.PageCount,
FreePageCount = snapshot.FreePageCount,
FreeBytes = snapshot.TotalFreeBytes,
FragmentationPercent = snapshot.FragmentationPercent,
TailReclaimablePages = snapshot.TailReclaimablePages
};
}
/// <summary>
/// Gets detailed page-level fragmentation diagnostics.
/// </summary>
public FragmentationMapReport GetFragmentationMap()
{
var freePageSet = new HashSet<uint>(_pageFile.EnumerateFreePages(includeEmptyPages: true));
var pageCount = _pageFile.NextPageId;
var buffer = new byte[_pageFile.PageSize];
var pages = new List<FragmentationPageEntry>((int)pageCount);
long totalFreeBytes = 0;
for (uint pageId = 0; pageId < pageCount; pageId++)
{
_pageFile.ReadPage(pageId, buffer);
var pageHeader = PageHeader.ReadFrom(buffer);
var isFreePage = freePageSet.Contains(pageId);
int freeBytes = 0;
if (isFreePage)
{
freeBytes = _pageFile.PageSize;
}
else if (TryReadSlottedFreeSpace(buffer, out var slottedFreeBytes))
{
freeBytes = slottedFreeBytes;
}
totalFreeBytes += freeBytes;
pages.Add(new FragmentationPageEntry
{
PageId = pageId,
PageType = pageHeader.PageType,
IsFreePage = isFreePage,
FreeBytes = freeBytes
});
}
uint tailReclaimablePages = 0;
for (var i = pageCount; i > 2; i--)
{
if (!freePageSet.Contains(i - 1))
break;
tailReclaimablePages++;
}
var fileBytes = Math.Max(1L, _pageFile.FileLengthBytes);
return new FragmentationMapReport
{
Pages = pages,
TotalFreeBytes = totalFreeBytes,
FragmentationPercent = (totalFreeBytes * 100d) / fileBytes,
TailReclaimablePages = tailReclaimablePages
};
}
private IEnumerable<DocumentLocation> EnumeratePrimaryLocations(CollectionMetadata metadata)
{
if (metadata.PrimaryRootPageId == 0)
yield break;
var index = new BTreeIndex(this, IndexOptions.CreateUnique("_id"), metadata.PrimaryRootPageId);
foreach (var entry in index.Range(IndexKey.MinKey, IndexKey.MaxKey, IndexDirection.Forward, transactionId: 0))
{
yield return entry.Location;
}
}
private bool TryReadFirstOverflowPage(in DocumentLocation location, out uint firstOverflowPage)
{
firstOverflowPage = 0;
var pageBuffer = new byte[_pageFile.PageSize];
_pageFile.ReadPage(location.PageId, pageBuffer);
var header = SlottedPageHeader.ReadFrom(pageBuffer);
if (location.SlotIndex >= header.SlotCount)
return false;
var slotOffset = SlottedPageHeader.Size + (location.SlotIndex * SlotEntry.Size);
var slot = SlotEntry.ReadFrom(pageBuffer.AsSpan(slotOffset, SlotEntry.Size));
if ((slot.Flags & SlotFlags.Deleted) != 0)
return false;
if ((slot.Flags & SlotFlags.HasOverflow) == 0)
return false;
if (slot.Length < 8)
return false;
firstOverflowPage = BinaryPrimitives.ReadUInt32LittleEndian(pageBuffer.AsSpan(slot.Offset + 4, 4));
return true;
}
private void AddOverflowChainPages(HashSet<uint> pageIds, uint firstOverflowPage)
{
if (firstOverflowPage == 0)
return;
var buffer = new byte[_pageFile.PageSize];
var visited = new HashSet<uint>();
var current = firstOverflowPage;
while (current != 0 && current < _pageFile.NextPageId && visited.Add(current))
{
pageIds.Add(current);
_pageFile.ReadPage(current, buffer);
var header = SlottedPageHeader.ReadFrom(buffer);
if (header.PageType != PageType.Overflow)
break;
current = header.NextOverflowPage;
}
}
private bool TryReadSlotPayloadStats(
in DocumentLocation location,
out bool isCompressed,
out int originalBytes,
out int storedBytes)
{
isCompressed = false;
originalBytes = 0;
storedBytes = 0;
var pageBuffer = new byte[_pageFile.PageSize];
_pageFile.ReadPage(location.PageId, pageBuffer);
var header = SlottedPageHeader.ReadFrom(pageBuffer);
if (location.SlotIndex >= header.SlotCount)
return false;
var slotOffset = SlottedPageHeader.Size + (location.SlotIndex * SlotEntry.Size);
var slot = SlotEntry.ReadFrom(pageBuffer.AsSpan(slotOffset, SlotEntry.Size));
if ((slot.Flags & SlotFlags.Deleted) != 0)
return false;
var hasOverflow = (slot.Flags & SlotFlags.HasOverflow) != 0;
isCompressed = (slot.Flags & SlotFlags.Compressed) != 0;
if (!hasOverflow)
{
storedBytes = slot.Length;
if (!isCompressed)
{
originalBytes = slot.Length;
return true;
}
if (slot.Length < CompressedPayloadHeader.Size)
return false;
var compressedHeader = CompressedPayloadHeader.ReadFrom(pageBuffer.AsSpan(slot.Offset, CompressedPayloadHeader.Size));
originalBytes = compressedHeader.OriginalLength;
return true;
}
if (slot.Length < 8)
return false;
var primaryPayload = pageBuffer.AsSpan(slot.Offset, slot.Length);
var totalStoredBytes = BinaryPrimitives.ReadInt32LittleEndian(primaryPayload.Slice(0, 4));
if (totalStoredBytes < 0)
return false;
storedBytes = totalStoredBytes;
if (!isCompressed)
{
originalBytes = totalStoredBytes;
return true;
}
var storedPrefix = primaryPayload.Slice(8);
Span<byte> headerBuffer = stackalloc byte[CompressedPayloadHeader.Size];
if (storedPrefix.Length >= CompressedPayloadHeader.Size)
{
storedPrefix.Slice(0, CompressedPayloadHeader.Size).CopyTo(headerBuffer);
}
else
{
storedPrefix.CopyTo(headerBuffer);
var copied = storedPrefix.Length;
var nextOverflow = BinaryPrimitives.ReadUInt32LittleEndian(primaryPayload.Slice(4, 4));
var overflowBuffer = new byte[_pageFile.PageSize];
while (copied < CompressedPayloadHeader.Size && nextOverflow != 0 && nextOverflow < _pageFile.NextPageId)
{
_pageFile.ReadPage(nextOverflow, overflowBuffer);
var overflowHeader = SlottedPageHeader.ReadFrom(overflowBuffer);
if (overflowHeader.PageType != PageType.Overflow)
return false;
var available = Math.Min(CompressedPayloadHeader.Size - copied, _pageFile.PageSize - SlottedPageHeader.Size);
overflowBuffer.AsSpan(SlottedPageHeader.Size, available).CopyTo(headerBuffer.Slice(copied));
copied += available;
nextOverflow = overflowHeader.NextOverflowPage;
}
if (copied < CompressedPayloadHeader.Size)
return false;
}
var headerFromPayload = CompressedPayloadHeader.ReadFrom(headerBuffer);
originalBytes = headerFromPayload.OriginalLength;
return true;
}
}