Initialize CBDD solution and add a .NET-focused gitignore for generated artifacts.
This commit is contained in:
174
src/CBDD.Core/Storage/VectorPage.cs
Executable file
174
src/CBDD.Core/Storage/VectorPage.cs
Executable file
@@ -0,0 +1,174 @@
|
||||
using System.Runtime.InteropServices;
|
||||
using ZB.MOM.WW.CBDD.Core.Indexing;
|
||||
|
||||
namespace ZB.MOM.WW.CBDD.Core.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Page for storing HNSW Vector Index nodes.
|
||||
/// Each page stores a fixed number of nodes based on vector dimensions and M.
|
||||
/// </summary>
|
||||
public struct VectorPage
|
||||
{
|
||||
// Layout:
|
||||
// [PageHeader (32)]
|
||||
// [Dimensions (4)]
|
||||
// [MaxM (4)]
|
||||
// [NodeSize (4)]
|
||||
// [NodeCount (4)]
|
||||
// [Nodes Data (Contiguous)...]
|
||||
|
||||
private const int DimensionsOffset = 32;
|
||||
private const int MaxMOffset = 36;
|
||||
private const int NodeSizeOffset = 40;
|
||||
private const int NodeCountOffset = 44;
|
||||
private const int DataOffset = 48;
|
||||
|
||||
/// <summary>
|
||||
/// Increments the node count stored in the vector page header.
|
||||
/// </summary>
|
||||
/// <param name="page">The page buffer.</param>
|
||||
public static void IncrementNodeCount(Span<byte> page)
|
||||
{
|
||||
int count = GetNodeCount(page);
|
||||
System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(NodeCountOffset), count + 1);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a vector page with header metadata and sizing information.
|
||||
/// </summary>
|
||||
/// <param name="page">The page buffer.</param>
|
||||
/// <param name="pageId">The page identifier.</param>
|
||||
/// <param name="dimensions">The vector dimensionality.</param>
|
||||
/// <param name="maxM">The HNSW max connections parameter.</param>
|
||||
public static void Initialize(Span<byte> page, uint pageId, int dimensions, int maxM)
|
||||
{
|
||||
var header = new PageHeader
|
||||
{
|
||||
PageId = pageId,
|
||||
PageType = PageType.Vector,
|
||||
FreeBytes = (ushort)(page.Length - DataOffset),
|
||||
NextPageId = 0,
|
||||
TransactionId = 0
|
||||
};
|
||||
header.WriteTo(page);
|
||||
|
||||
System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(DimensionsOffset), dimensions);
|
||||
System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(MaxMOffset), maxM);
|
||||
|
||||
// Node Size Calculation:
|
||||
// Location (6) + MaxLevel (1) + Vector (dim * 4) + Links (maxM * 10 * 6) -- estimating 10 levels for simplicity
|
||||
// Better: Node size is variable? No, let's keep it fixed per index configuration to avoid fragmentation.
|
||||
// HNSW standard: level 0 has 2*M links, levels > 0 have M links.
|
||||
// Max level is typically < 16. Let's reserve space for 16 levels.
|
||||
int nodeSize = 6 + 1 + (dimensions * 4) + (maxM * (2 + 15) * 6);
|
||||
System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(NodeSizeOffset), nodeSize);
|
||||
System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(NodeCountOffset), 0);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the number of nodes currently stored in the page.
|
||||
/// </summary>
|
||||
/// <param name="page">The page buffer.</param>
|
||||
/// <returns>The node count.</returns>
|
||||
public static int GetNodeCount(ReadOnlySpan<byte> page) =>
|
||||
System.Buffers.Binary.BinaryPrimitives.ReadInt32LittleEndian(page.Slice(NodeCountOffset));
|
||||
|
||||
/// <summary>
|
||||
/// Gets the configured node size for the page.
|
||||
/// </summary>
|
||||
/// <param name="page">The page buffer.</param>
|
||||
/// <returns>The node size in bytes.</returns>
|
||||
public static int GetNodeSize(ReadOnlySpan<byte> page) =>
|
||||
System.Buffers.Binary.BinaryPrimitives.ReadInt32LittleEndian(page.Slice(NodeSizeOffset));
|
||||
|
||||
/// <summary>
|
||||
/// Gets the maximum number of nodes that can fit in the page.
|
||||
/// </summary>
|
||||
/// <param name="page">The page buffer.</param>
|
||||
/// <returns>The maximum node count.</returns>
|
||||
public static int GetMaxNodes(ReadOnlySpan<byte> page) =>
|
||||
(page.Length - DataOffset) / GetNodeSize(page);
|
||||
|
||||
/// <summary>
|
||||
/// Writes a node to the page at the specified index.
|
||||
/// </summary>
|
||||
/// <param name="page">The page buffer.</param>
|
||||
/// <param name="nodeIndex">The zero-based node index.</param>
|
||||
/// <param name="loc">The document location for the node.</param>
|
||||
/// <param name="maxLevel">The maximum graph level for the node.</param>
|
||||
/// <param name="vector">The vector values to store.</param>
|
||||
/// <param name="dimensions">The vector dimensionality.</param>
|
||||
public static void WriteNode(Span<byte> page, int nodeIndex, DocumentLocation loc, int maxLevel, ReadOnlySpan<float> vector, int dimensions)
|
||||
{
|
||||
int nodeSize = GetNodeSize(page);
|
||||
int offset = DataOffset + (nodeIndex * nodeSize);
|
||||
var nodeSpan = page.Slice(offset, nodeSize);
|
||||
|
||||
// 1. Document Location
|
||||
loc.WriteTo(nodeSpan.Slice(0, 6));
|
||||
|
||||
// 2. Max Level
|
||||
nodeSpan[6] = (byte)maxLevel;
|
||||
|
||||
// 3. Vector
|
||||
var vectorSpan = MemoryMarshal.Cast<byte, float>(nodeSpan.Slice(7, dimensions * 4));
|
||||
vector.CopyTo(vectorSpan);
|
||||
|
||||
// 4. Links (initialize with 0/empty)
|
||||
// Links follow the vector. Level 0: 2*M links, Level 1..15: M links.
|
||||
// For now, just ensure it's cleared or handled by the indexer.
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads node metadata and vector data from the page.
|
||||
/// </summary>
|
||||
/// <param name="page">The page buffer.</param>
|
||||
/// <param name="nodeIndex">The zero-based node index.</param>
|
||||
/// <param name="loc">When this method returns, contains the node document location.</param>
|
||||
/// <param name="maxLevel">When this method returns, contains the node max level.</param>
|
||||
/// <param name="vector">The destination span for vector values.</param>
|
||||
public static void ReadNodeData(ReadOnlySpan<byte> page, int nodeIndex, out DocumentLocation loc, out int maxLevel, Span<float> vector)
|
||||
{
|
||||
int nodeSize = GetNodeSize(page);
|
||||
int offset = DataOffset + (nodeIndex * nodeSize);
|
||||
var nodeSpan = page.Slice(offset, nodeSize);
|
||||
|
||||
loc = DocumentLocation.ReadFrom(nodeSpan.Slice(0, 6));
|
||||
maxLevel = nodeSpan[6];
|
||||
|
||||
var vectorSource = MemoryMarshal.Cast<byte, float>(nodeSpan.Slice(7, vector.Length * 4));
|
||||
vectorSource.CopyTo(vector);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the span that stores links for a node at a specific level.
|
||||
/// </summary>
|
||||
/// <param name="page">The page buffer.</param>
|
||||
/// <param name="nodeIndex">The zero-based node index.</param>
|
||||
/// <param name="level">The graph level.</param>
|
||||
/// <param name="dimensions">The vector dimensionality.</param>
|
||||
/// <param name="maxM">The HNSW max connections parameter.</param>
|
||||
/// <returns>The span representing the link storage region.</returns>
|
||||
public static Span<byte> GetLinksSpan(Span<byte> page, int nodeIndex, int level, int dimensions, int maxM)
|
||||
{
|
||||
int nodeSize = GetNodeSize(page);
|
||||
int nodeOffset = DataOffset + (nodeIndex * nodeSize);
|
||||
|
||||
// Link offset: Location(6) + MaxLevel(1) + Vector(dim*4)
|
||||
int linkBaseOffset = nodeOffset + 7 + (dimensions * 4);
|
||||
|
||||
int levelOffset;
|
||||
if (level == 0)
|
||||
{
|
||||
levelOffset = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Level 0 has 2*M links
|
||||
levelOffset = (2 * maxM * 6) + ((level - 1) * maxM * 6);
|
||||
}
|
||||
|
||||
int count = (level == 0) ? (2 * maxM) : maxM;
|
||||
return page.Slice(linkBaseOffset + levelOffset, count * 6);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user