using System.Runtime.InteropServices; using ZB.MOM.WW.CBDD.Core.Indexing; namespace ZB.MOM.WW.CBDD.Core.Storage; /// /// Page for storing HNSW Vector Index nodes. /// Each page stores a fixed number of nodes based on vector dimensions and M. /// public struct VectorPage { // Layout: // [PageHeader (32)] // [Dimensions (4)] // [MaxM (4)] // [NodeSize (4)] // [NodeCount (4)] // [Nodes Data (Contiguous)...] private const int DimensionsOffset = 32; private const int MaxMOffset = 36; private const int NodeSizeOffset = 40; private const int NodeCountOffset = 44; private const int DataOffset = 48; /// /// Increments the node count stored in the vector page header. /// /// The page buffer. public static void IncrementNodeCount(Span page) { int count = GetNodeCount(page); System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(NodeCountOffset), count + 1); } /// /// Initializes a vector page with header metadata and sizing information. /// /// The page buffer. /// The page identifier. /// The vector dimensionality. /// The HNSW max connections parameter. public static void Initialize(Span page, uint pageId, int dimensions, int maxM) { var header = new PageHeader { PageId = pageId, PageType = PageType.Vector, FreeBytes = (ushort)(page.Length - DataOffset), NextPageId = 0, TransactionId = 0 }; header.WriteTo(page); System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(DimensionsOffset), dimensions); System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(MaxMOffset), maxM); // Node Size Calculation: // Location (6) + MaxLevel (1) + Vector (dim * 4) + Links (maxM * 10 * 6) -- estimating 10 levels for simplicity // Better: Node size is variable? No, let's keep it fixed per index configuration to avoid fragmentation. // HNSW standard: level 0 has 2*M links, levels > 0 have M links. // Max level is typically < 16. Let's reserve space for 16 levels. int nodeSize = 6 + 1 + (dimensions * 4) + (maxM * (2 + 15) * 6); System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(NodeSizeOffset), nodeSize); System.Buffers.Binary.BinaryPrimitives.WriteInt32LittleEndian(page.Slice(NodeCountOffset), 0); } /// /// Gets the number of nodes currently stored in the page. /// /// The page buffer. /// The node count. public static int GetNodeCount(ReadOnlySpan page) => System.Buffers.Binary.BinaryPrimitives.ReadInt32LittleEndian(page.Slice(NodeCountOffset)); /// /// Gets the configured node size for the page. /// /// The page buffer. /// The node size in bytes. public static int GetNodeSize(ReadOnlySpan page) => System.Buffers.Binary.BinaryPrimitives.ReadInt32LittleEndian(page.Slice(NodeSizeOffset)); /// /// Gets the maximum number of nodes that can fit in the page. /// /// The page buffer. /// The maximum node count. public static int GetMaxNodes(ReadOnlySpan page) => (page.Length - DataOffset) / GetNodeSize(page); /// /// Writes a node to the page at the specified index. /// /// The page buffer. /// The zero-based node index. /// The document location for the node. /// The maximum graph level for the node. /// The vector values to store. /// The vector dimensionality. public static void WriteNode(Span page, int nodeIndex, DocumentLocation loc, int maxLevel, ReadOnlySpan vector, int dimensions) { int nodeSize = GetNodeSize(page); int offset = DataOffset + (nodeIndex * nodeSize); var nodeSpan = page.Slice(offset, nodeSize); // 1. Document Location loc.WriteTo(nodeSpan.Slice(0, 6)); // 2. Max Level nodeSpan[6] = (byte)maxLevel; // 3. Vector var vectorSpan = MemoryMarshal.Cast(nodeSpan.Slice(7, dimensions * 4)); vector.CopyTo(vectorSpan); // 4. Links (initialize with 0/empty) // Links follow the vector. Level 0: 2*M links, Level 1..15: M links. // For now, just ensure it's cleared or handled by the indexer. } /// /// Reads node metadata and vector data from the page. /// /// The page buffer. /// The zero-based node index. /// When this method returns, contains the node document location. /// When this method returns, contains the node max level. /// The destination span for vector values. public static void ReadNodeData(ReadOnlySpan page, int nodeIndex, out DocumentLocation loc, out int maxLevel, Span vector) { int nodeSize = GetNodeSize(page); int offset = DataOffset + (nodeIndex * nodeSize); var nodeSpan = page.Slice(offset, nodeSize); loc = DocumentLocation.ReadFrom(nodeSpan.Slice(0, 6)); maxLevel = nodeSpan[6]; var vectorSource = MemoryMarshal.Cast(nodeSpan.Slice(7, vector.Length * 4)); vectorSource.CopyTo(vector); } /// /// Gets the span that stores links for a node at a specific level. /// /// The page buffer. /// The zero-based node index. /// The graph level. /// The vector dimensionality. /// The HNSW max connections parameter. /// The span representing the link storage region. public static Span GetLinksSpan(Span page, int nodeIndex, int level, int dimensions, int maxM) { int nodeSize = GetNodeSize(page); int nodeOffset = DataOffset + (nodeIndex * nodeSize); // Link offset: Location(6) + MaxLevel(1) + Vector(dim*4) int linkBaseOffset = nodeOffset + 7 + (dimensions * 4); int levelOffset; if (level == 0) { levelOffset = 0; } else { // Level 0 has 2*M links levelOffset = (2 * maxM * 6) + ((level - 1) * maxM * 6); } int count = (level == 0) ? (2 * maxM) : maxM; return page.Slice(linkBaseOffset + levelOffset, count * 6); } }