Add enterprise docs structure and include pending core maintenance updates.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -49,6 +49,7 @@ yarn-error.log*
|
|||||||
generated/
|
generated/
|
||||||
generated-data/
|
generated-data/
|
||||||
data/generated/
|
data/generated/
|
||||||
|
.tmp*/
|
||||||
output/
|
output/
|
||||||
out/
|
out/
|
||||||
reports/
|
reports/
|
||||||
|
|||||||
@@ -173,8 +173,3 @@ dotnet run -c Release --project tests/CBDD.Tests.Benchmark
|
|||||||
| 'DocumentDb Single Insert' | 355.8 μs | 19.42 μs | 56.65 μs | 0.12 | 128.89 KB |
|
| 'DocumentDb Single Insert' | 355.8 μs | 19.42 μs | 56.65 μs | 0.12 | 128.89 KB |
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
CBDD is licensed under the MIT License. See [LICENSE](LICENSE) for details.
|
|
||||||
|
|||||||
430
README.md
430
README.md
@@ -1,314 +1,132 @@
|
|||||||
# ⚡ CBDD
|
# CBDD
|
||||||
### High-Performance BSON Database Engine for .NET 10
|
|
||||||
|
|
||||||

|
|
||||||

|
|
||||||

|
|
||||||

|
|
||||||
|
|
||||||
**CBDD** is an embedded, ACID-compliant, document-oriented database built from scratch for **maximum performance** and **zero allocation**. It leverages modern .NET features like `Span<T>`, `Memory<T>`, and Source Generators to eliminate runtime overhead.
|
|
||||||
|
|
||||||
> **Note**: Currently targets **.NET 10** to maximize performance with `Span<T>` and modern hardware intrinsics. Future support for `.netstandard2.1` is being evaluated.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 🚀 Why CBDD?
|
|
||||||
|
|
||||||
Most embedded databases for .NET are either wrappers around C libraries (SQLite, RocksDB) or legacy C# codebases burdened by heavy GC pressure.
|
|
||||||
|
|
||||||
**CBDD is different:**
|
|
||||||
- **Zero Allocation**: I/O and interaction paths use `Span<byte>` and `stackalloc`. No heap allocations for reads/writes.
|
|
||||||
- **Type-Safe**: No reflection. All serialization code is generated at compile-time.
|
|
||||||
- **Developer Experience**: Full LINQ provider (`IQueryable`) that feels like Entity Framework but runs on bare metal.
|
|
||||||
- **Reliable**: Full ACID transactions with Write-Ahead Logging (WAL) and Snapshot Isolation.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## ✨ Key Features
|
|
||||||
|
|
||||||
### 🚄 Zero-Allocation Architecture
|
|
||||||
- **Span-based I/O**: The entire pipeline, from disk to user objects, utilizes `Span<T>` to avoid copying memory.
|
|
||||||
- **Memory-Mapped Files**: OS-level paging and caching for blazing fast access.
|
|
||||||
|
|
||||||
### 🧠 Powerful Query Engine (LINQ)
|
|
||||||
Write queries naturally using LINQ. The engine automatically translates them to optimized B-Tree lookups.
|
|
||||||
|
|
||||||
```csharp
|
|
||||||
// Automatic Index Usage
|
|
||||||
var users = collection.AsQueryable()
|
|
||||||
.Where(x => x.Age > 25 && x.Name.StartsWith("A"))
|
|
||||||
.OrderBy(x => x.Age)
|
|
||||||
.Take(10)
|
|
||||||
.AsEnumerable(); // Executed efficiently on the engine
|
|
||||||
```
|
|
||||||
|
|
||||||
- **Optimized**: Uses B-Tree indexes for `=`, `>`, `<`, `Between`, and `StartsWith`.
|
|
||||||
- **Hybrid Execution**: Combines storage-level optimization with in-memory LINQ to Objects.
|
|
||||||
- **Advanced Features**: Full support for `GroupBy`, `Join`, `Select` (including anonymous types), and Aggregations (`Count`, `Sum`, `Min`, `Max`, `Average`).
|
|
||||||
|
|
||||||
### 🔍 Advanced Indexing
|
|
||||||
- **B-Tree Indexes**: Logarithmic time complexity for lookups.
|
|
||||||
- **Composite Indexes**: Support for multi-column keys.
|
|
||||||
- **Vector Search (HNSW)**: Fast similarity search for AI embeddings using Hierarchical Navigable Small World algorithm.
|
|
||||||
|
|
||||||
### 🤖 AI-Ready Vector Search
|
|
||||||
CBDD natively supports vector embeddings and fast similarity search.
|
|
||||||
|
|
||||||
```csharp
|
|
||||||
// 1. Configure vector index on float[] property
|
|
||||||
modelBuilder.Entity<VectorItem>()
|
|
||||||
.HasVectorIndex(x => x.Embedding, dimensions: 1536, metric: VectorMetric.Cosine);
|
|
||||||
|
|
||||||
// 2. Perform fast similarity search
|
|
||||||
var results = db.Items.AsQueryable()
|
|
||||||
.VectorSearch(x => x.Embedding, queryVector, k: 5)
|
|
||||||
.ToList();
|
|
||||||
```
|
|
||||||
|
|
||||||
### 🌍 High-Performance Geospatial Indexing
|
|
||||||
CBDD features a built-in R-Tree implementation for lightning-fast proximity and bounding box searches.
|
|
||||||
|
|
||||||
- **Zero-Allocation**: Uses coordinate tuples `(double, double)` and `Span`-based BSON arrays.
|
|
||||||
- **LINQ Integrated**: Search naturally using `.Near()` and `.Within()`.
|
|
||||||
|
|
||||||
```csharp
|
|
||||||
// 1. Configure spatial index (uses R-Tree internally)
|
|
||||||
modelBuilder.Entity<Store>()
|
|
||||||
.HasSpatialIndex(x => x.Location);
|
|
||||||
|
|
||||||
// 2. Proximity Search (Find stores within 5km)
|
|
||||||
var stores = db.Stores.AsQueryable()
|
|
||||||
.Where(s => s.Location.Near((45.4642, 9.1899), 5.0))
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
// 3. Bounding Box Search
|
|
||||||
var area = db.Stores.AsQueryable()
|
|
||||||
.Where(s => s.Location.Within((45.0, 9.0), (46.0, 10.0)))
|
|
||||||
.ToList();
|
|
||||||
```
|
|
||||||
|
|
||||||
### 🆔 Custom ID Converters (ValueObjects)
|
|
||||||
Native support for custom primary key types using `ValueConverter<TModel, TProvider>`. Configure them easily via the Fluent API.
|
|
||||||
|
|
||||||
```csharp
|
|
||||||
// 1. Define your ValueObject and Converter
|
|
||||||
public record OrderId(string Value);
|
|
||||||
public class OrderIdConverter : ValueConverter<OrderId, string> { ... }
|
|
||||||
|
|
||||||
// 2. Configure in OnModelCreating
|
|
||||||
modelBuilder.Entity<Order>()
|
|
||||||
.Property(x => x.Id)
|
|
||||||
.HasConversion<OrderIdConverter>();
|
|
||||||
|
|
||||||
// 3. Use it naturally
|
|
||||||
var order = collection.FindById(new OrderId("ORD-123"));
|
|
||||||
```
|
|
||||||
|
|
||||||
### 📡 Change Data Capture (CDC)
|
|
||||||
Real-time event streaming for database changes with transactional consistency.
|
|
||||||
|
|
||||||
- **Zero-Allocation**: Events are only captured when watchers exist; no overhead when disabled.
|
|
||||||
- **Transactional**: Events fire only after successful commit, never on rollback.
|
|
||||||
- **Scalable**: Uses Channel-per-subscriber architecture to support thousands of concurrent listeners.
|
|
||||||
|
|
||||||
```csharp
|
|
||||||
// Watch for changes in a collection
|
|
||||||
using var subscription = db.People.Watch(capturePayload: true)
|
|
||||||
.Subscribe(e =>
|
|
||||||
{
|
|
||||||
Console.WriteLine($"{e.Type}: {e.DocumentId}");
|
|
||||||
if (e.Entity != null)
|
|
||||||
Console.WriteLine($" Name: {e.Entity.Name}");
|
|
||||||
});
|
|
||||||
|
|
||||||
// Perform operations - events fire after commit
|
|
||||||
db.People.Insert(new Person { Id = 1, Name = "Alice" });
|
|
||||||
```
|
|
||||||
|
|
||||||
### 🛡️ Transactions & ACID
|
|
||||||
- **Atomic**: Multi-document transactions.
|
|
||||||
- **Durable**: WAL ensures data safety even in power loss.
|
|
||||||
- **Isolated**: Snapshot isolation allowing concurrent readers and writers.
|
|
||||||
- **Thread-Safe**: Protected with `SemaphoreSlim` to prevent race conditions in concurrent scenarios.
|
|
||||||
- **Async-First**: Full async/await support with proper `CancellationToken` handling.
|
|
||||||
- **Implicit Transactions**: Use `SaveChanges()` / `SaveChangesAsync()` for automatic transaction management (like EF Core).
|
|
||||||
|
|
||||||
### 🔌 Intelligent Source Generation
|
|
||||||
- **Zero Reflection**: Mappers are generated at compile-time for zero overhead.
|
|
||||||
- **Nested Objects & Collections**: Full support for complex graphs, deep nesting, and ref struct handling.
|
|
||||||
- **Robust Serialization**: Correctly handles nested objects, collections, and complex type hierarchies.
|
|
||||||
- **Lowercase Policy**: BSON keys are automatically persisted as `lowercase` for consistency.
|
|
||||||
- **Custom Overrides**: Use `[BsonProperty]` or `[JsonPropertyName]` for manual field naming.
|
|
||||||
|
|
||||||
#### ✅ Supported Scenarios
|
|
||||||
|
|
||||||
The source generator handles a wide range of modern C# patterns:
|
|
||||||
|
|
||||||
| Feature | Support | Description |
|
|
||||||
| :--- | :---: | :--- |
|
|
||||||
| **Property Inheritance** | ✅ | Properties from base classes are automatically included in serialization |
|
|
||||||
| **Private Setters** | ✅ | Properties with `private set` are correctly deserialized using Expression Trees |
|
|
||||||
| **Init-Only Setters** | ✅ | Properties with `init` are supported via runtime compilation |
|
|
||||||
| **Private Constructors** | ✅ | Deserialization works even without parameterless public constructor |
|
|
||||||
| **Advanced Collections** | ✅ | `IEnumerable<T>`, `ICollection<T>`, `IList<T>`, `HashSet<T>`, and more |
|
|
||||||
| **Nullable Value Types** | ✅ | `ObjectId?`, `int?`, `DateTime?` are correctly serialized/deserialized |
|
|
||||||
| **Nullable Collections** | ✅ | `List<T>?`, `string?` with proper null handling |
|
|
||||||
| **Unlimited Nesting** | ✅ | Deeply nested object graphs with circular reference protection |
|
|
||||||
| **Self-Referencing** | ✅ | Entities can reference themselves (e.g., `Manager` property in `Employee`) |
|
|
||||||
| **N-N Relationships** | ✅ | Collections of ObjectIds for efficient document referencing |
|
|
||||||
|
|
||||||
#### ❌ Limitations & Design Choices
|
|
||||||
|
|
||||||
| Scenario | Status | Reason |
|
|
||||||
| :--- | :---: | :--- |
|
|
||||||
| **Computed Properties** | ⚠️ Excluded | Getter-only properties without backing fields are intentionally skipped (e.g., `FullName => $"{First} {Last}"`) |
|
|
||||||
| **Constructor Logic** | ⚠️ Bypassed | Deserialization uses `RuntimeHelpers.GetUninitializedObject()` to avoid constructor execution |
|
|
||||||
| **Constructor Validation** | ⚠️ Not Executed | Validation logic in constructors won't run during deserialization - use Data Annotations instead |
|
|
||||||
|
|
||||||
> **💡 Best Practice**: For relationships between entities, prefer **referencing** (storing ObjectIds) over **embedding** (full nested objects) to avoid data duplication and maintain consistency. See tests in `CircularReferenceTests.cs` for implementation patterns.
|
|
||||||
|
|
||||||
### 🏷️ Supported Attributes
|
|
||||||
CBDD supports standard .NET Data Annotations for mapping and validation:
|
|
||||||
|
|
||||||
| Attribute | Category | Description |
|
|
||||||
| :--- | :--- | :--- |
|
|
||||||
| `[Table("name")]` | Mapping | Sets the collection name. Supports `Schema="s"` for `s.name` grouping. |
|
|
||||||
| `[Column("name")]` | Mapping | Maps property to a specific BSON field name. |
|
|
||||||
| `[Column(TypeName="...")]`| Mapping | Handles special types (e.g., `geopoint` for coordinate tuples). |
|
|
||||||
| `[Key]` | Identity | Explicitly marks the primary key (maps to `_id`). |
|
|
||||||
| `[NotMapped]` | Mapping | Excludes property from BSON serialization. |
|
|
||||||
| `[Required]` | Validation | Ensures string is not null/empty or nullable type is not null. |
|
|
||||||
| `[StringLength(max)]` | Validation | Validates string length (supports `MinimumLength`). |
|
|
||||||
| `[MaxLength(n)]` | Validation | Validates maximum string length. |
|
|
||||||
| `[MinLength(n)]` | Validation | Validates minimum string length. |
|
|
||||||
| `[Range(min, max)]` | Validation | Validates numeric values stay within the specified range. |
|
|
||||||
|
|
||||||
> [!IMPORTANT]
|
|
||||||
> Validation attributes (`[Required]`, `[Range]`, etc.) throw a `System.ComponentModel.DataAnnotations.ValidationException` during serialization if rules are violated.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 📚 Documentation
|
|
||||||
|
|
||||||
For in-depth technical details, see the complete specification documents:
|
CBDD is an embedded, document-oriented database engine for .NET 10. It targets internal platform teams that need predictable ACID behavior, low-latency local persistence, and typed access patterns without running an external database server.
|
||||||
|
|
||||||
- **[RFC.md](RFC.md)** - Full architectural specification covering storage engine, indexing, transactions, WAL protocol, and query processing
|
|
||||||
- **[C-BSON.md](C-BSON.md)** - Detailed wire format specification for CBDD's Compressed BSON format, including hex dumps and performance analysis
|
|
||||||
|
|
||||||
## ✅ Fitness Check
|
## Purpose And Business Context
|
||||||
|
|
||||||
Run the repository fitness suite locally:
|
CBDD provides a local data layer for services and tools that need transactional durability, deterministic startup, and high-throughput reads/writes. The primary business outcome is reducing operational overhead for workloads that do not require a networked database cluster.
|
||||||
|
|
||||||
|
## Ownership And Support
|
||||||
|
|
||||||
|
- Owning team: CBDD maintainers (repository owner: `@dohertj2`)
|
||||||
|
- Primary support path: open a Gitea issue in this repository with labels `incident` or `bug`
|
||||||
|
- Escalation path: follow `/Users/dohertj2/Desktop/CBDD/docs/runbook.md` and page the release maintainer listed in the active release PR
|
||||||
|
|
||||||
|
## Architecture Overview
|
||||||
|
|
||||||
|
CBDD has four primary layers:
|
||||||
|
|
||||||
|
1. Storage and transaction engine (`/Users/dohertj2/Desktop/CBDD/src/CBDD.Core/Storage`, `/Users/dohertj2/Desktop/CBDD/src/CBDD.Core/Transactions`)
|
||||||
|
2. BSON serialization (`/Users/dohertj2/Desktop/CBDD/src/CBDD.Bson`)
|
||||||
|
3. Indexing and query execution (`/Users/dohertj2/Desktop/CBDD/src/CBDD.Core/Indexing`, `/Users/dohertj2/Desktop/CBDD/src/CBDD.Core/Query`)
|
||||||
|
4. Source-generated mapping (`/Users/dohertj2/Desktop/CBDD/src/CBDD.SourceGenerators`)
|
||||||
|
|
||||||
|
Detailed architecture material:
|
||||||
|
- `/Users/dohertj2/Desktop/CBDD/docs/architecture.md`
|
||||||
|
- `/Users/dohertj2/Desktop/CBDD/RFC.md`
|
||||||
|
- `/Users/dohertj2/Desktop/CBDD/C-BSON.md`
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- .NET SDK 10.x
|
||||||
|
- Bash (for repository scripts)
|
||||||
|
- Read/write permissions for the local working directory
|
||||||
|
- Gitea access for pull requests and release publishing
|
||||||
|
|
||||||
|
## Setup And Local Run
|
||||||
|
|
||||||
|
1. Clone the repository.
|
||||||
|
```bash
|
||||||
|
git clone https://gitea.dohertylan.com/dohertj2/CBDD.git
|
||||||
|
cd CBDD
|
||||||
|
```
|
||||||
|
Expected outcome: local repository checkout with `CBDD.slnx` present.
|
||||||
|
|
||||||
|
2. Restore dependencies.
|
||||||
|
```bash
|
||||||
|
dotnet restore
|
||||||
|
```
|
||||||
|
Expected outcome: restore completes without package errors.
|
||||||
|
|
||||||
|
3. Build the solution.
|
||||||
|
```bash
|
||||||
|
dotnet build CBDD.slnx -c Release
|
||||||
|
```
|
||||||
|
Expected outcome: solution builds without compiler errors.
|
||||||
|
|
||||||
|
4. Run tests.
|
||||||
|
```bash
|
||||||
|
dotnet test CBDD.slnx -c Release
|
||||||
|
```
|
||||||
|
Expected outcome: all tests pass.
|
||||||
|
|
||||||
|
5. Run the full repository fitness check.
|
||||||
|
```bash
|
||||||
|
bash scripts/fitness-check.sh
|
||||||
|
```
|
||||||
|
Expected outcome: format, build, tests, coverage threshold, and package checks complete.
|
||||||
|
|
||||||
|
## Configuration And Secrets
|
||||||
|
|
||||||
|
- Default local usage requires only a filesystem path for the database file.
|
||||||
|
- Do not commit secrets in source, test fixtures, benchmark assets, or `.env` files.
|
||||||
|
- If publishing packages, keep feed credentials in CI secrets or local keychain-backed credential storage.
|
||||||
|
- Store environment-specific values outside the repository and inject them at runtime.
|
||||||
|
|
||||||
|
## Build, Test, And Quality Gates
|
||||||
|
|
||||||
|
Required pre-merge commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dotnet format --verify-no-changes
|
||||||
|
dotnet build -t:Rebuild
|
||||||
|
dotnet test
|
||||||
|
bash scripts/coverage-check.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Optional full gate:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bash scripts/fitness-check.sh
|
bash scripts/fitness-check.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
It verifies formatting, build/test health, and package risk checks.
|
## Deployment And Rollback
|
||||||
|
|
||||||
---
|
CBDD is released as an internal package.
|
||||||
|
|
||||||
## 📦 Quick Start
|
- Deployment workflow: `/Users/dohertj2/Desktop/CBDD/docs/deployment.md`
|
||||||
|
- Rollback workflow: `/Users/dohertj2/Desktop/CBDD/docs/deployment.md#rollback-procedure`
|
||||||
### 1. Installation
|
|
||||||
```
|
## Operations And Incident Response
|
||||||
dotnet add package ZB.MOM.WW.CBDD
|
|
||||||
```
|
Operational procedures, diagnostics, and escalation are documented in:
|
||||||
|
|
||||||
### 2. Basic Usage
|
- `/Users/dohertj2/Desktop/CBDD/docs/runbook.md`
|
||||||
|
- `/Users/dohertj2/Desktop/CBDD/docs/troubleshooting.md`
|
||||||
```csharp
|
|
||||||
// 1. Define your Entities
|
## Security And Compliance Posture
|
||||||
public class User
|
|
||||||
{
|
- CBDD relies on host and process-level access controls.
|
||||||
public ObjectId Id { get; set; }
|
- Sensitive payload classification and handling requirements are defined in `/Users/dohertj2/Desktop/CBDD/docs/security.md`.
|
||||||
public string Name { get; set; }
|
- Role and approval requirements are defined in `/Users/dohertj2/Desktop/CBDD/docs/access.md`.
|
||||||
}
|
|
||||||
|
## Troubleshooting
|
||||||
// 2. Define your DbContext (Source Generator will produce InitializeCollections)
|
|
||||||
public partial class MyDbContext : DocumentDbContext
|
Common issues and remediation:
|
||||||
{
|
|
||||||
public DocumentCollection<ObjectId, User> Users { get; set; } = null!;
|
- Build/test environment failures: `/Users/dohertj2/Desktop/CBDD/docs/troubleshooting.md#build-and-test-failures`
|
||||||
|
- Data-file recovery procedures: `/Users/dohertj2/Desktop/CBDD/docs/troubleshooting.md#data-file-and-recovery-issues`
|
||||||
public MyDbContext(string path) : base(path)
|
- Query/index behavior verification: `/Users/dohertj2/Desktop/CBDD/docs/troubleshooting.md#query-and-index-issues`
|
||||||
{
|
|
||||||
InitializeCollections();
|
## Change Governance
|
||||||
}
|
|
||||||
}
|
- Use feature branches from `main`.
|
||||||
|
- Open pull requests with at least one reviewer approval before merge.
|
||||||
// 3. Use with Implicit Transactions (Recommended)
|
- Keep release notes in pull request descriptions and tagged release notes.
|
||||||
using var db = new MyDbContext("mydb.db");
|
- Run documented quality gates before requesting review.
|
||||||
|
|
||||||
// Operations are tracked automatically
|
## Documentation Index
|
||||||
db.Users.Insert(new User { Name = "Alice" });
|
|
||||||
db.Users.Insert(new User { Name = "Bob" });
|
- Documentation home: `/Users/dohertj2/Desktop/CBDD/docs/README.md`
|
||||||
|
- Major feature inventory: `/Users/dohertj2/Desktop/CBDD/docs/features/README.md`
|
||||||
// Commit all changes at once
|
- Architecture decisions: `/Users/dohertj2/Desktop/CBDD/docs/adr/0001-storage-engine-and-source-generation.md`
|
||||||
db.SaveChanges();
|
|
||||||
|
|
||||||
// 4. Query naturally with LINQ
|
|
||||||
var results = db.Users.AsQueryable()
|
|
||||||
.Where(u => u.Name.StartsWith("A"))
|
|
||||||
.AsEnumerable();
|
|
||||||
|
|
||||||
// 5. Or use explicit transactions for fine-grained control
|
|
||||||
using (var txn = db.BeginTransaction())
|
|
||||||
{
|
|
||||||
db.Users.Insert(new User { Name = "Charlie" });
|
|
||||||
txn.Commit(); // Explicit commit
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 🗺️ Roadmap & Status
|
|
||||||
|
|
||||||
We are actively building the core. Here is where we stand:
|
|
||||||
|
|
||||||
- ✅ **Core Storage**: Paged I/O, WAL, Transactions with thread-safe concurrent access.
|
|
||||||
- ✅ **BSON Engine**: Zero-copy Reader/Writer with lowercase policy.
|
|
||||||
- ✅ **Indexing**: B-Tree implementation.
|
|
||||||
- ✅ **Vector Search**: HNSW implementation for Similarity Search.
|
|
||||||
- ✅ **Geospatial Indexing**: Optimized R-Tree with zero-allocation tuple API.
|
|
||||||
- ✅ **Query Engine**: Hybrid execution (Index/Scan + LINQ to Objects).
|
|
||||||
- ✅ **Advanced LINQ**: GroupBy, Joins, Aggregations, Complex Projections.
|
|
||||||
- ✅ **Async I/O**: Full `async`/`await` support with proper `CancellationToken` handling.
|
|
||||||
- ✅ **Source Generators**: Auto-map POCO/DDD classes with robust nested objects, collections, and ref struct support.
|
|
||||||
|
|
||||||
## 🔮 Future Vision
|
|
||||||
|
|
||||||
### 1. Advanced Querying & Specialized Indices
|
|
||||||
- **Graph Traversals**:
|
|
||||||
- Specialized index for "links" (Document IDs) for $O(1)$ navigation without full scans.
|
|
||||||
|
|
||||||
### 2. CDC & Event Integration
|
|
||||||
- **BSON Change Stream**: "Log Miner" that decodes WAL entries and emits structured events.
|
|
||||||
- **Internal Dispatcher**: Keeps specialized indices updated automatically via CDC.
|
|
||||||
|
|
||||||
### 3. Performance & Optimization
|
|
||||||
- **Projection Engine**: Read only specific fields from disk (via BSON offsets) without full document deserialization.
|
|
||||||
- **Portability**: Evaluate `.netstandard2.1` support for broader compatibility (Unity, MAUI, etc.).
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 🤝 Contributing
|
|
||||||
|
|
||||||
We welcome contributions! This is a great project to learn about database internals, B-Trees, and high-performance .NET.
|
|
||||||
|
|
||||||
### How to Build
|
|
||||||
1. **Clone**: `git clone https://github.com/mrdevrobot/CBDD.git`
|
|
||||||
2. **Build**: `dotnet build`
|
|
||||||
3. **Test**: `dotnet test` (We have comprehensive tests for Storage, Indexing, and LINQ).
|
|
||||||
4. **Coverage Gate**: `bash scripts/coverage-check.sh`
|
|
||||||
|
|
||||||
### Areas to Contribute
|
|
||||||
- **Missing LINQ Operators**: Help us implement additional `IQueryable` functions.
|
|
||||||
- **Benchmarks**: Help us prove `CBDD` is faster than the competition.
|
|
||||||
- **Documentation**: Examples, Guides, and Wiki.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 📝 License
|
|
||||||
|
|
||||||
Licensed under the MIT License. Use it freely in personal and commercial projects.
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
25
docs/README.md
Normal file
25
docs/README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# CBDD Documentation
|
||||||
|
|
||||||
|
This folder is the canonical source for internal operational and engineering documentation.
|
||||||
|
|
||||||
|
## Core Documents
|
||||||
|
|
||||||
|
- Architecture: [`architecture.md`](architecture.md)
|
||||||
|
- Deployment: [`deployment.md`](deployment.md)
|
||||||
|
- Operations runbook: [`runbook.md`](runbook.md)
|
||||||
|
- Security controls: [`security.md`](security.md)
|
||||||
|
- Access model: [`access.md`](access.md)
|
||||||
|
- Troubleshooting guide: [`troubleshooting.md`](troubleshooting.md)
|
||||||
|
|
||||||
|
## Major Features
|
||||||
|
|
||||||
|
- Feature inventory: [`features/README.md`](features/README.md)
|
||||||
|
|
||||||
|
## Architecture Decisions
|
||||||
|
|
||||||
|
- Initial ADR: [`adr/0001-storage-engine-and-source-generation.md`](adr/0001-storage-engine-and-source-generation.md)
|
||||||
|
|
||||||
|
## Reference Specifications
|
||||||
|
|
||||||
|
- Engine RFC: [`../RFC.md`](../RFC.md)
|
||||||
|
- C-BSON format spec: [`../C-BSON.md`](../C-BSON.md)
|
||||||
32
docs/access.md
Normal file
32
docs/access.md
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# Access And Permissions
|
||||||
|
|
||||||
|
## Roles
|
||||||
|
|
||||||
|
- Maintainer: merge authority, release authority, incident ownership.
|
||||||
|
- Reviewer: approves pull requests and validates architecture/security impact.
|
||||||
|
- Contributor: proposes changes through pull requests.
|
||||||
|
- Consumer: integrates published package versions in downstream applications.
|
||||||
|
|
||||||
|
## Least-Privilege Model
|
||||||
|
|
||||||
|
- Limit maintainer privileges to required release and incident responders.
|
||||||
|
- Use reviewer role for routine code review and documentation updates.
|
||||||
|
- Restrict package publishing credentials to release maintainers.
|
||||||
|
|
||||||
|
## Approval Workflow
|
||||||
|
|
||||||
|
1. Contributor opens pull request.
|
||||||
|
2. Reviewer validates tests, documentation, and risk impact.
|
||||||
|
3. Maintainer approves merge for high-risk or release-impacting changes.
|
||||||
|
4. Release maintainer publishes approved release artifacts.
|
||||||
|
|
||||||
|
## Periodic Access Review
|
||||||
|
|
||||||
|
1. Review maintainer and publisher access quarterly.
|
||||||
|
2. Remove inactive accounts and obsolete credentials.
|
||||||
|
3. Confirm access ownership in repository settings and package feed controls.
|
||||||
|
|
||||||
|
## Emergency Access
|
||||||
|
|
||||||
|
- Temporary elevated access requires a tracked incident issue.
|
||||||
|
- Remove temporary access immediately after incident closure.
|
||||||
32
docs/adr/0001-storage-engine-and-source-generation.md
Normal file
32
docs/adr/0001-storage-engine-and-source-generation.md
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# 0001 Storage Engine And Source Generation
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Accepted
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
CBDD targets embedded workloads where predictable latency and low operational overhead are priorities. Runtime reflection and remote database dependencies increase startup and runtime variance for this workload profile.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
1. Use an embedded storage engine with page-based persistence and WAL-backed transactions.
|
||||||
|
2. Use compile-time source generation for mapping instead of runtime reflection.
|
||||||
|
3. Keep query and indexing execution in-process for deterministic behavior.
|
||||||
|
|
||||||
|
## Consequences
|
||||||
|
|
||||||
|
Positive:
|
||||||
|
- Lower runtime allocation and startup overhead.
|
||||||
|
- Strong control over transaction and recovery behavior.
|
||||||
|
- Predictable deployment for local/offline workloads.
|
||||||
|
|
||||||
|
Trade-offs:
|
||||||
|
- Greater maintenance burden for custom storage/query engine internals.
|
||||||
|
- Source generator complexity requires dedicated regression coverage.
|
||||||
|
|
||||||
|
## Related Documents
|
||||||
|
|
||||||
|
- [`../architecture.md`](../architecture.md)
|
||||||
|
- [`../runbook.md`](../runbook.md)
|
||||||
|
- [`../../RFC.md`](../../RFC.md)
|
||||||
46
docs/architecture.md
Normal file
46
docs/architecture.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
# Architecture
|
||||||
|
|
||||||
|
## System Context
|
||||||
|
|
||||||
|
CBDD is an embedded database library consumed in-process by .NET applications. The host application owns process lifecycle, filesystem placement, and operational policy.
|
||||||
|
|
||||||
|
External dependencies:
|
||||||
|
- .NET runtime and SDK
|
||||||
|
- Local filesystem
|
||||||
|
- Optional CI and package registry for build/release
|
||||||
|
|
||||||
|
## Containers And Major Components
|
||||||
|
|
||||||
|
1. `CBDD.Core`
|
||||||
|
- Owns storage engine, transaction protocol, WAL, indexing, query planning, and CDC plumbing.
|
||||||
|
|
||||||
|
2. `CBDD.Bson`
|
||||||
|
- Owns BSON document model and span-based serialization/deserialization primitives.
|
||||||
|
|
||||||
|
3. `CBDD.SourceGenerators`
|
||||||
|
- Generates mapping code at compile time for entity serialization and collection initialization.
|
||||||
|
|
||||||
|
4. Consumer application
|
||||||
|
- Defines entities, `DocumentDbContext` subclasses, and operational behavior.
|
||||||
|
|
||||||
|
## Data Flow
|
||||||
|
|
||||||
|
1. Consumer invokes collection API through `DocumentDbContext`.
|
||||||
|
2. Mapper layer serializes entities to BSON via generated mappers.
|
||||||
|
3. Storage engine writes page data and WAL entries.
|
||||||
|
4. Index subsystem updates primary and secondary indexes.
|
||||||
|
5. Transaction commit persists durable state and emits CDC notifications where enabled.
|
||||||
|
6. Query path evaluates expression plans and uses indexes or scan fallback.
|
||||||
|
|
||||||
|
## Reliability Model
|
||||||
|
|
||||||
|
- Write-ahead logging enforces durability before logical commit completion.
|
||||||
|
- Snapshot isolation supports concurrent reads with transactional correctness.
|
||||||
|
- Recovery logic replays WAL on restart to restore committed state.
|
||||||
|
|
||||||
|
## Cross References
|
||||||
|
|
||||||
|
- Operational procedures: [`runbook.md`](runbook.md)
|
||||||
|
- Deployment and rollback: [`deployment.md`](deployment.md)
|
||||||
|
- Security controls: [`security.md`](security.md)
|
||||||
|
- Detailed protocol reference: [`../RFC.md`](../RFC.md)
|
||||||
60
docs/deployment.md
Normal file
60
docs/deployment.md
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# Deployment
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
This workflow covers releasing CBDD as an internal package and promoting a validated version for downstream consumers.
|
||||||
|
|
||||||
|
## Promotion Path
|
||||||
|
|
||||||
|
1. Development validation on feature branch.
|
||||||
|
2. Merge to `main` after required quality gates.
|
||||||
|
3. Tag release and publish package artifact.
|
||||||
|
4. Consumer rollout to target services/tools.
|
||||||
|
|
||||||
|
## Pre-Deployment Checklist
|
||||||
|
|
||||||
|
1. Run repository fitness gate.
|
||||||
|
```bash
|
||||||
|
bash scripts/fitness-check.sh
|
||||||
|
```
|
||||||
|
2. Verify no pending incidents in current release window.
|
||||||
|
3. Confirm release notes include behavioral changes and migration notes.
|
||||||
|
4. Confirm package version bump strategy.
|
||||||
|
|
||||||
|
## Release Procedure
|
||||||
|
|
||||||
|
1. Build release artifacts.
|
||||||
|
```bash
|
||||||
|
dotnet build CBDD.slnx -c Release
|
||||||
|
dotnet test CBDD.slnx -c Release
|
||||||
|
```
|
||||||
|
2. Pack publishable projects.
|
||||||
|
```bash
|
||||||
|
dotnet pack src/CBDD/ZB.MOM.WW.CBDD.csproj -c Release -o nupkgs
|
||||||
|
```
|
||||||
|
3. Publish package to approved internal feed.
|
||||||
|
4. Create release tag and attach release notes.
|
||||||
|
5. Notify downstream teams of version and rollout guidance.
|
||||||
|
|
||||||
|
## Post-Deployment Validation
|
||||||
|
|
||||||
|
1. Install released package in a smoke-test consumer.
|
||||||
|
2. Validate insert, query, transaction commit, and recovery startup behavior.
|
||||||
|
3. Verify CDC, vector, and geospatial capabilities when used by consuming teams.
|
||||||
|
|
||||||
|
## Rollback Procedure
|
||||||
|
|
||||||
|
Trigger rollback when release validation fails or production consumers detect regression.
|
||||||
|
|
||||||
|
1. Stop further promotions of the failing version.
|
||||||
|
2. Revert to previous known-good package version in consumer manifests.
|
||||||
|
3. If package registry supports unlisting/yanking, unlist the bad version.
|
||||||
|
4. Open incident issue with impact, timeline, and containment actions.
|
||||||
|
5. Prepare and validate patch release before re-promotion.
|
||||||
|
|
||||||
|
## Emergency Change Path
|
||||||
|
|
||||||
|
1. Create hotfix branch from last good tag.
|
||||||
|
2. Apply minimal fix and run full quality gates.
|
||||||
|
3. Require maintainer approval.
|
||||||
|
4. Publish patched version and communicate mandatory upgrade guidance.
|
||||||
10
docs/features/README.md
Normal file
10
docs/features/README.md
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# Feature Inventory
|
||||||
|
|
||||||
|
The following documents are the canonical reference for major CBDD capabilities:
|
||||||
|
|
||||||
|
- [Storage and transactions](storage-transactions.md)
|
||||||
|
- [Query and indexing](query-and-indexing.md)
|
||||||
|
- [Vector search](vector-search.md)
|
||||||
|
- [Geospatial search](geospatial-search.md)
|
||||||
|
- [Change data capture](change-data-capture.md)
|
||||||
|
- [Source-generated mapping](source-generated-mapping.md)
|
||||||
61
docs/features/change-data-capture.md
Normal file
61
docs/features/change-data-capture.md
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
# Change Data Capture
|
||||||
|
|
||||||
|
## Purpose And Business Outcome
|
||||||
|
|
||||||
|
Expose transactional change events so consumers can react to committed inserts, updates, and deletes.
|
||||||
|
|
||||||
|
## Scope And Non-Goals
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
- Collection-level change stream subscriptions
|
||||||
|
- Event publication after successful commit
|
||||||
|
|
||||||
|
Non-goals:
|
||||||
|
- Cross-process event transport guarantees
|
||||||
|
- External message broker delivery semantics
|
||||||
|
|
||||||
|
## User And System Workflows
|
||||||
|
|
||||||
|
1. Consumer subscribes to a collection change stream.
|
||||||
|
2. Application performs data mutations.
|
||||||
|
3. On commit, CDC publishes events to active subscribers.
|
||||||
|
4. Subscriber handlers process entity and metadata payloads.
|
||||||
|
|
||||||
|
## Interfaces And APIs
|
||||||
|
|
||||||
|
- `Watch(...)` collection API
|
||||||
|
- `ChangeStreamObservable`
|
||||||
|
- `ChangeStreamDispatcher`
|
||||||
|
- `ChangeStreamEvent`
|
||||||
|
|
||||||
|
## Permissions And Data Handling
|
||||||
|
|
||||||
|
- CDC payloads can include document identifiers and entity data.
|
||||||
|
- Restrict subscription access to trusted application components.
|
||||||
|
|
||||||
|
## Dependencies And Failure Modes
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- Transaction commit lifecycle
|
||||||
|
- Subscriber channel health
|
||||||
|
|
||||||
|
Failure modes:
|
||||||
|
- Subscriber backpressure or handler exceptions
|
||||||
|
- Event handling assumptions that conflict with rollback behavior
|
||||||
|
|
||||||
|
## Monitoring, Alerts, And Troubleshooting
|
||||||
|
|
||||||
|
- Review CDC behavior in integration and scalability tests.
|
||||||
|
- Follow [`../runbook.md`](../runbook.md) for incident response.
|
||||||
|
- Follow [`../security.md`](../security.md) for event payload handling controls.
|
||||||
|
- Use [`../troubleshooting.md`](../troubleshooting.md) for diagnosis guidance.
|
||||||
|
|
||||||
|
## Rollout And Change Considerations
|
||||||
|
|
||||||
|
- Behavioral CDC changes require explicit release-note callouts.
|
||||||
|
- Maintain compatibility expectations for event payload shape.
|
||||||
|
|
||||||
|
## Validation Guidance
|
||||||
|
|
||||||
|
- Run `CdcTests` and `CdcScalabilityTests` before release.
|
||||||
|
- Validate commit-only emission behavior in regression tests.
|
||||||
60
docs/features/geospatial-search.md
Normal file
60
docs/features/geospatial-search.md
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# Geospatial Search
|
||||||
|
|
||||||
|
## Purpose And Business Outcome
|
||||||
|
|
||||||
|
Support location-aware queries such as nearest-neighbor and bounding-box lookups for geospatial workloads.
|
||||||
|
|
||||||
|
## Scope And Non-Goals
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
- Spatial index configuration
|
||||||
|
- Proximity and bounding-box query operations
|
||||||
|
|
||||||
|
Non-goals:
|
||||||
|
- Full GIS projection transformations
|
||||||
|
- External map tile services
|
||||||
|
|
||||||
|
## User And System Workflows
|
||||||
|
|
||||||
|
1. Consumer configures spatial index for coordinate fields.
|
||||||
|
2. Coordinates are serialized and stored with entity payloads.
|
||||||
|
3. Query path evaluates `Near` or `Within` predicates.
|
||||||
|
4. Engine returns matching entities.
|
||||||
|
|
||||||
|
## Interfaces And APIs
|
||||||
|
|
||||||
|
- Spatial index APIs in model configuration
|
||||||
|
- Query helpers under `GeoSpatialExtensions`
|
||||||
|
- Index implementation in `RTreeIndex`
|
||||||
|
|
||||||
|
## Permissions And Data Handling
|
||||||
|
|
||||||
|
- Geolocation data may be regulated or privacy-sensitive.
|
||||||
|
- Apply least-privilege access and retention limits.
|
||||||
|
|
||||||
|
## Dependencies And Failure Modes
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- Correct coordinate format and units
|
||||||
|
- Spatial index consistency
|
||||||
|
|
||||||
|
Failure modes:
|
||||||
|
- Invalid coordinate values
|
||||||
|
- Unexpected results from bounding definitions or radius units
|
||||||
|
|
||||||
|
## Monitoring, Alerts, And Troubleshooting
|
||||||
|
|
||||||
|
- Validate geospatial paths through dedicated stress and correctness tests.
|
||||||
|
- Use [`../runbook.md`](../runbook.md) for escalation.
|
||||||
|
- Follow [`../security.md`](../security.md) for geolocation data protection controls.
|
||||||
|
- Use [`../troubleshooting.md`](../troubleshooting.md#query-and-index-issues) for issue resolution.
|
||||||
|
|
||||||
|
## Rollout And Change Considerations
|
||||||
|
|
||||||
|
- Document coordinate and unit assumptions in release notes when behavior changes.
|
||||||
|
- Validate backward compatibility for persisted spatial index pages.
|
||||||
|
|
||||||
|
## Validation Guidance
|
||||||
|
|
||||||
|
- Run `GeospatialTests` and `GeospatialStressTests` before release.
|
||||||
|
- Include representative proximity/bounding queries in smoke checks.
|
||||||
62
docs/features/query-and-indexing.md
Normal file
62
docs/features/query-and-indexing.md
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# Query And Indexing
|
||||||
|
|
||||||
|
## Purpose And Business Outcome
|
||||||
|
|
||||||
|
Deliver predictable query correctness and performance using expression translation with index-aware execution.
|
||||||
|
|
||||||
|
## Scope And Non-Goals
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
- LINQ query translation
|
||||||
|
- Primary and secondary index usage
|
||||||
|
- Scan fallback where index optimization is not applicable
|
||||||
|
|
||||||
|
Non-goals:
|
||||||
|
- Full SQL compatibility
|
||||||
|
- Distributed query federation
|
||||||
|
|
||||||
|
## User And System Workflows
|
||||||
|
|
||||||
|
1. Consumer submits LINQ expression.
|
||||||
|
2. Query planner evaluates index opportunities.
|
||||||
|
3. Engine executes index-backed or scan path.
|
||||||
|
4. Results are materialized to consumer entities.
|
||||||
|
|
||||||
|
## Interfaces And APIs
|
||||||
|
|
||||||
|
- `AsQueryable()` and query provider components
|
||||||
|
- `CollectionIndexManager`
|
||||||
|
- Index implementations under `src/CBDD.Core/Indexing`
|
||||||
|
|
||||||
|
## Permissions And Data Handling
|
||||||
|
|
||||||
|
- Query access follows host application authorization policy.
|
||||||
|
- Indexed data inherits the same sensitivity classification as source payloads.
|
||||||
|
|
||||||
|
## Dependencies And Failure Modes
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- Valid index metadata and storage consistency
|
||||||
|
- Expression visitor correctness
|
||||||
|
|
||||||
|
Failure modes:
|
||||||
|
- Incorrect predicate translation
|
||||||
|
- Missing/ineffective indexes
|
||||||
|
- Performance regressions due to scan-heavy paths
|
||||||
|
|
||||||
|
## Monitoring, Alerts, And Troubleshooting
|
||||||
|
|
||||||
|
- Track query-related regressions in automated tests.
|
||||||
|
- Use [`../runbook.md`](../runbook.md) for incident handling.
|
||||||
|
- Follow [`../security.md`](../security.md) for sensitive data and access constraints.
|
||||||
|
- Use [`../troubleshooting.md`](../troubleshooting.md#query-and-index-issues) for remediation.
|
||||||
|
|
||||||
|
## Rollout And Change Considerations
|
||||||
|
|
||||||
|
- Query planner/index behavior changes require benchmark comparison and regression coverage.
|
||||||
|
- Document breaking semantics in release notes.
|
||||||
|
|
||||||
|
## Validation Guidance
|
||||||
|
|
||||||
|
- Run query, index, and optimizer test suites in `tests/CBDD.Tests`.
|
||||||
|
- Confirm coverage gate with `bash scripts/coverage-check.sh`.
|
||||||
61
docs/features/source-generated-mapping.md
Normal file
61
docs/features/source-generated-mapping.md
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
# Source-Generated Mapping
|
||||||
|
|
||||||
|
## Purpose And Business Outcome
|
||||||
|
|
||||||
|
Generate compile-time mapping code to reduce runtime overhead and reflection risk for serialization paths.
|
||||||
|
|
||||||
|
## Scope And Non-Goals
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
- Entity metadata analysis
|
||||||
|
- Mapper source generation
|
||||||
|
- Collection initialization helpers
|
||||||
|
|
||||||
|
Non-goals:
|
||||||
|
- Runtime dynamic mapping for unknown schemas
|
||||||
|
- Support for unsupported C# patterns outside generator design
|
||||||
|
|
||||||
|
## User And System Workflows
|
||||||
|
|
||||||
|
1. Consumer defines entities and context patterns.
|
||||||
|
2. Build invokes source generator.
|
||||||
|
3. Generated mapper code is compiled into target project.
|
||||||
|
4. Runtime serialization path uses generated code.
|
||||||
|
|
||||||
|
## Interfaces And APIs
|
||||||
|
|
||||||
|
- Source generator project under `src/CBDD.SourceGenerators`
|
||||||
|
- Attributes in BSON and data-annotation mapping surface
|
||||||
|
- Generated initialization methods for context collections
|
||||||
|
|
||||||
|
## Permissions And Data Handling
|
||||||
|
|
||||||
|
- Generated code can expose field-level mapping behavior.
|
||||||
|
- Repository write permissions should be limited to trusted contributors.
|
||||||
|
|
||||||
|
## Dependencies And Failure Modes
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- Roslyn source generator execution during build
|
||||||
|
- Entity schema conventions
|
||||||
|
|
||||||
|
Failure modes:
|
||||||
|
- Missing generation due to invalid entity declarations
|
||||||
|
- Serialization mismatch caused by attribute/model changes
|
||||||
|
|
||||||
|
## Monitoring, Alerts, And Troubleshooting
|
||||||
|
|
||||||
|
- Monitor build output for generator diagnostics.
|
||||||
|
- Use [`../runbook.md`](../runbook.md) for escalation.
|
||||||
|
- Follow [`../security.md`](../security.md) for review and control expectations.
|
||||||
|
- Use [`../troubleshooting.md`](../troubleshooting.md#source-generation-issues) for remediation steps.
|
||||||
|
|
||||||
|
## Rollout And Change Considerations
|
||||||
|
|
||||||
|
- Generator behavioral changes require broad regression tests across entities.
|
||||||
|
- Document any new constraints or unsupported patterns in release notes.
|
||||||
|
|
||||||
|
## Validation Guidance
|
||||||
|
|
||||||
|
- Run source generator and serialization tests in `tests/CBDD.Tests`.
|
||||||
|
- Validate mapper generation with clean `dotnet build` in CI.
|
||||||
63
docs/features/storage-transactions.md
Normal file
63
docs/features/storage-transactions.md
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# Storage And Transactions
|
||||||
|
|
||||||
|
## Purpose And Business Outcome
|
||||||
|
|
||||||
|
Provide durable, ACID-compliant local persistence for embedded workloads that need consistent commit and recovery semantics.
|
||||||
|
|
||||||
|
## Scope And Non-Goals
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
- Page-based storage
|
||||||
|
- Write-ahead logging
|
||||||
|
- Transaction lifecycle and commit/rollback semantics
|
||||||
|
|
||||||
|
Non-goals:
|
||||||
|
- Distributed transactions
|
||||||
|
- Multi-node replication
|
||||||
|
|
||||||
|
## User And System Workflows
|
||||||
|
|
||||||
|
1. Application writes through `DocumentDbContext`.
|
||||||
|
2. Engine records WAL entries.
|
||||||
|
3. Commit persists pages and marks transaction durable.
|
||||||
|
4. Recovery replays WAL to restore committed state after restart.
|
||||||
|
|
||||||
|
## Interfaces And APIs
|
||||||
|
|
||||||
|
- `DocumentDbContext`
|
||||||
|
- `Transaction` and `ITransaction`
|
||||||
|
- `WriteAheadLog`
|
||||||
|
- Storage engine modules under `src/CBDD.Core/Storage`
|
||||||
|
|
||||||
|
## Permissions And Data Handling
|
||||||
|
|
||||||
|
- Database files require host-managed filesystem access controls.
|
||||||
|
- Transaction data should be treated as sensitive if payloads contain regulated information.
|
||||||
|
|
||||||
|
## Dependencies And Failure Modes
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- Local filesystem I/O
|
||||||
|
- WAL and page file consistency
|
||||||
|
|
||||||
|
Failure modes:
|
||||||
|
- Interrupted writes
|
||||||
|
- Corrupted WAL entries
|
||||||
|
- Invalid page metadata after unsafe process termination
|
||||||
|
|
||||||
|
## Monitoring, Alerts, And Troubleshooting
|
||||||
|
|
||||||
|
- Use CI/test failures and incident issues as primary signals.
|
||||||
|
- Follow [`../runbook.md`](../runbook.md) for triage.
|
||||||
|
- Follow [`../security.md`](../security.md) for data handling and control requirements.
|
||||||
|
- Use [`../troubleshooting.md`](../troubleshooting.md#data-file-and-recovery-issues) for recovery issues.
|
||||||
|
|
||||||
|
## Rollout And Change Considerations
|
||||||
|
|
||||||
|
- Any storage format or WAL behavior change requires migration and rollback validation.
|
||||||
|
- Release notes must document backward compatibility impact.
|
||||||
|
|
||||||
|
## Validation Guidance
|
||||||
|
|
||||||
|
- Run transaction and recovery tests in `tests/CBDD.Tests`.
|
||||||
|
- Execute `dotnet test CBDD.slnx -c Release` before merge.
|
||||||
60
docs/features/vector-search.md
Normal file
60
docs/features/vector-search.md
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# Vector Search
|
||||||
|
|
||||||
|
## Purpose And Business Outcome
|
||||||
|
|
||||||
|
Enable similarity search for embedding-driven workloads directly in embedded storage.
|
||||||
|
|
||||||
|
## Scope And Non-Goals
|
||||||
|
|
||||||
|
Scope:
|
||||||
|
- Vector index configuration
|
||||||
|
- Approximate nearest-neighbor query execution
|
||||||
|
|
||||||
|
Non-goals:
|
||||||
|
- External model training
|
||||||
|
- Cross-database vector federation
|
||||||
|
|
||||||
|
## User And System Workflows
|
||||||
|
|
||||||
|
1. Consumer registers vector index for embedding field.
|
||||||
|
2. Documents persist embeddings in collection payloads.
|
||||||
|
3. Query issues vector search request with `k` nearest neighbors.
|
||||||
|
4. Engine returns ranked matches.
|
||||||
|
|
||||||
|
## Interfaces And APIs
|
||||||
|
|
||||||
|
- Vector index configuration via model builder
|
||||||
|
- Query extensions under `VectorSearchExtensions`
|
||||||
|
- Index implementation in `VectorSearchIndex`
|
||||||
|
|
||||||
|
## Permissions And Data Handling
|
||||||
|
|
||||||
|
- Embeddings may contain sensitive semantic information.
|
||||||
|
- Apply host-level access restrictions and retention controls.
|
||||||
|
|
||||||
|
## Dependencies And Failure Modes
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- Correct embedding dimensionality
|
||||||
|
- Index parameter tuning for workload
|
||||||
|
|
||||||
|
Failure modes:
|
||||||
|
- Dimension mismatch between data and query vectors
|
||||||
|
- Poor recall due to incorrect index configuration
|
||||||
|
|
||||||
|
## Monitoring, Alerts, And Troubleshooting
|
||||||
|
|
||||||
|
- Validate vector query quality during release smoke checks.
|
||||||
|
- Use [`../runbook.md`](../runbook.md) for incident handling.
|
||||||
|
- Follow [`../security.md`](../security.md) for embedding-data handling controls.
|
||||||
|
- Use [`../troubleshooting.md`](../troubleshooting.md#query-and-index-issues) for vector query remediation.
|
||||||
|
|
||||||
|
## Rollout And Change Considerations
|
||||||
|
|
||||||
|
- Treat vector index parameter changes as performance-sensitive releases.
|
||||||
|
- Document compatibility impact for existing persisted indexes.
|
||||||
|
|
||||||
|
## Validation Guidance
|
||||||
|
|
||||||
|
- Run vector search tests in `tests/CBDD.Tests/VectorSearchTests.cs`.
|
||||||
|
- Add benchmark runs for large-vector workloads before release.
|
||||||
56
docs/runbook.md
Normal file
56
docs/runbook.md
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# Runbook
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
This runbook provides standard operations, incident triage, escalation, and recovery procedures for CBDD maintainers.
|
||||||
|
|
||||||
|
## Signals And Entry Points
|
||||||
|
|
||||||
|
- CI failures on `main`
|
||||||
|
- Failing integration tests in consumer repositories
|
||||||
|
- Regression issues labeled `incident`
|
||||||
|
- Recovery or corruption reports from consumers
|
||||||
|
|
||||||
|
## Alert Triage Procedure
|
||||||
|
|
||||||
|
1. Capture incident context: version, environment, failing operation, and first failure timestamp.
|
||||||
|
2. Classify severity:
|
||||||
|
- `SEV-1`: data loss risk, persistent startup failure, or transaction correctness risk.
|
||||||
|
- `SEV-2`: feature-level regression without confirmed data loss.
|
||||||
|
- `SEV-3`: non-critical behavior or documentation defects.
|
||||||
|
3. Create or update the incident issue with owner and current mitigation status.
|
||||||
|
4. Reproduce with targeted tests in `/Users/dohertj2/Desktop/CBDD/tests/CBDD.Tests`.
|
||||||
|
|
||||||
|
## Diagnostics
|
||||||
|
|
||||||
|
1. Validate build and tests.
|
||||||
|
```bash
|
||||||
|
dotnet test CBDD.slnx -c Release
|
||||||
|
```
|
||||||
|
2. Run coverage threshold gate when behavior changed in core paths.
|
||||||
|
```bash
|
||||||
|
bash scripts/coverage-check.sh
|
||||||
|
```
|
||||||
|
3. For storage and recovery incidents, prioritize:
|
||||||
|
- `StorageEngine.Recovery`
|
||||||
|
- `WriteAheadLog`
|
||||||
|
- transaction protocol tests
|
||||||
|
|
||||||
|
## Escalation Path
|
||||||
|
|
||||||
|
1. Initial owner: maintainer on incident issue.
|
||||||
|
2. Escalate to release maintainer when severity is `SEV-1` or rollback is required.
|
||||||
|
3. Communicate status updates on each milestone: triage complete, mitigation active, fix merged, validation complete.
|
||||||
|
|
||||||
|
## Recovery Actions
|
||||||
|
|
||||||
|
1. Contain impact by pinning consumers to last known-good package version.
|
||||||
|
2. Apply rollback steps from [`deployment.md`](deployment.md#rollback-procedure).
|
||||||
|
3. Validate repaired build with targeted and full regression suites.
|
||||||
|
4. Publish fixed package and confirm consumer recovery.
|
||||||
|
|
||||||
|
## Post-Incident Expectations
|
||||||
|
|
||||||
|
1. Document root cause, blast radius, and timeline.
|
||||||
|
2. Add regression tests to prevent recurrence.
|
||||||
|
3. Record follow-up actions in issue tracker with owners and due dates.
|
||||||
34
docs/security.md
Normal file
34
docs/security.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# Security
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
CBDD is an embedded data engine. Security controls are shared between the library and the host application that embeds it.
|
||||||
|
|
||||||
|
## Authentication And Authorization Model
|
||||||
|
|
||||||
|
- CBDD does not provide built-in user authentication.
|
||||||
|
- Authorization is enforced by the host process and filesystem permissions.
|
||||||
|
- Access to database files must be limited to trusted service identities.
|
||||||
|
|
||||||
|
## Data Classification And Handling
|
||||||
|
|
||||||
|
- Treat persisted database files as sensitive when they contain customer or regulated data.
|
||||||
|
- Do not store secrets in source, fixtures, or benchmark datasets.
|
||||||
|
- Apply environment-specific retention and backup controls outside this repository.
|
||||||
|
|
||||||
|
## Storage And Cryptography Controls
|
||||||
|
|
||||||
|
- CBDD enforces integrity through WAL and transactional semantics.
|
||||||
|
- Encryption at rest and key management are host responsibilities.
|
||||||
|
- If encryption is required, use filesystem or volume-level encryption managed by platform security controls.
|
||||||
|
|
||||||
|
## Secure Coding Expectations
|
||||||
|
|
||||||
|
1. Require code review for storage, WAL, indexing, query, and serialization changes.
|
||||||
|
2. Add targeted tests for all security-relevant behavior changes.
|
||||||
|
3. Run package vulnerability checks in fitness pipeline.
|
||||||
|
|
||||||
|
## Incident Handling
|
||||||
|
|
||||||
|
- Follow [`runbook.md`](runbook.md) for incident triage and escalation.
|
||||||
|
- Label security-impacting issues with `security` and prioritize immediate containment.
|
||||||
62
docs/troubleshooting.md
Normal file
62
docs/troubleshooting.md
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
# Troubleshooting
|
||||||
|
|
||||||
|
## Build And Test Failures
|
||||||
|
|
||||||
|
### Symptom
|
||||||
|
`dotnet build` or `dotnet test` fails locally or in CI.
|
||||||
|
|
||||||
|
### Checks
|
||||||
|
1. Verify .NET 10 SDK is installed.
|
||||||
|
2. Run `dotnet restore`.
|
||||||
|
3. Run `dotnet format --verify-no-changes`.
|
||||||
|
4. Re-run tests with `dotnet test CBDD.slnx -c Release`.
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
- Fix reported compile/test failures before merge.
|
||||||
|
- For flaky tests, isolate affected test class and open an issue with reproduction details.
|
||||||
|
|
||||||
|
## Data File And Recovery Issues
|
||||||
|
|
||||||
|
### Symptom
|
||||||
|
Database startup fails or recovery path throws WAL/storage errors.
|
||||||
|
|
||||||
|
### Checks
|
||||||
|
1. Capture exact exception and stack trace.
|
||||||
|
2. Reproduce with storage/recovery-focused tests.
|
||||||
|
3. Validate rollback path from [`deployment.md`](deployment.md#rollback-procedure).
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
- Pin consumers to last known-good package.
|
||||||
|
- Apply fix and add regression coverage in recovery/transaction tests.
|
||||||
|
|
||||||
|
## Query And Index Issues
|
||||||
|
|
||||||
|
### Symptom
|
||||||
|
Unexpected query performance or incorrect query results.
|
||||||
|
|
||||||
|
### Checks
|
||||||
|
1. Verify relevant indexes are configured for query predicates.
|
||||||
|
2. Reproduce with test cases in `tests/CBDD.Tests` for query/index modules.
|
||||||
|
3. Validate behavior for scan fallback and expression translation.
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
- Add or adjust index definitions and query tests.
|
||||||
|
- Document any changed query semantics in release notes.
|
||||||
|
|
||||||
|
## Source Generation Issues
|
||||||
|
|
||||||
|
### Symptom
|
||||||
|
Generated mappers missing or serialization behavior differs from expectations.
|
||||||
|
|
||||||
|
### Checks
|
||||||
|
1. Verify entity attributes and accessibility are valid for source generation.
|
||||||
|
2. Build solution to regenerate mapper output.
|
||||||
|
3. Validate related tests in source generator test coverage.
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
- Update entity annotations or generator logic.
|
||||||
|
- Add focused regression tests for unsupported pattern handling.
|
||||||
|
|
||||||
|
## Escalation
|
||||||
|
|
||||||
|
If troubleshooting steps do not resolve the issue, follow incident escalation in [`runbook.md`](runbook.md).
|
||||||
@@ -124,14 +124,28 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
|||||||
_indexManager.SetPrimaryRootPageId(_primaryIndex.RootPageId);
|
_indexManager.SetPrimaryRootPageId(_primaryIndex.RootPageId);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Register keys used by the mapper to ensure they are available for compression
|
// Register keys used by the mapper to ensure they are available for compression
|
||||||
_storage.RegisterKeys(_mapper.UsedKeys);
|
_storage.RegisterKeys(_mapper.UsedKeys);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void EnsureSchema()
|
private void RefreshPrimaryIndexRootFromMetadata()
|
||||||
{
|
{
|
||||||
var currentSchema = _mapper.GetSchema();
|
_indexManager.RefreshFromStorageMetadata();
|
||||||
var metadata = _indexManager.GetMetadata();
|
|
||||||
|
var primaryRootPageId = _indexManager.PrimaryRootPageId;
|
||||||
|
if (primaryRootPageId == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (primaryRootPageId != _primaryIndex.RootPageId)
|
||||||
|
{
|
||||||
|
_primaryIndex.SetRootPageId(primaryRootPageId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void EnsureSchema()
|
||||||
|
{
|
||||||
|
var currentSchema = _mapper.GetSchema();
|
||||||
|
var metadata = _indexManager.GetMetadata();
|
||||||
|
|
||||||
var persistedSchemas = _storage.GetSchemas(metadata.SchemaRootPageId);
|
var persistedSchemas = _storage.GetSchemas(metadata.SchemaRootPageId);
|
||||||
var latestPersisted = persistedSchemas.Count > 0 ? persistedSchemas[persistedSchemas.Count - 1] : null;
|
var latestPersisted = persistedSchemas.Count > 0 ? persistedSchemas[persistedSchemas.Count - 1] : null;
|
||||||
@@ -363,12 +377,13 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
|||||||
/// Rebuilds an index by scanning all existing documents and re-inserting them.
|
/// Rebuilds an index by scanning all existing documents and re-inserting them.
|
||||||
/// Called automatically when creating a new index.
|
/// Called automatically when creating a new index.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private void RebuildIndex(CollectionSecondaryIndex<TId, T> index)
|
private void RebuildIndex(CollectionSecondaryIndex<TId, T> index)
|
||||||
{
|
{
|
||||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
RefreshPrimaryIndexRootFromMetadata();
|
||||||
// Iterate all documents in the collection via primary index
|
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||||
var minKey = new IndexKey(Array.Empty<byte>());
|
// Iterate all documents in the collection via primary index
|
||||||
var maxKey = new IndexKey(Enumerable.Repeat((byte)0xFF, 32).ToArray());
|
var minKey = new IndexKey(Array.Empty<byte>());
|
||||||
|
var maxKey = new IndexKey(Enumerable.Repeat((byte)0xFF, 32).ToArray());
|
||||||
|
|
||||||
foreach (var entry in _primaryIndex.Range(minKey, maxKey, IndexDirection.Forward, transaction.TransactionId))
|
foreach (var entry in _primaryIndex.Range(minKey, maxKey, IndexDirection.Forward, transaction.TransactionId))
|
||||||
{
|
{
|
||||||
@@ -967,6 +982,7 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
|||||||
|
|
||||||
private void InsertDataCore(TId id, T entity, ReadOnlySpan<byte> docData)
|
private void InsertDataCore(TId id, T entity, ReadOnlySpan<byte> docData)
|
||||||
{
|
{
|
||||||
|
RefreshPrimaryIndexRootFromMetadata();
|
||||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||||
var (storedPayloadOverride, storedPayloadFlags) = PreparePayloadForStorage(docData);
|
var (storedPayloadOverride, storedPayloadFlags) = PreparePayloadForStorage(docData);
|
||||||
ReadOnlySpan<byte> storedPayload = storedPayloadOverride is null ? docData : storedPayloadOverride;
|
ReadOnlySpan<byte> storedPayload = storedPayloadOverride is null ? docData : storedPayloadOverride;
|
||||||
@@ -1005,11 +1021,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
|||||||
/// <param name="id">ObjectId of the document</param>
|
/// <param name="id">ObjectId of the document</param>
|
||||||
/// <param name="transaction">Optional transaction for isolation (supports Read Your Own Writes)</param>
|
/// <param name="transaction">Optional transaction for isolation (supports Read Your Own Writes)</param>
|
||||||
/// <returns>The document, or null if not found</returns>
|
/// <returns>The document, or null if not found</returns>
|
||||||
public T? FindById(TId id)
|
public T? FindById(TId id)
|
||||||
{
|
{
|
||||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
RefreshPrimaryIndexRootFromMetadata();
|
||||||
try
|
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||||
{
|
try
|
||||||
|
{
|
||||||
var key = _mapper.ToIndexKey(id);
|
var key = _mapper.ToIndexKey(id);
|
||||||
|
|
||||||
if (!_primaryIndex.TryFind(key, out var location, transaction.TransactionId))
|
if (!_primaryIndex.TryFind(key, out var location, transaction.TransactionId))
|
||||||
@@ -1031,11 +1048,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="transaction">Transaction for isolation (REQUIRED for consistent reads during concurrent writes)</param>
|
/// <param name="transaction">Transaction for isolation (REQUIRED for consistent reads during concurrent writes)</param>
|
||||||
/// <returns>Enumerable of all documents</returns>
|
/// <returns>Enumerable of all documents</returns>
|
||||||
public IEnumerable<T> FindAll()
|
public IEnumerable<T> FindAll()
|
||||||
{
|
{
|
||||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
RefreshPrimaryIndexRootFromMetadata();
|
||||||
var txnId = transaction?.TransactionId ?? 0;
|
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||||
var minKey = new IndexKey(Array.Empty<byte>());
|
var txnId = transaction?.TransactionId ?? 0;
|
||||||
|
var minKey = new IndexKey(Array.Empty<byte>());
|
||||||
var maxKey = new IndexKey(Enumerable.Repeat((byte)0xFF, 32).ToArray());
|
var maxKey = new IndexKey(Enumerable.Repeat((byte)0xFF, 32).ToArray());
|
||||||
|
|
||||||
foreach (var entry in _primaryIndex.Range(minKey, maxKey, IndexDirection.Forward, txnId))
|
foreach (var entry in _primaryIndex.Range(minKey, maxKey, IndexDirection.Forward, txnId))
|
||||||
@@ -1202,11 +1220,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int UpdateBulkInternal(List<T> entityList)
|
private int UpdateBulkInternal(List<T> entityList)
|
||||||
{
|
{
|
||||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
RefreshPrimaryIndexRootFromMetadata();
|
||||||
int updateCount = 0;
|
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||||
const int BATCH_SIZE = 50;
|
int updateCount = 0;
|
||||||
|
const int BATCH_SIZE = 50;
|
||||||
|
|
||||||
for (int batchStart = 0; batchStart < entityList.Count; batchStart += BATCH_SIZE)
|
for (int batchStart = 0; batchStart < entityList.Count; batchStart += BATCH_SIZE)
|
||||||
{
|
{
|
||||||
@@ -1272,6 +1291,7 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
|||||||
|
|
||||||
private bool UpdateDataCore(TId id, T entity, ReadOnlySpan<byte> docData)
|
private bool UpdateDataCore(TId id, T entity, ReadOnlySpan<byte> docData)
|
||||||
{
|
{
|
||||||
|
RefreshPrimaryIndexRootFromMetadata();
|
||||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||||
var key = _mapper.ToIndexKey(id);
|
var key = _mapper.ToIndexKey(id);
|
||||||
var (storedPayloadOverride, storedPayloadFlags) = PreparePayloadForStorage(docData);
|
var (storedPayloadOverride, storedPayloadFlags) = PreparePayloadForStorage(docData);
|
||||||
@@ -1438,11 +1458,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
|||||||
return deleteCount;
|
return deleteCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
private bool DeleteCore(TId id, bool notifyCdc = true)
|
private bool DeleteCore(TId id, bool notifyCdc = true)
|
||||||
{
|
{
|
||||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
RefreshPrimaryIndexRootFromMetadata();
|
||||||
var key = _mapper.ToIndexKey(id);
|
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||||
if (!_primaryIndex.TryFind(key, out var location, transaction.TransactionId))
|
var key = _mapper.ToIndexKey(id);
|
||||||
|
if (!_primaryIndex.TryFind(key, out var location, transaction.TransactionId))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Notify secondary indexes BEFORE deleting document from storage
|
// Notify secondary indexes BEFORE deleting document from storage
|
||||||
@@ -1524,11 +1545,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="transaction">Optional transaction for isolation</param>
|
/// <param name="transaction">Optional transaction for isolation</param>
|
||||||
/// <returns>Number of documents</returns>
|
/// <returns>Number of documents</returns>
|
||||||
public int Count()
|
public int Count()
|
||||||
{
|
{
|
||||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
RefreshPrimaryIndexRootFromMetadata();
|
||||||
// Count all entries in primary index
|
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||||
// Use generic min/max keys for the index
|
// Count all entries in primary index
|
||||||
|
// Use generic min/max keys for the index
|
||||||
var minKey = IndexKey.MinKey;
|
var minKey = IndexKey.MinKey;
|
||||||
var maxKey = IndexKey.MaxKey;
|
var maxKey = IndexKey.MaxKey;
|
||||||
return _primaryIndex.Range(minKey, maxKey, IndexDirection.Forward, transaction.TransactionId).Count();
|
return _primaryIndex.Range(minKey, maxKey, IndexDirection.Forward, transaction.TransactionId).Count();
|
||||||
|
|||||||
@@ -82,6 +82,18 @@ public sealed class BTreeIndex
|
|||||||
/// Gets the current root page identifier for the B+Tree.
|
/// Gets the current root page identifier for the B+Tree.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public uint RootPageId => _rootPageId;
|
public uint RootPageId => _rootPageId;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Updates the in-memory root page identifier.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="rootPageId">The root page identifier to use for subsequent operations.</param>
|
||||||
|
internal void SetRootPageId(uint rootPageId)
|
||||||
|
{
|
||||||
|
if (rootPageId == 0)
|
||||||
|
throw new ArgumentOutOfRangeException(nameof(rootPageId));
|
||||||
|
|
||||||
|
_rootPageId = rootPageId;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Reads a page using StorageEngine for transaction isolation.
|
/// Reads a page using StorageEngine for transaction isolation.
|
||||||
|
|||||||
@@ -504,6 +504,37 @@ public sealed class CollectionIndexManager<TId, T> : IDisposable where T : class
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public uint PrimaryRootPageId => _metadata.PrimaryRootPageId;
|
public uint PrimaryRootPageId => _metadata.PrimaryRootPageId;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Refreshes cached metadata and index root bindings from storage.
|
||||||
|
/// </summary>
|
||||||
|
internal void RefreshFromStorageMetadata()
|
||||||
|
{
|
||||||
|
lock (_lock)
|
||||||
|
{
|
||||||
|
if (_disposed)
|
||||||
|
throw new ObjectDisposedException(nameof(CollectionIndexManager<TId, T>));
|
||||||
|
|
||||||
|
var latest = _storage.GetCollectionMetadata(_collectionName) ?? new CollectionMetadata { Name = _collectionName };
|
||||||
|
if (MetadataEquals(_metadata, latest))
|
||||||
|
return;
|
||||||
|
|
||||||
|
foreach (var index in _indexes.Values)
|
||||||
|
{
|
||||||
|
try { index.Dispose(); } catch { /* Best effort */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
_indexes.Clear();
|
||||||
|
_metadata = latest;
|
||||||
|
|
||||||
|
foreach (var idxMeta in _metadata.Indexes)
|
||||||
|
{
|
||||||
|
var definition = RebuildDefinition(idxMeta.Name, idxMeta.PropertyPaths, idxMeta.IsUnique, idxMeta.Type, idxMeta.Dimensions, idxMeta.Metric);
|
||||||
|
var index = new CollectionSecondaryIndex<TId, T>(definition, _storage, _mapper, idxMeta.RootPageId);
|
||||||
|
_indexes[idxMeta.Name] = index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Sets the root page identifier for the primary index.
|
/// Sets the root page identifier for the primary index.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -526,11 +557,52 @@ public sealed class CollectionIndexManager<TId, T> : IDisposable where T : class
|
|||||||
/// <returns>The collection metadata.</returns>
|
/// <returns>The collection metadata.</returns>
|
||||||
public CollectionMetadata GetMetadata() => _metadata;
|
public CollectionMetadata GetMetadata() => _metadata;
|
||||||
|
|
||||||
private void SaveMetadata()
|
private void SaveMetadata()
|
||||||
{
|
{
|
||||||
UpdateMetadata();
|
UpdateMetadata();
|
||||||
_storage.SaveCollectionMetadata(_metadata);
|
_storage.SaveCollectionMetadata(_metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static bool MetadataEquals(CollectionMetadata left, CollectionMetadata right)
|
||||||
|
{
|
||||||
|
if (!string.Equals(left.Name, right.Name, StringComparison.OrdinalIgnoreCase))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (left.PrimaryRootPageId != right.PrimaryRootPageId ||
|
||||||
|
left.SchemaRootPageId != right.SchemaRootPageId ||
|
||||||
|
left.Indexes.Count != right.Indexes.Count)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var i = 0; i < left.Indexes.Count; i++)
|
||||||
|
{
|
||||||
|
var l = left.Indexes[i];
|
||||||
|
var r = right.Indexes[i];
|
||||||
|
if (!string.Equals(l.Name, r.Name, StringComparison.OrdinalIgnoreCase) ||
|
||||||
|
l.RootPageId != r.RootPageId ||
|
||||||
|
l.Type != r.Type ||
|
||||||
|
l.IsUnique != r.IsUnique ||
|
||||||
|
l.Dimensions != r.Dimensions ||
|
||||||
|
l.Metric != r.Metric)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
var lPaths = l.PropertyPaths ?? Array.Empty<string>();
|
||||||
|
var rPaths = r.PropertyPaths ?? Array.Empty<string>();
|
||||||
|
if (lPaths.Length != rPaths.Length)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (var p = 0; p < lPaths.Length; p++)
|
||||||
|
{
|
||||||
|
if (!string.Equals(lPaths[p], rPaths[p], StringComparison.Ordinal))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Releases resources used by the index manager.
|
/// Releases resources used by the index manager.
|
||||||
|
|||||||
@@ -645,6 +645,39 @@ public sealed class PageFile : IDisposable
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Trims excess physical file capacity beyond the current logical page count.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>The number of bytes removed from the file.</returns>
|
||||||
|
public long TrimExcessCapacityToLogicalPageCount()
|
||||||
|
{
|
||||||
|
lock (_lock)
|
||||||
|
{
|
||||||
|
EnsureFileOpen();
|
||||||
|
|
||||||
|
var targetLengthBytes = (long)_nextPageId * _config.PageSize;
|
||||||
|
var currentLengthBytes = _fileStream!.Length;
|
||||||
|
if (currentLengthBytes <= targetLengthBytes)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
_mappedFile?.Dispose();
|
||||||
|
_mappedFile = null;
|
||||||
|
|
||||||
|
_fileStream.SetLength(targetLengthBytes);
|
||||||
|
_fileStream.Flush(flushToDisk: true);
|
||||||
|
|
||||||
|
_mappedFile = MemoryMappedFile.CreateFromFile(
|
||||||
|
_fileStream,
|
||||||
|
null,
|
||||||
|
targetLengthBytes,
|
||||||
|
_config.Access,
|
||||||
|
HandleInheritability.None,
|
||||||
|
leaveOpen: true);
|
||||||
|
|
||||||
|
return currentLengthBytes - targetLengthBytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Defragments a slotted page in place by packing live slot payloads densely at the end of the page.
|
/// Defragments a slotted page in place by packing live slot payloads densely at the end of the page.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user