Add enterprise docs structure and include pending core maintenance updates.
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -49,6 +49,7 @@ yarn-error.log*
|
||||
generated/
|
||||
generated-data/
|
||||
data/generated/
|
||||
.tmp*/
|
||||
output/
|
||||
out/
|
||||
reports/
|
||||
|
||||
@@ -173,8 +173,3 @@ dotnet run -c Release --project tests/CBDD.Tests.Benchmark
|
||||
| 'DocumentDb Single Insert' | 355.8 μs | 19.42 μs | 56.65 μs | 0.12 | 128.89 KB |
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
CBDD is licensed under the MIT License. See [LICENSE](LICENSE) for details.
|
||||
|
||||
430
README.md
430
README.md
@@ -1,314 +1,132 @@
|
||||
# ⚡ CBDD
|
||||
### High-Performance BSON Database Engine for .NET 10
|
||||
|
||||

|
||||

|
||||

|
||||

|
||||
|
||||
**CBDD** is an embedded, ACID-compliant, document-oriented database built from scratch for **maximum performance** and **zero allocation**. It leverages modern .NET features like `Span<T>`, `Memory<T>`, and Source Generators to eliminate runtime overhead.
|
||||
|
||||
> **Note**: Currently targets **.NET 10** to maximize performance with `Span<T>` and modern hardware intrinsics. Future support for `.netstandard2.1` is being evaluated.
|
||||
|
||||
---
|
||||
|
||||
## 🚀 Why CBDD?
|
||||
|
||||
Most embedded databases for .NET are either wrappers around C libraries (SQLite, RocksDB) or legacy C# codebases burdened by heavy GC pressure.
|
||||
|
||||
**CBDD is different:**
|
||||
- **Zero Allocation**: I/O and interaction paths use `Span<byte>` and `stackalloc`. No heap allocations for reads/writes.
|
||||
- **Type-Safe**: No reflection. All serialization code is generated at compile-time.
|
||||
- **Developer Experience**: Full LINQ provider (`IQueryable`) that feels like Entity Framework but runs on bare metal.
|
||||
- **Reliable**: Full ACID transactions with Write-Ahead Logging (WAL) and Snapshot Isolation.
|
||||
|
||||
---
|
||||
|
||||
## ✨ Key Features
|
||||
|
||||
### 🚄 Zero-Allocation Architecture
|
||||
- **Span-based I/O**: The entire pipeline, from disk to user objects, utilizes `Span<T>` to avoid copying memory.
|
||||
- **Memory-Mapped Files**: OS-level paging and caching for blazing fast access.
|
||||
|
||||
### 🧠 Powerful Query Engine (LINQ)
|
||||
Write queries naturally using LINQ. The engine automatically translates them to optimized B-Tree lookups.
|
||||
|
||||
```csharp
|
||||
// Automatic Index Usage
|
||||
var users = collection.AsQueryable()
|
||||
.Where(x => x.Age > 25 && x.Name.StartsWith("A"))
|
||||
.OrderBy(x => x.Age)
|
||||
.Take(10)
|
||||
.AsEnumerable(); // Executed efficiently on the engine
|
||||
```
|
||||
|
||||
- **Optimized**: Uses B-Tree indexes for `=`, `>`, `<`, `Between`, and `StartsWith`.
|
||||
- **Hybrid Execution**: Combines storage-level optimization with in-memory LINQ to Objects.
|
||||
- **Advanced Features**: Full support for `GroupBy`, `Join`, `Select` (including anonymous types), and Aggregations (`Count`, `Sum`, `Min`, `Max`, `Average`).
|
||||
|
||||
### 🔍 Advanced Indexing
|
||||
- **B-Tree Indexes**: Logarithmic time complexity for lookups.
|
||||
- **Composite Indexes**: Support for multi-column keys.
|
||||
- **Vector Search (HNSW)**: Fast similarity search for AI embeddings using Hierarchical Navigable Small World algorithm.
|
||||
|
||||
### 🤖 AI-Ready Vector Search
|
||||
CBDD natively supports vector embeddings and fast similarity search.
|
||||
|
||||
```csharp
|
||||
// 1. Configure vector index on float[] property
|
||||
modelBuilder.Entity<VectorItem>()
|
||||
.HasVectorIndex(x => x.Embedding, dimensions: 1536, metric: VectorMetric.Cosine);
|
||||
|
||||
// 2. Perform fast similarity search
|
||||
var results = db.Items.AsQueryable()
|
||||
.VectorSearch(x => x.Embedding, queryVector, k: 5)
|
||||
.ToList();
|
||||
```
|
||||
|
||||
### 🌍 High-Performance Geospatial Indexing
|
||||
CBDD features a built-in R-Tree implementation for lightning-fast proximity and bounding box searches.
|
||||
|
||||
- **Zero-Allocation**: Uses coordinate tuples `(double, double)` and `Span`-based BSON arrays.
|
||||
- **LINQ Integrated**: Search naturally using `.Near()` and `.Within()`.
|
||||
|
||||
```csharp
|
||||
// 1. Configure spatial index (uses R-Tree internally)
|
||||
modelBuilder.Entity<Store>()
|
||||
.HasSpatialIndex(x => x.Location);
|
||||
|
||||
// 2. Proximity Search (Find stores within 5km)
|
||||
var stores = db.Stores.AsQueryable()
|
||||
.Where(s => s.Location.Near((45.4642, 9.1899), 5.0))
|
||||
.ToList();
|
||||
|
||||
// 3. Bounding Box Search
|
||||
var area = db.Stores.AsQueryable()
|
||||
.Where(s => s.Location.Within((45.0, 9.0), (46.0, 10.0)))
|
||||
.ToList();
|
||||
```
|
||||
|
||||
### 🆔 Custom ID Converters (ValueObjects)
|
||||
Native support for custom primary key types using `ValueConverter<TModel, TProvider>`. Configure them easily via the Fluent API.
|
||||
|
||||
```csharp
|
||||
// 1. Define your ValueObject and Converter
|
||||
public record OrderId(string Value);
|
||||
public class OrderIdConverter : ValueConverter<OrderId, string> { ... }
|
||||
|
||||
// 2. Configure in OnModelCreating
|
||||
modelBuilder.Entity<Order>()
|
||||
.Property(x => x.Id)
|
||||
.HasConversion<OrderIdConverter>();
|
||||
|
||||
// 3. Use it naturally
|
||||
var order = collection.FindById(new OrderId("ORD-123"));
|
||||
```
|
||||
|
||||
### 📡 Change Data Capture (CDC)
|
||||
Real-time event streaming for database changes with transactional consistency.
|
||||
|
||||
- **Zero-Allocation**: Events are only captured when watchers exist; no overhead when disabled.
|
||||
- **Transactional**: Events fire only after successful commit, never on rollback.
|
||||
- **Scalable**: Uses Channel-per-subscriber architecture to support thousands of concurrent listeners.
|
||||
|
||||
```csharp
|
||||
// Watch for changes in a collection
|
||||
using var subscription = db.People.Watch(capturePayload: true)
|
||||
.Subscribe(e =>
|
||||
{
|
||||
Console.WriteLine($"{e.Type}: {e.DocumentId}");
|
||||
if (e.Entity != null)
|
||||
Console.WriteLine($" Name: {e.Entity.Name}");
|
||||
});
|
||||
|
||||
// Perform operations - events fire after commit
|
||||
db.People.Insert(new Person { Id = 1, Name = "Alice" });
|
||||
```
|
||||
|
||||
### 🛡️ Transactions & ACID
|
||||
- **Atomic**: Multi-document transactions.
|
||||
- **Durable**: WAL ensures data safety even in power loss.
|
||||
- **Isolated**: Snapshot isolation allowing concurrent readers and writers.
|
||||
- **Thread-Safe**: Protected with `SemaphoreSlim` to prevent race conditions in concurrent scenarios.
|
||||
- **Async-First**: Full async/await support with proper `CancellationToken` handling.
|
||||
- **Implicit Transactions**: Use `SaveChanges()` / `SaveChangesAsync()` for automatic transaction management (like EF Core).
|
||||
|
||||
### 🔌 Intelligent Source Generation
|
||||
- **Zero Reflection**: Mappers are generated at compile-time for zero overhead.
|
||||
- **Nested Objects & Collections**: Full support for complex graphs, deep nesting, and ref struct handling.
|
||||
- **Robust Serialization**: Correctly handles nested objects, collections, and complex type hierarchies.
|
||||
- **Lowercase Policy**: BSON keys are automatically persisted as `lowercase` for consistency.
|
||||
- **Custom Overrides**: Use `[BsonProperty]` or `[JsonPropertyName]` for manual field naming.
|
||||
|
||||
#### ✅ Supported Scenarios
|
||||
|
||||
The source generator handles a wide range of modern C# patterns:
|
||||
|
||||
| Feature | Support | Description |
|
||||
| :--- | :---: | :--- |
|
||||
| **Property Inheritance** | ✅ | Properties from base classes are automatically included in serialization |
|
||||
| **Private Setters** | ✅ | Properties with `private set` are correctly deserialized using Expression Trees |
|
||||
| **Init-Only Setters** | ✅ | Properties with `init` are supported via runtime compilation |
|
||||
| **Private Constructors** | ✅ | Deserialization works even without parameterless public constructor |
|
||||
| **Advanced Collections** | ✅ | `IEnumerable<T>`, `ICollection<T>`, `IList<T>`, `HashSet<T>`, and more |
|
||||
| **Nullable Value Types** | ✅ | `ObjectId?`, `int?`, `DateTime?` are correctly serialized/deserialized |
|
||||
| **Nullable Collections** | ✅ | `List<T>?`, `string?` with proper null handling |
|
||||
| **Unlimited Nesting** | ✅ | Deeply nested object graphs with circular reference protection |
|
||||
| **Self-Referencing** | ✅ | Entities can reference themselves (e.g., `Manager` property in `Employee`) |
|
||||
| **N-N Relationships** | ✅ | Collections of ObjectIds for efficient document referencing |
|
||||
|
||||
#### ❌ Limitations & Design Choices
|
||||
|
||||
| Scenario | Status | Reason |
|
||||
| :--- | :---: | :--- |
|
||||
| **Computed Properties** | ⚠️ Excluded | Getter-only properties without backing fields are intentionally skipped (e.g., `FullName => $"{First} {Last}"`) |
|
||||
| **Constructor Logic** | ⚠️ Bypassed | Deserialization uses `RuntimeHelpers.GetUninitializedObject()` to avoid constructor execution |
|
||||
| **Constructor Validation** | ⚠️ Not Executed | Validation logic in constructors won't run during deserialization - use Data Annotations instead |
|
||||
|
||||
> **💡 Best Practice**: For relationships between entities, prefer **referencing** (storing ObjectIds) over **embedding** (full nested objects) to avoid data duplication and maintain consistency. See tests in `CircularReferenceTests.cs` for implementation patterns.
|
||||
|
||||
### 🏷️ Supported Attributes
|
||||
CBDD supports standard .NET Data Annotations for mapping and validation:
|
||||
|
||||
| Attribute | Category | Description |
|
||||
| :--- | :--- | :--- |
|
||||
| `[Table("name")]` | Mapping | Sets the collection name. Supports `Schema="s"` for `s.name` grouping. |
|
||||
| `[Column("name")]` | Mapping | Maps property to a specific BSON field name. |
|
||||
| `[Column(TypeName="...")]`| Mapping | Handles special types (e.g., `geopoint` for coordinate tuples). |
|
||||
| `[Key]` | Identity | Explicitly marks the primary key (maps to `_id`). |
|
||||
| `[NotMapped]` | Mapping | Excludes property from BSON serialization. |
|
||||
| `[Required]` | Validation | Ensures string is not null/empty or nullable type is not null. |
|
||||
| `[StringLength(max)]` | Validation | Validates string length (supports `MinimumLength`). |
|
||||
| `[MaxLength(n)]` | Validation | Validates maximum string length. |
|
||||
| `[MinLength(n)]` | Validation | Validates minimum string length. |
|
||||
| `[Range(min, max)]` | Validation | Validates numeric values stay within the specified range. |
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Validation attributes (`[Required]`, `[Range]`, etc.) throw a `System.ComponentModel.DataAnnotations.ValidationException` during serialization if rules are violated.
|
||||
|
||||
---
|
||||
|
||||
## 📚 Documentation
|
||||
# CBDD
|
||||
|
||||
For in-depth technical details, see the complete specification documents:
|
||||
|
||||
- **[RFC.md](RFC.md)** - Full architectural specification covering storage engine, indexing, transactions, WAL protocol, and query processing
|
||||
- **[C-BSON.md](C-BSON.md)** - Detailed wire format specification for CBDD's Compressed BSON format, including hex dumps and performance analysis
|
||||
CBDD is an embedded, document-oriented database engine for .NET 10. It targets internal platform teams that need predictable ACID behavior, low-latency local persistence, and typed access patterns without running an external database server.
|
||||
|
||||
## ✅ Fitness Check
|
||||
## Purpose And Business Context
|
||||
|
||||
Run the repository fitness suite locally:
|
||||
CBDD provides a local data layer for services and tools that need transactional durability, deterministic startup, and high-throughput reads/writes. The primary business outcome is reducing operational overhead for workloads that do not require a networked database cluster.
|
||||
|
||||
## Ownership And Support
|
||||
|
||||
- Owning team: CBDD maintainers (repository owner: `@dohertj2`)
|
||||
- Primary support path: open a Gitea issue in this repository with labels `incident` or `bug`
|
||||
- Escalation path: follow `/Users/dohertj2/Desktop/CBDD/docs/runbook.md` and page the release maintainer listed in the active release PR
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
CBDD has four primary layers:
|
||||
|
||||
1. Storage and transaction engine (`/Users/dohertj2/Desktop/CBDD/src/CBDD.Core/Storage`, `/Users/dohertj2/Desktop/CBDD/src/CBDD.Core/Transactions`)
|
||||
2. BSON serialization (`/Users/dohertj2/Desktop/CBDD/src/CBDD.Bson`)
|
||||
3. Indexing and query execution (`/Users/dohertj2/Desktop/CBDD/src/CBDD.Core/Indexing`, `/Users/dohertj2/Desktop/CBDD/src/CBDD.Core/Query`)
|
||||
4. Source-generated mapping (`/Users/dohertj2/Desktop/CBDD/src/CBDD.SourceGenerators`)
|
||||
|
||||
Detailed architecture material:
|
||||
- `/Users/dohertj2/Desktop/CBDD/docs/architecture.md`
|
||||
- `/Users/dohertj2/Desktop/CBDD/RFC.md`
|
||||
- `/Users/dohertj2/Desktop/CBDD/C-BSON.md`
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- .NET SDK 10.x
|
||||
- Bash (for repository scripts)
|
||||
- Read/write permissions for the local working directory
|
||||
- Gitea access for pull requests and release publishing
|
||||
|
||||
## Setup And Local Run
|
||||
|
||||
1. Clone the repository.
|
||||
```bash
|
||||
git clone https://gitea.dohertylan.com/dohertj2/CBDD.git
|
||||
cd CBDD
|
||||
```
|
||||
Expected outcome: local repository checkout with `CBDD.slnx` present.
|
||||
|
||||
2. Restore dependencies.
|
||||
```bash
|
||||
dotnet restore
|
||||
```
|
||||
Expected outcome: restore completes without package errors.
|
||||
|
||||
3. Build the solution.
|
||||
```bash
|
||||
dotnet build CBDD.slnx -c Release
|
||||
```
|
||||
Expected outcome: solution builds without compiler errors.
|
||||
|
||||
4. Run tests.
|
||||
```bash
|
||||
dotnet test CBDD.slnx -c Release
|
||||
```
|
||||
Expected outcome: all tests pass.
|
||||
|
||||
5. Run the full repository fitness check.
|
||||
```bash
|
||||
bash scripts/fitness-check.sh
|
||||
```
|
||||
Expected outcome: format, build, tests, coverage threshold, and package checks complete.
|
||||
|
||||
## Configuration And Secrets
|
||||
|
||||
- Default local usage requires only a filesystem path for the database file.
|
||||
- Do not commit secrets in source, test fixtures, benchmark assets, or `.env` files.
|
||||
- If publishing packages, keep feed credentials in CI secrets or local keychain-backed credential storage.
|
||||
- Store environment-specific values outside the repository and inject them at runtime.
|
||||
|
||||
## Build, Test, And Quality Gates
|
||||
|
||||
Required pre-merge commands:
|
||||
|
||||
```bash
|
||||
dotnet format --verify-no-changes
|
||||
dotnet build -t:Rebuild
|
||||
dotnet test
|
||||
bash scripts/coverage-check.sh
|
||||
```
|
||||
|
||||
Optional full gate:
|
||||
|
||||
```bash
|
||||
bash scripts/fitness-check.sh
|
||||
```
|
||||
|
||||
It verifies formatting, build/test health, and package risk checks.
|
||||
|
||||
---
|
||||
|
||||
## 📦 Quick Start
|
||||
|
||||
### 1. Installation
|
||||
```
|
||||
dotnet add package ZB.MOM.WW.CBDD
|
||||
```
|
||||
|
||||
### 2. Basic Usage
|
||||
|
||||
```csharp
|
||||
// 1. Define your Entities
|
||||
public class User
|
||||
{
|
||||
public ObjectId Id { get; set; }
|
||||
public string Name { get; set; }
|
||||
}
|
||||
|
||||
// 2. Define your DbContext (Source Generator will produce InitializeCollections)
|
||||
public partial class MyDbContext : DocumentDbContext
|
||||
{
|
||||
public DocumentCollection<ObjectId, User> Users { get; set; } = null!;
|
||||
|
||||
public MyDbContext(string path) : base(path)
|
||||
{
|
||||
InitializeCollections();
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Use with Implicit Transactions (Recommended)
|
||||
using var db = new MyDbContext("mydb.db");
|
||||
|
||||
// Operations are tracked automatically
|
||||
db.Users.Insert(new User { Name = "Alice" });
|
||||
db.Users.Insert(new User { Name = "Bob" });
|
||||
|
||||
// Commit all changes at once
|
||||
db.SaveChanges();
|
||||
|
||||
// 4. Query naturally with LINQ
|
||||
var results = db.Users.AsQueryable()
|
||||
.Where(u => u.Name.StartsWith("A"))
|
||||
.AsEnumerable();
|
||||
|
||||
// 5. Or use explicit transactions for fine-grained control
|
||||
using (var txn = db.BeginTransaction())
|
||||
{
|
||||
db.Users.Insert(new User { Name = "Charlie" });
|
||||
txn.Commit(); // Explicit commit
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🗺️ Roadmap & Status
|
||||
|
||||
We are actively building the core. Here is where we stand:
|
||||
|
||||
- ✅ **Core Storage**: Paged I/O, WAL, Transactions with thread-safe concurrent access.
|
||||
- ✅ **BSON Engine**: Zero-copy Reader/Writer with lowercase policy.
|
||||
- ✅ **Indexing**: B-Tree implementation.
|
||||
- ✅ **Vector Search**: HNSW implementation for Similarity Search.
|
||||
- ✅ **Geospatial Indexing**: Optimized R-Tree with zero-allocation tuple API.
|
||||
- ✅ **Query Engine**: Hybrid execution (Index/Scan + LINQ to Objects).
|
||||
- ✅ **Advanced LINQ**: GroupBy, Joins, Aggregations, Complex Projections.
|
||||
- ✅ **Async I/O**: Full `async`/`await` support with proper `CancellationToken` handling.
|
||||
- ✅ **Source Generators**: Auto-map POCO/DDD classes with robust nested objects, collections, and ref struct support.
|
||||
|
||||
## 🔮 Future Vision
|
||||
|
||||
### 1. Advanced Querying & Specialized Indices
|
||||
- **Graph Traversals**:
|
||||
- Specialized index for "links" (Document IDs) for $O(1)$ navigation without full scans.
|
||||
|
||||
### 2. CDC & Event Integration
|
||||
- **BSON Change Stream**: "Log Miner" that decodes WAL entries and emits structured events.
|
||||
- **Internal Dispatcher**: Keeps specialized indices updated automatically via CDC.
|
||||
|
||||
### 3. Performance & Optimization
|
||||
- **Projection Engine**: Read only specific fields from disk (via BSON offsets) without full document deserialization.
|
||||
- **Portability**: Evaluate `.netstandard2.1` support for broader compatibility (Unity, MAUI, etc.).
|
||||
|
||||
---
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
We welcome contributions! This is a great project to learn about database internals, B-Trees, and high-performance .NET.
|
||||
|
||||
### How to Build
|
||||
1. **Clone**: `git clone https://github.com/mrdevrobot/CBDD.git`
|
||||
2. **Build**: `dotnet build`
|
||||
3. **Test**: `dotnet test` (We have comprehensive tests for Storage, Indexing, and LINQ).
|
||||
4. **Coverage Gate**: `bash scripts/coverage-check.sh`
|
||||
|
||||
### Areas to Contribute
|
||||
- **Missing LINQ Operators**: Help us implement additional `IQueryable` functions.
|
||||
- **Benchmarks**: Help us prove `CBDD` is faster than the competition.
|
||||
- **Documentation**: Examples, Guides, and Wiki.
|
||||
|
||||
---
|
||||
|
||||
## 📝 License
|
||||
|
||||
Licensed under the MIT License. Use it freely in personal and commercial projects.
|
||||
|
||||
|
||||
## Deployment And Rollback
|
||||
|
||||
CBDD is released as an internal package.
|
||||
|
||||
- Deployment workflow: `/Users/dohertj2/Desktop/CBDD/docs/deployment.md`
|
||||
- Rollback workflow: `/Users/dohertj2/Desktop/CBDD/docs/deployment.md#rollback-procedure`
|
||||
|
||||
## Operations And Incident Response
|
||||
|
||||
Operational procedures, diagnostics, and escalation are documented in:
|
||||
|
||||
- `/Users/dohertj2/Desktop/CBDD/docs/runbook.md`
|
||||
- `/Users/dohertj2/Desktop/CBDD/docs/troubleshooting.md`
|
||||
|
||||
## Security And Compliance Posture
|
||||
|
||||
- CBDD relies on host and process-level access controls.
|
||||
- Sensitive payload classification and handling requirements are defined in `/Users/dohertj2/Desktop/CBDD/docs/security.md`.
|
||||
- Role and approval requirements are defined in `/Users/dohertj2/Desktop/CBDD/docs/access.md`.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
Common issues and remediation:
|
||||
|
||||
- Build/test environment failures: `/Users/dohertj2/Desktop/CBDD/docs/troubleshooting.md#build-and-test-failures`
|
||||
- Data-file recovery procedures: `/Users/dohertj2/Desktop/CBDD/docs/troubleshooting.md#data-file-and-recovery-issues`
|
||||
- Query/index behavior verification: `/Users/dohertj2/Desktop/CBDD/docs/troubleshooting.md#query-and-index-issues`
|
||||
|
||||
## Change Governance
|
||||
|
||||
- Use feature branches from `main`.
|
||||
- Open pull requests with at least one reviewer approval before merge.
|
||||
- Keep release notes in pull request descriptions and tagged release notes.
|
||||
- Run documented quality gates before requesting review.
|
||||
|
||||
## Documentation Index
|
||||
|
||||
- Documentation home: `/Users/dohertj2/Desktop/CBDD/docs/README.md`
|
||||
- Major feature inventory: `/Users/dohertj2/Desktop/CBDD/docs/features/README.md`
|
||||
- Architecture decisions: `/Users/dohertj2/Desktop/CBDD/docs/adr/0001-storage-engine-and-source-generation.md`
|
||||
|
||||
25
docs/README.md
Normal file
25
docs/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# CBDD Documentation
|
||||
|
||||
This folder is the canonical source for internal operational and engineering documentation.
|
||||
|
||||
## Core Documents
|
||||
|
||||
- Architecture: [`architecture.md`](architecture.md)
|
||||
- Deployment: [`deployment.md`](deployment.md)
|
||||
- Operations runbook: [`runbook.md`](runbook.md)
|
||||
- Security controls: [`security.md`](security.md)
|
||||
- Access model: [`access.md`](access.md)
|
||||
- Troubleshooting guide: [`troubleshooting.md`](troubleshooting.md)
|
||||
|
||||
## Major Features
|
||||
|
||||
- Feature inventory: [`features/README.md`](features/README.md)
|
||||
|
||||
## Architecture Decisions
|
||||
|
||||
- Initial ADR: [`adr/0001-storage-engine-and-source-generation.md`](adr/0001-storage-engine-and-source-generation.md)
|
||||
|
||||
## Reference Specifications
|
||||
|
||||
- Engine RFC: [`../RFC.md`](../RFC.md)
|
||||
- C-BSON format spec: [`../C-BSON.md`](../C-BSON.md)
|
||||
32
docs/access.md
Normal file
32
docs/access.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# Access And Permissions
|
||||
|
||||
## Roles
|
||||
|
||||
- Maintainer: merge authority, release authority, incident ownership.
|
||||
- Reviewer: approves pull requests and validates architecture/security impact.
|
||||
- Contributor: proposes changes through pull requests.
|
||||
- Consumer: integrates published package versions in downstream applications.
|
||||
|
||||
## Least-Privilege Model
|
||||
|
||||
- Limit maintainer privileges to required release and incident responders.
|
||||
- Use reviewer role for routine code review and documentation updates.
|
||||
- Restrict package publishing credentials to release maintainers.
|
||||
|
||||
## Approval Workflow
|
||||
|
||||
1. Contributor opens pull request.
|
||||
2. Reviewer validates tests, documentation, and risk impact.
|
||||
3. Maintainer approves merge for high-risk or release-impacting changes.
|
||||
4. Release maintainer publishes approved release artifacts.
|
||||
|
||||
## Periodic Access Review
|
||||
|
||||
1. Review maintainer and publisher access quarterly.
|
||||
2. Remove inactive accounts and obsolete credentials.
|
||||
3. Confirm access ownership in repository settings and package feed controls.
|
||||
|
||||
## Emergency Access
|
||||
|
||||
- Temporary elevated access requires a tracked incident issue.
|
||||
- Remove temporary access immediately after incident closure.
|
||||
32
docs/adr/0001-storage-engine-and-source-generation.md
Normal file
32
docs/adr/0001-storage-engine-and-source-generation.md
Normal file
@@ -0,0 +1,32 @@
|
||||
# 0001 Storage Engine And Source Generation
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
CBDD targets embedded workloads where predictable latency and low operational overhead are priorities. Runtime reflection and remote database dependencies increase startup and runtime variance for this workload profile.
|
||||
|
||||
## Decision
|
||||
|
||||
1. Use an embedded storage engine with page-based persistence and WAL-backed transactions.
|
||||
2. Use compile-time source generation for mapping instead of runtime reflection.
|
||||
3. Keep query and indexing execution in-process for deterministic behavior.
|
||||
|
||||
## Consequences
|
||||
|
||||
Positive:
|
||||
- Lower runtime allocation and startup overhead.
|
||||
- Strong control over transaction and recovery behavior.
|
||||
- Predictable deployment for local/offline workloads.
|
||||
|
||||
Trade-offs:
|
||||
- Greater maintenance burden for custom storage/query engine internals.
|
||||
- Source generator complexity requires dedicated regression coverage.
|
||||
|
||||
## Related Documents
|
||||
|
||||
- [`../architecture.md`](../architecture.md)
|
||||
- [`../runbook.md`](../runbook.md)
|
||||
- [`../../RFC.md`](../../RFC.md)
|
||||
46
docs/architecture.md
Normal file
46
docs/architecture.md
Normal file
@@ -0,0 +1,46 @@
|
||||
# Architecture
|
||||
|
||||
## System Context
|
||||
|
||||
CBDD is an embedded database library consumed in-process by .NET applications. The host application owns process lifecycle, filesystem placement, and operational policy.
|
||||
|
||||
External dependencies:
|
||||
- .NET runtime and SDK
|
||||
- Local filesystem
|
||||
- Optional CI and package registry for build/release
|
||||
|
||||
## Containers And Major Components
|
||||
|
||||
1. `CBDD.Core`
|
||||
- Owns storage engine, transaction protocol, WAL, indexing, query planning, and CDC plumbing.
|
||||
|
||||
2. `CBDD.Bson`
|
||||
- Owns BSON document model and span-based serialization/deserialization primitives.
|
||||
|
||||
3. `CBDD.SourceGenerators`
|
||||
- Generates mapping code at compile time for entity serialization and collection initialization.
|
||||
|
||||
4. Consumer application
|
||||
- Defines entities, `DocumentDbContext` subclasses, and operational behavior.
|
||||
|
||||
## Data Flow
|
||||
|
||||
1. Consumer invokes collection API through `DocumentDbContext`.
|
||||
2. Mapper layer serializes entities to BSON via generated mappers.
|
||||
3. Storage engine writes page data and WAL entries.
|
||||
4. Index subsystem updates primary and secondary indexes.
|
||||
5. Transaction commit persists durable state and emits CDC notifications where enabled.
|
||||
6. Query path evaluates expression plans and uses indexes or scan fallback.
|
||||
|
||||
## Reliability Model
|
||||
|
||||
- Write-ahead logging enforces durability before logical commit completion.
|
||||
- Snapshot isolation supports concurrent reads with transactional correctness.
|
||||
- Recovery logic replays WAL on restart to restore committed state.
|
||||
|
||||
## Cross References
|
||||
|
||||
- Operational procedures: [`runbook.md`](runbook.md)
|
||||
- Deployment and rollback: [`deployment.md`](deployment.md)
|
||||
- Security controls: [`security.md`](security.md)
|
||||
- Detailed protocol reference: [`../RFC.md`](../RFC.md)
|
||||
60
docs/deployment.md
Normal file
60
docs/deployment.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# Deployment
|
||||
|
||||
## Scope
|
||||
|
||||
This workflow covers releasing CBDD as an internal package and promoting a validated version for downstream consumers.
|
||||
|
||||
## Promotion Path
|
||||
|
||||
1. Development validation on feature branch.
|
||||
2. Merge to `main` after required quality gates.
|
||||
3. Tag release and publish package artifact.
|
||||
4. Consumer rollout to target services/tools.
|
||||
|
||||
## Pre-Deployment Checklist
|
||||
|
||||
1. Run repository fitness gate.
|
||||
```bash
|
||||
bash scripts/fitness-check.sh
|
||||
```
|
||||
2. Verify no pending incidents in current release window.
|
||||
3. Confirm release notes include behavioral changes and migration notes.
|
||||
4. Confirm package version bump strategy.
|
||||
|
||||
## Release Procedure
|
||||
|
||||
1. Build release artifacts.
|
||||
```bash
|
||||
dotnet build CBDD.slnx -c Release
|
||||
dotnet test CBDD.slnx -c Release
|
||||
```
|
||||
2. Pack publishable projects.
|
||||
```bash
|
||||
dotnet pack src/CBDD/ZB.MOM.WW.CBDD.csproj -c Release -o nupkgs
|
||||
```
|
||||
3. Publish package to approved internal feed.
|
||||
4. Create release tag and attach release notes.
|
||||
5. Notify downstream teams of version and rollout guidance.
|
||||
|
||||
## Post-Deployment Validation
|
||||
|
||||
1. Install released package in a smoke-test consumer.
|
||||
2. Validate insert, query, transaction commit, and recovery startup behavior.
|
||||
3. Verify CDC, vector, and geospatial capabilities when used by consuming teams.
|
||||
|
||||
## Rollback Procedure
|
||||
|
||||
Trigger rollback when release validation fails or production consumers detect regression.
|
||||
|
||||
1. Stop further promotions of the failing version.
|
||||
2. Revert to previous known-good package version in consumer manifests.
|
||||
3. If package registry supports unlisting/yanking, unlist the bad version.
|
||||
4. Open incident issue with impact, timeline, and containment actions.
|
||||
5. Prepare and validate patch release before re-promotion.
|
||||
|
||||
## Emergency Change Path
|
||||
|
||||
1. Create hotfix branch from last good tag.
|
||||
2. Apply minimal fix and run full quality gates.
|
||||
3. Require maintainer approval.
|
||||
4. Publish patched version and communicate mandatory upgrade guidance.
|
||||
10
docs/features/README.md
Normal file
10
docs/features/README.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Feature Inventory
|
||||
|
||||
The following documents are the canonical reference for major CBDD capabilities:
|
||||
|
||||
- [Storage and transactions](storage-transactions.md)
|
||||
- [Query and indexing](query-and-indexing.md)
|
||||
- [Vector search](vector-search.md)
|
||||
- [Geospatial search](geospatial-search.md)
|
||||
- [Change data capture](change-data-capture.md)
|
||||
- [Source-generated mapping](source-generated-mapping.md)
|
||||
61
docs/features/change-data-capture.md
Normal file
61
docs/features/change-data-capture.md
Normal file
@@ -0,0 +1,61 @@
|
||||
# Change Data Capture
|
||||
|
||||
## Purpose And Business Outcome
|
||||
|
||||
Expose transactional change events so consumers can react to committed inserts, updates, and deletes.
|
||||
|
||||
## Scope And Non-Goals
|
||||
|
||||
Scope:
|
||||
- Collection-level change stream subscriptions
|
||||
- Event publication after successful commit
|
||||
|
||||
Non-goals:
|
||||
- Cross-process event transport guarantees
|
||||
- External message broker delivery semantics
|
||||
|
||||
## User And System Workflows
|
||||
|
||||
1. Consumer subscribes to a collection change stream.
|
||||
2. Application performs data mutations.
|
||||
3. On commit, CDC publishes events to active subscribers.
|
||||
4. Subscriber handlers process entity and metadata payloads.
|
||||
|
||||
## Interfaces And APIs
|
||||
|
||||
- `Watch(...)` collection API
|
||||
- `ChangeStreamObservable`
|
||||
- `ChangeStreamDispatcher`
|
||||
- `ChangeStreamEvent`
|
||||
|
||||
## Permissions And Data Handling
|
||||
|
||||
- CDC payloads can include document identifiers and entity data.
|
||||
- Restrict subscription access to trusted application components.
|
||||
|
||||
## Dependencies And Failure Modes
|
||||
|
||||
Dependencies:
|
||||
- Transaction commit lifecycle
|
||||
- Subscriber channel health
|
||||
|
||||
Failure modes:
|
||||
- Subscriber backpressure or handler exceptions
|
||||
- Event handling assumptions that conflict with rollback behavior
|
||||
|
||||
## Monitoring, Alerts, And Troubleshooting
|
||||
|
||||
- Review CDC behavior in integration and scalability tests.
|
||||
- Follow [`../runbook.md`](../runbook.md) for incident response.
|
||||
- Follow [`../security.md`](../security.md) for event payload handling controls.
|
||||
- Use [`../troubleshooting.md`](../troubleshooting.md) for diagnosis guidance.
|
||||
|
||||
## Rollout And Change Considerations
|
||||
|
||||
- Behavioral CDC changes require explicit release-note callouts.
|
||||
- Maintain compatibility expectations for event payload shape.
|
||||
|
||||
## Validation Guidance
|
||||
|
||||
- Run `CdcTests` and `CdcScalabilityTests` before release.
|
||||
- Validate commit-only emission behavior in regression tests.
|
||||
60
docs/features/geospatial-search.md
Normal file
60
docs/features/geospatial-search.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# Geospatial Search
|
||||
|
||||
## Purpose And Business Outcome
|
||||
|
||||
Support location-aware queries such as nearest-neighbor and bounding-box lookups for geospatial workloads.
|
||||
|
||||
## Scope And Non-Goals
|
||||
|
||||
Scope:
|
||||
- Spatial index configuration
|
||||
- Proximity and bounding-box query operations
|
||||
|
||||
Non-goals:
|
||||
- Full GIS projection transformations
|
||||
- External map tile services
|
||||
|
||||
## User And System Workflows
|
||||
|
||||
1. Consumer configures spatial index for coordinate fields.
|
||||
2. Coordinates are serialized and stored with entity payloads.
|
||||
3. Query path evaluates `Near` or `Within` predicates.
|
||||
4. Engine returns matching entities.
|
||||
|
||||
## Interfaces And APIs
|
||||
|
||||
- Spatial index APIs in model configuration
|
||||
- Query helpers under `GeoSpatialExtensions`
|
||||
- Index implementation in `RTreeIndex`
|
||||
|
||||
## Permissions And Data Handling
|
||||
|
||||
- Geolocation data may be regulated or privacy-sensitive.
|
||||
- Apply least-privilege access and retention limits.
|
||||
|
||||
## Dependencies And Failure Modes
|
||||
|
||||
Dependencies:
|
||||
- Correct coordinate format and units
|
||||
- Spatial index consistency
|
||||
|
||||
Failure modes:
|
||||
- Invalid coordinate values
|
||||
- Unexpected results from bounding definitions or radius units
|
||||
|
||||
## Monitoring, Alerts, And Troubleshooting
|
||||
|
||||
- Validate geospatial paths through dedicated stress and correctness tests.
|
||||
- Use [`../runbook.md`](../runbook.md) for escalation.
|
||||
- Follow [`../security.md`](../security.md) for geolocation data protection controls.
|
||||
- Use [`../troubleshooting.md`](../troubleshooting.md#query-and-index-issues) for issue resolution.
|
||||
|
||||
## Rollout And Change Considerations
|
||||
|
||||
- Document coordinate and unit assumptions in release notes when behavior changes.
|
||||
- Validate backward compatibility for persisted spatial index pages.
|
||||
|
||||
## Validation Guidance
|
||||
|
||||
- Run `GeospatialTests` and `GeospatialStressTests` before release.
|
||||
- Include representative proximity/bounding queries in smoke checks.
|
||||
62
docs/features/query-and-indexing.md
Normal file
62
docs/features/query-and-indexing.md
Normal file
@@ -0,0 +1,62 @@
|
||||
# Query And Indexing
|
||||
|
||||
## Purpose And Business Outcome
|
||||
|
||||
Deliver predictable query correctness and performance using expression translation with index-aware execution.
|
||||
|
||||
## Scope And Non-Goals
|
||||
|
||||
Scope:
|
||||
- LINQ query translation
|
||||
- Primary and secondary index usage
|
||||
- Scan fallback where index optimization is not applicable
|
||||
|
||||
Non-goals:
|
||||
- Full SQL compatibility
|
||||
- Distributed query federation
|
||||
|
||||
## User And System Workflows
|
||||
|
||||
1. Consumer submits LINQ expression.
|
||||
2. Query planner evaluates index opportunities.
|
||||
3. Engine executes index-backed or scan path.
|
||||
4. Results are materialized to consumer entities.
|
||||
|
||||
## Interfaces And APIs
|
||||
|
||||
- `AsQueryable()` and query provider components
|
||||
- `CollectionIndexManager`
|
||||
- Index implementations under `src/CBDD.Core/Indexing`
|
||||
|
||||
## Permissions And Data Handling
|
||||
|
||||
- Query access follows host application authorization policy.
|
||||
- Indexed data inherits the same sensitivity classification as source payloads.
|
||||
|
||||
## Dependencies And Failure Modes
|
||||
|
||||
Dependencies:
|
||||
- Valid index metadata and storage consistency
|
||||
- Expression visitor correctness
|
||||
|
||||
Failure modes:
|
||||
- Incorrect predicate translation
|
||||
- Missing/ineffective indexes
|
||||
- Performance regressions due to scan-heavy paths
|
||||
|
||||
## Monitoring, Alerts, And Troubleshooting
|
||||
|
||||
- Track query-related regressions in automated tests.
|
||||
- Use [`../runbook.md`](../runbook.md) for incident handling.
|
||||
- Follow [`../security.md`](../security.md) for sensitive data and access constraints.
|
||||
- Use [`../troubleshooting.md`](../troubleshooting.md#query-and-index-issues) for remediation.
|
||||
|
||||
## Rollout And Change Considerations
|
||||
|
||||
- Query planner/index behavior changes require benchmark comparison and regression coverage.
|
||||
- Document breaking semantics in release notes.
|
||||
|
||||
## Validation Guidance
|
||||
|
||||
- Run query, index, and optimizer test suites in `tests/CBDD.Tests`.
|
||||
- Confirm coverage gate with `bash scripts/coverage-check.sh`.
|
||||
61
docs/features/source-generated-mapping.md
Normal file
61
docs/features/source-generated-mapping.md
Normal file
@@ -0,0 +1,61 @@
|
||||
# Source-Generated Mapping
|
||||
|
||||
## Purpose And Business Outcome
|
||||
|
||||
Generate compile-time mapping code to reduce runtime overhead and reflection risk for serialization paths.
|
||||
|
||||
## Scope And Non-Goals
|
||||
|
||||
Scope:
|
||||
- Entity metadata analysis
|
||||
- Mapper source generation
|
||||
- Collection initialization helpers
|
||||
|
||||
Non-goals:
|
||||
- Runtime dynamic mapping for unknown schemas
|
||||
- Support for unsupported C# patterns outside generator design
|
||||
|
||||
## User And System Workflows
|
||||
|
||||
1. Consumer defines entities and context patterns.
|
||||
2. Build invokes source generator.
|
||||
3. Generated mapper code is compiled into target project.
|
||||
4. Runtime serialization path uses generated code.
|
||||
|
||||
## Interfaces And APIs
|
||||
|
||||
- Source generator project under `src/CBDD.SourceGenerators`
|
||||
- Attributes in BSON and data-annotation mapping surface
|
||||
- Generated initialization methods for context collections
|
||||
|
||||
## Permissions And Data Handling
|
||||
|
||||
- Generated code can expose field-level mapping behavior.
|
||||
- Repository write permissions should be limited to trusted contributors.
|
||||
|
||||
## Dependencies And Failure Modes
|
||||
|
||||
Dependencies:
|
||||
- Roslyn source generator execution during build
|
||||
- Entity schema conventions
|
||||
|
||||
Failure modes:
|
||||
- Missing generation due to invalid entity declarations
|
||||
- Serialization mismatch caused by attribute/model changes
|
||||
|
||||
## Monitoring, Alerts, And Troubleshooting
|
||||
|
||||
- Monitor build output for generator diagnostics.
|
||||
- Use [`../runbook.md`](../runbook.md) for escalation.
|
||||
- Follow [`../security.md`](../security.md) for review and control expectations.
|
||||
- Use [`../troubleshooting.md`](../troubleshooting.md#source-generation-issues) for remediation steps.
|
||||
|
||||
## Rollout And Change Considerations
|
||||
|
||||
- Generator behavioral changes require broad regression tests across entities.
|
||||
- Document any new constraints or unsupported patterns in release notes.
|
||||
|
||||
## Validation Guidance
|
||||
|
||||
- Run source generator and serialization tests in `tests/CBDD.Tests`.
|
||||
- Validate mapper generation with clean `dotnet build` in CI.
|
||||
63
docs/features/storage-transactions.md
Normal file
63
docs/features/storage-transactions.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# Storage And Transactions
|
||||
|
||||
## Purpose And Business Outcome
|
||||
|
||||
Provide durable, ACID-compliant local persistence for embedded workloads that need consistent commit and recovery semantics.
|
||||
|
||||
## Scope And Non-Goals
|
||||
|
||||
Scope:
|
||||
- Page-based storage
|
||||
- Write-ahead logging
|
||||
- Transaction lifecycle and commit/rollback semantics
|
||||
|
||||
Non-goals:
|
||||
- Distributed transactions
|
||||
- Multi-node replication
|
||||
|
||||
## User And System Workflows
|
||||
|
||||
1. Application writes through `DocumentDbContext`.
|
||||
2. Engine records WAL entries.
|
||||
3. Commit persists pages and marks transaction durable.
|
||||
4. Recovery replays WAL to restore committed state after restart.
|
||||
|
||||
## Interfaces And APIs
|
||||
|
||||
- `DocumentDbContext`
|
||||
- `Transaction` and `ITransaction`
|
||||
- `WriteAheadLog`
|
||||
- Storage engine modules under `src/CBDD.Core/Storage`
|
||||
|
||||
## Permissions And Data Handling
|
||||
|
||||
- Database files require host-managed filesystem access controls.
|
||||
- Transaction data should be treated as sensitive if payloads contain regulated information.
|
||||
|
||||
## Dependencies And Failure Modes
|
||||
|
||||
Dependencies:
|
||||
- Local filesystem I/O
|
||||
- WAL and page file consistency
|
||||
|
||||
Failure modes:
|
||||
- Interrupted writes
|
||||
- Corrupted WAL entries
|
||||
- Invalid page metadata after unsafe process termination
|
||||
|
||||
## Monitoring, Alerts, And Troubleshooting
|
||||
|
||||
- Use CI/test failures and incident issues as primary signals.
|
||||
- Follow [`../runbook.md`](../runbook.md) for triage.
|
||||
- Follow [`../security.md`](../security.md) for data handling and control requirements.
|
||||
- Use [`../troubleshooting.md`](../troubleshooting.md#data-file-and-recovery-issues) for recovery issues.
|
||||
|
||||
## Rollout And Change Considerations
|
||||
|
||||
- Any storage format or WAL behavior change requires migration and rollback validation.
|
||||
- Release notes must document backward compatibility impact.
|
||||
|
||||
## Validation Guidance
|
||||
|
||||
- Run transaction and recovery tests in `tests/CBDD.Tests`.
|
||||
- Execute `dotnet test CBDD.slnx -c Release` before merge.
|
||||
60
docs/features/vector-search.md
Normal file
60
docs/features/vector-search.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# Vector Search
|
||||
|
||||
## Purpose And Business Outcome
|
||||
|
||||
Enable similarity search for embedding-driven workloads directly in embedded storage.
|
||||
|
||||
## Scope And Non-Goals
|
||||
|
||||
Scope:
|
||||
- Vector index configuration
|
||||
- Approximate nearest-neighbor query execution
|
||||
|
||||
Non-goals:
|
||||
- External model training
|
||||
- Cross-database vector federation
|
||||
|
||||
## User And System Workflows
|
||||
|
||||
1. Consumer registers vector index for embedding field.
|
||||
2. Documents persist embeddings in collection payloads.
|
||||
3. Query issues vector search request with `k` nearest neighbors.
|
||||
4. Engine returns ranked matches.
|
||||
|
||||
## Interfaces And APIs
|
||||
|
||||
- Vector index configuration via model builder
|
||||
- Query extensions under `VectorSearchExtensions`
|
||||
- Index implementation in `VectorSearchIndex`
|
||||
|
||||
## Permissions And Data Handling
|
||||
|
||||
- Embeddings may contain sensitive semantic information.
|
||||
- Apply host-level access restrictions and retention controls.
|
||||
|
||||
## Dependencies And Failure Modes
|
||||
|
||||
Dependencies:
|
||||
- Correct embedding dimensionality
|
||||
- Index parameter tuning for workload
|
||||
|
||||
Failure modes:
|
||||
- Dimension mismatch between data and query vectors
|
||||
- Poor recall due to incorrect index configuration
|
||||
|
||||
## Monitoring, Alerts, And Troubleshooting
|
||||
|
||||
- Validate vector query quality during release smoke checks.
|
||||
- Use [`../runbook.md`](../runbook.md) for incident handling.
|
||||
- Follow [`../security.md`](../security.md) for embedding-data handling controls.
|
||||
- Use [`../troubleshooting.md`](../troubleshooting.md#query-and-index-issues) for vector query remediation.
|
||||
|
||||
## Rollout And Change Considerations
|
||||
|
||||
- Treat vector index parameter changes as performance-sensitive releases.
|
||||
- Document compatibility impact for existing persisted indexes.
|
||||
|
||||
## Validation Guidance
|
||||
|
||||
- Run vector search tests in `tests/CBDD.Tests/VectorSearchTests.cs`.
|
||||
- Add benchmark runs for large-vector workloads before release.
|
||||
56
docs/runbook.md
Normal file
56
docs/runbook.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# Runbook
|
||||
|
||||
## Purpose
|
||||
|
||||
This runbook provides standard operations, incident triage, escalation, and recovery procedures for CBDD maintainers.
|
||||
|
||||
## Signals And Entry Points
|
||||
|
||||
- CI failures on `main`
|
||||
- Failing integration tests in consumer repositories
|
||||
- Regression issues labeled `incident`
|
||||
- Recovery or corruption reports from consumers
|
||||
|
||||
## Alert Triage Procedure
|
||||
|
||||
1. Capture incident context: version, environment, failing operation, and first failure timestamp.
|
||||
2. Classify severity:
|
||||
- `SEV-1`: data loss risk, persistent startup failure, or transaction correctness risk.
|
||||
- `SEV-2`: feature-level regression without confirmed data loss.
|
||||
- `SEV-3`: non-critical behavior or documentation defects.
|
||||
3. Create or update the incident issue with owner and current mitigation status.
|
||||
4. Reproduce with targeted tests in `/Users/dohertj2/Desktop/CBDD/tests/CBDD.Tests`.
|
||||
|
||||
## Diagnostics
|
||||
|
||||
1. Validate build and tests.
|
||||
```bash
|
||||
dotnet test CBDD.slnx -c Release
|
||||
```
|
||||
2. Run coverage threshold gate when behavior changed in core paths.
|
||||
```bash
|
||||
bash scripts/coverage-check.sh
|
||||
```
|
||||
3. For storage and recovery incidents, prioritize:
|
||||
- `StorageEngine.Recovery`
|
||||
- `WriteAheadLog`
|
||||
- transaction protocol tests
|
||||
|
||||
## Escalation Path
|
||||
|
||||
1. Initial owner: maintainer on incident issue.
|
||||
2. Escalate to release maintainer when severity is `SEV-1` or rollback is required.
|
||||
3. Communicate status updates on each milestone: triage complete, mitigation active, fix merged, validation complete.
|
||||
|
||||
## Recovery Actions
|
||||
|
||||
1. Contain impact by pinning consumers to last known-good package version.
|
||||
2. Apply rollback steps from [`deployment.md`](deployment.md#rollback-procedure).
|
||||
3. Validate repaired build with targeted and full regression suites.
|
||||
4. Publish fixed package and confirm consumer recovery.
|
||||
|
||||
## Post-Incident Expectations
|
||||
|
||||
1. Document root cause, blast radius, and timeline.
|
||||
2. Add regression tests to prevent recurrence.
|
||||
3. Record follow-up actions in issue tracker with owners and due dates.
|
||||
34
docs/security.md
Normal file
34
docs/security.md
Normal file
@@ -0,0 +1,34 @@
|
||||
# Security
|
||||
|
||||
## Scope
|
||||
|
||||
CBDD is an embedded data engine. Security controls are shared between the library and the host application that embeds it.
|
||||
|
||||
## Authentication And Authorization Model
|
||||
|
||||
- CBDD does not provide built-in user authentication.
|
||||
- Authorization is enforced by the host process and filesystem permissions.
|
||||
- Access to database files must be limited to trusted service identities.
|
||||
|
||||
## Data Classification And Handling
|
||||
|
||||
- Treat persisted database files as sensitive when they contain customer or regulated data.
|
||||
- Do not store secrets in source, fixtures, or benchmark datasets.
|
||||
- Apply environment-specific retention and backup controls outside this repository.
|
||||
|
||||
## Storage And Cryptography Controls
|
||||
|
||||
- CBDD enforces integrity through WAL and transactional semantics.
|
||||
- Encryption at rest and key management are host responsibilities.
|
||||
- If encryption is required, use filesystem or volume-level encryption managed by platform security controls.
|
||||
|
||||
## Secure Coding Expectations
|
||||
|
||||
1. Require code review for storage, WAL, indexing, query, and serialization changes.
|
||||
2. Add targeted tests for all security-relevant behavior changes.
|
||||
3. Run package vulnerability checks in fitness pipeline.
|
||||
|
||||
## Incident Handling
|
||||
|
||||
- Follow [`runbook.md`](runbook.md) for incident triage and escalation.
|
||||
- Label security-impacting issues with `security` and prioritize immediate containment.
|
||||
62
docs/troubleshooting.md
Normal file
62
docs/troubleshooting.md
Normal file
@@ -0,0 +1,62 @@
|
||||
# Troubleshooting
|
||||
|
||||
## Build And Test Failures
|
||||
|
||||
### Symptom
|
||||
`dotnet build` or `dotnet test` fails locally or in CI.
|
||||
|
||||
### Checks
|
||||
1. Verify .NET 10 SDK is installed.
|
||||
2. Run `dotnet restore`.
|
||||
3. Run `dotnet format --verify-no-changes`.
|
||||
4. Re-run tests with `dotnet test CBDD.slnx -c Release`.
|
||||
|
||||
### Resolution
|
||||
- Fix reported compile/test failures before merge.
|
||||
- For flaky tests, isolate affected test class and open an issue with reproduction details.
|
||||
|
||||
## Data File And Recovery Issues
|
||||
|
||||
### Symptom
|
||||
Database startup fails or recovery path throws WAL/storage errors.
|
||||
|
||||
### Checks
|
||||
1. Capture exact exception and stack trace.
|
||||
2. Reproduce with storage/recovery-focused tests.
|
||||
3. Validate rollback path from [`deployment.md`](deployment.md#rollback-procedure).
|
||||
|
||||
### Resolution
|
||||
- Pin consumers to last known-good package.
|
||||
- Apply fix and add regression coverage in recovery/transaction tests.
|
||||
|
||||
## Query And Index Issues
|
||||
|
||||
### Symptom
|
||||
Unexpected query performance or incorrect query results.
|
||||
|
||||
### Checks
|
||||
1. Verify relevant indexes are configured for query predicates.
|
||||
2. Reproduce with test cases in `tests/CBDD.Tests` for query/index modules.
|
||||
3. Validate behavior for scan fallback and expression translation.
|
||||
|
||||
### Resolution
|
||||
- Add or adjust index definitions and query tests.
|
||||
- Document any changed query semantics in release notes.
|
||||
|
||||
## Source Generation Issues
|
||||
|
||||
### Symptom
|
||||
Generated mappers missing or serialization behavior differs from expectations.
|
||||
|
||||
### Checks
|
||||
1. Verify entity attributes and accessibility are valid for source generation.
|
||||
2. Build solution to regenerate mapper output.
|
||||
3. Validate related tests in source generator test coverage.
|
||||
|
||||
### Resolution
|
||||
- Update entity annotations or generator logic.
|
||||
- Add focused regression tests for unsupported pattern handling.
|
||||
|
||||
## Escalation
|
||||
|
||||
If troubleshooting steps do not resolve the issue, follow incident escalation in [`runbook.md`](runbook.md).
|
||||
@@ -124,14 +124,28 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
||||
_indexManager.SetPrimaryRootPageId(_primaryIndex.RootPageId);
|
||||
}
|
||||
|
||||
// Register keys used by the mapper to ensure they are available for compression
|
||||
_storage.RegisterKeys(_mapper.UsedKeys);
|
||||
}
|
||||
|
||||
private void EnsureSchema()
|
||||
{
|
||||
var currentSchema = _mapper.GetSchema();
|
||||
var metadata = _indexManager.GetMetadata();
|
||||
// Register keys used by the mapper to ensure they are available for compression
|
||||
_storage.RegisterKeys(_mapper.UsedKeys);
|
||||
}
|
||||
|
||||
private void RefreshPrimaryIndexRootFromMetadata()
|
||||
{
|
||||
_indexManager.RefreshFromStorageMetadata();
|
||||
|
||||
var primaryRootPageId = _indexManager.PrimaryRootPageId;
|
||||
if (primaryRootPageId == 0)
|
||||
return;
|
||||
|
||||
if (primaryRootPageId != _primaryIndex.RootPageId)
|
||||
{
|
||||
_primaryIndex.SetRootPageId(primaryRootPageId);
|
||||
}
|
||||
}
|
||||
|
||||
private void EnsureSchema()
|
||||
{
|
||||
var currentSchema = _mapper.GetSchema();
|
||||
var metadata = _indexManager.GetMetadata();
|
||||
|
||||
var persistedSchemas = _storage.GetSchemas(metadata.SchemaRootPageId);
|
||||
var latestPersisted = persistedSchemas.Count > 0 ? persistedSchemas[persistedSchemas.Count - 1] : null;
|
||||
@@ -363,12 +377,13 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
||||
/// Rebuilds an index by scanning all existing documents and re-inserting them.
|
||||
/// Called automatically when creating a new index.
|
||||
/// </summary>
|
||||
private void RebuildIndex(CollectionSecondaryIndex<TId, T> index)
|
||||
{
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
// Iterate all documents in the collection via primary index
|
||||
var minKey = new IndexKey(Array.Empty<byte>());
|
||||
var maxKey = new IndexKey(Enumerable.Repeat((byte)0xFF, 32).ToArray());
|
||||
private void RebuildIndex(CollectionSecondaryIndex<TId, T> index)
|
||||
{
|
||||
RefreshPrimaryIndexRootFromMetadata();
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
// Iterate all documents in the collection via primary index
|
||||
var minKey = new IndexKey(Array.Empty<byte>());
|
||||
var maxKey = new IndexKey(Enumerable.Repeat((byte)0xFF, 32).ToArray());
|
||||
|
||||
foreach (var entry in _primaryIndex.Range(minKey, maxKey, IndexDirection.Forward, transaction.TransactionId))
|
||||
{
|
||||
@@ -967,6 +982,7 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
||||
|
||||
private void InsertDataCore(TId id, T entity, ReadOnlySpan<byte> docData)
|
||||
{
|
||||
RefreshPrimaryIndexRootFromMetadata();
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
var (storedPayloadOverride, storedPayloadFlags) = PreparePayloadForStorage(docData);
|
||||
ReadOnlySpan<byte> storedPayload = storedPayloadOverride is null ? docData : storedPayloadOverride;
|
||||
@@ -1005,11 +1021,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
||||
/// <param name="id">ObjectId of the document</param>
|
||||
/// <param name="transaction">Optional transaction for isolation (supports Read Your Own Writes)</param>
|
||||
/// <returns>The document, or null if not found</returns>
|
||||
public T? FindById(TId id)
|
||||
{
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
try
|
||||
{
|
||||
public T? FindById(TId id)
|
||||
{
|
||||
RefreshPrimaryIndexRootFromMetadata();
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
try
|
||||
{
|
||||
var key = _mapper.ToIndexKey(id);
|
||||
|
||||
if (!_primaryIndex.TryFind(key, out var location, transaction.TransactionId))
|
||||
@@ -1031,11 +1048,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
||||
/// </summary>
|
||||
/// <param name="transaction">Transaction for isolation (REQUIRED for consistent reads during concurrent writes)</param>
|
||||
/// <returns>Enumerable of all documents</returns>
|
||||
public IEnumerable<T> FindAll()
|
||||
{
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
var txnId = transaction?.TransactionId ?? 0;
|
||||
var minKey = new IndexKey(Array.Empty<byte>());
|
||||
public IEnumerable<T> FindAll()
|
||||
{
|
||||
RefreshPrimaryIndexRootFromMetadata();
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
var txnId = transaction?.TransactionId ?? 0;
|
||||
var minKey = new IndexKey(Array.Empty<byte>());
|
||||
var maxKey = new IndexKey(Enumerable.Repeat((byte)0xFF, 32).ToArray());
|
||||
|
||||
foreach (var entry in _primaryIndex.Range(minKey, maxKey, IndexDirection.Forward, txnId))
|
||||
@@ -1202,11 +1220,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
||||
}
|
||||
}
|
||||
|
||||
private int UpdateBulkInternal(List<T> entityList)
|
||||
{
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
int updateCount = 0;
|
||||
const int BATCH_SIZE = 50;
|
||||
private int UpdateBulkInternal(List<T> entityList)
|
||||
{
|
||||
RefreshPrimaryIndexRootFromMetadata();
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
int updateCount = 0;
|
||||
const int BATCH_SIZE = 50;
|
||||
|
||||
for (int batchStart = 0; batchStart < entityList.Count; batchStart += BATCH_SIZE)
|
||||
{
|
||||
@@ -1272,6 +1291,7 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
||||
|
||||
private bool UpdateDataCore(TId id, T entity, ReadOnlySpan<byte> docData)
|
||||
{
|
||||
RefreshPrimaryIndexRootFromMetadata();
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
var key = _mapper.ToIndexKey(id);
|
||||
var (storedPayloadOverride, storedPayloadFlags) = PreparePayloadForStorage(docData);
|
||||
@@ -1438,11 +1458,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
||||
return deleteCount;
|
||||
}
|
||||
|
||||
private bool DeleteCore(TId id, bool notifyCdc = true)
|
||||
{
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
var key = _mapper.ToIndexKey(id);
|
||||
if (!_primaryIndex.TryFind(key, out var location, transaction.TransactionId))
|
||||
private bool DeleteCore(TId id, bool notifyCdc = true)
|
||||
{
|
||||
RefreshPrimaryIndexRootFromMetadata();
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
var key = _mapper.ToIndexKey(id);
|
||||
if (!_primaryIndex.TryFind(key, out var location, transaction.TransactionId))
|
||||
return false;
|
||||
|
||||
// Notify secondary indexes BEFORE deleting document from storage
|
||||
@@ -1524,11 +1545,12 @@ public partial class DocumentCollection<TId, T> : IDisposable where T : class
|
||||
/// </summary>
|
||||
/// <param name="transaction">Optional transaction for isolation</param>
|
||||
/// <returns>Number of documents</returns>
|
||||
public int Count()
|
||||
{
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
// Count all entries in primary index
|
||||
// Use generic min/max keys for the index
|
||||
public int Count()
|
||||
{
|
||||
RefreshPrimaryIndexRootFromMetadata();
|
||||
var transaction = _transactionHolder.GetCurrentTransactionOrStart();
|
||||
// Count all entries in primary index
|
||||
// Use generic min/max keys for the index
|
||||
var minKey = IndexKey.MinKey;
|
||||
var maxKey = IndexKey.MaxKey;
|
||||
return _primaryIndex.Range(minKey, maxKey, IndexDirection.Forward, transaction.TransactionId).Count();
|
||||
|
||||
@@ -82,6 +82,18 @@ public sealed class BTreeIndex
|
||||
/// Gets the current root page identifier for the B+Tree.
|
||||
/// </summary>
|
||||
public uint RootPageId => _rootPageId;
|
||||
|
||||
/// <summary>
|
||||
/// Updates the in-memory root page identifier.
|
||||
/// </summary>
|
||||
/// <param name="rootPageId">The root page identifier to use for subsequent operations.</param>
|
||||
internal void SetRootPageId(uint rootPageId)
|
||||
{
|
||||
if (rootPageId == 0)
|
||||
throw new ArgumentOutOfRangeException(nameof(rootPageId));
|
||||
|
||||
_rootPageId = rootPageId;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads a page using StorageEngine for transaction isolation.
|
||||
|
||||
@@ -504,6 +504,37 @@ public sealed class CollectionIndexManager<TId, T> : IDisposable where T : class
|
||||
/// </summary>
|
||||
public uint PrimaryRootPageId => _metadata.PrimaryRootPageId;
|
||||
|
||||
/// <summary>
|
||||
/// Refreshes cached metadata and index root bindings from storage.
|
||||
/// </summary>
|
||||
internal void RefreshFromStorageMetadata()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_disposed)
|
||||
throw new ObjectDisposedException(nameof(CollectionIndexManager<TId, T>));
|
||||
|
||||
var latest = _storage.GetCollectionMetadata(_collectionName) ?? new CollectionMetadata { Name = _collectionName };
|
||||
if (MetadataEquals(_metadata, latest))
|
||||
return;
|
||||
|
||||
foreach (var index in _indexes.Values)
|
||||
{
|
||||
try { index.Dispose(); } catch { /* Best effort */ }
|
||||
}
|
||||
|
||||
_indexes.Clear();
|
||||
_metadata = latest;
|
||||
|
||||
foreach (var idxMeta in _metadata.Indexes)
|
||||
{
|
||||
var definition = RebuildDefinition(idxMeta.Name, idxMeta.PropertyPaths, idxMeta.IsUnique, idxMeta.Type, idxMeta.Dimensions, idxMeta.Metric);
|
||||
var index = new CollectionSecondaryIndex<TId, T>(definition, _storage, _mapper, idxMeta.RootPageId);
|
||||
_indexes[idxMeta.Name] = index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets the root page identifier for the primary index.
|
||||
/// </summary>
|
||||
@@ -526,11 +557,52 @@ public sealed class CollectionIndexManager<TId, T> : IDisposable where T : class
|
||||
/// <returns>The collection metadata.</returns>
|
||||
public CollectionMetadata GetMetadata() => _metadata;
|
||||
|
||||
private void SaveMetadata()
|
||||
{
|
||||
UpdateMetadata();
|
||||
_storage.SaveCollectionMetadata(_metadata);
|
||||
}
|
||||
private void SaveMetadata()
|
||||
{
|
||||
UpdateMetadata();
|
||||
_storage.SaveCollectionMetadata(_metadata);
|
||||
}
|
||||
|
||||
private static bool MetadataEquals(CollectionMetadata left, CollectionMetadata right)
|
||||
{
|
||||
if (!string.Equals(left.Name, right.Name, StringComparison.OrdinalIgnoreCase))
|
||||
return false;
|
||||
|
||||
if (left.PrimaryRootPageId != right.PrimaryRootPageId ||
|
||||
left.SchemaRootPageId != right.SchemaRootPageId ||
|
||||
left.Indexes.Count != right.Indexes.Count)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (var i = 0; i < left.Indexes.Count; i++)
|
||||
{
|
||||
var l = left.Indexes[i];
|
||||
var r = right.Indexes[i];
|
||||
if (!string.Equals(l.Name, r.Name, StringComparison.OrdinalIgnoreCase) ||
|
||||
l.RootPageId != r.RootPageId ||
|
||||
l.Type != r.Type ||
|
||||
l.IsUnique != r.IsUnique ||
|
||||
l.Dimensions != r.Dimensions ||
|
||||
l.Metric != r.Metric)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var lPaths = l.PropertyPaths ?? Array.Empty<string>();
|
||||
var rPaths = r.PropertyPaths ?? Array.Empty<string>();
|
||||
if (lPaths.Length != rPaths.Length)
|
||||
return false;
|
||||
|
||||
for (var p = 0; p < lPaths.Length; p++)
|
||||
{
|
||||
if (!string.Equals(lPaths[p], rPaths[p], StringComparison.Ordinal))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases resources used by the index manager.
|
||||
|
||||
@@ -645,6 +645,39 @@ public sealed class PageFile : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Trims excess physical file capacity beyond the current logical page count.
|
||||
/// </summary>
|
||||
/// <returns>The number of bytes removed from the file.</returns>
|
||||
public long TrimExcessCapacityToLogicalPageCount()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
EnsureFileOpen();
|
||||
|
||||
var targetLengthBytes = (long)_nextPageId * _config.PageSize;
|
||||
var currentLengthBytes = _fileStream!.Length;
|
||||
if (currentLengthBytes <= targetLengthBytes)
|
||||
return 0;
|
||||
|
||||
_mappedFile?.Dispose();
|
||||
_mappedFile = null;
|
||||
|
||||
_fileStream.SetLength(targetLengthBytes);
|
||||
_fileStream.Flush(flushToDisk: true);
|
||||
|
||||
_mappedFile = MemoryMappedFile.CreateFromFile(
|
||||
_fileStream,
|
||||
null,
|
||||
targetLengthBytes,
|
||||
_config.Access,
|
||||
HandleInheritability.None,
|
||||
leaveOpen: true);
|
||||
|
||||
return currentLengthBytes - targetLengthBytes;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Defragments a slotted page in place by packing live slot payloads densely at the end of the page.
|
||||
/// </summary>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user