Initial import of the CBDDC codebase with docs and tests. Add a .NET-focused gitignore to keep generated artifacts out of source control.
Some checks failed
CI / verify (push) Has been cancelled

This commit is contained in:
Joseph Doherty
2026-02-20 13:03:21 -05:00
commit 08bfc17218
218 changed files with 33910 additions and 0 deletions

View File

@@ -0,0 +1,179 @@
using System;
using System.Collections.Generic;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Cache;
/// <summary>
/// LRU cache entry with linked list node.
/// </summary>
internal class CacheEntry
{
/// <summary>
/// Gets the cached document.
/// </summary>
public Document Document { get; }
/// <summary>
/// Gets the linked-list node used for LRU tracking.
/// </summary>
public LinkedListNode<string> Node { get; }
/// <summary>
/// Initializes a new instance of the <see cref="CacheEntry"/> class.
/// </summary>
/// <param name="document">The cached document.</param>
/// <param name="node">The linked-list node used for LRU tracking.</param>
public CacheEntry(Document document, LinkedListNode<string> node)
{
Document = document;
Node = node;
}
}
/// <summary>
/// In-memory LRU cache for documents.
/// </summary>
public class DocumentCache : IDocumentCache
{
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
private readonly Dictionary<string, CacheEntry> _cache = new();
private readonly LinkedList<string> _lru = new();
private readonly ILogger<DocumentCache> _logger;
private readonly object _lock = new();
// Statistics
private long _hits = 0;
private long _misses = 0;
/// <summary>
/// Initializes a new instance of the <see cref="DocumentCache"/> class.
/// </summary>
/// <param name="peerNodeConfigurationProvider">The configuration provider used for cache size limits.</param>
/// <param name="logger">The logger instance.</param>
public DocumentCache(IPeerNodeConfigurationProvider peerNodeConfigurationProvider, ILogger<DocumentCache>? logger = null)
{
_peerNodeConfigurationProvider = peerNodeConfigurationProvider;
_logger = logger ?? NullLogger<DocumentCache>.Instance;
}
/// <summary>
/// Gets a document from cache.
/// </summary>
/// <param name="collection">The document collection name.</param>
/// <param name="key">The document key.</param>
/// <returns>A task whose result is the cached document, or <see langword="null"/> if not found.</returns>
public async Task<Document?> Get(string collection, string key)
{
lock (_lock)
{
var cacheKey = $"{collection}:{key}";
if (_cache.TryGetValue(cacheKey, out var entry))
{
// Move to front (most recently used)
_lru.Remove(entry.Node);
_lru.AddFirst(entry.Node);
_hits++;
_logger.LogTrace("Cache hit for {Key}", cacheKey);
return entry.Document;
}
_misses++;
_logger.LogTrace("Cache miss for {Key}", cacheKey);
return null;
}
}
/// <summary>
/// Sets a document in cache.
/// </summary>
/// <param name="collection">The document collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="document">The document to cache.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task Set(string collection, string key, Document document)
{
var peerConfig = await _peerNodeConfigurationProvider.GetConfiguration();
lock (_lock)
{
var cacheKey = $"{collection}:{key}";
// If already exists, update and move to front
if (_cache.TryGetValue(cacheKey, out var existingEntry))
{
_lru.Remove(existingEntry.Node);
var newNode = _lru.AddFirst(cacheKey);
_cache[cacheKey] = new CacheEntry(document, newNode);
_logger.LogTrace("Updated cache for {Key}", cacheKey);
return;
}
// Evict if full
if (_cache.Count >= peerConfig.MaxDocumentCacheSize)
{
var oldest = _lru.Last!.Value;
_lru.RemoveLast();
_cache.Remove(oldest);
_logger.LogTrace("Evicted oldest cache entry {Key}", oldest);
}
var node = _lru.AddFirst(cacheKey);
_cache[cacheKey] = new CacheEntry(document, node);
_logger.LogTrace("Added to cache: {Key}", cacheKey);
}
}
/// <summary>
/// Removes a document from cache.
/// </summary>
/// <param name="collection">The document collection name.</param>
/// <param name="key">The document key.</param>
public void Remove(string collection, string key)
{
lock (_lock)
{
var cacheKey = $"{collection}:{key}";
if (_cache.TryGetValue(cacheKey, out var entry))
{
_lru.Remove(entry.Node);
_cache.Remove(cacheKey);
_logger.LogTrace("Removed from cache: {Key}", cacheKey);
}
}
}
/// <summary>
/// Clears all cached documents.
/// </summary>
public void Clear()
{
lock (_lock)
{
var count = _cache.Count;
_cache.Clear();
_lru.Clear();
_logger.LogInformation("Cleared cache ({Count} entries)", count);
}
}
/// <summary>
/// Gets cache statistics.
/// </summary>
public (long Hits, long Misses, int Size, double HitRate) GetStatistics()
{
lock (_lock)
{
var total = _hits + _misses;
var hitRate = total > 0 ? (double)_hits / total : 0;
return (_hits, _misses, _cache.Count, hitRate);
}
}
}

View File

@@ -0,0 +1,45 @@
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Cache
{
/// <summary>
/// Defines operations for caching documents by collection and key.
/// </summary>
public interface IDocumentCache
{
/// <summary>
/// Clears all cached documents.
/// </summary>
void Clear();
/// <summary>
/// Gets a cached document by collection and key.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="key">The document key.</param>
/// <returns>The cached document, or <see langword="null"/> if not found.</returns>
Task<Document?> Get(string collection, string key);
/// <summary>
/// Gets cache hit/miss statistics.
/// </summary>
/// <returns>A tuple containing hits, misses, current size, and hit rate.</returns>
(long Hits, long Misses, int Size, double HitRate) GetStatistics();
/// <summary>
/// Removes a cached document by collection and key.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="key">The document key.</param>
void Remove(string collection, string key);
/// <summary>
/// Adds or updates a cached document.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="document">The document to cache.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
Task Set(string collection, string key, Document document);
}
}

View File

@@ -0,0 +1,24 @@
using System;
using System.Collections.Generic;
namespace ZB.MOM.WW.CBDDC.Core;
/// <summary>
/// Event arguments for when changes are applied to the peer store.
/// </summary>
public class ChangesAppliedEventArgs : EventArgs
{
/// <summary>
/// Gets the changes that were applied.
/// </summary>
public IEnumerable<OplogEntry> Changes { get; }
/// <summary>
/// Initializes a new instance of the <see cref="ChangesAppliedEventArgs"/> class.
/// </summary>
/// <param name="changes">The changes that were applied.</param>
public ChangesAppliedEventArgs(IEnumerable<OplogEntry> changes)
{
Changes = changes;
}
}

View File

@@ -0,0 +1,82 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.CBDDC.Core.Storage;
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics;
/// <summary>
/// Provides health check functionality.
/// </summary>
public class CBDDCHealthCheck : ICBDDCHealthCheck
{
private readonly IOplogStore _store;
private readonly ISyncStatusTracker _syncTracker;
private readonly ILogger<CBDDCHealthCheck> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="CBDDCHealthCheck"/> class.
/// </summary>
/// <param name="store">The oplog store used for database health checks.</param>
/// <param name="syncTracker">The tracker that provides synchronization status.</param>
/// <param name="logger">The logger instance.</param>
public CBDDCHealthCheck(
IOplogStore store,
ISyncStatusTracker syncTracker,
ILogger<CBDDCHealthCheck>? logger = null)
{
_store = store ?? throw new ArgumentNullException(nameof(store));
_syncTracker = syncTracker ?? throw new ArgumentNullException(nameof(syncTracker));
_logger = logger ?? NullLogger<CBDDCHealthCheck>.Instance;
}
/// <summary>
/// Performs a comprehensive health check.
/// </summary>
/// <param name="cancellationToken">A token used to cancel the health check.</param>
public async Task<HealthStatus> CheckAsync(CancellationToken cancellationToken = default)
{
var status = new HealthStatus();
// Check database health
try
{
// Try to get latest timestamp (simple database operation)
var timestamp = await _store.GetLatestTimestampAsync(cancellationToken);
status.DatabaseHealthy = true;
_logger.LogDebug("Database health check passed (latest timestamp: {Timestamp})", timestamp);
}
catch (Exception ex)
{
status.DatabaseHealthy = false;
status.Errors.Add($"Database check failed: {ex.Message}");
_logger.LogError(ex, "Database health check failed");
}
// Get sync status
var syncStatus = _syncTracker.GetStatus();
status.NetworkHealthy = syncStatus.IsOnline;
status.ConnectedPeers = syncStatus.ActivePeers.Count(p => p.IsConnected);
status.LastSyncTime = syncStatus.LastSyncTime;
// Add error messages from sync tracker
foreach (var error in syncStatus.SyncErrors.Take(5)) // Last 5 errors
{
status.Errors.Add($"{error.Timestamp:yyyy-MM-dd HH:mm:ss} - {error.Message}");
}
// Add metadata
status.Metadata["TotalDocumentsSynced"] = syncStatus.TotalDocumentsSynced;
status.Metadata["TotalBytesTransferred"] = syncStatus.TotalBytesTransferred;
status.Metadata["ActivePeers"] = syncStatus.ActivePeers.Count;
_logger.LogInformation("Health check completed: Database={DbHealth}, Network={NetHealth}, Peers={Peers}",
status.DatabaseHealthy, status.NetworkHealthy, status.ConnectedPeers);
return status;
}
}

View File

@@ -0,0 +1,148 @@
using System;
using System.Collections.Generic;
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics;
/// <summary>
/// Represents the health status of an CBDDC instance.
/// </summary>
public class HealthStatus
{
/// <summary>
/// Indicates if the database is healthy.
/// </summary>
public bool DatabaseHealthy { get; set; }
/// <summary>
/// Indicates if network connectivity is available.
/// </summary>
public bool NetworkHealthy { get; set; }
/// <summary>
/// Number of currently connected peers.
/// </summary>
public int ConnectedPeers { get; set; }
/// <summary>
/// Timestamp of the last successful sync operation.
/// </summary>
public DateTime? LastSyncTime { get; set; }
/// <summary>
/// List of recent errors.
/// </summary>
public List<string> Errors { get; set; } = new();
/// <summary>
/// Overall health status.
/// </summary>
public bool IsHealthy => DatabaseHealthy && NetworkHealthy && Errors.Count == 0;
/// <summary>
/// Additional diagnostic information.
/// </summary>
public Dictionary<string, object> Metadata { get; set; } = new();
}
/// <summary>
/// Represents the synchronization status.
/// </summary>
public class SyncStatus
{
/// <summary>
/// Indicates if the node is currently online.
/// </summary>
public bool IsOnline { get; set; }
/// <summary>
/// Timestamp of the last sync operation.
/// </summary>
public DateTime? LastSyncTime { get; set; }
/// <summary>
/// Number of pending operations in the offline queue.
/// </summary>
public int PendingOperations { get; set; }
/// <summary>
/// List of active peer nodes.
/// </summary>
public List<PeerInfo> ActivePeers { get; set; } = new();
/// <summary>
/// Recent sync errors.
/// </summary>
public List<SyncError> SyncErrors { get; set; } = new();
/// <summary>
/// Total number of documents synced.
/// </summary>
public long TotalDocumentsSynced { get; set; }
/// <summary>
/// Total bytes transferred.
/// </summary>
public long TotalBytesTransferred { get; set; }
}
/// <summary>
/// Information about a peer node.
/// </summary>
public class PeerInfo
{
/// <summary>
/// Unique identifier of the peer.
/// </summary>
public string NodeId { get; set; } = "";
/// <summary>
/// Network address of the peer.
/// </summary>
public string Address { get; set; } = "";
/// <summary>
/// Last time the peer was seen.
/// </summary>
public DateTime LastSeen { get; set; }
/// <summary>
/// Indicates if the peer is currently connected.
/// </summary>
public bool IsConnected { get; set; }
/// <summary>
/// Number of successful syncs with this peer.
/// </summary>
public int SuccessfulSyncs { get; set; }
/// <summary>
/// Number of failed syncs with this peer.
/// </summary>
public int FailedSyncs { get; set; }
}
/// <summary>
/// Represents a synchronization error.
/// </summary>
public class SyncError
{
/// <summary>
/// Timestamp when the error occurred.
/// </summary>
public DateTime Timestamp { get; set; }
/// <summary>
/// Error message.
/// </summary>
public string Message { get; set; } = "";
/// <summary>
/// Peer node ID if applicable.
/// </summary>
public string? PeerNodeId { get; set; }
/// <summary>
/// Error code.
/// </summary>
public string? ErrorCode { get; set; }
}

View File

@@ -0,0 +1,15 @@
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics
{
public interface ICBDDCHealthCheck
{
/// <summary>
/// Performs a health check for the implementing component.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>The resulting health status.</returns>
Task<HealthStatus> CheckAsync(CancellationToken cancellationToken = default);
}
}

View File

@@ -0,0 +1,63 @@
using System;
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics
{
/// <summary>
/// Tracks synchronization status and peer health metrics.
/// </summary>
public interface ISyncStatusTracker
{
/// <summary>
/// Removes peer entries that have been inactive longer than the specified threshold.
/// </summary>
/// <param name="inactiveThreshold">The inactivity threshold used to prune peers.</param>
void CleanupInactivePeers(TimeSpan inactiveThreshold);
/// <summary>
/// Gets the current synchronization status snapshot.
/// </summary>
/// <returns>The current <see cref="SyncStatus"/>.</returns>
SyncStatus GetStatus();
/// <summary>
/// Records an error encountered during synchronization.
/// </summary>
/// <param name="message">The error message.</param>
/// <param name="peerNodeId">The related peer node identifier, if available.</param>
/// <param name="errorCode">An optional error code.</param>
void RecordError(string message, string? peerNodeId = null, string? errorCode = null);
/// <summary>
/// Records a failed operation for the specified peer.
/// </summary>
/// <param name="nodeId">The peer node identifier.</param>
void RecordPeerFailure(string nodeId);
/// <summary>
/// Records a successful operation for the specified peer.
/// </summary>
/// <param name="nodeId">The peer node identifier.</param>
void RecordPeerSuccess(string nodeId);
/// <summary>
/// Records synchronization throughput metrics.
/// </summary>
/// <param name="documentCount">The number of synchronized documents.</param>
/// <param name="bytesTransferred">The number of bytes transferred.</param>
void RecordSync(int documentCount, long bytesTransferred);
/// <summary>
/// Sets whether the local node is currently online.
/// </summary>
/// <param name="isOnline">A value indicating whether the node is online.</param>
void SetOnlineStatus(bool isOnline);
/// <summary>
/// Updates peer connectivity details.
/// </summary>
/// <param name="nodeId">The peer node identifier.</param>
/// <param name="address">The peer network address.</param>
/// <param name="isConnected">A value indicating whether the peer is connected.</param>
void UpdatePeer(string nodeId, string address, bool isConnected);
}
}

View File

@@ -0,0 +1,198 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics;
/// <summary>
/// Tracks synchronization status and provides diagnostics.
/// </summary>
public class SyncStatusTracker : ISyncStatusTracker
{
private readonly ILogger<SyncStatusTracker> _logger;
private readonly object _lock = new();
private bool _isOnline = false;
private DateTime? _lastSyncTime;
private readonly List<PeerInfo> _activePeers = new();
private readonly Queue<SyncError> _recentErrors = new();
private long _totalDocumentsSynced = 0;
private long _totalBytesTransferred = 0;
private const int MaxErrorHistory = 50;
/// <summary>
/// Initializes a new instance of the <see cref="SyncStatusTracker"/> class.
/// </summary>
/// <param name="logger">Optional logger instance.</param>
public SyncStatusTracker(ILogger<SyncStatusTracker>? logger = null)
{
_logger = logger ?? NullLogger<SyncStatusTracker>.Instance;
}
/// <summary>
/// Updates online status.
/// </summary>
/// <param name="isOnline">Whether the node is currently online.</param>
public void SetOnlineStatus(bool isOnline)
{
lock (_lock)
{
if (_isOnline != isOnline)
{
_isOnline = isOnline;
_logger.LogInformation("Status changed to {Status}", isOnline ? "Online" : "Offline");
}
}
}
/// <summary>
/// Records a successful sync operation.
/// </summary>
/// <param name="documentCount">The number of documents synchronized.</param>
/// <param name="bytesTransferred">The number of bytes transferred.</param>
public void RecordSync(int documentCount, long bytesTransferred)
{
lock (_lock)
{
_lastSyncTime = DateTime.UtcNow;
_totalDocumentsSynced += documentCount;
_totalBytesTransferred += bytesTransferred;
_logger.LogDebug("Synced {Count} documents ({Bytes} bytes)", documentCount, bytesTransferred);
}
}
/// <summary>
/// Records a sync error.
/// </summary>
/// <param name="message">The error message.</param>
/// <param name="peerNodeId">The related peer node identifier, if available.</param>
/// <param name="errorCode">The error code, if available.</param>
public void RecordError(string message, string? peerNodeId = null, string? errorCode = null)
{
lock (_lock)
{
var error = new SyncError
{
Timestamp = DateTime.UtcNow,
Message = message,
PeerNodeId = peerNodeId,
ErrorCode = errorCode
};
_recentErrors.Enqueue(error);
while (_recentErrors.Count > MaxErrorHistory)
{
_recentErrors.Dequeue();
}
_logger.LogWarning("Sync error recorded: {Message} (Peer: {Peer})", message, peerNodeId ?? "N/A");
}
}
/// <summary>
/// Updates peer information.
/// </summary>
/// <param name="nodeId">The peer node identifier.</param>
/// <param name="address">The peer address.</param>
/// <param name="isConnected">Whether the peer is currently connected.</param>
public void UpdatePeer(string nodeId, string address, bool isConnected)
{
lock (_lock)
{
var peer = _activePeers.FirstOrDefault(p => p.NodeId == nodeId);
if (peer == null)
{
peer = new PeerInfo
{
NodeId = nodeId,
Address = address,
IsConnected = isConnected,
LastSeen = DateTime.UtcNow
};
_activePeers.Add(peer);
_logger.LogInformation("New peer discovered: {NodeId} at {Address}", nodeId, address);
}
else
{
peer.Address = address;
peer.IsConnected = isConnected;
peer.LastSeen = DateTime.UtcNow;
}
}
}
/// <summary>
/// Records successful sync with a peer.
/// </summary>
/// <param name="nodeId">The peer node identifier.</param>
public void RecordPeerSuccess(string nodeId)
{
lock (_lock)
{
var peer = _activePeers.FirstOrDefault(p => p.NodeId == nodeId);
if (peer != null)
{
peer.SuccessfulSyncs++;
}
}
}
/// <summary>
/// Records failed sync with a peer.
/// </summary>
/// <param name="nodeId">The peer node identifier.</param>
public void RecordPeerFailure(string nodeId)
{
lock (_lock)
{
var peer = _activePeers.FirstOrDefault(p => p.NodeId == nodeId);
if (peer != null)
{
peer.FailedSyncs++;
}
}
}
/// <summary>
/// Gets current sync status.
/// </summary>
public SyncStatus GetStatus()
{
lock (_lock)
{
return new SyncStatus
{
IsOnline = _isOnline,
LastSyncTime = _lastSyncTime,
PendingOperations = 0, // Will be set by caller if offline queue is available
ActivePeers = _activePeers.ToList(),
SyncErrors = _recentErrors.ToList(),
TotalDocumentsSynced = _totalDocumentsSynced,
TotalBytesTransferred = _totalBytesTransferred
};
}
}
/// <summary>
/// Cleans up inactive peers.
/// </summary>
/// <param name="inactiveThreshold">The inactivity threshold used to remove peers.</param>
public void CleanupInactivePeers(TimeSpan inactiveThreshold)
{
lock (_lock)
{
var cutoff = DateTime.UtcNow - inactiveThreshold;
var removed = _activePeers.RemoveAll(p => p.LastSeen < cutoff);
if (removed > 0)
{
_logger.LogInformation("Removed {Count} inactive peers", removed);
}
}
}
}

View File

@@ -0,0 +1,83 @@
using ZB.MOM.WW.CBDDC.Core.Sync;
using System;
using System.Text.Json;
namespace ZB.MOM.WW.CBDDC.Core;
/// <summary>
/// Represents a stored document and its synchronization metadata.
/// </summary>
public class Document
{
/// <summary>
/// Gets the collection that contains the document.
/// </summary>
public string Collection { get; private set; }
/// <summary>
/// Gets the document key.
/// </summary>
public string Key { get; private set; }
/// <summary>
/// Gets the document content.
/// </summary>
public JsonElement Content { get; private set; }
/// <summary>
/// Gets the timestamp of the latest applied update.
/// </summary>
public HlcTimestamp UpdatedAt { get; private set; }
/// <summary>
/// Gets a value indicating whether the document is deleted.
/// </summary>
public bool IsDeleted { get; private set; }
/// <summary>
/// Initializes a new instance of the <see cref="Document"/> class.
/// </summary>
/// <param name="collection">The collection that contains the document.</param>
/// <param name="key">The document key.</param>
/// <param name="content">The document content.</param>
/// <param name="updatedAt">The timestamp of the latest applied update.</param>
/// <param name="isDeleted">Whether the document is marked as deleted.</param>
public Document(string collection, string key, JsonElement content, HlcTimestamp updatedAt, bool isDeleted)
{
Collection = collection;
Key = key;
Content = content;
UpdatedAt = updatedAt;
IsDeleted = isDeleted;
}
/// <summary>
/// Merges a remote operation into the current document using last-write-wins or a conflict resolver.
/// </summary>
/// <param name="oplogEntry">The remote operation to merge.</param>
/// <param name="resolver">An optional conflict resolver for custom merge behavior.</param>
public void Merge(OplogEntry oplogEntry, IConflictResolver? resolver = null)
{
if (oplogEntry == null) return;
if (Collection != oplogEntry.Collection) return;
if (Key != oplogEntry.Key) return;
if (resolver == null)
{
//last wins
if (UpdatedAt <= oplogEntry.Timestamp)
{
Content = oplogEntry.Payload ?? default;
UpdatedAt = oplogEntry.Timestamp;
IsDeleted = oplogEntry.Operation == OperationType.Delete;
}
return;
}
var resolutionResult = resolver.Resolve(this, oplogEntry);
if (resolutionResult.ShouldApply && resolutionResult.MergedDocument != null)
{
Content = resolutionResult.MergedDocument.Content;
UpdatedAt = resolutionResult.MergedDocument.UpdatedAt;
IsDeleted = resolutionResult.MergedDocument.IsDeleted;
}
}
}

View File

@@ -0,0 +1,189 @@
using System;
namespace ZB.MOM.WW.CBDDC.Core.Exceptions;
/// <summary>
/// Base exception for all CBDDC-related errors.
/// </summary>
public class CBDDCException : Exception
{
/// <summary>
/// Error code for programmatic error handling.
/// </summary>
public string ErrorCode { get; }
/// <summary>
/// Initializes a new instance of the <see cref="CBDDCException"/> class.
/// </summary>
/// <param name="errorCode">The application-specific error code.</param>
/// <param name="message">The exception message.</param>
public CBDDCException(string errorCode, string message)
: base(message)
{
ErrorCode = errorCode;
}
/// <summary>
/// Initializes a new instance of the <see cref="CBDDCException"/> class.
/// </summary>
/// <param name="errorCode">The application-specific error code.</param>
/// <param name="message">The exception message.</param>
/// <param name="innerException">The exception that caused the current exception.</param>
public CBDDCException(string errorCode, string message, Exception innerException)
: base(message, innerException)
{
ErrorCode = errorCode;
}
}
/// <summary>
/// Exception thrown when network operations fail.
/// </summary>
public class NetworkException : CBDDCException
{
/// <summary>
/// Initializes a new instance of the <see cref="NetworkException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
public NetworkException(string message)
: base("NETWORK_ERROR", message) { }
/// <summary>
/// Initializes a new instance of the <see cref="NetworkException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
/// <param name="innerException">The exception that caused the current exception.</param>
public NetworkException(string message, Exception innerException)
: base("NETWORK_ERROR", message, innerException) { }
}
/// <summary>
/// Exception thrown when persistence operations fail.
/// </summary>
public class PersistenceException : CBDDCException
{
/// <summary>
/// Initializes a new instance of the <see cref="PersistenceException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
public PersistenceException(string message)
: base("PERSISTENCE_ERROR", message) { }
/// <summary>
/// Initializes a new instance of the <see cref="PersistenceException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
/// <param name="innerException">The exception that caused the current exception.</param>
public PersistenceException(string message, Exception innerException)
: base("PERSISTENCE_ERROR", message, innerException) { }
}
/// <summary>
/// Exception thrown when synchronization operations fail.
/// </summary>
public class SyncException : CBDDCException
{
/// <summary>
/// Initializes a new instance of the <see cref="SyncException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
public SyncException(string message)
: base("SYNC_ERROR", message) { }
/// <summary>
/// Initializes a new instance of the <see cref="SyncException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
/// <param name="innerException">The exception that caused the current exception.</param>
public SyncException(string message, Exception innerException)
: base("SYNC_ERROR", message, innerException) { }
}
/// <summary>
/// Exception thrown when configuration is invalid.
/// </summary>
public class ConfigurationException : CBDDCException
{
/// <summary>
/// Initializes a new instance of the <see cref="ConfigurationException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
public ConfigurationException(string message)
: base("CONFIG_ERROR", message) { }
}
/// <summary>
/// Exception thrown when database corruption is detected.
/// </summary>
public class DatabaseCorruptionException : PersistenceException
{
/// <summary>
/// Initializes a new instance of the <see cref="DatabaseCorruptionException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
public DatabaseCorruptionException(string message)
: base(message) { }
/// <summary>
/// Initializes a new instance of the <see cref="DatabaseCorruptionException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
/// <param name="innerException">The exception that caused the current exception.</param>
public DatabaseCorruptionException(string message, Exception innerException)
: base(message, innerException) { }
}
/// <summary>
/// Exception thrown when a timeout occurs.
/// </summary>
public class TimeoutException : CBDDCException
{
/// <summary>
/// Initializes a new instance of the <see cref="TimeoutException"/> class.
/// </summary>
/// <param name="operation">The operation that timed out.</param>
/// <param name="timeoutMs">The timeout in milliseconds.</param>
public TimeoutException(string operation, int timeoutMs)
: base("TIMEOUT_ERROR", $"Operation '{operation}' timed out after {timeoutMs}ms") { }
}
/// <summary>
/// Exception thrown when a document is not found in a collection.
/// </summary>
public class DocumentNotFoundException : PersistenceException
{
/// <summary>
/// Gets the document key that was not found.
/// </summary>
public string Key { get; }
/// <summary>
/// Gets the collection where the document was searched.
/// </summary>
public string Collection { get; }
/// <summary>
/// Initializes a new instance of the <see cref="DocumentNotFoundException"/> class.
/// </summary>
/// <param name="collection">The collection where the document was searched.</param>
/// <param name="key">The document key that was not found.</param>
public DocumentNotFoundException(string collection, string key)
: base($"Document with key '{key}' not found in collection '{collection}'.")
{
Collection = collection;
Key = key;
}
}
/// <summary>
/// Exception thrown when a concurrency conflict occurs during persistence operations.
/// </summary>
public class CBDDCConcurrencyException : PersistenceException
{
/// <summary>
/// Initializes a new instance of the <see cref="CBDDCConcurrencyException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
public CBDDCConcurrencyException(string message) : base(message) { }
}

View File

@@ -0,0 +1,137 @@
using System;
namespace ZB.MOM.WW.CBDDC.Core;
/// <summary>
/// Represents a Hybrid Logical Clock timestamp.
/// Provides a Total Ordering of events in a distributed system.
/// Implements value semantics and comparable interfaces.
/// </summary>
public readonly struct HlcTimestamp : IComparable<HlcTimestamp>, IComparable, IEquatable<HlcTimestamp>
{
/// <summary>
/// Gets the physical time component of the timestamp.
/// </summary>
public long PhysicalTime { get; }
/// <summary>
/// Gets the logical counter component used to order events with equal physical time.
/// </summary>
public int LogicalCounter { get; }
/// <summary>
/// Gets the node identifier that produced this timestamp.
/// </summary>
public string NodeId { get; }
/// <summary>
/// Initializes a new instance of the <see cref="HlcTimestamp"/> struct.
/// </summary>
/// <param name="physicalTime">The physical time component.</param>
/// <param name="logicalCounter">The logical counter component.</param>
/// <param name="nodeId">The node identifier.</param>
public HlcTimestamp(long physicalTime, int logicalCounter, string nodeId)
{
PhysicalTime = physicalTime;
LogicalCounter = logicalCounter;
NodeId = nodeId ?? throw new ArgumentNullException(nameof(nodeId));
}
/// <summary>
/// Compares two timestamps to establish a total order.
/// Order: PhysicalTime -> LogicalCounter -> NodeId (lexicographical tie-breaker).
/// </summary>
/// <param name="other">The other timestamp to compare with this instance.</param>
/// <returns>
/// A value less than zero if this instance is earlier than <paramref name="other"/>, zero if they are equal,
/// or greater than zero if this instance is later than <paramref name="other"/>.
/// </returns>
public int CompareTo(HlcTimestamp other)
{
int timeComparison = PhysicalTime.CompareTo(other.PhysicalTime);
if (timeComparison != 0) return timeComparison;
int counterComparison = LogicalCounter.CompareTo(other.LogicalCounter);
if (counterComparison != 0) return counterComparison;
// Use Ordinal comparison for consistent tie-breaking across cultures/platforms
return string.Compare(NodeId, other.NodeId, StringComparison.Ordinal);
}
/// <summary>
/// Compares this instance with another object.
/// </summary>
/// <param name="obj">The object to compare with this instance.</param>
/// <returns>
/// A value less than zero if this instance is earlier than <paramref name="obj"/>, zero if equal, or greater
/// than zero if later.
/// </returns>
public int CompareTo(object? obj)
{
if (obj is null) return 1;
if (obj is HlcTimestamp other) return CompareTo(other);
throw new ArgumentException($"Object must be of type {nameof(HlcTimestamp)}");
}
/// <summary>
/// Determines whether this instance and another timestamp are equal.
/// </summary>
/// <param name="other">The other timestamp to compare.</param>
/// <returns><see langword="true"/> if the timestamps are equal; otherwise, <see langword="false"/>.</returns>
public bool Equals(HlcTimestamp other)
{
return PhysicalTime == other.PhysicalTime &&
LogicalCounter == other.LogicalCounter &&
string.Equals(NodeId, other.NodeId, StringComparison.Ordinal);
}
/// <inheritdoc />
public override bool Equals(object? obj)
{
return obj is HlcTimestamp other && Equals(other);
}
/// <inheritdoc />
public override int GetHashCode()
{
unchecked
{
var hashCode = PhysicalTime.GetHashCode();
hashCode = (hashCode * 397) ^ LogicalCounter;
// Ensure HashCode uses the same comparison logic as Equals/CompareTo
// Handle null NodeId gracefully (possible via default(HlcTimestamp))
hashCode = (hashCode * 397) ^ (NodeId != null ? StringComparer.Ordinal.GetHashCode(NodeId) : 0);
return hashCode;
}
}
public static bool operator ==(HlcTimestamp left, HlcTimestamp right) => left.Equals(right);
public static bool operator !=(HlcTimestamp left, HlcTimestamp right) => !left.Equals(right);
// Standard comparison operators making usage in SyncOrchestrator cleaner (e.g., remote > local)
public static bool operator <(HlcTimestamp left, HlcTimestamp right) => left.CompareTo(right) < 0;
public static bool operator <=(HlcTimestamp left, HlcTimestamp right) => left.CompareTo(right) <= 0;
public static bool operator >(HlcTimestamp left, HlcTimestamp right) => left.CompareTo(right) > 0;
public static bool operator >=(HlcTimestamp left, HlcTimestamp right) => left.CompareTo(right) >= 0;
/// <inheritdoc />
public override string ToString() => FormattableString.Invariant($"{PhysicalTime}:{LogicalCounter}:{NodeId}");
/// <summary>
/// Parses a timestamp string.
/// </summary>
/// <param name="s">The string to parse, in the format "PhysicalTime:LogicalCounter:NodeId".</param>
/// <returns>The parsed <see cref="HlcTimestamp"/>.</returns>
public static HlcTimestamp Parse(string s)
{
if (string.IsNullOrEmpty(s)) throw new ArgumentNullException(nameof(s));
var parts = s.Split(':');
if (parts.Length != 3) throw new FormatException("Invalid HlcTimestamp format. Expected 'PhysicalTime:LogicalCounter:NodeId'.");
if (!long.TryParse(parts[0], out var physicalTime))
throw new FormatException("Invalid PhysicalTime component in HlcTimestamp.");
if (!int.TryParse(parts[1], out var logicalCounter))
throw new FormatException("Invalid LogicalCounter component in HlcTimestamp.");
var nodeId = parts[2];
return new HlcTimestamp(physicalTime, logicalCounter, nodeId);
}
}

View File

@@ -0,0 +1,60 @@
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core.Network;
namespace ZB.MOM.WW.CBDDC.Core.Management;
/// <summary>
/// Service for managing remote peer configurations.
/// Provides CRUD operations for adding, removing, enabling/disabling remote cloud nodes.
/// </summary>
public interface IPeerManagementService
{
/// <summary>
/// Adds a static remote peer with simple authentication.
/// </summary>
/// <param name="nodeId">Unique identifier for the remote peer.</param>
/// <param name="address">Network address (hostname:port) of the remote peer.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task AddStaticPeerAsync(string nodeId, string address, CancellationToken cancellationToken = default);
/// <summary>
/// Removes a remote peer configuration.
/// </summary>
/// <param name="nodeId">Unique identifier of the peer to remove.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RemoveRemotePeerAsync(string nodeId, CancellationToken cancellationToken = default);
/// <summary>
/// Removes confirmation tracking for a peer and optionally removes static remote configuration.
/// </summary>
/// <param name="nodeId">Unique identifier of the peer to untrack.</param>
/// <param name="removeRemoteConfig">When true, also removes static remote peer configuration.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RemovePeerTrackingAsync(
string nodeId,
bool removeRemoteConfig = true,
CancellationToken cancellationToken = default);
/// <summary>
/// Retrieves all configured remote peers.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Collection of remote peer configurations.</returns>
Task<IEnumerable<RemotePeerConfiguration>> GetAllRemotePeersAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Enables synchronization with a remote peer.
/// </summary>
/// <param name="nodeId">Unique identifier of the peer to enable.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task EnablePeerAsync(string nodeId, CancellationToken cancellationToken = default);
/// <summary>
/// Disables synchronization with a remote peer (keeps configuration).
/// </summary>
/// <param name="nodeId">Unique identifier of the peer to disable.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task DisablePeerAsync(string nodeId, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,185 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Core.Storage;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace ZB.MOM.WW.CBDDC.Core.Management;
/// <summary>
/// Implementation of peer management service.
/// Provides CRUD operations for managing remote peer configurations.
///
/// Remote peer configurations are stored in a synchronized collection and automatically
/// replicated across all nodes in the cluster. Any change made on one node will be
/// synchronized to all other nodes through the normal CBDDC sync process.
/// </summary>
public class PeerManagementService : IPeerManagementService
{
private readonly IPeerConfigurationStore _store;
private readonly IPeerOplogConfirmationStore _peerOplogConfirmationStore;
private readonly ILogger<PeerManagementService> _logger;
/// <summary>
/// Initializes a new instance of the PeerManagementService class.
/// </summary>
/// <param name="store">Database instance for accessing the synchronized collection.</param>
/// <param name="peerOplogConfirmationStore">Peer confirmation tracking store.</param>
/// <param name="logger">Logger instance.</param>
public PeerManagementService(
IPeerConfigurationStore store,
IPeerOplogConfirmationStore peerOplogConfirmationStore,
ILogger<PeerManagementService>? logger = null)
{
_store = store ?? throw new ArgumentNullException(nameof(store));
_peerOplogConfirmationStore = peerOplogConfirmationStore ?? throw new ArgumentNullException(nameof(peerOplogConfirmationStore));
_logger = logger ?? NullLogger<PeerManagementService>.Instance;
}
/// <summary>
/// Adds or updates a static remote peer configuration.
/// </summary>
/// <param name="nodeId">The unique node identifier of the peer.</param>
/// <param name="address">The peer network address in host:port format.</param>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task AddStaticPeerAsync(string nodeId, string address, CancellationToken cancellationToken = default)
{
ValidateNodeId(nodeId);
ValidateAddress(address);
var config = new RemotePeerConfiguration
{
NodeId = nodeId,
Address = address,
Type = PeerType.StaticRemote,
IsEnabled = true
};
await _store.SaveRemotePeerAsync(config, cancellationToken);
_logger.LogInformation("Added static remote peer: {NodeId} at {Address} (will sync to all cluster nodes)", nodeId, address);
}
/// <summary>
/// Removes a remote peer configuration.
/// </summary>
/// <param name="nodeId">The unique node identifier of the peer to remove.</param>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task RemoveRemotePeerAsync(string nodeId, CancellationToken cancellationToken = default)
{
await RemovePeerTrackingAsync(nodeId, removeRemoteConfig: true, cancellationToken);
}
/// <summary>
/// Removes peer tracking and optionally removes remote peer configuration.
/// </summary>
/// <param name="nodeId">The unique node identifier of the peer to untrack.</param>
/// <param name="removeRemoteConfig">When true, also removes static remote peer configuration.</param>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task RemovePeerTrackingAsync(
string nodeId,
bool removeRemoteConfig = true,
CancellationToken cancellationToken = default)
{
ValidateNodeId(nodeId);
await _peerOplogConfirmationStore.RemovePeerTrackingAsync(nodeId, cancellationToken);
if (removeRemoteConfig)
{
await _store.RemoveRemotePeerAsync(nodeId, cancellationToken);
_logger.LogInformation("Removed remote peer and tracking: {NodeId} (will sync to all cluster nodes)", nodeId);
return;
}
_logger.LogInformation("Removed peer tracking only: {NodeId}", nodeId);
}
/// <summary>
/// Gets all configured remote peers.
/// </summary>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation. The task result contains remote peer configurations.</returns>
public async Task<IEnumerable<RemotePeerConfiguration>> GetAllRemotePeersAsync(CancellationToken cancellationToken = default)
{
return await _store.GetRemotePeersAsync(cancellationToken);
}
/// <summary>
/// Enables a configured remote peer.
/// </summary>
/// <param name="nodeId">The unique node identifier of the peer to enable.</param>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task EnablePeerAsync(string nodeId, CancellationToken cancellationToken = default)
{
ValidateNodeId(nodeId);
var peer = await _store.GetRemotePeerAsync(nodeId, cancellationToken);
if (peer == null)
{
return; // Peer not found, nothing to enable
}
if (!peer.IsEnabled)
{
peer.IsEnabled = true;
await _store.SaveRemotePeerAsync(peer, cancellationToken);
_logger.LogInformation("Enabled remote peer: {NodeId} (will sync to all cluster nodes)", nodeId);
}
}
/// <summary>
/// Disables a configured remote peer.
/// </summary>
/// <param name="nodeId">The unique node identifier of the peer to disable.</param>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
public async Task DisablePeerAsync(string nodeId, CancellationToken cancellationToken = default)
{
ValidateNodeId(nodeId);
var peer = await _store.GetRemotePeerAsync(nodeId, cancellationToken);
if (peer == null)
{
return; // Peer not found, nothing to disable
}
if (peer.IsEnabled)
{
peer.IsEnabled = false;
await _store.SaveRemotePeerAsync(peer, cancellationToken);
_logger.LogInformation("Disabled remote peer: {NodeId} (will sync to all cluster nodes)", nodeId);
}
}
private static void ValidateNodeId(string nodeId)
{
if (string.IsNullOrWhiteSpace(nodeId))
{
throw new ArgumentException("NodeId cannot be null or empty", nameof(nodeId));
}
}
private static void ValidateAddress(string address)
{
if (string.IsNullOrWhiteSpace(address))
{
throw new ArgumentException("Address cannot be null or empty", nameof(address));
}
// Basic format validation (should contain host:port)
if (!address.Contains(':'))
{
throw new ArgumentException("Address must be in format 'host:port'", nameof(address));
}
}
}

View File

@@ -0,0 +1,38 @@
using System;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Network;
/// <summary>
/// Represents a method that handles peer node configuration change notifications.
/// </summary>
/// <param name="sender">The source of the event.</param>
/// <param name="newConfig">The updated peer node configuration.</param>
public delegate void PeerNodeConfigurationChangedEventHandler(object? sender, PeerNodeConfiguration newConfig);
/// <summary>
/// Defines a contract for retrieving and monitoring configuration settings for a peer node.
/// </summary>
/// <remarks>Implementations of this interface provide access to the current configuration and notify subscribers
/// when configuration changes occur. This interface is typically used by components that require up-to-date
/// configuration information for peer-to-peer networking scenarios.</remarks>
public interface IPeerNodeConfigurationProvider
{
/// <summary>
/// Asynchronously retrieves the current configuration settings for the peer node.
/// </summary>
/// <returns>
/// A task that represents the asynchronous operation. The task result contains the current
/// <see cref="PeerNodeConfiguration"/>.
/// </returns>
public Task<PeerNodeConfiguration> GetConfiguration();
/// <summary>
/// Occurs when the configuration of the peer node changes.
/// </summary>
/// <remarks>Subscribe to this event to be notified when any configuration settings for the peer node are
/// modified. Event handlers can use this notification to update dependent components or respond to configuration
/// changes as needed.</remarks>
public event PeerNodeConfigurationChangedEventHandler? ConfigurationChanged;
}

View File

@@ -0,0 +1,20 @@
namespace ZB.MOM.WW.CBDDC.Core.Network;
/// <summary>
/// Defines the role of a node in the distributed network cluster.
/// </summary>
public enum NodeRole
{
/// <summary>
/// Standard member node that synchronizes only within the local area network.
/// Does not connect to cloud remote nodes.
/// </summary>
Member = 0,
/// <summary>
/// Leader node that acts as a gateway to cloud remote nodes.
/// Elected via the Bully algorithm (lexicographically smallest NodeId).
/// Responsible for synchronizing local cluster changes with cloud nodes.
/// </summary>
CloudGateway = 1
}

View File

@@ -0,0 +1,76 @@
using System;
using System.Collections.Generic;
using System.Linq;
namespace ZB.MOM.WW.CBDDC.Core.Network;
/// <summary>
/// Represents a peer node in a distributed network, including its unique identifier, network address, and last seen
/// timestamp.
/// </summary>
public class PeerNode
{
/// <summary>
/// Gets the unique identifier for the node.
/// </summary>
public string NodeId { get; }
/// <summary>
/// Gets the address associated with the current instance.
/// </summary>
public string Address { get; }
/// <summary>
/// Gets the date and time when the entity was last observed or updated.
/// </summary>
public DateTimeOffset LastSeen { get; }
/// <summary>
/// Gets the configuration settings for the peer node.
/// </summary>
public PeerNodeConfiguration? Configuration { get; }
/// <summary>
/// Gets the type of the peer node (LanDiscovered, StaticRemote, or CloudRemote).
/// </summary>
public PeerType Type { get; }
/// <summary>
/// Gets the role assigned to this node within the cluster.
/// </summary>
public NodeRole Role { get; }
/// <summary>
/// Gets the list of collections this peer is interested in.
/// </summary>
public System.Collections.Generic.IReadOnlyList<string> InterestingCollections { get; }
/// <summary>
/// Initializes a new instance of the PeerNode class with the specified node identifier, network address, and last
/// seen timestamp.
/// </summary>
/// <param name="nodeId">The unique identifier for the peer node. Cannot be null or empty.</param>
/// <param name="address">The network address of the peer node. Cannot be null or empty.</param>
/// <param name="lastSeen">The date and time when the peer node was last seen, expressed as a DateTimeOffset.</param>
/// <param name="type">The type of the peer node. Defaults to LanDiscovered.</param>
/// <param name="role">The role of the peer node. Defaults to Member.</param>
/// <param name="configuration">The peer node configuration</param>
/// <param name="interestingCollections">The list of collections this peer is interested in.</param>
public PeerNode(
string nodeId,
string address,
DateTimeOffset lastSeen,
PeerType type = PeerType.LanDiscovered,
NodeRole role = NodeRole.Member,
PeerNodeConfiguration? configuration = null,
IEnumerable<string>? interestingCollections = null)
{
NodeId = nodeId;
Address = address;
LastSeen = lastSeen;
Type = type;
Role = role;
Configuration = configuration;
InterestingCollections = new List<string>(interestingCollections ?? []).AsReadOnly();
}
}

View File

@@ -0,0 +1,96 @@
using System;
namespace ZB.MOM.WW.CBDDC.Core.Network;
/// <summary>
/// Represents the configuration settings for a peer node in a distributed network.
/// </summary>
/// <remarks>Use this class to specify identification, network port, and authentication details required for a
/// peer node to participate in a cluster or peer-to-peer environment. The <see cref="Default"/> property provides a
/// basic configuration suitable for development or testing scenarios.</remarks>
public class PeerNodeConfiguration
{
/// <summary>
/// Gets or sets the unique identifier for the node.
/// </summary>
public string NodeId { get; set; } = string.Empty;
/// <summary>
/// Gets or sets the TCP port number used for network communication.
/// </summary>
public int TcpPort { get; set; }
/// <summary>
/// Gets or sets the authentication token used to authorize API requests.
/// </summary>
public string AuthToken { get; set; } = string.Empty;
/// <summary>
/// Maximum size of the document cache items. Default: 10.
/// </summary>
public int MaxDocumentCacheSize { get; set; } = 100;
/// <summary>
/// Maximum size of offline queue. Default: 1000.
/// </summary>
public int MaxQueueSize { get; set; } = 1000;
/// <summary>
/// Number of retry attempts for failed network operations. Default: 3.
/// </summary>
public int RetryAttempts { get; set; } = 3;
/// <summary>
/// Delay between retry attempts in milliseconds. Default: 1000ms.
/// </summary>
public int RetryDelayMs { get; set; } = 1000;
/// <summary>
/// Interval between periodic maintenance operations (Oplog pruning) in minutes. Default: 60 minutes.
/// </summary>
public int MaintenanceIntervalMinutes { get; set; } = 60;
/// <summary>
/// Oplog retention period in hours. Entries older than this will be pruned. Default: 24 hours.
/// </summary>
public int OplogRetentionHours { get; set; } = 24;
/// <summary>
/// Gets or sets a list of known peers to connect to directly, bypassing discovery.
/// </summary>
public System.Collections.Generic.List<KnownPeerConfiguration> KnownPeers { get; set; } = new();
/// <summary>
/// Gets the default configuration settings for a peer node.
/// </summary>
/// <remarks>Each access returns a new instance of the configuration with a unique node identifier. The
/// default settings use TCP port 9000 and a generated authentication token. Modify the returned instance as needed
/// before use.</remarks>
public static PeerNodeConfiguration Default => new PeerNodeConfiguration
{
NodeId = Guid.NewGuid().ToString(),
TcpPort = 9000,
AuthToken = Guid.NewGuid().ToString("N")
};
}
/// <summary>
/// Configuration for a known peer node.
/// </summary>
public class KnownPeerConfiguration
{
/// <summary>
/// The unique identifier of the peer node.
/// </summary>
public string NodeId { get; set; } = string.Empty;
/// <summary>
/// The hostname or IP address of the peer.
/// </summary>
public string Host { get; set; } = string.Empty;
/// <summary>
/// The TCP port of the peer.
/// </summary>
public int Port { get; set; }
}

View File

@@ -0,0 +1,26 @@
namespace ZB.MOM.WW.CBDDC.Core.Network;
/// <summary>
/// Defines the type of peer node in the distributed network.
/// </summary>
public enum PeerType
{
/// <summary>
/// Peer discovered via UDP broadcast on the local area network.
/// These peers are ephemeral and removed after timeout when no longer broadcasting.
/// </summary>
LanDiscovered = 0,
/// <summary>
/// Peer manually configured with a static address.
/// These peers are persistent across restarts and stored in the database.
/// </summary>
StaticRemote = 1,
/// <summary>
/// Cloud remote node.
/// Always active if internet connectivity is available.
/// Synchronized only by the elected leader node to reduce overhead.
/// </summary>
CloudRemote = 2
}

View File

@@ -0,0 +1,38 @@
using System.ComponentModel.DataAnnotations;
namespace ZB.MOM.WW.CBDDC.Core.Network;
/// <summary>
/// Configuration for a remote peer node that is persistent across restarts.
/// This collection is automatically synchronized across all nodes in the cluster.
/// </summary>
public class RemotePeerConfiguration
{
/// <summary>
/// Gets or sets the unique identifier for the remote peer node.
/// </summary>
[Key]
public string NodeId { get; set; } = "";
/// <summary>
/// Gets or sets the network address of the remote peer (hostname:port).
/// </summary>
public string Address { get; set; } = "";
/// <summary>
/// Gets or sets the type of the peer (StaticRemote or CloudRemote).
/// </summary>
public PeerType Type { get; set; }
/// <summary>
/// Gets or sets whether this peer is enabled for synchronization.
/// Disabled peers are stored but not used for sync.
/// </summary>
public bool IsEnabled { get; set; } = true;
/// <summary>
/// Gets or sets the list of collections this peer is interested in.
/// If empty, the peer is interested in all collections.
/// </summary>
public System.Collections.Generic.List<string> InterestingCollections { get; set; } = new();
}

View File

@@ -0,0 +1,59 @@
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Network;
/// <summary>
/// Provides peer node configuration from an in-memory static source.
/// </summary>
public class StaticPeerNodeConfigurationProvider : IPeerNodeConfigurationProvider
{
private PeerNodeConfiguration _configuration = new();
/// <summary>
/// Gets or sets the current peer node configuration.
/// </summary>
public PeerNodeConfiguration Configuration
{
get => _configuration;
set
{
if (_configuration != value)
{
_configuration = value;
OnConfigurationChanged(_configuration);
}
}
}
/// <summary>
/// Initializes a new instance of the <see cref="StaticPeerNodeConfigurationProvider"/> class.
/// </summary>
/// <param name="configuration">The initial peer node configuration.</param>
public StaticPeerNodeConfigurationProvider(PeerNodeConfiguration configuration)
{
Configuration = configuration;
}
/// <summary>
/// Occurs when the peer node configuration changes.
/// </summary>
public event PeerNodeConfigurationChangedEventHandler? ConfigurationChanged;
/// <summary>
/// Gets the current peer node configuration.
/// </summary>
/// <returns>A task whose result is the current configuration.</returns>
public Task<PeerNodeConfiguration> GetConfiguration()
{
return Task.FromResult(Configuration);
}
/// <summary>
/// Raises the <see cref="ConfigurationChanged"/> event.
/// </summary>
/// <param name="newConfig">The new peer node configuration.</param>
protected virtual void OnConfigurationChanged(PeerNodeConfiguration newConfig)
{
ConfigurationChanged?.Invoke(this, newConfig);
}
}

View File

@@ -0,0 +1,107 @@
using System;
using System.ComponentModel.DataAnnotations;
using System.Text.Json;
namespace ZB.MOM.WW.CBDDC.Core;
public enum OperationType
{
Put,
Delete
}
public static class OplogEntryExtensions
{
/// <summary>
/// Computes a deterministic hash for the specified oplog entry.
/// </summary>
/// <param name="entry">The oplog entry to hash.</param>
/// <returns>The lowercase hexadecimal SHA-256 hash of the entry.</returns>
public static string ComputeHash(this OplogEntry entry)
{
using var sha256 = System.Security.Cryptography.SHA256.Create();
var sb = new System.Text.StringBuilder();
sb.Append(entry.Collection);
sb.Append('|');
sb.Append(entry.Key);
sb.Append('|');
// Ensure stable string representation for Enum (integer value)
sb.Append(((int)entry.Operation).ToString(System.Globalization.CultureInfo.InvariantCulture));
sb.Append('|');
// Payload excluded from hash to avoid serialization non-determinism
// sb.Append(entry.Payload...);
sb.Append('|');
// Timestamp.ToString() is now Invariant
sb.Append(entry.Timestamp.ToString());
sb.Append('|');
sb.Append(entry.PreviousHash);
var bytes = System.Text.Encoding.UTF8.GetBytes(sb.ToString());
var hashBytes = sha256.ComputeHash(bytes);
// Convert to hex string
return BitConverter.ToString(hashBytes).Replace("-", "").ToLowerInvariant();
}
}
public class OplogEntry
{
/// <summary>
/// Gets the collection name associated with this entry.
/// </summary>
public string Collection { get; }
/// <summary>
/// Gets the document key associated with this entry.
/// </summary>
public string Key { get; }
/// <summary>
/// Gets the operation represented by this entry.
/// </summary>
public OperationType Operation { get; }
/// <summary>
/// Gets the serialized payload for the operation.
/// </summary>
public JsonElement? Payload { get; }
/// <summary>
/// Gets the logical timestamp for this entry.
/// </summary>
public HlcTimestamp Timestamp { get; }
/// <summary>
/// Gets the hash of this entry.
/// </summary>
public string Hash { get; }
/// <summary>
/// Gets the hash of the previous entry in the chain.
/// </summary>
public string PreviousHash { get; }
/// <summary>
/// Initializes a new instance of the <see cref="OplogEntry"/> class.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="operation">The operation type.</param>
/// <param name="payload">The serialized payload.</param>
/// <param name="timestamp">The logical timestamp.</param>
/// <param name="previousHash">The previous entry hash.</param>
/// <param name="hash">The current entry hash. If null, it is computed.</param>
public OplogEntry(string collection, string key, OperationType operation, JsonElement? payload, HlcTimestamp timestamp, string previousHash, string? hash = null)
{
Collection = collection;
Key = key;
Operation = operation;
Payload = payload;
Timestamp = timestamp;
PreviousHash = previousHash ?? string.Empty;
Hash = hash ?? this.ComputeHash();
}
/// <summary>
/// Verifies if the stored Hash matches the content.
/// </summary>
public bool IsValid()
{
return Hash == this.ComputeHash();
}
}

View File

@@ -0,0 +1,44 @@
using System;
namespace ZB.MOM.WW.CBDDC.Core;
/// <summary>
/// Represents a persisted confirmation watermark for a tracked peer and source node.
/// </summary>
public class PeerOplogConfirmation
{
/// <summary>
/// Gets or sets the tracked peer node identifier.
/// </summary>
public string PeerNodeId { get; set; } = "";
/// <summary>
/// Gets or sets the source node identifier this confirmation applies to.
/// </summary>
public string SourceNodeId { get; set; } = "";
/// <summary>
/// Gets or sets the physical wall-clock component of the confirmed HLC timestamp.
/// </summary>
public long ConfirmedWall { get; set; }
/// <summary>
/// Gets or sets the logical counter component of the confirmed HLC timestamp.
/// </summary>
public int ConfirmedLogic { get; set; }
/// <summary>
/// Gets or sets the confirmed hash at the watermark.
/// </summary>
public string ConfirmedHash { get; set; } = "";
/// <summary>
/// Gets or sets when this confirmation record was last updated in UTC.
/// </summary>
public DateTimeOffset LastConfirmedUtc { get; set; } = DateTimeOffset.UtcNow;
/// <summary>
/// Gets or sets whether this tracked peer is active for pruning/sync gating.
/// </summary>
public bool IsActive { get; set; } = true;
}

View File

@@ -0,0 +1,225 @@
using System.Text.Json;
namespace ZB.MOM.WW.CBDDC.Core;
public abstract class QueryNode { }
public class Eq : QueryNode
{
/// <summary>
/// Gets the field name to compare.
/// </summary>
public string Field { get; }
/// <summary>
/// Gets the value to compare against.
/// </summary>
public object Value { get; }
/// <summary>
/// Initializes a new equality query node.
/// </summary>
/// <param name="field">The field name to compare.</param>
/// <param name="value">The value to compare against.</param>
public Eq(string field, object value) { Field = field; Value = value; }
}
public class Gt : QueryNode
{
/// <summary>
/// Gets the field name to compare.
/// </summary>
public string Field { get; }
/// <summary>
/// Gets the threshold value.
/// </summary>
public object Value { get; }
/// <summary>
/// Initializes a new greater-than query node.
/// </summary>
/// <param name="field">The field name to compare.</param>
/// <param name="value">The threshold value.</param>
public Gt(string field, object value) { Field = field; Value = value; }
}
public class Lt : QueryNode
{
/// <summary>
/// Gets the field name to compare.
/// </summary>
public string Field { get; }
/// <summary>
/// Gets the threshold value.
/// </summary>
public object Value { get; }
/// <summary>
/// Initializes a new less-than query node.
/// </summary>
/// <param name="field">The field name to compare.</param>
/// <param name="value">The threshold value.</param>
public Lt(string field, object value) { Field = field; Value = value; }
}
public class Gte : QueryNode
{
/// <summary>
/// Gets the field name to compare.
/// </summary>
public string Field { get; }
/// <summary>
/// Gets the threshold value.
/// </summary>
public object Value { get; }
/// <summary>
/// Initializes a new greater-than-or-equal query node.
/// </summary>
/// <param name="field">The field name to compare.</param>
/// <param name="value">The threshold value.</param>
public Gte(string field, object value) { Field = field; Value = value; }
}
public class Lte : QueryNode
{
/// <summary>
/// Gets the field name to compare.
/// </summary>
public string Field { get; }
/// <summary>
/// Gets the threshold value.
/// </summary>
public object Value { get; }
/// <summary>
/// Initializes a new less-than-or-equal query node.
/// </summary>
/// <param name="field">The field name to compare.</param>
/// <param name="value">The threshold value.</param>
public Lte(string field, object value) { Field = field; Value = value; }
}
public class Neq : QueryNode
{
/// <summary>
/// Gets the field name to compare.
/// </summary>
public string Field { get; }
/// <summary>
/// Gets the value to compare against.
/// </summary>
public object Value { get; }
/// <summary>
/// Initializes a new not-equal query node.
/// </summary>
/// <param name="field">The field name to compare.</param>
/// <param name="value">The value to compare against.</param>
public Neq(string field, object value) { Field = field; Value = value; }
}
public class In : QueryNode
{
/// <summary>
/// Gets the field name to compare.
/// </summary>
public string Field { get; }
/// <summary>
/// Gets the set of values to compare against.
/// </summary>
public object[] Values { get; }
/// <summary>
/// Initializes a new in-list query node.
/// </summary>
/// <param name="field">The field name to compare.</param>
/// <param name="values">The set of values to compare against.</param>
public In(string field, object[] values) { Field = field; Values = values; }
}
public class Contains : QueryNode
{
/// <summary>
/// Gets the field name to compare.
/// </summary>
public string Field { get; }
/// <summary>
/// Gets the substring value to search for.
/// </summary>
public string Value { get; }
/// <summary>
/// Initializes a new contains query node.
/// </summary>
/// <param name="field">The field name to compare.</param>
/// <param name="value">The substring value to search for.</param>
public Contains(string field, string value) { Field = field; Value = value; }
}
public class NotContains : QueryNode
{
/// <summary>
/// Gets the field name to compare.
/// </summary>
public string Field { get; }
/// <summary>
/// Gets the substring value to exclude.
/// </summary>
public string Value { get; }
/// <summary>
/// Initializes a new not-contains query node.
/// </summary>
/// <param name="field">The field name to compare.</param>
/// <param name="value">The substring value to exclude.</param>
public NotContains(string field, string value) { Field = field; Value = value; }
}
public class And : QueryNode
{
/// <summary>
/// Gets the left side of the logical operation.
/// </summary>
public QueryNode Left { get; }
/// <summary>
/// Gets the right side of the logical operation.
/// </summary>
public QueryNode Right { get; }
/// <summary>
/// Initializes a new logical AND query node.
/// </summary>
/// <param name="left">The left query node.</param>
/// <param name="right">The right query node.</param>
public And(QueryNode left, QueryNode right) { Left = left; Right = right; }
}
public class Or : QueryNode
{
/// <summary>
/// Gets the left side of the logical operation.
/// </summary>
public QueryNode Left { get; }
/// <summary>
/// Gets the right side of the logical operation.
/// </summary>
public QueryNode Right { get; }
/// <summary>
/// Initializes a new logical OR query node.
/// </summary>
/// <param name="left">The left query node.</param>
/// <param name="right">The right query node.</param>
public Or(QueryNode left, QueryNode right) { Left = left; Right = right; }
}

View File

@@ -0,0 +1,105 @@
# ZB.MOM.WW.CBDDC.Core
Core abstractions and logic for **CBDDC**, a peer-to-peer data synchronization middleware for .NET.
## What Is CBDDC?
CBDDC is **not** a database — it's a sync layer that plugs into your existing data store (BLite) and enables automatic P2P replication across nodes in a mesh network. Your application reads and writes to its database as usual; CBDDC handles synchronization in the background.
## What's In This Package
- **Interfaces**: `IDocumentStore`, `IOplogStore`, `IVectorClockService`, `IConflictResolver`
- **Models**: `OplogEntry`, `Document`, `HlcTimestamp`, `VectorClock`
- **Conflict Resolution**: `LastWriteWinsConflictResolver`, `RecursiveNodeMergeConflictResolver`
- **Production Features**: Document caching (LRU), offline queue, health monitoring, retry policies
## Installation
```bash
# Pick a persistence provider
dotnet add package ZB.MOM.WW.CBDDC.Persistence # Embedded document DB
# Add networking
dotnet add package ZB.MOM.WW.CBDDC.Network
```
## Quick Start
```csharp
// 1. Define your DbContext
public class MyDbContext : CBDDCDocumentDbContext
{
public DocumentCollection<string, User> Users { get; private set; }
public MyDbContext(string path) : base(path) { }
}
// 2. Create your DocumentStore (the sync bridge)
public class MyDocumentStore : BLiteDocumentStore<MyDbContext>
{
public MyDocumentStore(MyDbContext ctx, IPeerNodeConfigurationProvider cfg,
IVectorClockService vc, ILogger<MyDocumentStore>? log = null)
: base(ctx, cfg, vc, logger: log)
{
WatchCollection("Users", ctx.Users, u => u.Id);
}
protected override async Task ApplyContentToEntityAsync(
string collection, string key, JsonElement content, CancellationToken ct)
{
var user = content.Deserialize<User>()!;
user.Id = key;
var existing = _context.Users.Find(u => u.Id == key).FirstOrDefault();
if (existing != null) _context.Users.Update(user);
else _context.Users.Insert(user);
await _context.SaveChangesAsync(ct);
}
// ... implement other abstract methods
}
// 3. Register and use
builder.Services.AddCBDDCCore()
.AddCBDDCBLite<MyDbContext, MyDocumentStore>(
sp => new MyDbContext("data.blite"))
.AddCBDDCNetwork<StaticPeerNodeConfigurationProvider>();
```
## Key Concepts
| Concept | Description |
|---------|-------------|
| **CDC** | Change Data Capture — watches collections registered via `WatchCollection()` |
| **Oplog** | Append-only hash-chained journal of changes per node |
| **VectorClock** | Tracks causal ordering across the mesh |
| **DocumentStore** | Your bridge between entities and the sync engine |
## Architecture
```
Your App ? DbContext.SaveChangesAsync()
?
? CDC Trigger
DocumentStore.CreateOplogEntryAsync()
?
??? OplogEntry (hash-chained, HLC timestamped)
??? VectorClockService.Update()
?
?
SyncOrchestrator (background)
??? Push to peers
??? Pull from peers ? ApplyBatchAsync
```
## Related Packages
- **ZB.MOM.WW.CBDDC.Persistence** � BLite embedded provider (.NET 10+)
- **ZB.MOM.WW.CBDDC.Network** — P2P networking (UDP discovery, TCP sync, Gossip)
## Documentation
- **[Complete Documentation](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net)**
- **[Sample Application](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/tree/main/samples/ZB.MOM.WW.CBDDC.Sample.Console)**
- **[Integration Guide](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net#integrating-with-your-database)**
## License
MIT — see [LICENSE](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/LICENSE)

View File

@@ -0,0 +1,27 @@
using System;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Resilience
{
public interface IRetryPolicy
{
/// <summary>
/// Executes an asynchronous operation with retry handling.
/// </summary>
/// <param name="operation">The operation to execute.</param>
/// <param name="operationName">The operation name used for diagnostics.</param>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous execution.</returns>
Task ExecuteAsync(Func<Task> operation, string operationName, CancellationToken cancellationToken = default);
/// <summary>
/// Executes an asynchronous operation with retry handling and returns a result.
/// </summary>
/// <typeparam name="T">The result type.</typeparam>
/// <param name="operation">The operation to execute.</param>
/// <param name="operationName">The operation name used for diagnostics.</param>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous execution and yields the operation result.</returns>
Task<T> ExecuteAsync<T>(Func<Task<T>> operation, string operationName, CancellationToken cancellationToken = default);
}
}

View File

@@ -0,0 +1,116 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core.Exceptions;
using ZB.MOM.WW.CBDDC.Core.Network;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace ZB.MOM.WW.CBDDC.Core.Resilience;
/// <summary>
/// Provides retry logic for transient failures.
/// </summary>
public class RetryPolicy : IRetryPolicy
{
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
private readonly ILogger<RetryPolicy> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="RetryPolicy"/> class.
/// </summary>
/// <param name="peerNodeConfigurationProvider">The provider for retry configuration values.</param>
/// <param name="logger">The logger instance.</param>
public RetryPolicy(IPeerNodeConfigurationProvider peerNodeConfigurationProvider, ILogger<RetryPolicy>? logger = null)
{
_logger = logger ?? NullLogger<RetryPolicy>.Instance;
_peerNodeConfigurationProvider = peerNodeConfigurationProvider
?? throw new ArgumentNullException(nameof(peerNodeConfigurationProvider));
}
/// <summary>
/// Executes an operation with retry logic.
/// </summary>
/// <typeparam name="T">The result type returned by the operation.</typeparam>
/// <param name="operation">The asynchronous operation to execute.</param>
/// <param name="operationName">The operation name used for logging.</param>
/// <param name="cancellationToken">A token used to cancel retry delays.</param>
public async Task<T> ExecuteAsync<T>(
Func<Task<T>> operation,
string operationName,
CancellationToken cancellationToken = default)
{
var config = await _peerNodeConfigurationProvider.GetConfiguration();
Exception? lastException = null;
for (int attempt = 1; attempt <= config.RetryAttempts; attempt++)
{
try
{
_logger.LogDebug("Executing {Operation} (attempt {Attempt}/{Max})",
operationName, attempt, config.RetryAttempts);
return await operation();
}
catch (Exception ex) when (attempt < config.RetryAttempts && IsTransient(ex))
{
lastException = ex;
var delay = config.RetryDelayMs * attempt; // Exponential backoff
_logger.LogWarning(ex,
"Operation {Operation} failed (attempt {Attempt}/{Max}). Retrying in {Delay}ms...",
operationName, attempt, config.RetryAttempts, delay);
await Task.Delay(delay, cancellationToken);
}
}
if (lastException != null)
{
_logger.LogError(lastException,
"Operation {Operation} failed after {Attempts} attempts",
operationName, config.RetryAttempts);
}
else
{
_logger.LogError(
"Operation {Operation} failed after {Attempts} attempts",
operationName, config.RetryAttempts);
}
throw new CBDDCException("RETRY_EXHAUSTED",
$"Operation '{operationName}' failed after {config.RetryAttempts} attempts",
lastException!);
}
/// <summary>
/// Executes an operation with retry logic (void return).
/// </summary>
/// <param name="operation">The asynchronous operation to execute.</param>
/// <param name="operationName">The operation name used for logging.</param>
/// <param name="cancellationToken">A token used to cancel retry delays.</param>
public async Task ExecuteAsync(
Func<Task> operation,
string operationName,
CancellationToken cancellationToken = default)
{
await ExecuteAsync(async () =>
{
await operation();
return true;
}, operationName, cancellationToken);
}
private static bool IsTransient(Exception ex)
{
// Network errors are typically transient
if (ex is NetworkException or System.Net.Sockets.SocketException or System.IO.IOException)
return true;
// Timeout errors are transient
if (ex is Exceptions.TimeoutException or OperationCanceledException)
return true;
return false;
}
}

View File

@@ -0,0 +1,21 @@
namespace ZB.MOM.WW.CBDDC.Core;
public class SnapshotMetadata
{
/// <summary>
/// Gets or sets the node identifier associated with the snapshot.
/// </summary>
public string NodeId { get; set; } = "";
/// <summary>
/// Gets or sets the physical time component of the snapshot timestamp.
/// </summary>
public long TimestampPhysicalTime { get; set; }
/// <summary>
/// Gets or sets the logical counter component of the snapshot timestamp.
/// </summary>
public int TimestampLogicalCounter { get; set; }
/// <summary>
/// Gets or sets the snapshot hash.
/// </summary>
public string Hash { get; set; } = "";
}

View File

@@ -0,0 +1,16 @@
using System;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
/// <summary>
/// Represents an error that occurs when a database is found to be corrupt.
/// </summary>
public class CorruptDatabaseException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="CorruptDatabaseException"/> class.
/// </summary>
/// <param name="message">The exception message.</param>
/// <param name="innerException">The underlying exception that caused this error.</param>
public CorruptDatabaseException(string message, Exception innerException) : base(message, innerException) { }
}

View File

@@ -0,0 +1,108 @@
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
/// <summary>
/// Defines the contract for storing and retrieving document metadata for sync tracking.
/// Document metadata stores HLC timestamps and deleted state without modifying application entities.
/// </summary>
public interface IDocumentMetadataStore : ISnapshotable<DocumentMetadata>
{
/// <summary>
/// Gets the metadata for a specific document.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="cancellationToken">A cancellation token.</param>
/// <returns>The document metadata if found; otherwise null.</returns>
Task<DocumentMetadata?> GetMetadataAsync(string collection, string key, CancellationToken cancellationToken = default);
/// <summary>
/// Gets metadata for all documents in a collection.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="cancellationToken">A cancellation token.</param>
/// <returns>Enumerable of document metadata for the collection.</returns>
Task<IEnumerable<DocumentMetadata>> GetMetadataByCollectionAsync(string collection, CancellationToken cancellationToken = default);
/// <summary>
/// Upserts (inserts or updates) metadata for a document.
/// </summary>
/// <param name="metadata">The metadata to upsert.</param>
/// <param name="cancellationToken">A cancellation token.</param>
Task UpsertMetadataAsync(DocumentMetadata metadata, CancellationToken cancellationToken = default);
/// <summary>
/// Upserts metadata for multiple documents in batch.
/// </summary>
/// <param name="metadatas">The metadata items to upsert.</param>
/// <param name="cancellationToken">A cancellation token.</param>
Task UpsertMetadataBatchAsync(IEnumerable<DocumentMetadata> metadatas, CancellationToken cancellationToken = default);
/// <summary>
/// Marks a document as deleted by setting IsDeleted=true and updating the timestamp.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="timestamp">The HLC timestamp of the deletion.</param>
/// <param name="cancellationToken">A cancellation token.</param>
Task MarkDeletedAsync(string collection, string key, HlcTimestamp timestamp, CancellationToken cancellationToken = default);
/// <summary>
/// Gets all document metadata with timestamps after the specified timestamp.
/// Used for incremental sync to find documents modified since last sync.
/// </summary>
/// <param name="since">The timestamp to compare against.</param>
/// <param name="collections">Optional collection filter.</param>
/// <param name="cancellationToken">A cancellation token.</param>
/// <returns>Documents modified after the specified timestamp.</returns>
Task<IEnumerable<DocumentMetadata>> GetMetadataAfterAsync(HlcTimestamp since, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default);
}
/// <summary>
/// Represents metadata for a document used in sync tracking.
/// </summary>
public class DocumentMetadata
{
/// <summary>
/// Gets or sets the collection name.
/// </summary>
public string Collection { get; set; } = "";
/// <summary>
/// Gets or sets the document key.
/// </summary>
public string Key { get; set; } = "";
/// <summary>
/// Gets or sets the HLC timestamp of the last modification.
/// </summary>
public HlcTimestamp UpdatedAt { get; set; }
/// <summary>
/// Gets or sets whether this document is marked as deleted (tombstone).
/// </summary>
public bool IsDeleted { get; set; }
/// <summary>
/// Initializes a new instance of the <see cref="DocumentMetadata"/> class.
/// </summary>
public DocumentMetadata() { }
/// <summary>
/// Initializes a new instance of the <see cref="DocumentMetadata"/> class.
/// </summary>
/// <param name="collection">The collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="updatedAt">The last update timestamp.</param>
/// <param name="isDeleted">Whether the document is marked as deleted.</param>
public DocumentMetadata(string collection, string key, HlcTimestamp updatedAt, bool isDeleted = false)
{
Collection = collection;
Key = key;
UpdatedAt = updatedAt;
IsDeleted = isDeleted;
}
}

View File

@@ -0,0 +1,103 @@
using System;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
/// <summary>
/// Handles basic CRUD operations for documents.
/// </summary>
public interface IDocumentStore : ISnapshotable<Document>
{
/// <summary>
/// Gets the collections this store is interested in.
/// </summary>
IEnumerable<string> InterestedCollection { get; }
/// <summary>
/// Asynchronously retrieves a incoming from the specified collection by its key.
/// </summary>
/// <param name="collection">The name of the collection containing the incoming to retrieve. Cannot be null or empty.</param>
/// <param name="key">The unique key identifying the incoming within the collection. Cannot be null or empty.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation. The task result contains the incoming if found; otherwise, null.</returns>
Task<Document?> GetDocumentAsync(string collection, string key, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves all documents belonging to the specified collection.
/// </summary>
/// <param name="collection">The name of the collection from which to retrieve documents. Cannot be null or empty.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task that represents the asynchronous operation. The task result contains an enumerable collection of
/// documents in the specified collection. The collection is empty if no documents are found.</returns>
Task<IEnumerable<Document>> GetDocumentsByCollectionAsync(string collection, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously inserts a batch of documents into the data store.
/// </summary>
/// <param name="documents">The collection of documents to insert. Cannot be null or contain null elements.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation. The task result is <see langword="true"/> if all documents
/// were inserted successfully; otherwise, <see langword="false"/>.</returns>
Task<bool> InsertBatchDocumentsAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously updates the specified incoming in the data store.
/// </summary>
/// <param name="document">The incoming to update. Cannot be null.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the update operation.</param>
/// <returns>A task that represents the asynchronous operation. The task result is <see langword="true"/> if the incoming was
/// successfully updated; otherwise, <see langword="false"/>.</returns>
Task<bool> PutDocumentAsync(Document document, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously updates a batch of documents in the data store.
/// </summary>
/// <param name="documents">The collection of documents to update. Cannot be null or contain null elements.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation. The task result is <see langword="true"/> if all documents
/// were updated successfully; otherwise, <see langword="false"/>.</returns>
Task<bool> UpdateBatchDocumentsAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously deletes a incoming identified by the specified key from the given collection.
/// </summary>
/// <param name="collection">The name of the collection containing the incoming to delete. Cannot be null or empty.</param>
/// <param name="key">The unique key identifying the incoming to delete. Cannot be null or empty.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the delete operation.</param>
/// <returns>A task that represents the asynchronous delete operation. The task result is <see langword="true"/> if the
/// incoming was successfully deleted; otherwise, <see langword="false"/>.</returns>
Task<bool> DeleteDocumentAsync(string collection, string key, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously deletes a batch of documents identified by their keys.
/// </summary>
/// <remarks>
/// If any of the specified documents cannot be deleted, the method returns <see langword="false"/> but does not
/// throw an exception. The operation is performed asynchronously and may complete partially if cancellation is requested.
/// </remarks>
/// <param name="documentKeys">A collection of incoming keys that specify the documents to delete. Cannot be null or contain null or empty
/// values.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the delete operation.</param>
/// <returns>A task that represents the asynchronous delete operation. The task result is <see langword="true"/> if all
/// specified documents were successfully deleted; otherwise, <see langword="false"/>.</returns>
Task<bool> DeleteBatchDocumentsAsync(IEnumerable<string> documentKeys, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously merges the specified incoming with existing data and returns the updated incoming.
/// </summary>
/// <param name="incoming">The incoming to merge. Cannot be null.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the merge operation.</param>
/// <returns>A task that represents the asynchronous merge operation. The task result contains the merged incoming.</returns>
Task<Document> MergeAsync(Document incoming, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves documents identified by the specified collection and key pairs.
/// </summary>
/// <param name="documentKeys">A list of tuples, each containing the collection name and the document key that uniquely identify the documents
/// to retrieve. Cannot be null or empty.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task that represents the asynchronous retrieval operation.</returns>
Task<IEnumerable<Document>> GetDocumentsAsync(List<(string Collection, string Key)> documentKeys, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,101 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
/// <summary>
/// Handles operations related to the Operation Log (Oplog), synchronization, and logical clocks.
/// </summary>
public interface IOplogStore : ISnapshotable<OplogEntry>
{
/// <summary>
/// Occurs when changes are applied to the store from external sources (sync).
/// </summary>
event EventHandler<ChangesAppliedEventArgs> ChangesApplied;
/// <summary>
/// Appends a new entry to the operation log asynchronously.
/// </summary>
/// <param name="entry">The operation log entry to append. Cannot be null.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the append operation.</param>
/// <returns>A task that represents the asynchronous append operation.</returns>
Task AppendOplogEntryAsync(OplogEntry entry, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves all oplog entries that occurred after the specified timestamp.
/// </summary>
/// <param name="timestamp">The timestamp after which oplog entries should be returned.</param>
/// <param name="collections">An optional collection of collection names to filter the results.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task that represents the asynchronous operation containing matching oplog entries.</returns>
Task<IEnumerable<OplogEntry>> GetOplogAfterAsync(HlcTimestamp timestamp, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves the latest observed hybrid logical clock (HLC) timestamp.
/// </summary>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation containing the latest HLC timestamp.</returns>
Task<HlcTimestamp> GetLatestTimestampAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves the current vector clock representing the state of distributed events.
/// </summary>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task that represents the asynchronous operation containing the current vector clock.</returns>
Task<VectorClock> GetVectorClockAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Retrieves a collection of oplog entries for the specified node that occurred after the given timestamp.
/// </summary>
/// <param name="nodeId">The unique identifier of the node for which to retrieve oplog entries. Cannot be null or empty.</param>
/// <param name="since">The timestamp after which oplog entries should be returned.</param>
/// <param name="collections">An optional collection of collection names to filter the oplog entries.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task that represents the asynchronous operation containing oplog entries for the specified node.</returns>
Task<IEnumerable<OplogEntry>> GetOplogForNodeAfterAsync(string nodeId, HlcTimestamp since, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves the hash of the most recent entry for the specified node.
/// </summary>
/// <param name="nodeId">The unique identifier of the node for which to retrieve the last entry hash. Cannot be null or empty.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous operation containing the hash string of the last entry or null.</returns>
Task<string?> GetLastEntryHashAsync(string nodeId, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves a sequence of oplog entries representing the chain between the specified start and end hashes.
/// </summary>
/// <param name="startHash">The hash of the first entry in the chain range. Cannot be null or empty.</param>
/// <param name="endHash">The hash of the last entry in the chain range. Cannot be null or empty.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task that represents the asynchronous operation containing OplogEntry objects in chain order.</returns>
Task<IEnumerable<OplogEntry>> GetChainRangeAsync(string startHash, string endHash, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves the oplog entry associated with the specified hash value.
/// </summary>
/// <param name="hash">The hash string identifying the oplog entry to retrieve. Cannot be null or empty.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task representing the asynchronous operation containing the OplogEntry if found, otherwise null.</returns>
Task<OplogEntry?> GetEntryByHashAsync(string hash, CancellationToken cancellationToken = default);
/// <summary>
/// Applies a batch of oplog entries asynchronously to the target data store.
/// </summary>
/// <param name="oplogEntries">A collection of OplogEntry objects representing the operations to apply. Cannot be null.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the batch operation.</param>
/// <returns>A task that represents the asynchronous batch apply operation.</returns>
Task ApplyBatchAsync(IEnumerable<OplogEntry> oplogEntries, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously removes entries from the oplog that are older than the specified cutoff timestamp.
/// </summary>
/// <param name="cutoff">The timestamp that defines the upper bound for entries to be pruned.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the prune operation.</param>
/// <returns>A task that represents the asynchronous prune operation.</returns>
Task PruneOplogAsync(HlcTimestamp cutoff, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,41 @@
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core.Network;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
/// <summary>
/// Handles storage and retrieval of remote peer configurations.
/// </summary>
public interface IPeerConfigurationStore : ISnapshotable<RemotePeerConfiguration>
{
/// <summary>
/// Saves or updates a remote peer configuration in the persistent store.
/// </summary>
/// <param name="peer">The remote peer configuration to save.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task SaveRemotePeerAsync(RemotePeerConfiguration peer, CancellationToken cancellationToken = default);
/// <summary>
/// Retrieves all remote peer configurations from the persistent store.
/// </summary>
/// <param name="cancellationToken">Cancellation token.</param>
/// <returns>Collection of remote peer configurations.</returns>
Task<IEnumerable<RemotePeerConfiguration>> GetRemotePeersAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves the configuration for a remote peer identified by the specified node ID.
/// </summary>
/// <param name="nodeId">The unique identifier of the remote peer whose configuration is to be retrieved.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task containing the RemotePeerConfiguration if found; otherwise, null.</returns>
Task<RemotePeerConfiguration?> GetRemotePeerAsync(string nodeId, CancellationToken cancellationToken);
/// <summary>
/// Removes a remote peer configuration from the persistent store.
/// </summary>
/// <param name="nodeId">The unique identifier of the peer to remove.</param>
/// <param name="cancellationToken">Cancellation token.</param>
Task RemoveRemotePeerAsync(string nodeId, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,71 @@
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core.Network;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
/// <summary>
/// Defines persistence operations for peer oplog confirmation tracking.
/// </summary>
public interface IPeerOplogConfirmationStore : ISnapshotable<PeerOplogConfirmation>
{
/// <summary>
/// Ensures the specified peer is tracked for confirmation-based pruning.
/// </summary>
/// <param name="peerNodeId">The peer node identifier.</param>
/// <param name="address">The peer network address.</param>
/// <param name="type">The peer type.</param>
/// <param name="cancellationToken">A cancellation token.</param>
Task EnsurePeerRegisteredAsync(
string peerNodeId,
string address,
PeerType type,
CancellationToken cancellationToken = default);
/// <summary>
/// Updates the confirmation watermark for a tracked peer and source node.
/// </summary>
/// <param name="peerNodeId">The tracked peer node identifier.</param>
/// <param name="sourceNodeId">The source node identifier of the confirmed oplog stream.</param>
/// <param name="timestamp">The confirmed HLC timestamp.</param>
/// <param name="hash">The confirmed hash at the specified timestamp.</param>
/// <param name="cancellationToken">A cancellation token.</param>
Task UpdateConfirmationAsync(
string peerNodeId,
string sourceNodeId,
HlcTimestamp timestamp,
string hash,
CancellationToken cancellationToken = default);
/// <summary>
/// Gets all persisted peer confirmations.
/// </summary>
/// <param name="cancellationToken">A cancellation token.</param>
/// <returns>All peer confirmations.</returns>
Task<IEnumerable<PeerOplogConfirmation>> GetConfirmationsAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Gets persisted confirmations for a specific tracked peer.
/// </summary>
/// <param name="peerNodeId">The peer node identifier.</param>
/// <param name="cancellationToken">A cancellation token.</param>
/// <returns>Peer confirmations for the requested peer.</returns>
Task<IEnumerable<PeerOplogConfirmation>> GetConfirmationsForPeerAsync(
string peerNodeId,
CancellationToken cancellationToken = default);
/// <summary>
/// Deactivates tracking for the specified peer.
/// </summary>
/// <param name="peerNodeId">The peer node identifier.</param>
/// <param name="cancellationToken">A cancellation token.</param>
Task RemovePeerTrackingAsync(string peerNodeId, CancellationToken cancellationToken = default);
/// <summary>
/// Gets all active tracked peer identifiers.
/// </summary>
/// <param name="cancellationToken">A cancellation token.</param>
/// <returns>Distinct active tracked peer identifiers.</returns>
Task<IEnumerable<string>> GetActiveTrackedPeersAsync(CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,48 @@
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
public interface ISnapshotMetadataStore : ISnapshotable<SnapshotMetadata>
{
/// <summary>
/// Asynchronously retrieves the snapshot metadata associated with the specified node identifier.
/// </summary>
/// <param name="nodeId">The unique identifier of the node for which to retrieve snapshot metadata. Cannot be null or empty.</param>
/// <param name="cancellationToken">A token to monitor for cancellation requests.</param>
/// <returns>A task that represents the asynchronous operation. The task result contains the <see cref="SnapshotMetadata"/>
/// for the specified node if found; otherwise, <see langword="null"/>.</returns>
Task<SnapshotMetadata?> GetSnapshotMetadataAsync(string nodeId, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously inserts the specified snapshot metadata into the data store.
/// </summary>
/// <param name="metadata">The snapshot metadata to insert. Cannot be null.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task that represents the asynchronous insert operation.</returns>
Task InsertSnapshotMetadataAsync(SnapshotMetadata metadata, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously updates the metadata for an existing snapshot.
/// </summary>
/// <param name="existingMeta">The metadata object representing the snapshot to update. Cannot be null.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
/// <returns>A task that represents the asynchronous update operation.</returns>
Task UpdateSnapshotMetadataAsync(SnapshotMetadata existingMeta, CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously retrieves the hash of the current snapshot for the specified node.
/// </summary>
/// <param name="nodeId">The unique identifier of the node for which to obtain the snapshot hash.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
/// <returns>A task containing the snapshot hash as a string, or null if no snapshot is available.</returns>
Task<string?> GetSnapshotHashAsync(string nodeId, CancellationToken cancellationToken = default);
/// <summary>
/// Gets all snapshot metadata entries. Used for initializing VectorClock cache.
/// </summary>
/// <param name="cancellationToken">A cancellation token.</param>
/// <returns>All snapshot metadata entries.</returns>
Task<IEnumerable<SnapshotMetadata>> GetAllSnapshotMetadataAsync(CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,35 @@
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
/// <summary>
/// Handles full database lifecycle operations such as snapshots, replacement, and clearing data.
/// </summary>
public interface ISnapshotService
{
/// <summary>
/// Asynchronously creates a snapshot of the current state and writes it to the specified destination stream.
/// </summary>
/// <param name="destination">The stream to which the snapshot data will be written.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the snapshot creation operation.</param>
/// <returns>A task that represents the asynchronous snapshot creation operation.</returns>
Task CreateSnapshotAsync(Stream destination, CancellationToken cancellationToken = default);
/// <summary>
/// Replaces the existing database with the contents provided in the specified stream asynchronously.
/// </summary>
/// <param name="databaseStream">A stream containing the new database data to be used for replacement.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
/// <returns>A task that represents the asynchronous database replacement operation.</returns>
Task ReplaceDatabaseAsync(Stream databaseStream, CancellationToken cancellationToken = default);
/// <summary>
/// Merges the provided snapshot stream into the current data store asynchronously.
/// </summary>
/// <param name="snapshotStream">A stream containing the snapshot data to be merged.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the merge operation.</param>
/// <returns>A task that represents the asynchronous merge operation.</returns>
Task MergeSnapshotAsync(Stream snapshotStream, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,44 @@
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
public interface ISnapshotable<T>
{
/// <summary>
/// Asynchronously deletes the underlying data store and all of its contents.
/// </summary>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the drop operation.</param>
/// <remarks>After calling this method, the data store and all stored data will be permanently removed.
/// This operation cannot be undone. Any further operations on the data store may result in errors.</remarks>
/// <returns>A task that represents the asynchronous drop operation.</returns>
Task DropAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Asynchronously exports a collection of items of type T.
/// </summary>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the export operation.</param>
/// <returns>A task that represents the asynchronous export operation. The task result contains an enumerable collection of
/// exported items of type T.</returns>
Task<IEnumerable<T>> ExportAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Imports the specified collection of items asynchronously.
/// </summary>
/// <param name="items">The collection of items to import. Cannot be null. Each item will be processed in sequence.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the import operation.</param>
/// <returns>A task that represents the asynchronous import operation.</returns>
Task ImportAsync(IEnumerable<T> items, CancellationToken cancellationToken = default);
/// <summary>
/// Merges the specified collection of items into the target data store asynchronously.
/// </summary>
/// <remarks>If the operation is canceled via the provided cancellation token, the returned task will be
/// in a canceled state. The merge operation may update existing items or add new items, depending on the
/// implementation.</remarks>
/// <param name="items">The collection of items to merge into the data store. Cannot be null.</param>
/// <param name="cancellationToken">A cancellation token that can be used to cancel the merge operation.</param>
/// <returns>A task that represents the asynchronous merge operation.</returns>
Task MergeAsync(IEnumerable<T> items, CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,58 @@
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Storage;
/// <summary>
/// Manages Vector Clock state for the local node.
/// Tracks the latest timestamp and hash per node for sync coordination.
/// </summary>
public interface IVectorClockService
{
/// <summary>
/// Indicates whether the cache has been populated with initial data.
/// Reset to false by <see cref="Invalidate"/>.
/// </summary>
bool IsInitialized { get; set; }
/// <summary>
/// Updates the cache with a new OplogEntry's timestamp and hash.
/// Called by both DocumentStore (local CDC) and OplogStore (remote sync).
/// </summary>
/// <param name="entry">The oplog entry containing timestamp and hash data.</param>
void Update(OplogEntry entry);
/// <summary>
/// Returns the current Vector Clock built from cached node timestamps.
/// </summary>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
Task<VectorClock> GetVectorClockAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Returns the latest known timestamp across all nodes.
/// </summary>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
Task<HlcTimestamp> GetLatestTimestampAsync(CancellationToken cancellationToken = default);
/// <summary>
/// Returns the last known hash for the specified node.
/// Returns null if the node is unknown.
/// </summary>
/// <param name="nodeId">The node identifier.</param>
string? GetLastHash(string nodeId);
/// <summary>
/// Updates the cache with a specific node's timestamp and hash.
/// Used for snapshot metadata fallback.
/// </summary>
/// <param name="nodeId">The node identifier.</param>
/// <param name="timestamp">The timestamp to store for the node.</param>
/// <param name="hash">The hash to store for the node.</param>
void UpdateNode(string nodeId, HlcTimestamp timestamp, string hash);
/// <summary>
/// Clears the cache and resets <see cref="IsInitialized"/> to false,
/// forcing re-initialization on next access.
/// </summary>
void Invalidate();
}

View File

@@ -0,0 +1,50 @@
using System.Text.Json;
using ZB.MOM.WW.CBDDC.Core;
namespace ZB.MOM.WW.CBDDC.Core.Sync;
public class ConflictResolutionResult
{
/// <summary>
/// Gets a value indicating whether the remote change should be applied.
/// </summary>
public bool ShouldApply { get; }
/// <summary>
/// Gets the merged document to apply when conflict resolution produced one.
/// </summary>
public Document? MergedDocument { get; }
/// <summary>
/// Initializes a new instance of the <see cref="ConflictResolutionResult"/> class.
/// </summary>
/// <param name="shouldApply">Indicates whether the change should be applied.</param>
/// <param name="mergedDocument">The merged document produced by resolution, if any.</param>
public ConflictResolutionResult(bool shouldApply, Document? mergedDocument)
{
ShouldApply = shouldApply;
MergedDocument = mergedDocument;
}
/// <summary>
/// Creates a result indicating that the resolved document should be applied.
/// </summary>
/// <param name="document">The merged document to apply.</param>
/// <returns>A resolution result that applies the provided document.</returns>
public static ConflictResolutionResult Apply(Document document) => new(true, document);
/// <summary>
/// Creates a result indicating that the remote change should be ignored.
/// </summary>
/// <returns>A resolution result that skips applying the remote change.</returns>
public static ConflictResolutionResult Ignore() => new(false, null);
}
public interface IConflictResolver
{
/// <summary>
/// Resolves a conflict between local state and a remote oplog entry.
/// </summary>
/// <param name="local">The local document state, if present.</param>
/// <param name="remote">The incoming remote oplog entry.</param>
/// <returns>The resolution outcome indicating whether and how to apply changes.</returns>
ConflictResolutionResult Resolve(Document? local, OplogEntry remote);
}

View File

@@ -0,0 +1,40 @@
using System;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Sync
{
/// <summary>
/// Represents a queue for operations that should be executed when connectivity is restored.
/// </summary>
public interface IOfflineQueue
{
/// <summary>
/// Gets the number of pending operations in the queue.
/// </summary>
int Count { get; }
/// <summary>
/// Clears all pending operations from the queue.
/// </summary>
/// <returns>A task that represents the asynchronous operation.</returns>
Task Clear();
/// <summary>
/// Enqueues a pending operation.
/// </summary>
/// <param name="operation">The operation to enqueue.</param>
/// <returns>A task that represents the asynchronous operation.</returns>
Task Enqueue(PendingOperation operation);
/// <summary>
/// Flushes the queue by executing each pending operation.
/// </summary>
/// <param name="executor">The delegate used to execute each operation.</param>
/// <param name="cancellationToken">A token used to cancel the flush operation.</param>
/// <returns>
/// A task that returns a tuple containing the number of successful and failed operations.
/// </returns>
Task<(int Successful, int Failed)> FlushAsync(Func<PendingOperation, Task> executor, CancellationToken cancellationToken = default);
}
}

View File

@@ -0,0 +1,37 @@
using System.Text.Json;
using ZB.MOM.WW.CBDDC.Core;
namespace ZB.MOM.WW.CBDDC.Core.Sync;
public class LastWriteWinsConflictResolver : IConflictResolver
{
/// <summary>
/// Resolves document conflicts by preferring the entry with the latest timestamp.
/// </summary>
/// <param name="local">The local document, if available.</param>
/// <param name="remote">The incoming remote oplog entry.</param>
/// <returns>The conflict resolution result indicating whether to apply or ignore the remote change.</returns>
public ConflictResolutionResult Resolve(Document? local, OplogEntry remote)
{
// If no local document exists, always apply remote change
if (local == null)
{
// Construct new document from oplog entry
var content = remote.Payload ?? default;
var newDoc = new Document(remote.Collection, remote.Key, content, remote.Timestamp, remote.Operation == OperationType.Delete);
return ConflictResolutionResult.Apply(newDoc);
}
// If local exists, compare timestamps
if (remote.Timestamp.CompareTo(local.UpdatedAt) > 0)
{
// Remote is newer, apply it
var content = remote.Payload ?? default;
var newDoc = new Document(remote.Collection, remote.Key, content, remote.Timestamp, remote.Operation == OperationType.Delete);
return ConflictResolutionResult.Apply(newDoc);
}
// Local is newer or equal, ignore remote
return ConflictResolutionResult.Ignore();
}
}

View File

@@ -0,0 +1,130 @@
using ZB.MOM.WW.CBDDC.Core.Network;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Sync;
/// <summary>
/// Queue for operations performed while offline.
/// </summary>
public class OfflineQueue : IOfflineQueue
{
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
private readonly Queue<PendingOperation> _queue = new();
private readonly ILogger<OfflineQueue> _logger;
private readonly object _lock = new();
/// <summary>
/// Initializes a new instance of the <see cref="OfflineQueue"/> class.
/// </summary>
/// <param name="peerNodeConfigurationProvider">The configuration provider used for queue limits.</param>
/// <param name="logger">The logger instance.</param>
public OfflineQueue(IPeerNodeConfigurationProvider peerNodeConfigurationProvider, ILogger<OfflineQueue>? logger = null)
{
_peerNodeConfigurationProvider = peerNodeConfigurationProvider;
_logger = logger ?? NullLogger<OfflineQueue>.Instance;
}
/// <summary>
/// Gets the number of pending operations.
/// </summary>
public int Count
{
get
{
lock (_lock)
{
return _queue.Count;
}
}
}
/// <summary>
/// Enqueues an operation for later execution.
/// </summary>
/// <param name="operation">The pending operation to enqueue.</param>
/// <returns>A task that represents the asynchronous enqueue operation.</returns>
public async Task Enqueue(PendingOperation operation)
{
var config = await _peerNodeConfigurationProvider.GetConfiguration();
lock (_lock)
{
if (_queue.Count >= config.MaxQueueSize)
{
var dropped = _queue.Dequeue();
_logger.LogWarning("Queue full, dropped oldest operation: {Type} {Collection}:{Key}",
dropped.Type, dropped.Collection, dropped.Key);
}
_queue.Enqueue(operation);
_logger.LogDebug("Queued {Type} operation for {Collection}:{Key}",
operation.Type, operation.Collection, operation.Key);
}
}
/// <summary>
/// Flushes all pending operations.
/// </summary>
/// <param name="executor">The delegate that executes each pending operation.</param>
/// <param name="cancellationToken">A token used to cancel the operation.</param>
/// <returns>A task whose result contains the number of successful and failed operations.</returns>
public async Task<(int Successful, int Failed)> FlushAsync(Func<PendingOperation, Task> executor, CancellationToken cancellationToken = default)
{
List<PendingOperation> operations;
lock (_lock)
{
operations = _queue.ToList();
_queue.Clear();
}
if (operations.Count == 0)
{
_logger.LogDebug("No pending operations to flush");
return (0, 0);
}
_logger.LogInformation("Flushing {Count} pending operations", operations.Count);
int successful = 0;
int failed = 0;
foreach (var op in operations)
{
try
{
await executor(op);
successful++;
}
catch (Exception ex)
{
failed++;
_logger.LogError(ex, "Failed to execute pending {Type} operation for {Collection}:{Key}",
op.Type, op.Collection, op.Key);
}
}
_logger.LogInformation("Flush completed: {Successful} successful, {Failed} failed",
successful, failed);
return (successful, failed);
}
/// <summary>
/// Clears all pending operations.
/// </summary>
public async Task Clear()
{
lock (_lock)
{
var count = _queue.Count;
_queue.Clear();
_logger.LogInformation("Cleared {Count} pending operations", count);
}
}
}

View File

@@ -0,0 +1,32 @@
using System;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Core.Sync;
/// <summary>
/// Represents a pending operation to be executed when connection is restored.
/// </summary>
public class PendingOperation
{
/// <summary>
/// Gets or sets the operation type.
/// </summary>
public string Type { get; set; } = "";
/// <summary>
/// Gets or sets the collection targeted by the operation.
/// </summary>
public string Collection { get; set; } = "";
/// <summary>
/// Gets or sets the document key targeted by the operation.
/// </summary>
public string Key { get; set; } = "";
/// <summary>
/// Gets or sets the payload associated with the operation.
/// </summary>
public object? Data { get; set; }
/// <summary>
/// Gets or sets the UTC time when the operation was queued.
/// </summary>
public DateTime QueuedAt { get; set; }
}

View File

@@ -0,0 +1,254 @@
using System;
using System.Buffers;
using System.Collections.Generic;
using System.IO;
using System.Text.Json;
namespace ZB.MOM.WW.CBDDC.Core.Sync;
/// <summary>
/// Resolves merge conflicts by recursively merging object and array nodes.
/// </summary>
public class RecursiveNodeMergeConflictResolver : IConflictResolver
{
/// <summary>
/// Resolves a conflict between a local document and a remote operation.
/// </summary>
/// <param name="local">The local document, or <see langword="null"/> if none exists.</param>
/// <param name="remote">The remote operation to apply.</param>
/// <returns>The conflict resolution result indicating whether and what to apply.</returns>
public ConflictResolutionResult Resolve(Document? local, OplogEntry remote)
{
if (local == null)
{
var content = remote.Payload ?? default;
var newDoc = new Document(remote.Collection, remote.Key, content, remote.Timestamp, remote.Operation == OperationType.Delete);
return ConflictResolutionResult.Apply(newDoc);
}
if (remote.Operation == OperationType.Delete)
{
if (remote.Timestamp.CompareTo(local.UpdatedAt) > 0)
{
var newDoc = new Document(remote.Collection, remote.Key, default, remote.Timestamp, true);
return ConflictResolutionResult.Apply(newDoc);
}
return ConflictResolutionResult.Ignore();
}
var localJson = local.Content;
var remoteJson = remote.Payload ?? default;
var localTs = local.UpdatedAt;
var remoteTs = remote.Timestamp;
if (localJson.ValueKind == JsonValueKind.Undefined) return ConflictResolutionResult.Apply(new Document(remote.Collection, remote.Key, remoteJson, remoteTs, false));
if (remoteJson.ValueKind == JsonValueKind.Undefined) return ConflictResolutionResult.Ignore();
// Optimization: Use ArrayBufferWriter (Net6.0) or MemoryStream (NS2.0)
// Utf8JsonWriter works with both, but ArrayBufferWriter is more efficient for high throughput.
JsonElement mergedDocJson;
#if NET6_0_OR_GREATER
var bufferWriter = new ArrayBufferWriter<byte>();
using (var writer = new Utf8JsonWriter(bufferWriter))
{
MergeJson(writer, localJson, localTs, remoteJson, remoteTs);
}
mergedDocJson = JsonDocument.Parse(bufferWriter.WrittenMemory).RootElement;
#else
using (var ms = new MemoryStream())
{
using (var writer = new Utf8JsonWriter(ms))
{
MergeJson(writer, localJson, localTs, remoteJson, remoteTs);
}
// Parse expects ReadOnlyMemory or Byte array
mergedDocJson = JsonDocument.Parse(ms.ToArray()).RootElement;
}
#endif
var maxTimestamp = remoteTs.CompareTo(localTs) > 0 ? remoteTs : localTs;
var mergedDoc = new Document(remote.Collection, remote.Key, mergedDocJson, maxTimestamp, false);
return ConflictResolutionResult.Apply(mergedDoc);
}
private void MergeJson(Utf8JsonWriter writer, JsonElement local, HlcTimestamp localTs, JsonElement remote, HlcTimestamp remoteTs)
{
if (local.ValueKind != remote.ValueKind)
{
// Winner writes
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
else local.WriteTo(writer);
return;
}
switch (local.ValueKind)
{
case JsonValueKind.Object:
MergeObjects(writer, local, localTs, remote, remoteTs);
break;
case JsonValueKind.Array:
MergeArrays(writer, local, localTs, remote, remoteTs);
break;
default:
// Primitives
if (local.GetRawText() == remote.GetRawText())
{
local.WriteTo(writer);
}
else
{
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
else local.WriteTo(writer);
}
break;
}
}
private void MergeObjects(Utf8JsonWriter writer, JsonElement local, HlcTimestamp localTs, JsonElement remote, HlcTimestamp remoteTs)
{
writer.WriteStartObject();
// We need to iterate keys. To avoid double iteration efficiently, we can use a dictionary for the UNION of keys.
// But populating a dictionary is effectively what we did before.
// Can we do better?
// Yes: Iterate Local, write merged/local. Track handled keys. Then iterate Remote, write remaining.
var processedKeys = new HashSet<string>();
foreach (var prop in local.EnumerateObject())
{
var key = prop.Name;
processedKeys.Add(key); // Mark as processed
writer.WritePropertyName(key);
if (remote.TryGetProperty(key, out var remoteVal))
{
// Collision -> Merge
MergeJson(writer, prop.Value, localTs, remoteVal, remoteTs);
}
else
{
// Only local
prop.Value.WriteTo(writer);
}
}
foreach (var prop in remote.EnumerateObject())
{
if (!processedKeys.Contains(prop.Name))
{
// New from remote
writer.WritePropertyName(prop.Name);
prop.Value.WriteTo(writer);
}
}
writer.WriteEndObject();
}
private void MergeArrays(Utf8JsonWriter writer, JsonElement local, HlcTimestamp localTs, JsonElement remote, HlcTimestamp remoteTs)
{
// Heuristic check
bool localIsObj = HasObjects(local);
bool remoteIsObj = HasObjects(remote);
if (!localIsObj && !remoteIsObj)
{
// Primitive LWW
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
else local.WriteTo(writer);
return;
}
if (localIsObj != remoteIsObj)
{
// Mixed mistmatch LWW
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
else local.WriteTo(writer);
return;
}
// Both Object Arrays - ID strategy
// 1. Build map of IDs (JsonElement is struct, cheap to hold)
var localMap = MapById(local);
var remoteMap = MapById(remote);
if (localMap == null || remoteMap == null)
{
// Fallback LWW
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
else local.WriteTo(writer);
return;
}
writer.WriteStartArray();
// We want to write Union of items by ID.
// To preserve some semblance of order (or just determinism), we can iterate local IDs first, then remote new IDs.
// Or just use the dictionary values.
// NOTE: We cannot simply write to writer inside the map loop if we are creating a merged map.
// Let's iterate the union of keys similar to Objects.
var processedIds = new HashSet<string>();
// 1. Process Local Items (Merge or Write)
foreach (var kvp in localMap)
{
var id = kvp.Key;
var localItem = kvp.Value;
processedIds.Add(id);
if (remoteMap.TryGetValue(id, out var remoteItem))
{
// Merge recursively
MergeJson(writer, localItem, localTs, remoteItem, remoteTs);
}
else
{
// Keep local item
localItem.WriteTo(writer);
}
}
// 2. Process New Remote Items
foreach (var kvp in remoteMap)
{
if (!processedIds.Contains(kvp.Key))
{
kvp.Value.WriteTo(writer);
}
}
writer.WriteEndArray();
}
private bool HasObjects(JsonElement array)
{
if (array.GetArrayLength() == 0) return false;
// Check first item as heuristic
return array[0].ValueKind == JsonValueKind.Object;
}
private Dictionary<string, JsonElement>? MapById(JsonElement array)
{
var map = new Dictionary<string, JsonElement>();
foreach (var item in array.EnumerateArray())
{
if (item.ValueKind != JsonValueKind.Object) return null; // Abort mixed
string? id = null;
if (item.TryGetProperty("id", out var p)) id = p.ToString();
else if (item.TryGetProperty("_id", out var p2)) id = p2.ToString();
if (id == null) return null; // Missing ID
if (map.ContainsKey(id)) return null; // Duplicate ID
map[id] = item;
}
return map;
}
}

View File

@@ -0,0 +1,196 @@
using System;
using System.Collections.Generic;
using System.Linq;
namespace ZB.MOM.WW.CBDDC.Core;
/// <summary>
/// Represents a Vector Clock for tracking causality in a distributed system.
/// Maps NodeId -> HlcTimestamp to track the latest known state of each node.
/// </summary>
public class VectorClock
{
private readonly Dictionary<string, HlcTimestamp> _clock;
/// <summary>
/// Initializes a new empty vector clock.
/// </summary>
public VectorClock()
{
_clock = new Dictionary<string, HlcTimestamp>(StringComparer.Ordinal);
}
/// <summary>
/// Initializes a new vector clock from an existing clock state.
/// </summary>
/// <param name="clock">The clock state to copy.</param>
public VectorClock(Dictionary<string, HlcTimestamp> clock)
{
_clock = new Dictionary<string, HlcTimestamp>(clock, StringComparer.Ordinal);
}
/// <summary>
/// Gets all node IDs in this vector clock.
/// </summary>
public IEnumerable<string> NodeIds => _clock.Keys;
/// <summary>
/// Gets the timestamp for a specific node, or default if not present.
/// </summary>
/// <param name="nodeId">The node identifier.</param>
public HlcTimestamp GetTimestamp(string nodeId)
{
return _clock.TryGetValue(nodeId, out var ts) ? ts : default;
}
/// <summary>
/// Sets or updates the timestamp for a specific node.
/// </summary>
/// <param name="nodeId">The node identifier.</param>
/// <param name="timestamp">The timestamp to set.</param>
public void SetTimestamp(string nodeId, HlcTimestamp timestamp)
{
_clock[nodeId] = timestamp;
}
/// <summary>
/// Merges another vector clock into this one, taking the maximum timestamp for each node.
/// </summary>
/// <param name="other">The vector clock to merge from.</param>
public void Merge(VectorClock other)
{
foreach (var nodeId in other.NodeIds)
{
var otherTs = other.GetTimestamp(nodeId);
if (!_clock.TryGetValue(nodeId, out var currentTs) || otherTs.CompareTo(currentTs) > 0)
{
_clock[nodeId] = otherTs;
}
}
}
/// <summary>
/// Compares this vector clock with another to determine causality.
/// Returns:
/// - Positive: This is strictly ahead (dominates other)
/// - Negative: Other is strictly ahead (other dominates this)
/// - Zero: Concurrent (neither dominates)
/// </summary>
/// <param name="other">The vector clock to compare with.</param>
public CausalityRelation CompareTo(VectorClock other)
{
bool thisAhead = false;
bool otherAhead = false;
var allNodes = new HashSet<string>(_clock.Keys.Union(other._clock.Keys), StringComparer.Ordinal);
foreach (var nodeId in allNodes)
{
var thisTs = GetTimestamp(nodeId);
var otherTs = other.GetTimestamp(nodeId);
int cmp = thisTs.CompareTo(otherTs);
if (cmp > 0)
{
thisAhead = true;
}
else if (cmp < 0)
{
otherAhead = true;
}
// Early exit if concurrent
if (thisAhead && otherAhead)
{
return CausalityRelation.Concurrent;
}
}
if (thisAhead && !otherAhead)
return CausalityRelation.StrictlyAhead;
if (otherAhead && !thisAhead)
return CausalityRelation.StrictlyBehind;
return CausalityRelation.Equal;
}
/// <summary>
/// Determines which nodes have updates that this vector clock doesn't have.
/// Returns node IDs where the other vector clock is ahead.
/// </summary>
/// <param name="other">The vector clock to compare against.</param>
public IEnumerable<string> GetNodesWithUpdates(VectorClock other)
{
var allNodes = new HashSet<string>(_clock.Keys, StringComparer.Ordinal);
foreach (var nodeId in other._clock.Keys)
{
allNodes.Add(nodeId);
}
foreach (var nodeId in allNodes)
{
var thisTs = GetTimestamp(nodeId);
var otherTs = other.GetTimestamp(nodeId);
if (otherTs.CompareTo(thisTs) > 0)
{
yield return nodeId;
}
}
}
/// <summary>
/// Determines which nodes have updates that the other vector clock doesn't have.
/// Returns node IDs where this vector clock is ahead.
/// </summary>
/// <param name="other">The vector clock to compare against.</param>
public IEnumerable<string> GetNodesToPush(VectorClock other)
{
var allNodes = new HashSet<string>(_clock.Keys.Union(other._clock.Keys), StringComparer.Ordinal);
foreach (var nodeId in allNodes)
{
var thisTs = GetTimestamp(nodeId);
var otherTs = other.GetTimestamp(nodeId);
if (thisTs.CompareTo(otherTs) > 0)
{
yield return nodeId;
}
}
}
/// <summary>
/// Creates a copy of this vector clock.
/// </summary>
public VectorClock Clone()
{
return new VectorClock(new Dictionary<string, HlcTimestamp>(_clock, StringComparer.Ordinal));
}
/// <inheritdoc />
public override string ToString()
{
if (_clock.Count == 0)
return "{}";
var entries = _clock.Select(kvp => $"{kvp.Key}:{kvp.Value}");
return "{" + string.Join(", ", entries) + "}";
}
}
/// <summary>
/// Represents the causality relationship between two vector clocks.
/// </summary>
public enum CausalityRelation
{
/// <summary>Both vector clocks are equal.</summary>
Equal,
/// <summary>This vector clock is strictly ahead (dominates).</summary>
StrictlyAhead,
/// <summary>This vector clock is strictly behind (dominated).</summary>
StrictlyBehind,
/// <summary>Vector clocks are concurrent (neither dominates).</summary>
Concurrent
}

View File

@@ -0,0 +1,33 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<AssemblyName>ZB.MOM.WW.CBDDC.Core</AssemblyName>
<RootNamespace>ZB.MOM.WW.CBDDC.Core</RootNamespace>
<PackageId>ZB.MOM.WW.CBDDC.Core</PackageId>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>latest</LangVersion>
<Nullable>enable</Nullable>
<Version>1.0.3</Version>
<Authors>MrDevRobot</Authors>
<Description>Core abstractions and logic for CBDDC, a lightweight P2P mesh database.</Description>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<PackageTags>p2p;mesh;database;gossip;cbddc;lan;offline-first;distributed</PackageTags>
<PackageProjectUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</PackageProjectUrl>
<RepositoryUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageReadmeFile>README.md</PackageReadmeFile>
</PropertyGroup>
<ItemGroup>
<None Include="README.md" Pack="true" PackagePath="\" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
</ItemGroup>
<ItemGroup>
<Folder Include="Storage\Events\" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,92 @@
using ZB.MOM.WW.CBDDC.Hosting.Configuration;
using ZB.MOM.WW.CBDDC.Hosting.HealthChecks;
using ZB.MOM.WW.CBDDC.Hosting.HostedServices;
using ZB.MOM.WW.CBDDC.Hosting.Services;
using ZB.MOM.WW.CBDDC.Network;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Microsoft.Extensions.Hosting;
namespace ZB.MOM.WW.CBDDC.Hosting;
/// <summary>
/// Extension methods for configuring CBDDC in ASP.NET Core applications.
/// </summary>
public static class CBDDCHostingExtensions
{
/// <summary>
/// Adds CBDDC ASP.NET integration with the specified configuration.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configure">Action to configure CBDDC options.</param>
public static IServiceCollection AddCBDDCHosting(
this IServiceCollection services,
Action<CBDDCHostingOptions> configure)
{
if (services == null) throw new ArgumentNullException(nameof(services));
if (configure == null) throw new ArgumentNullException(nameof(configure));
var options = new CBDDCHostingOptions();
configure(options);
// Register options
services.TryAddSingleton(options);
// Register services
RegisterSingleClusterServices(services, options.Cluster);
// Register common services
RegisterCommonServices(services, options);
return services;
}
/// <summary>
/// Adds CBDDC ASP.NET integration for single-cluster mode.
/// </summary>
/// <param name="services">The service collection.</param>
/// <param name="configure">Action to configure single-cluster options.</param>
public static IServiceCollection AddCBDDCHostingSingleCluster(
this IServiceCollection services,
Action<ClusterOptions>? configure = null)
{
return services.AddCBDDCHosting(options =>
{
configure?.Invoke(options.Cluster);
});
}
private static void RegisterSingleClusterServices(
IServiceCollection services,
ClusterOptions options)
{
// Discovery service (no-op in server mode - no UDP broadcast)
services.TryAddSingleton<IDiscoveryService, NoOpDiscoveryService>();
// Sync orchestrator - use actual orchestrator to propagate changes between peers
// Cloud nodes need to act as propagators for scenarios:
// 1. Services connected to cloud that modify data
// 2. Separate LAN clusters that connect through the cloud
services.TryAddSingleton<ISyncOrchestrator, SyncOrchestrator>();
// Hosted services
services.TryAddEnumerable(ServiceDescriptor.Singleton<IHostedService, TcpSyncServerHostedService>());
services.TryAddEnumerable(ServiceDescriptor.Singleton<IHostedService, DiscoveryServiceHostedService>());
}
private static void RegisterCommonServices(
IServiceCollection services,
CBDDCHostingOptions options)
{
// Health checks
if (options.EnableHealthChecks)
{
services.AddHealthChecks()
.AddCheck<CBDDCHealthCheck>(
"cbddc",
failureStatus: HealthStatus.Unhealthy,
tags: new[] { "db", "ready" });
}
}
}

View File

@@ -0,0 +1,18 @@
namespace ZB.MOM.WW.CBDDC.Hosting.Configuration;
/// <summary>
/// Configuration options for CBDDC ASP.NET integration.
/// </summary>
public class CBDDCHostingOptions
{
/// <summary>
/// Gets or sets the cluster configuration.
/// </summary>
public ClusterOptions Cluster { get; set; } = new();
/// <summary>
/// Gets or sets whether to enable health checks.
/// Default: true
/// </summary>
public bool EnableHealthChecks { get; set; } = true;
}

View File

@@ -0,0 +1,40 @@
using System;
namespace ZB.MOM.WW.CBDDC.Hosting.Configuration;
/// <summary>
/// Configuration options for cluster mode.
/// </summary>
public class ClusterOptions
{
/// <summary>
/// Gets or sets the node identifier for this instance.
/// </summary>
public string NodeId { get; set; } = Environment.MachineName;
/// <summary>
/// Gets or sets the TCP port for sync operations.
/// Default: 5001
/// </summary>
public int TcpPort { get; set; } = 5001;
/// <summary>
/// Gets or sets whether to enable UDP discovery.
/// Default: false (disabled in server mode)
/// </summary>
public bool EnableUdpDiscovery { get; set; } = false;
/// <summary>
/// Gets or sets the lag threshold (in milliseconds) used to determine when a tracked peer is considered lagging.
/// Peers above this threshold degrade health status.
/// Default: 30,000 ms.
/// </summary>
public long PeerConfirmationLagThresholdMs { get; set; } = 30_000;
/// <summary>
/// Gets or sets the critical lag threshold (in milliseconds) used to determine when a tracked peer causes unhealthy status.
/// Peers above this threshold mark health as unhealthy.
/// Default: 120,000 ms.
/// </summary>
public long PeerConfirmationCriticalLagThresholdMs { get; set; } = 120_000;
}

View File

@@ -0,0 +1,135 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Hosting.Configuration;
namespace ZB.MOM.WW.CBDDC.Hosting.HealthChecks;
/// <summary>
/// Health check for CBDDC persistence layer.
/// Verifies that the database connection is healthy.
/// </summary>
public class CBDDCHealthCheck : IHealthCheck
{
private readonly IOplogStore _oplogStore;
private readonly IPeerOplogConfirmationStore _peerOplogConfirmationStore;
private readonly CBDDCHostingOptions _options;
/// <summary>
/// Initializes a new instance of the <see cref="CBDDCHealthCheck"/> class.
/// </summary>
/// <param name="oplogStore">The oplog store used to verify persistence health.</param>
/// <param name="peerOplogConfirmationStore">The peer confirmation store used for confirmation lag health checks.</param>
/// <param name="options">Hosting options containing health lag thresholds.</param>
public CBDDCHealthCheck(
IOplogStore oplogStore,
IPeerOplogConfirmationStore peerOplogConfirmationStore,
CBDDCHostingOptions options)
{
_oplogStore = oplogStore ?? throw new ArgumentNullException(nameof(oplogStore));
_peerOplogConfirmationStore = peerOplogConfirmationStore ?? throw new ArgumentNullException(nameof(peerOplogConfirmationStore));
_options = options ?? throw new ArgumentNullException(nameof(options));
}
/// <summary>
/// Performs a health check against the CBDDC persistence layer.
/// </summary>
/// <param name="context">The health check execution context.</param>
/// <param name="cancellationToken">A token used to cancel the health check.</param>
/// <returns>A <see cref="HealthCheckResult"/> describing the health status.</returns>
public async Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context,
CancellationToken cancellationToken = default)
{
try
{
var localHead = await _oplogStore.GetLatestTimestampAsync(cancellationToken);
var trackedPeers = (await _peerOplogConfirmationStore.GetActiveTrackedPeersAsync(cancellationToken))
.Where(peerNodeId => !string.IsNullOrWhiteSpace(peerNodeId))
.Distinct(StringComparer.Ordinal)
.OrderBy(peerNodeId => peerNodeId, StringComparer.Ordinal)
.ToList();
var peersWithNoConfirmation = new List<string>();
var laggingPeers = new List<string>();
var criticalLaggingPeers = new List<string>();
var lastSuccessfulConfirmationUpdateByPeer = new Dictionary<string, DateTimeOffset?>(StringComparer.Ordinal);
var maxLagMs = 0L;
var lagThresholdMs = Math.Max(0, _options.Cluster.PeerConfirmationLagThresholdMs);
var criticalLagThresholdMs = Math.Max(lagThresholdMs, _options.Cluster.PeerConfirmationCriticalLagThresholdMs);
foreach (var peerNodeId in trackedPeers)
{
var confirmations = (await _peerOplogConfirmationStore.GetConfirmationsForPeerAsync(peerNodeId, cancellationToken))
.Where(confirmation => confirmation.IsActive)
.ToList();
if (confirmations.Count == 0)
{
peersWithNoConfirmation.Add(peerNodeId);
lastSuccessfulConfirmationUpdateByPeer[peerNodeId] = null;
continue;
}
// Report worst-case peer lag across source streams.
var oldestConfirmation = confirmations
.OrderBy(confirmation => confirmation.ConfirmedWall)
.ThenBy(confirmation => confirmation.ConfirmedLogic)
.First();
var lagMs = Math.Max(0, localHead.PhysicalTime - oldestConfirmation.ConfirmedWall);
maxLagMs = Math.Max(maxLagMs, lagMs);
lastSuccessfulConfirmationUpdateByPeer[peerNodeId] = confirmations.Max(confirmation => confirmation.LastConfirmedUtc);
if (lagMs > lagThresholdMs)
{
laggingPeers.Add(peerNodeId);
}
if (lagMs > criticalLagThresholdMs)
{
criticalLaggingPeers.Add(peerNodeId);
}
}
var payload = new Dictionary<string, object>
{
["trackedPeerCount"] = trackedPeers.Count,
["peersWithNoConfirmation"] = peersWithNoConfirmation,
["maxLagMs"] = maxLagMs,
["laggingPeers"] = laggingPeers,
["lastSuccessfulConfirmationUpdateByPeer"] = lastSuccessfulConfirmationUpdateByPeer
};
if (criticalLaggingPeers.Count > 0)
{
return HealthCheckResult.Unhealthy(
$"CBDDC is unhealthy. Critical lag detected for {criticalLaggingPeers.Count} tracked peer(s).",
data: payload);
}
if (peersWithNoConfirmation.Count > 0 || laggingPeers.Count > 0)
{
return HealthCheckResult.Degraded(
$"CBDDC is degraded. Lagging peers: {laggingPeers.Count}, unconfirmed peers: {peersWithNoConfirmation.Count}.",
data: payload);
}
return HealthCheckResult.Healthy(
$"CBDDC is healthy. Latest timestamp: {localHead.PhysicalTime}.",
payload);
}
catch (Exception ex)
{
return HealthCheckResult.Unhealthy(
"CBDDC persistence layer is unavailable",
exception: ex);
}
}
}

View File

@@ -0,0 +1,62 @@
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Serilog.Context;
using ZB.MOM.WW.CBDDC.Network;
namespace ZB.MOM.WW.CBDDC.Hosting.HostedServices;
/// <summary>
/// Hosted service that manages the lifecycle of the discovery service.
/// </summary>
public class DiscoveryServiceHostedService : IHostedService
{
private readonly IDiscoveryService _discoveryService;
private readonly ILogger<DiscoveryServiceHostedService> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="DiscoveryServiceHostedService"/> class.
/// </summary>
/// <param name="discoveryService">The discovery service to manage.</param>
/// <param name="logger">The logger used for service lifecycle events.</param>
public DiscoveryServiceHostedService(
IDiscoveryService discoveryService,
ILogger<DiscoveryServiceHostedService> logger)
{
_discoveryService = discoveryService;
_logger = logger;
}
/// <summary>
/// Starts the discovery service.
/// </summary>
/// <param name="cancellationToken">A token used to cancel the startup operation.</param>
/// <returns>A task that represents the asynchronous start operation.</returns>
public async Task StartAsync(CancellationToken cancellationToken)
{
using var serviceContext = LogContext.PushProperty("Service", nameof(DiscoveryServiceHostedService));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Start");
_logger.LogInformation("Starting Discovery Service...");
await _discoveryService.Start();
_logger.LogInformation("Discovery Service started");
}
/// <summary>
/// Stops the discovery service.
/// </summary>
/// <param name="cancellationToken">A token used to cancel the shutdown operation.</param>
/// <returns>A task that represents the asynchronous stop operation.</returns>
public async Task StopAsync(CancellationToken cancellationToken)
{
using var serviceContext = LogContext.PushProperty("Service", nameof(DiscoveryServiceHostedService));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Stop");
_logger.LogInformation("Stopping Discovery Service...");
await _discoveryService.Stop();
_logger.LogInformation("Discovery Service stopped");
}
}

View File

@@ -0,0 +1,60 @@
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Serilog.Context;
using ZB.MOM.WW.CBDDC.Network;
namespace ZB.MOM.WW.CBDDC.Hosting.HostedServices;
/// <summary>
/// Hosted service that manages the lifecycle of the TCP sync server.
/// </summary>
public class TcpSyncServerHostedService : IHostedService
{
private readonly ISyncServer _syncServer;
private readonly ILogger<TcpSyncServerHostedService> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="TcpSyncServerHostedService"/> class.
/// </summary>
/// <param name="syncServer">The sync server to start and stop.</param>
/// <param name="logger">The logger instance.</param>
public TcpSyncServerHostedService(
ISyncServer syncServer,
ILogger<TcpSyncServerHostedService> logger)
{
_syncServer = syncServer;
_logger = logger;
}
/// <summary>
/// Starts the TCP sync server.
/// </summary>
/// <param name="cancellationToken">A token used to cancel startup.</param>
public async Task StartAsync(CancellationToken cancellationToken)
{
using var serviceContext = LogContext.PushProperty("Service", nameof(TcpSyncServerHostedService));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Start");
_logger.LogInformation("Starting TCP Sync Server...");
await _syncServer.Start();
_logger.LogInformation("TCP Sync Server started successfully");
}
/// <summary>
/// Stops the TCP sync server.
/// </summary>
/// <param name="cancellationToken">A token used to cancel shutdown.</param>
public async Task StopAsync(CancellationToken cancellationToken)
{
using var serviceContext = LogContext.PushProperty("Service", nameof(TcpSyncServerHostedService));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Stop");
_logger.LogInformation("Stopping TCP Sync Server...");
await _syncServer.Stop();
_logger.LogInformation("TCP Sync Server stopped");
}
}

View File

@@ -0,0 +1,90 @@
# ZB.MOM.WW.CBDDC.Hosting
ASP.NET Core integration for CBDDC with health checks and hosted services.
## Features
- Cluster mode
- Built-in health endpoint integration
- Hosted services for sync server lifecycle
- Respond-only server operation
## Installation
```bash
dotnet add package ZB.MOM.WW.CBDDC.Hosting
```
## Quick Start - Cluster
```csharp
var builder = WebApplication.CreateBuilder(args);
// Add CBDDC core + BLite persistence (custom DbContext + DocumentStore required)
builder.Services.AddCBDDCCore()
.AddCBDDCBLite<MyDbContext, MyDocumentStore>(
sp => new MyDbContext("/var/lib/cbddc/data.blite"));
// Add ASP.NET integration (cluster mode)
builder.Services.AddCBDDCHosting(options =>
{
options.Cluster.NodeId = "server-01";
options.Cluster.TcpPort = 5001;
});
var app = builder.Build();
app.MapHealthChecks("/health");
app.Run();
```
## Health Checks
CBDDC registers health checks that verify:
- Database connectivity
- Latest timestamp retrieval
```bash
curl http://localhost:5000/health
```
## Deployment Mode
### Cluster
Best for:
- Dedicated database servers
- Simple deployments
- Development/testing environments
## Server Behavior
CBDDC servers operate in respond-only mode:
- Accept incoming sync connections
- Respond to sync requests
- Do not initiate outbound sync
- Do not perform UDP discovery
## Configuration Options
### ClusterOptions
| Property | Type | Default | Description |
|----------|------|---------|-------------|
| NodeId | string | MachineName | Unique node identifier |
| TcpPort | int | 5001 | TCP port for sync |
| EnableUdpDiscovery | bool | false | Enable UDP discovery |
## Production Checklist
- Store BLite database files on durable storage in production
- Configure health checks for load balancer
- Set up proper logging and monitoring
- Configure backup/restore for BLite database files
- Configure proper firewall rules for TCP port
- Set unique NodeId per instance
- Test failover scenarios
## License
MIT

View File

@@ -0,0 +1,72 @@
using System;
using System.Collections.Generic;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using Serilog.Context;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Network;
namespace ZB.MOM.WW.CBDDC.Hosting.Services;
/// <summary>
/// No-op implementation of IDiscoveryService for server scenarios.
/// Does not perform UDP broadcast discovery - relies on explicit peer configuration.
/// </summary>
public class NoOpDiscoveryService : IDiscoveryService
{
private readonly ILogger<NoOpDiscoveryService> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="NoOpDiscoveryService"/> class.
/// </summary>
/// <param name="logger">The logger instance to use, or <see langword="null"/> to use a no-op logger.</param>
public NoOpDiscoveryService(ILogger<NoOpDiscoveryService>? logger = null)
{
_logger = logger ?? NullLogger<NoOpDiscoveryService>.Instance;
}
/// <summary>
/// Gets the currently active peers.
/// </summary>
/// <returns>An empty sequence because discovery is disabled.</returns>
public IEnumerable<PeerNode> GetActivePeers()
{
return Array.Empty<PeerNode>();
}
/// <summary>
/// Starts the discovery service.
/// </summary>
/// <returns>A completed task.</returns>
public Task Start()
{
using var serviceContext = LogContext.PushProperty("Service", nameof(NoOpDiscoveryService));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Start");
_logger.LogInformation("NoOpDiscoveryService started (passive mode - no UDP discovery)");
return Task.CompletedTask;
}
/// <summary>
/// Stops the discovery service.
/// </summary>
/// <returns>A completed task.</returns>
public Task Stop()
{
using var serviceContext = LogContext.PushProperty("Service", nameof(NoOpDiscoveryService));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Stop");
_logger.LogInformation("NoOpDiscoveryService stopped");
return Task.CompletedTask;
}
/// <summary>
/// Releases resources used by this instance.
/// </summary>
public void Dispose()
{
_logger.LogDebug("NoOpDiscoveryService disposed");
}
}

View File

@@ -0,0 +1,61 @@
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using Serilog.Context;
using ZB.MOM.WW.CBDDC.Network;
namespace ZB.MOM.WW.CBDDC.Hosting.Services;
/// <summary>
/// No-op implementation of ISyncOrchestrator for server scenarios.
/// Does not initiate outbound sync - only responds to incoming sync requests.
/// </summary>
public class NoOpSyncOrchestrator : ISyncOrchestrator
{
private readonly ILogger<NoOpSyncOrchestrator> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="NoOpSyncOrchestrator"/> class.
/// </summary>
/// <param name="logger">The logger instance to use, or <see langword="null"/> for a no-op logger.</param>
public NoOpSyncOrchestrator(ILogger<NoOpSyncOrchestrator>? logger = null)
{
_logger = logger ?? NullLogger<NoOpSyncOrchestrator>.Instance;
}
/// <summary>
/// Starts the orchestrator lifecycle.
/// </summary>
/// <returns>A completed task.</returns>
public Task Start()
{
using var serviceContext = LogContext.PushProperty("Service", nameof(NoOpSyncOrchestrator));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Start");
_logger.LogInformation("NoOpSyncOrchestrator started (respond-only mode - no outbound sync)");
return Task.CompletedTask;
}
/// <summary>
/// Stops the orchestrator lifecycle.
/// </summary>
/// <returns>A completed task.</returns>
public Task Stop()
{
using var serviceContext = LogContext.PushProperty("Service", nameof(NoOpSyncOrchestrator));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Stop");
_logger.LogInformation("NoOpSyncOrchestrator stopped");
return Task.CompletedTask;
}
/// <summary>
/// Releases resources used by the orchestrator.
/// </summary>
public void Dispose()
{
_logger.LogDebug("NoOpSyncOrchestrator disposed");
}
}

View File

@@ -0,0 +1,37 @@
<Project Sdk="Microsoft.NET.Sdk">
<ItemGroup>
<ProjectReference Include="..\ZB.MOM.WW.CBDDC.Network\ZB.MOM.WW.CBDDC.Network.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Diagnostics.HealthChecks" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
<PackageReference Include="Serilog" Version="4.2.0" />
</ItemGroup>
<PropertyGroup>
<AssemblyName>ZB.MOM.WW.CBDDC.Hosting</AssemblyName>
<RootNamespace>ZB.MOM.WW.CBDDC.Hosting</RootNamespace>
<PackageId>ZB.MOM.WW.CBDDC.Hosting</PackageId>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>latest</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Version>1.0.3</Version>
<Authors>MrDevRobot</Authors>
<Description>ASP.NET Core integration for CBDDC with health checks and hosted services.</Description>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<PackageTags>p2p;database;aspnetcore;healthcheck;hosting;cluster</PackageTags>
<PackageProjectUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</PackageProjectUrl>
<RepositoryUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageReadmeFile>README.md</PackageReadmeFile>
</PropertyGroup>
<ItemGroup>
<None Include="README.md" Pack="true" PackagePath="\" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,160 @@
using System;
using Microsoft.Extensions.Logging;
using System.Linq;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core.Storage;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Represents a single CBDDC Peer Node.
/// Acts as a facade to orchestrate the lifecycle of Networking, Discovery, and Synchronization components.
/// </summary>
public class CBDDCNode : ICBDDCNode
{
private readonly ILogger<CBDDCNode> _logger;
/// <summary>
/// Gets the Sync Server instance.
/// </summary>
public ISyncServer Server { get; }
/// <summary>
/// Gets the Discovery Service instance.
/// </summary>
public IDiscoveryService Discovery { get; }
/// <summary>
/// Gets the Synchronization Orchestrator instance.
/// </summary>
public ISyncOrchestrator Orchestrator { get; }
/// <summary>
/// Initializes a new instance of the <see cref="CBDDCNode"/> class.
/// </summary>
/// <param name="server">The TCP server for handling incoming sync requests.</param>
/// <param name="discovery">The UDP service for peer discovery.</param>
/// <param name="orchestrator">The orchestrator for managing outgoing sync operations.</param>
/// <param name="logger">The logger instance.</param>
public CBDDCNode(
ISyncServer server,
IDiscoveryService discovery,
ISyncOrchestrator orchestrator,
ILogger<CBDDCNode> logger)
{
Server = server;
Discovery = discovery;
Orchestrator = orchestrator;
_logger = logger;
}
/// <summary>
/// Starts all node components (Server, Discovery, Orchestrator).
/// </summary>
public async Task Start()
{
_logger.LogInformation("Starting CBDDC Node...");
await Task.WhenAll(
Server.Start(),
Discovery.Start(),
Orchestrator.Start()
);
_logger.LogInformation("CBDDC Node Started on {Address}", Address);
}
/// <summary>
/// Stops all node components.
/// </summary>
public async Task Stop()
{
_logger.LogInformation("Stopping CBDDC Node...");
await Task.WhenAll(
Orchestrator.Stop(),
Discovery.Stop(),
Server.Stop()
);
_logger.LogInformation("CBDDC Node Stopped.");
}
/// <summary>
/// Gets the address information of this node.
/// </summary>
public NodeAddress Address
{
get
{
var ep = Server.ListeningEndpoint;
if (ep != null)
{
// If the server is listening on "Any" (0.0.0.0), we cannot advertise that as a connectable address.
// We must resolve the actual machine IP address that peers can reach.
if (Equals(ep.Address, System.Net.IPAddress.Any) || Equals(ep.Address, System.Net.IPAddress.IPv6Any))
{
return new NodeAddress(GetLocalIpAddress(), ep.Port);
}
return new NodeAddress(ep.Address.ToString(), ep.Port);
}
return new NodeAddress("Unknown", 0);
}
}
private string GetLocalIpAddress()
{
try
{
var interfaces = System.Net.NetworkInformation.NetworkInterface.GetAllNetworkInterfaces()
.Where(i => i.OperationalStatus == System.Net.NetworkInformation.OperationalStatus.Up
&& i.NetworkInterfaceType != System.Net.NetworkInformation.NetworkInterfaceType.Loopback);
foreach (var i in interfaces)
{
var props = i.GetIPProperties();
var ipInfo = props.UnicastAddresses
.FirstOrDefault(u => u.Address.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork); // Prefer IPv4
if (ipInfo != null)
{
return ipInfo.Address.ToString();
}
}
return "127.0.0.1";
}
catch (Exception ex)
{
_logger.LogWarning("Failed to resolve local IP: {Message}. Fallback to localhost.", ex.Message);
return "127.0.0.1";
}
}
}
public class NodeAddress
{
/// <summary>
/// Gets the host portion of the node address.
/// </summary>
public string Host { get; }
/// <summary>
/// Gets the port portion of the node address.
/// </summary>
public int Port { get; }
/// <summary>
/// Initializes a new instance of the <see cref="NodeAddress"/> class.
/// </summary>
/// <param name="host">The host name or IP address.</param>
/// <param name="port">The port number.</param>
public NodeAddress(string host, int port)
{
Host = host;
Port = port;
}
/// <inheritdoc />
public override string ToString() => $"{Host}:{Port}";
}

View File

@@ -0,0 +1,85 @@
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Serilog.Context;
using System;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Hosted service that automatically starts and stops the CBDDC node.
/// </summary>
public class CBDDCNodeService : IHostedService
{
private readonly ICBDDCNode _node;
private readonly ILogger<CBDDCNodeService> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="CBDDCNodeService"/> class.
/// </summary>
/// <param name="node">The CBDDC node to manage.</param>
/// <param name="logger">The logger instance.</param>
public CBDDCNodeService(ICBDDCNode node, ILogger<CBDDCNodeService> logger)
{
_node = node;
_logger = logger;
}
/// <summary>
/// Starts the managed CBDDC node.
/// </summary>
/// <param name="cancellationToken">A token used to cancel startup.</param>
/// <returns>A task that represents the asynchronous start operation.</returns>
public async Task StartAsync(CancellationToken cancellationToken)
{
using var serviceContext = LogContext.PushProperty("Service", nameof(CBDDCNodeService));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Start");
try
{
_logger.LogInformation("Starting CBDDC Node Service...");
// Check for cancellation before starting
cancellationToken.ThrowIfCancellationRequested();
await _node.Start();
_logger.LogInformation("CBDDC Node Service started successfully");
}
catch (OperationCanceledException)
{
_logger.LogWarning("CBDDC Node Service start was cancelled");
throw;
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to start CBDDC Node Service");
throw;
}
}
/// <summary>
/// Stops the managed CBDDC node.
/// </summary>
/// <param name="cancellationToken">A token used to cancel shutdown.</param>
/// <returns>A task that represents the asynchronous stop operation.</returns>
public async Task StopAsync(CancellationToken cancellationToken)
{
using var serviceContext = LogContext.PushProperty("Service", nameof(CBDDCNodeService));
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var actionContext = LogContext.PushProperty("Action", "Stop");
try
{
_logger.LogInformation("Stopping CBDDC Node Service...");
await _node.Stop();
_logger.LogInformation("CBDDC Node Service stopped successfully");
}
catch (Exception ex)
{
_logger.LogError(ex, "Error occurred while stopping CBDDC Node Service");
// Don't rethrow during shutdown to avoid breaking the shutdown process
}
}
}

View File

@@ -0,0 +1,32 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using ZB.MOM.WW.CBDDC.Core.Cache;
using ZB.MOM.WW.CBDDC.Core.Diagnostics;
using ZB.MOM.WW.CBDDC.Core.Resilience;
using ZB.MOM.WW.CBDDC.Core.Sync;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Provides extension methods for registering core CBDDC services.
/// </summary>
public static class CBDDCServiceCollectionExtensions
{
/// <summary>
/// Registers core CBDDC service dependencies.
/// </summary>
/// <param name="services">The service collection to update.</param>
/// <returns>The same <see cref="IServiceCollection"/> instance for chaining.</returns>
public static IServiceCollection AddCBDDCCore(this IServiceCollection services)
{
ArgumentNullException.ThrowIfNull(services);
services.TryAddSingleton<IDocumentCache, DocumentCache>();
services.TryAddSingleton<IOfflineQueue, OfflineQueue>();
services.TryAddSingleton<ISyncStatusTracker, SyncStatusTracker>();
services.TryAddSingleton<IRetryPolicy, RetryPolicy>();
services.TryAddSingleton<ICBDDCHealthCheck, CBDDCHealthCheck>();
return services;
}
}

View File

@@ -0,0 +1,195 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Core.Storage;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Composite discovery service that combines UDP LAN discovery with persistent remote peers from the peerConfigurationStore.
/// Periodically refreshes the remote peer list and merges with actively discovered LAN peers.
///
/// Remote peer configurations are stored in a synchronized collection that is automatically
/// replicated across all nodes in the cluster. Any node that adds a remote peer will have
/// it synchronized to all other nodes automatically.
/// </summary>
public class CompositeDiscoveryService : IDiscoveryService
{
private readonly IDiscoveryService _udpDiscovery;
private readonly IPeerConfigurationStore _peerConfigurationStore;
private readonly ILogger<CompositeDiscoveryService> _logger;
private readonly TimeSpan _refreshInterval;
private const string RemotePeersCollectionName = "_system_remote_peers";
private CancellationTokenSource? _cts;
private readonly ConcurrentDictionary<string, PeerNode> _remotePeers = new();
private readonly object _startStopLock = new object();
/// <summary>
/// Initializes a new instance of the CompositeDiscoveryService class.
/// </summary>
/// <param name="udpDiscovery">UDP-based LAN discovery service.</param>
/// <param name="peerConfigurationStore">Database instance for accessing the synchronized remote peers collection.</param>
/// <param name="logger">Logger instance.</param>
/// <param name="refreshInterval">Interval for refreshing remote peers from peerConfigurationStore. Defaults to 5 minutes.</param>
public CompositeDiscoveryService(
IDiscoveryService udpDiscovery,
IPeerConfigurationStore peerConfigurationStore,
ILogger<CompositeDiscoveryService>? logger = null,
TimeSpan? refreshInterval = null)
{
_udpDiscovery = udpDiscovery ?? throw new ArgumentNullException(nameof(udpDiscovery));
_peerConfigurationStore = peerConfigurationStore ?? throw new ArgumentNullException(nameof(peerConfigurationStore));
_logger = logger ?? NullLogger<CompositeDiscoveryService>.Instance;
_refreshInterval = refreshInterval ?? TimeSpan.FromMinutes(5);
}
/// <summary>
/// Gets the currently active peers from LAN discovery and configured remote peers.
/// </summary>
/// <returns>A sequence of active peer nodes.</returns>
public IEnumerable<PeerNode> GetActivePeers()
{
// Merge LAN peers from UDP discovery with remote peers from peerConfigurationStore
var lanPeers = _udpDiscovery.GetActivePeers();
var remotePeers = _remotePeers.Values;
return lanPeers.Concat(remotePeers);
}
/// <summary>
/// Starts peer discovery and the remote peer refresh loop.
/// </summary>
/// <returns>A task that represents the asynchronous start operation.</returns>
public async Task Start()
{
lock (_startStopLock)
{
if (_cts != null)
{
_logger.LogWarning("Composite discovery service already started");
return;
}
_cts = new CancellationTokenSource();
}
// Start UDP discovery
await _udpDiscovery.Start();
// Start remote peer refresh loop
var token = _cts.Token;
_ = Task.Run(async () =>
{
try
{
await RefreshLoopAsync(token);
}
catch (Exception ex)
{
_logger.LogError(ex, "Remote peer refresh loop failed");
}
}, token);
// Initial load of remote peers
await RefreshRemotePeersAsync();
_logger.LogInformation("Composite discovery service started (UDP + Synchronized Remote Peers)");
}
/// <summary>
/// Stops peer discovery and the remote peer refresh loop.
/// </summary>
/// <returns>A task that represents the asynchronous stop operation.</returns>
public async Task Stop()
{
CancellationTokenSource? ctsToDispose = null;
lock (_startStopLock)
{
if (_cts == null)
{
_logger.LogWarning("Composite discovery service already stopped or never started");
return;
}
ctsToDispose = _cts;
_cts = null;
}
try
{
ctsToDispose.Cancel();
}
catch (ObjectDisposedException)
{
// Already disposed, ignore
}
finally
{
ctsToDispose.Dispose();
}
await _udpDiscovery.Stop();
_logger.LogInformation("Composite discovery service stopped");
}
private async Task RefreshLoopAsync(CancellationToken cancellationToken)
{
while (!cancellationToken.IsCancellationRequested)
{
try
{
await Task.Delay(_refreshInterval, cancellationToken);
await RefreshRemotePeersAsync();
}
catch (OperationCanceledException)
{
// Normal shutdown
break;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during remote peer refresh");
}
}
}
private async Task RefreshRemotePeersAsync()
{
try
{
var peers = await _peerConfigurationStore.GetRemotePeersAsync();
var remoteConfigs = peers.Where(p => p.IsEnabled);
var now = DateTimeOffset.UtcNow;
// Update remote peers dictionary
_remotePeers.Clear();
foreach (var config in remoteConfigs)
{
var peerNode = new PeerNode(
config.NodeId,
config.Address,
now, // LastSeen is now for persistent peers (always considered active)
config.Type,
NodeRole.Member // Remote peers are always members, never gateways
);
_remotePeers[config.NodeId] = peerNode;
}
_logger.LogInformation("Refreshed remote peers: {Count} enabled peers loaded from synchronized collection", _remotePeers.Count);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to refresh remote peers from database");
}
}
}

View File

@@ -0,0 +1,66 @@
using System;
using System.IO;
using System.IO.Compression;
namespace ZB.MOM.WW.CBDDC.Network;
public static class CompressionHelper
{
public const int THRESHOLD = 1024; // 1KB
/// <summary>
/// Gets a value indicating whether Brotli compression is supported on the current target framework.
/// </summary>
public static bool IsBrotliSupported
{
get
{
#if NET6_0_OR_GREATER
return true;
#else
return false;
#endif
}
}
/// <summary>
/// Compresses the specified data when Brotli is supported and the payload exceeds the threshold.
/// </summary>
/// <param name="data">The input data to compress.</param>
/// <returns>The compressed payload, or the original payload if compression is skipped.</returns>
public static byte[] Compress(byte[] data)
{
if (data.Length < THRESHOLD || !IsBrotliSupported) return data;
#if NET6_0_OR_GREATER
using var output = new MemoryStream();
using (var brotli = new BrotliStream(output, CompressionLevel.Fastest))
{
brotli.Write(data, 0, data.Length);
}
return output.ToArray();
#else
return data;
#endif
}
/// <summary>
/// Decompresses Brotli-compressed data.
/// </summary>
/// <param name="compressedData">The compressed payload.</param>
/// <returns>The decompressed payload.</returns>
public static byte[] Decompress(byte[] compressedData)
{
#if NET6_0_OR_GREATER
using var input = new MemoryStream(compressedData);
using var output = new MemoryStream();
using (var brotli = new BrotliStream(input, CompressionMode.Decompress))
{
brotli.CopyTo(output);
}
return output.ToArray();
#else
throw new NotSupportedException("Brotli decompression not supported on this platform.");
#endif
}
}

View File

@@ -0,0 +1,35 @@
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network
{
public interface ICBDDCNode
{
/// <summary>
/// Gets the node address.
/// </summary>
NodeAddress Address { get; }
/// <summary>
/// Gets the discovery service.
/// </summary>
IDiscoveryService Discovery { get; }
/// <summary>
/// Gets the synchronization orchestrator.
/// </summary>
ISyncOrchestrator Orchestrator { get; }
/// <summary>
/// Gets the synchronization server.
/// </summary>
ISyncServer Server { get; }
/// <summary>
/// Starts the node services.
/// </summary>
/// <returns>A task that represents the asynchronous start operation.</returns>
Task Start();
/// <summary>
/// Stops the node services.
/// </summary>
/// <returns>A task that represents the asynchronous stop operation.</returns>
Task Stop();
}
}

View File

@@ -0,0 +1,30 @@
using ZB.MOM.WW.CBDDC.Core.Network;
using System.Collections.Generic;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network
{
/// <summary>
/// Defines peer discovery operations.
/// </summary>
public interface IDiscoveryService
{
/// <summary>
/// Gets the currently active peers.
/// </summary>
/// <returns>The active peer nodes.</returns>
IEnumerable<PeerNode> GetActivePeers();
/// <summary>
/// Starts the discovery service.
/// </summary>
/// <returns>A task that represents the asynchronous operation.</returns>
Task Start();
/// <summary>
/// Stops the discovery service.
/// </summary>
/// <returns>A task that represents the asynchronous operation.</returns>
Task Stop();
}
}

View File

@@ -0,0 +1,21 @@
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core.Network;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Calculates the effective oplog prune cutoff for maintenance.
/// </summary>
public interface IOplogPruneCutoffCalculator
{
/// <summary>
/// Calculates the effective prune cutoff for the provided node configuration.
/// </summary>
/// <param name="configuration">The local node configuration.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns>A prune cutoff decision describing whether prune is allowed and at which cutoff.</returns>
Task<OplogPruneCutoffDecision> CalculateEffectiveCutoffAsync(
PeerNodeConfiguration configuration,
CancellationToken cancellationToken = default);
}

View File

@@ -0,0 +1,22 @@
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network
{
/// <summary>
/// Defines lifecycle operations for synchronization orchestration.
/// </summary>
public interface ISyncOrchestrator
{
/// <summary>
/// Starts synchronization orchestration.
/// </summary>
/// <returns>A task that represents the asynchronous start operation.</returns>
Task Start();
/// <summary>
/// Stops synchronization orchestration.
/// </summary>
/// <returns>A task that represents the asynchronous stop operation.</returns>
Task Stop();
}
}

View File

@@ -0,0 +1,31 @@
using System.Net;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Defines the contract for a server that supports starting, stopping, and reporting its listening network endpoint for
/// synchronization operations.
/// </summary>
/// <remarks>Implementations of this interface are expected to provide asynchronous methods for starting and
/// stopping the server. The listening endpoint may be null if the server is not currently active or has not been
/// started.</remarks>
public interface ISyncServer
{
/// <summary>
/// Starts the synchronization server.
/// </summary>
/// <returns>A task that represents the asynchronous operation.</returns>
Task Start();
/// <summary>
/// Stops the synchronization server.
/// </summary>
/// <returns>A task that represents the asynchronous operation.</returns>
Task Stop();
/// <summary>
/// Gets the network endpoint currently used by the server for listening.
/// </summary>
IPEndPoint? ListeningEndpoint { get; }
}

View File

@@ -0,0 +1,166 @@
using System;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace ZB.MOM.WW.CBDDC.Network.Leadership;
/// <summary>
/// Implements the Bully algorithm for leader election.
/// The node with the lexicographically smallest NodeId becomes the cloud gateway (leader).
/// Elections run periodically (every 5 seconds) to adapt to cluster changes.
/// </summary>
public class BullyLeaderElectionService : ILeaderElectionService
{
private readonly IDiscoveryService _discoveryService;
private readonly IPeerNodeConfigurationProvider _configProvider;
private readonly ILogger<BullyLeaderElectionService> _logger;
private readonly TimeSpan _electionInterval;
private CancellationTokenSource? _cts;
private string? _localNodeId;
private string? _currentGatewayNodeId;
private bool _isCloudGateway;
/// <summary>
/// Gets a value indicating whether this node is currently the cloud gateway leader.
/// </summary>
public bool IsCloudGateway => _isCloudGateway;
/// <summary>
/// Gets the current gateway node identifier.
/// </summary>
public string? CurrentGatewayNodeId => _currentGatewayNodeId;
/// <summary>
/// Occurs when leadership changes.
/// </summary>
public event EventHandler<LeadershipChangedEventArgs>? LeadershipChanged;
/// <summary>
/// Initializes a new instance of the BullyLeaderElectionService class.
/// </summary>
/// <param name="discoveryService">Service providing active peer information.</param>
/// <param name="configProvider">Provider for local node configuration.</param>
/// <param name="logger">Logger instance.</param>
/// <param name="electionInterval">Interval between elections. Defaults to 5 seconds.</param>
public BullyLeaderElectionService(
IDiscoveryService discoveryService,
IPeerNodeConfigurationProvider configProvider,
ILogger<BullyLeaderElectionService>? logger = null,
TimeSpan? electionInterval = null)
{
_discoveryService = discoveryService ?? throw new ArgumentNullException(nameof(discoveryService));
_configProvider = configProvider ?? throw new ArgumentNullException(nameof(configProvider));
_logger = logger ?? NullLogger<BullyLeaderElectionService>.Instance;
_electionInterval = electionInterval ?? TimeSpan.FromSeconds(5);
}
/// <summary>
/// Starts the leader election loop.
/// </summary>
/// <returns>A task that represents the asynchronous start operation.</returns>
public async Task Start()
{
if (_cts != null)
{
_logger.LogWarning("Leader election service already started");
return;
}
var config = await _configProvider.GetConfiguration();
_localNodeId = config.NodeId;
_cts = new CancellationTokenSource();
_ = Task.Run(() => ElectionLoopAsync(_cts.Token));
_logger.LogInformation("Leader election service started for node {NodeId}", _localNodeId);
}
/// <summary>
/// Stops the leader election loop.
/// </summary>
/// <returns>A task that represents the asynchronous stop operation.</returns>
public Task Stop()
{
if (_cts == null) return Task.CompletedTask;
_cts.Cancel();
_cts.Dispose();
_cts = null;
_logger.LogInformation("Leader election service stopped");
return Task.CompletedTask;
}
private async Task ElectionLoopAsync(CancellationToken cancellationToken)
{
while (!cancellationToken.IsCancellationRequested)
{
try
{
await Task.Delay(_electionInterval, cancellationToken);
RunElection();
}
catch (OperationCanceledException)
{
// Normal shutdown
break;
}
catch (Exception ex)
{
_logger.LogError(ex, "Error during leader election");
}
}
}
private void RunElection()
{
if (_localNodeId == null) return;
// Get all active LAN peers (excluding remote cloud nodes)
var lanPeers = _discoveryService.GetActivePeers()
.Where(p => p.Type == PeerType.LanDiscovered)
.Select(p => p.NodeId)
.ToList();
// Add local node to the pool
lanPeers.Add(_localNodeId);
// Bully algorithm: smallest NodeId wins (lexicographic comparison)
var newLeader = lanPeers.OrderBy(id => id, StringComparer.Ordinal).FirstOrDefault();
if (newLeader == null)
{
// No peers available, local node is leader by default
newLeader = _localNodeId;
}
// Check if leadership changed
if (newLeader != _currentGatewayNodeId)
{
var wasLeader = _isCloudGateway;
_currentGatewayNodeId = newLeader;
_isCloudGateway = newLeader == _localNodeId;
if (wasLeader != _isCloudGateway)
{
if (_isCloudGateway)
{
_logger.LogInformation("🔐 This node is now the CLOUD GATEWAY (Leader) - Will sync with remote cloud nodes");
}
else
{
_logger.LogInformation("👤 This node is now a MEMBER - Cloud sync handled by gateway: {Gateway}", _currentGatewayNodeId);
}
// Raise event
LeadershipChanged?.Invoke(this, new LeadershipChangedEventArgs(_currentGatewayNodeId, _isCloudGateway));
}
}
}
}

View File

@@ -0,0 +1,65 @@
using System;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network.Leadership;
/// <summary>
/// Event arguments for leadership change events.
/// </summary>
public class LeadershipChangedEventArgs : EventArgs
{
/// <summary>
/// Gets the NodeId of the current cloud gateway (leader).
/// Null if no leader is elected.
/// </summary>
public string? CurrentGatewayNodeId { get; }
/// <summary>
/// Gets whether the local node is now the cloud gateway.
/// </summary>
public bool IsLocalNodeGateway { get; }
/// <summary>
/// Initializes a new instance of the LeadershipChangedEventArgs class.
/// </summary>
/// <param name="currentGatewayNodeId">The NodeId of the current gateway node, or <see langword="null"/> when none is elected.</param>
/// <param name="isLocalNodeGateway">A value indicating whether the local node is the gateway.</param>
public LeadershipChangedEventArgs(string? currentGatewayNodeId, bool isLocalNodeGateway)
{
CurrentGatewayNodeId = currentGatewayNodeId;
IsLocalNodeGateway = isLocalNodeGateway;
}
}
/// <summary>
/// Service for managing leader election in a distributed cluster.
/// Uses the Bully algorithm where the node with the lexicographically smallest NodeId becomes the leader.
/// Only the leader (Cloud Gateway) synchronizes with remote cloud nodes.
/// </summary>
public interface ILeaderElectionService
{
/// <summary>
/// Gets whether the local node is currently the cloud gateway (leader).
/// </summary>
bool IsCloudGateway { get; }
/// <summary>
/// Gets the NodeId of the current cloud gateway, or null if no gateway is elected.
/// </summary>
string? CurrentGatewayNodeId { get; }
/// <summary>
/// Event raised when leadership changes.
/// </summary>
event EventHandler<LeadershipChangedEventArgs>? LeadershipChanged;
/// <summary>
/// Starts the leader election service.
/// </summary>
Task Start();
/// <summary>
/// Stops the leader election service.
/// </summary>
Task Stop();
}

View File

@@ -0,0 +1,164 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Core.Storage;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Default implementation for effective oplog prune cutoff calculation.
/// </summary>
public class OplogPruneCutoffCalculator : IOplogPruneCutoffCalculator
{
private readonly IOplogStore _oplogStore;
private readonly IPeerOplogConfirmationStore? _peerOplogConfirmationStore;
/// <summary>
/// Initializes a new instance of the <see cref="OplogPruneCutoffCalculator"/> class.
/// </summary>
/// <param name="oplogStore">The oplog store.</param>
/// <param name="peerOplogConfirmationStore">The optional peer confirmation store.</param>
public OplogPruneCutoffCalculator(
IOplogStore oplogStore,
IPeerOplogConfirmationStore? peerOplogConfirmationStore = null)
{
_oplogStore = oplogStore ?? throw new ArgumentNullException(nameof(oplogStore));
_peerOplogConfirmationStore = peerOplogConfirmationStore;
}
/// <inheritdoc />
public async Task<OplogPruneCutoffDecision> CalculateEffectiveCutoffAsync(
PeerNodeConfiguration configuration,
CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(configuration);
var retentionCutoff = BuildRetentionCutoff(configuration);
if (_peerOplogConfirmationStore == null)
{
return OplogPruneCutoffDecision.WithCutoff(
retentionCutoff,
confirmationCutoff: null,
effectiveCutoff: retentionCutoff,
reason: "Confirmation tracking is not configured.");
}
var relevantSources = await GetRelevantSourceNodesAsync(cancellationToken);
if (relevantSources.Count == 0)
{
return OplogPruneCutoffDecision.WithCutoff(
retentionCutoff,
confirmationCutoff: null,
effectiveCutoff: retentionCutoff,
reason: "No local non-default oplog/vector-clock sources were found.");
}
var activeTrackedPeers = (await _peerOplogConfirmationStore.GetActiveTrackedPeersAsync(cancellationToken))
.Where(peerNodeId => !string.IsNullOrWhiteSpace(peerNodeId))
.Distinct(StringComparer.Ordinal)
.ToList();
if (activeTrackedPeers.Count == 0)
{
return OplogPruneCutoffDecision.WithCutoff(
retentionCutoff,
confirmationCutoff: null,
effectiveCutoff: retentionCutoff,
reason: "No active tracked peers found for confirmation gating.");
}
HlcTimestamp? confirmationCutoff = null;
foreach (var peerNodeId in activeTrackedPeers)
{
var confirmationsForPeer = (await _peerOplogConfirmationStore.GetConfirmationsForPeerAsync(peerNodeId, cancellationToken))
.Where(confirmation => confirmation.IsActive)
.Where(confirmation => !string.IsNullOrWhiteSpace(confirmation.SourceNodeId))
.GroupBy(confirmation => confirmation.SourceNodeId, StringComparer.Ordinal)
.ToDictionary(
group => group.Key,
group => group
.Select(ToTimestamp)
.OrderBy(timestamp => timestamp)
.Last(),
StringComparer.Ordinal);
foreach (var sourceNodeId in relevantSources)
{
if (!confirmationsForPeer.TryGetValue(sourceNodeId, out var confirmedTimestamp) || confirmedTimestamp == default)
{
return OplogPruneCutoffDecision.NoCutoff(
retentionCutoff,
$"Active tracked peer '{peerNodeId}' is missing confirmation for source '{sourceNodeId}'.");
}
if (!confirmationCutoff.HasValue || confirmedTimestamp < confirmationCutoff.Value)
{
confirmationCutoff = confirmedTimestamp;
}
}
}
if (!confirmationCutoff.HasValue)
{
return OplogPruneCutoffDecision.WithCutoff(
retentionCutoff,
confirmationCutoff: null,
effectiveCutoff: retentionCutoff,
reason: "No confirmation cutoff could be determined.");
}
var effectiveCutoff = retentionCutoff <= confirmationCutoff.Value
? retentionCutoff
: confirmationCutoff.Value;
return OplogPruneCutoffDecision.WithCutoff(
retentionCutoff,
confirmationCutoff,
effectiveCutoff);
}
private static HlcTimestamp BuildRetentionCutoff(PeerNodeConfiguration configuration)
{
var retentionTimestamp = DateTimeOffset.UtcNow
.AddHours(-configuration.OplogRetentionHours)
.ToUnixTimeMilliseconds();
return new HlcTimestamp(retentionTimestamp, 0, configuration.NodeId ?? string.Empty);
}
private async Task<HashSet<string>> GetRelevantSourceNodesAsync(CancellationToken cancellationToken)
{
var localVectorClock = await _oplogStore.GetVectorClockAsync(cancellationToken);
var sourceNodes = new HashSet<string>(StringComparer.Ordinal);
foreach (var sourceNodeId in localVectorClock.NodeIds)
{
if (string.IsNullOrWhiteSpace(sourceNodeId))
{
continue;
}
var timestamp = localVectorClock.GetTimestamp(sourceNodeId);
if (timestamp == default)
{
continue;
}
sourceNodes.Add(sourceNodeId);
}
return sourceNodes;
}
private static HlcTimestamp ToTimestamp(PeerOplogConfirmation confirmation)
{
return new HlcTimestamp(
confirmation.ConfirmedWall,
confirmation.ConfirmedLogic,
confirmation.SourceNodeId ?? string.Empty);
}
}

View File

@@ -0,0 +1,84 @@
using ZB.MOM.WW.CBDDC.Core;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Represents the prune cutoff decision for an oplog maintenance cycle.
/// </summary>
public sealed class OplogPruneCutoffDecision
{
private OplogPruneCutoffDecision(
bool hasCutoff,
HlcTimestamp retentionCutoff,
HlcTimestamp? confirmationCutoff,
HlcTimestamp? effectiveCutoff,
string reason)
{
HasCutoff = hasCutoff;
RetentionCutoff = retentionCutoff;
ConfirmationCutoff = confirmationCutoff;
EffectiveCutoff = effectiveCutoff;
Reason = reason;
}
/// <summary>
/// Gets a value indicating whether pruning is allowed for this decision.
/// </summary>
public bool HasCutoff { get; }
/// <summary>
/// Gets the retention-based cutoff.
/// </summary>
public HlcTimestamp RetentionCutoff { get; }
/// <summary>
/// Gets the confirmation-based cutoff, when available.
/// </summary>
public HlcTimestamp? ConfirmationCutoff { get; }
/// <summary>
/// Gets the effective cutoff to use for pruning when <see cref="HasCutoff"/> is true.
/// </summary>
public HlcTimestamp? EffectiveCutoff { get; }
/// <summary>
/// Gets the explanatory reason for skip/special handling decisions.
/// </summary>
public string Reason { get; }
/// <summary>
/// Creates a prune-allowed decision with the provided cutoffs.
/// </summary>
/// <param name="retentionCutoff">The cutoff derived from retention policy.</param>
/// <param name="confirmationCutoff">The cutoff derived from peer confirmations, if available.</param>
/// <param name="effectiveCutoff">The effective cutoff to apply for pruning.</param>
/// <param name="reason">The explanatory reason associated with the decision.</param>
public static OplogPruneCutoffDecision WithCutoff(
HlcTimestamp retentionCutoff,
HlcTimestamp? confirmationCutoff,
HlcTimestamp effectiveCutoff,
string reason = "")
{
return new OplogPruneCutoffDecision(
hasCutoff: true,
retentionCutoff: retentionCutoff,
confirmationCutoff: confirmationCutoff,
effectiveCutoff: effectiveCutoff,
reason: reason);
}
/// <summary>
/// Creates a prune-blocked decision.
/// </summary>
/// <param name="retentionCutoff">The cutoff derived from retention policy.</param>
/// <param name="reason">The explanatory reason associated with the decision.</param>
public static OplogPruneCutoffDecision NoCutoff(HlcTimestamp retentionCutoff, string reason)
{
return new OplogPruneCutoffDecision(
hasCutoff: false,
retentionCutoff: retentionCutoff,
confirmationCutoff: null,
effectiveCutoff: null,
reason: reason);
}
}

View File

@@ -0,0 +1,57 @@
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network; // For IMeshNetwork if we implement it
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Network.Security;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Hosting;
using System;
namespace ZB.MOM.WW.CBDDC.Network;
public static class CBDDCNetworkExtensions
{
/// <summary>
/// Adds CBDDC network services to the service collection.
/// </summary>
/// <typeparam name="TPeerNodeConfigurationProvider">The peer node configuration provider implementation type.</typeparam>
/// <param name="services">The service collection to register services into.</param>
/// <param name="useHostedService">If true, registers CBDDCNodeService as IHostedService to automatically start/stop the node.</param>
public static IServiceCollection AddCBDDCNetwork<TPeerNodeConfigurationProvider>(
this IServiceCollection services,
bool useHostedService = true)
where TPeerNodeConfigurationProvider : class, IPeerNodeConfigurationProvider
{
services.TryAddSingleton<IPeerNodeConfigurationProvider, TPeerNodeConfigurationProvider>();
services.TryAddSingleton<IAuthenticator, ClusterKeyAuthenticator>();
services.TryAddSingleton<IPeerHandshakeService, SecureHandshakeService>();
services.TryAddSingleton<IDiscoveryService, UdpDiscoveryService>();
services.TryAddSingleton<ZB.MOM.WW.CBDDC.Network.Telemetry.INetworkTelemetryService>(sp =>
{
var logger = sp.GetRequiredService<ILogger<ZB.MOM.WW.CBDDC.Network.Telemetry.NetworkTelemetryService>>();
var path = System.IO.Path.Combine(System.AppContext.BaseDirectory, "cbddc_metrics.bin");
return new ZB.MOM.WW.CBDDC.Network.Telemetry.NetworkTelemetryService(logger, path);
});
services.TryAddSingleton<ISyncServer, TcpSyncServer>();
services.TryAddSingleton<IOplogPruneCutoffCalculator, OplogPruneCutoffCalculator>();
services.TryAddSingleton<ISyncOrchestrator, SyncOrchestrator>();
services.TryAddSingleton<ICBDDCNode, CBDDCNode>();
// Optionally register hosted service for automatic node lifecycle management
if (useHostedService)
{
services.AddHostedService<CBDDCNodeService>();
}
return services;
}
}

View File

@@ -0,0 +1,259 @@
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using Google.Protobuf;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.CBDDC.Network.Proto;
using ZB.MOM.WW.CBDDC.Network.Security;
using ZB.MOM.WW.CBDDC.Network.Telemetry;
namespace ZB.MOM.WW.CBDDC.Network.Protocol
{
/// <summary>
/// Handles the low-level framing, compression, encryption, and serialization of CBDDC messages.
/// Encapsulates the wire format: [Length (4)] [Type (1)] [Compression (1)] [Payload (N)]
/// </summary>
internal class ProtocolHandler
{
private readonly ILogger<ProtocolHandler> _logger;
private readonly INetworkTelemetryService? _telemetry;
private readonly SemaphoreSlim _writeLock = new SemaphoreSlim(1, 1);
private readonly SemaphoreSlim _readLock = new SemaphoreSlim(1, 1);
/// <summary>
/// Initializes a new instance of the <see cref="ProtocolHandler"/> class.
/// </summary>
/// <param name="logger">The logger used for protocol diagnostics.</param>
/// <param name="telemetry">An optional telemetry service used to record network metrics.</param>
public ProtocolHandler(ILogger<ProtocolHandler> logger, INetworkTelemetryService? telemetry = null)
{
_logger = logger;
_telemetry = telemetry;
}
/// <summary>
/// Initializes a new instance of the <see cref="ProtocolHandler"/> class using a non-generic logger.
/// </summary>
/// <param name="logger">The logger used for protocol diagnostics.</param>
/// <param name="telemetry">An optional telemetry service used to record network metrics.</param>
internal ProtocolHandler(ILogger logger, INetworkTelemetryService? telemetry = null)
: this(new ForwardingLogger(logger), telemetry)
{
}
/// <summary>
/// Serializes and sends a protocol message to the provided stream.
/// </summary>
/// <param name="stream">The destination stream.</param>
/// <param name="type">The protocol message type.</param>
/// <param name="message">The message payload to serialize.</param>
/// <param name="useCompression">Whether payload compression should be attempted.</param>
/// <param name="cipherState">Optional cipher state used to encrypt outgoing payloads.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>A task that represents the asynchronous send operation.</returns>
public async Task SendMessageAsync(Stream stream, MessageType type, IMessage message, bool useCompression, CipherState? cipherState, CancellationToken token = default)
{
if (stream == null) throw new ArgumentNullException(nameof(stream));
// 1. Serialize
byte[] payloadBytes = message.ToByteArray();
int originalSize = payloadBytes.Length;
byte compressionFlag = 0x00;
// 2. Compress (inner payload)
if (useCompression && payloadBytes.Length > CompressionHelper.THRESHOLD && type != MessageType.SecureEnv)
{
// Measure Compression Time
// using var _ = _telemetry?.StartMetric(MetricType.CompressionTime); // Oops, MetricType.CompressionTime not defined? Wait, user asked for "Compression Ratio".
// User asked for "performance della compressione brotli (% media di compressione)".
// That usually means ratio. But time is also good?
// Plan said: "MetricType: CompressionRatio, EncryptionTime..."
// byte[] compressed; // Removed unused variable
// using (_telemetry?.StartMetric(MetricType.CompressionTime)) // Let's stick to Time if relevant? NO, MetricType only has Ratio.
// Ah I see MetricType enum: CompressionRatio, EncryptionTime, DecryptionTime, RoundTripTime.
// So for compression we only record Ratio.
payloadBytes = CompressionHelper.Compress(payloadBytes);
compressionFlag = 0x01; // Brotli
if (_telemetry != null && originalSize > 0)
{
double ratio = (double)payloadBytes.Length / originalSize;
_telemetry.RecordValue(MetricType.CompressionRatio, ratio);
}
}
// 3. Encrypt
if (cipherState != null)
{
using (_telemetry?.StartMetric(MetricType.EncryptionTime))
{
// Inner data: [Type (1)] [Compression (1)] [Payload (N)]
var dataToEncrypt = new byte[2 + payloadBytes.Length];
dataToEncrypt[0] = (byte)type;
dataToEncrypt[1] = compressionFlag;
Buffer.BlockCopy(payloadBytes, 0, dataToEncrypt, 2, payloadBytes.Length);
var (ciphertext, iv, tag) = CryptoHelper.Encrypt(dataToEncrypt, cipherState.EncryptKey);
var env = new SecureEnvelope
{
Ciphertext = ByteString.CopyFrom(ciphertext),
Nonce = ByteString.CopyFrom(iv),
AuthTag = ByteString.CopyFrom(tag)
};
payloadBytes = env.ToByteArray();
type = MessageType.SecureEnv;
compressionFlag = 0x00; // Outer envelope is not compressed
}
}
// 4. Thread-Safe Write
await _writeLock.WaitAsync(token);
try
{
_logger.LogDebug("Sending Message {Type}, OrgSize: {Org}, WireSize: {Wire}", type, originalSize, payloadBytes.Length);
// Framing: [Length (4)] [Type (1)] [Compression (1)] [Payload (N)]
var lengthBytes = BitConverter.GetBytes(payloadBytes.Length);
await stream.WriteAsync(lengthBytes, 0, 4, token);
stream.WriteByte((byte)type);
stream.WriteByte(compressionFlag);
await stream.WriteAsync(payloadBytes, 0, payloadBytes.Length, token);
await stream.FlushAsync(token);
}
finally
{
_writeLock.Release();
}
}
/// <summary>
/// Reads and decodes the next protocol message from the provided stream.
/// </summary>
/// <param name="stream">The source stream.</param>
/// <param name="cipherState">Optional cipher state used to decrypt incoming payloads.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>A tuple containing the decoded message type and payload bytes.</returns>
public async Task<(MessageType, byte[])> ReadMessageAsync(Stream stream, CipherState? cipherState, CancellationToken token = default)
{
await _readLock.WaitAsync(token);
try
{
var lenBuf = new byte[4];
int read = await ReadExactAsync(stream, lenBuf, 0, 4, token);
if (read == 0) return (MessageType.Unknown, null!);
int length = BitConverter.ToInt32(lenBuf, 0);
int typeByte = stream.ReadByte();
if (typeByte == -1) throw new EndOfStreamException("Connection closed abruptly (type byte)");
int compByte = stream.ReadByte();
if (compByte == -1) throw new EndOfStreamException("Connection closed abruptly (comp byte)");
var payload = new byte[length];
await ReadExactAsync(stream, payload, 0, length, token);
var msgType = (MessageType)typeByte;
// Handle Secure Envelope
if (msgType == MessageType.SecureEnv)
{
if (cipherState == null) throw new InvalidOperationException("Received encrypted message but no cipher state established");
byte[] decrypted;
using (_telemetry?.StartMetric(MetricType.DecryptionTime))
{
var env = SecureEnvelope.Parser.ParseFrom(payload);
decrypted = CryptoHelper.Decrypt(
env.Ciphertext.ToByteArray(),
env.Nonce.ToByteArray(),
env.AuthTag.ToByteArray(),
cipherState.DecryptKey);
}
if (decrypted.Length < 2) throw new InvalidDataException("Decrypted payload too short");
msgType = (MessageType)decrypted[0];
int innerComp = decrypted[1];
var innerPayload = new byte[decrypted.Length - 2];
Buffer.BlockCopy(decrypted, 2, innerPayload, 0, innerPayload.Length);
if (innerComp == 0x01)
{
innerPayload = CompressionHelper.Decompress(innerPayload);
}
return (msgType, innerPayload);
}
// Handle Unencrypted Compression
if (compByte == 0x01)
{
payload = CompressionHelper.Decompress(payload);
}
_logger.LogDebug("Read Message {Type}, Size: {Size}", msgType, payload.Length);
return (msgType, payload);
}
finally
{
_readLock.Release();
}
}
private async Task<int> ReadExactAsync(Stream stream, byte[] buffer, int offset, int count, CancellationToken token)
{
int total = 0;
while (total < count)
{
int read = await stream.ReadAsync(buffer, offset + total, count - total, token);
if (read == 0) return 0; // EOF
total += read;
}
return total;
}
private sealed class ForwardingLogger : ILogger<ProtocolHandler>
{
private readonly ILogger _inner;
/// <summary>
/// Initializes a new instance of the <see cref="ForwardingLogger"/> class.
/// </summary>
/// <param name="inner">The underlying logger instance.</param>
public ForwardingLogger(ILogger inner)
{
_inner = inner ?? throw new ArgumentNullException(nameof(inner));
}
/// <inheritdoc />
public IDisposable? BeginScope<TState>(TState state) where TState : notnull
{
return _inner.BeginScope(state);
}
/// <inheritdoc />
public bool IsEnabled(LogLevel logLevel)
{
return _inner.IsEnabled(logLevel);
}
/// <inheritdoc />
public void Log<TState>(
LogLevel logLevel,
EventId eventId,
TState state,
Exception? exception,
Func<TState, Exception?, string> formatter)
{
_inner.Log(logLevel, eventId, state, exception, formatter);
}
}
}
}

View File

@@ -0,0 +1,72 @@
# ZB.MOM.WW.CBDDC.Network
Networking layer for **CBDDC** - provides peer-to-peer mesh networking with automatic discovery and synchronization.
## What's Included
This package handles all networking for CBDDC:
- **UDP Discovery**: Automatic peer discovery on LAN via broadcast
- **TCP Synchronization**: Reliable data sync between nodes
- **Gossip Protocol**: Efficient update propagation
- **Sync Orchestrator**: Manages peer connections and sync operations
- **Anti-Entropy**: Automatic reconciliation between peers
- **Resilience**: Retry policies, timeouts, error handling
## Installation
```bash
dotnet add package ZB.MOM.WW.CBDDC.Core
dotnet add package ZB.MOM.WW.CBDDC.Network
dotnet add package ZB.MOM.WW.CBDDC.Persistence.Sqlite
```
## Quick Start
```csharp
using ZB.MOM.WW.CBDDC.Network;
using Microsoft.Extensions.DependencyInjection;
var services = new ServiceCollection();
// Register networking
services.AddCBDDCNetwork(
nodeId: "my-node",
tcpPort: 5000,
authToken: "shared-secret"
);
var provider = services.BuildServiceProvider();
// Start network node
var node = provider.GetRequiredService<CBDDCNode>();
node.Start();
// Nodes on the same LAN will discover each other automatically!
```
## Features
### Automatic Discovery
Nodes broadcast their presence via UDP and automatically connect to peers on the same network.
### Secure Synchronization
All nodes must share the same authentication token to sync data.
### Scalable Gossip
Updates propagate exponentially - each node tells multiple peers, ensuring fast network-wide propagation.
## Documentation
- **[Architecture](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/docs/architecture.md)**
- **[LAN Deployment](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/docs/deployment-lan.md)**
- **[Network Configuration](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/docs/production-hardening.md)**
## Related Packages
- **ZB.MOM.WW.CBDDC.Core** - Core database abstractions
- **ZB.MOM.WW.CBDDC.Persistence.Sqlite** - SQLite storage provider
## License
MIT - see [LICENSE](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/LICENSE)

View File

@@ -0,0 +1,33 @@
using ZB.MOM.WW.CBDDC.Core.Network;
using System.Security.Cryptography;
using System.Text;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network.Security;
/// <summary>
/// Authenticator implementation that uses a shared secret (pre-shared key) to validate nodes.
/// Both nodes must possess the same key to successfully handshake.
/// </summary>
public class ClusterKeyAuthenticator : IAuthenticator
{
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
/// <summary>
/// Initializes a new instance of the <see cref="ClusterKeyAuthenticator"/> class.
/// </summary>
/// <param name="peerNodeConfigurationProvider">The provider for peer node configuration.</param>
public ClusterKeyAuthenticator(IPeerNodeConfigurationProvider peerNodeConfigurationProvider)
{
_peerNodeConfigurationProvider = peerNodeConfigurationProvider;
}
/// <inheritdoc />
public async Task<bool> ValidateAsync(string nodeId, string token)
{
var config = await _peerNodeConfigurationProvider.GetConfiguration();
var configuredHash = SHA256.HashData(Encoding.UTF8.GetBytes(config.AuthToken ?? string.Empty));
var presentedHash = SHA256.HashData(Encoding.UTF8.GetBytes(token ?? string.Empty));
return CryptographicOperations.FixedTimeEquals(configuredHash, presentedHash);
}
}

View File

@@ -0,0 +1,81 @@
using System;
using System.IO;
using System.Security.Cryptography;
namespace ZB.MOM.WW.CBDDC.Network.Security;
public static class CryptoHelper
{
private const int KeySize = 32; // 256 bits
private const int BlockSize = 16; // 128 bits
private const int MacSize = 32; // 256 bits (HMACSHA256)
/// <summary>
/// Encrypts plaintext and computes an authentication tag.
/// </summary>
/// <param name="plaintext">The plaintext bytes to encrypt.</param>
/// <param name="key">The encryption and HMAC key.</param>
/// <returns>The ciphertext, IV, and authentication tag.</returns>
public static (byte[] ciphertext, byte[] iv, byte[] tag) Encrypt(byte[] plaintext, byte[] key)
{
using var aes = Aes.Create();
aes.Key = key;
aes.GenerateIV();
var iv = aes.IV;
using var encryptor = aes.CreateEncryptor();
var ciphertext = encryptor.TransformFinalBlock(plaintext, 0, plaintext.Length);
// Compute HMAC
using var hmac = new HMACSHA256(key);
// Authenticate IV + Ciphertext
var toSign = new byte[iv.Length + ciphertext.Length];
Buffer.BlockCopy(iv, 0, toSign, 0, iv.Length);
Buffer.BlockCopy(ciphertext, 0, toSign, iv.Length, ciphertext.Length);
var tag = hmac.ComputeHash(toSign);
return (ciphertext, iv, tag);
}
/// <summary>
/// Verifies and decrypts ciphertext.
/// </summary>
/// <param name="ciphertext">The encrypted bytes.</param>
/// <param name="iv">The initialization vector used during encryption.</param>
/// <param name="tag">The authentication tag for integrity verification.</param>
/// <param name="key">The encryption and HMAC key.</param>
/// <returns>The decrypted plaintext bytes.</returns>
public static byte[] Decrypt(byte[] ciphertext, byte[] iv, byte[] tag, byte[] key)
{
// Verify HMAC
using var hmac = new HMACSHA256(key);
var toVerify = new byte[iv.Length + ciphertext.Length];
Buffer.BlockCopy(iv, 0, toVerify, 0, iv.Length);
Buffer.BlockCopy(ciphertext, 0, toVerify, iv.Length, ciphertext.Length);
var computedTag = hmac.ComputeHash(toVerify);
if (!FixedTimeEquals(tag, computedTag))
{
throw new CryptographicException("Authentication failed (HMAC mismatch)");
}
using var aes = Aes.Create();
aes.Key = key;
aes.IV = iv;
using var decryptor = aes.CreateDecryptor();
return decryptor.TransformFinalBlock(ciphertext, 0, ciphertext.Length);
}
private static bool FixedTimeEquals(byte[] left, byte[] right)
{
#if NET6_0_OR_GREATER
return CryptographicOperations.FixedTimeEquals(left, right);
#else
if (left.Length != right.Length) return false;
int res = 0;
for (int i = 0; i < left.Length; i++) res |= left[i] ^ right[i];
return res == 0;
#endif
}
}

View File

@@ -0,0 +1,14 @@
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network.Security;
public interface IAuthenticator
{
/// <summary>
/// Validates an authentication token for a node identifier.
/// </summary>
/// <param name="nodeId">The node identifier to validate.</param>
/// <param name="token">The authentication token to validate.</param>
/// <returns><see langword="true"/> if the token is valid for the node; otherwise <see langword="false"/>.</returns>
Task<bool> ValidateAsync(string nodeId, string token);
}

View File

@@ -0,0 +1,42 @@
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network.Security;
public interface IPeerHandshakeService
{
/// <summary>
/// Performs a handshake to establishing identity and optional security context.
/// </summary>
/// <param name="stream">The transport stream used for handshake message exchange.</param>
/// <param name="isInitiator">A value indicating whether the caller initiated the connection.</param>
/// <param name="myNodeId">The local node identifier.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>A CipherState if encryption is established, or null if plaintext.</returns>
Task<CipherState?> HandshakeAsync(System.IO.Stream stream, bool isInitiator, string myNodeId, CancellationToken token);
}
public class CipherState
{
/// <summary>
/// Gets the key used to encrypt outgoing messages.
/// </summary>
public byte[] EncryptKey { get; }
/// <summary>
/// Gets the key used to decrypt incoming messages.
/// </summary>
public byte[] DecryptKey { get; }
// For simplicity using IV chaining or explicit IVs.
// We'll store just the keys here and let the encryption helper handle IVs.
/// <summary>
/// Initializes a new instance of the <see cref="CipherState"/> class.
/// </summary>
/// <param name="encryptKey">The key used for encrypting outgoing payloads.</param>
/// <param name="decryptKey">The key used for decrypting incoming payloads.</param>
public CipherState(byte[] encryptKey, byte[] decryptKey)
{
EncryptKey = encryptKey;
DecryptKey = decryptKey;
}
}

View File

@@ -0,0 +1,29 @@
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace ZB.MOM.WW.CBDDC.Network.Security;
/// <summary>
/// Provides a no-operation implementation of the peer handshake service that performs no handshake and always returns
/// null.
/// </summary>
/// <remarks>This class can be used in scenarios where a handshake is not required or for testing purposes. All
/// handshake attempts using this service will result in no cipher state being established.</remarks>
public class NoOpHandshakeService : IPeerHandshakeService
{
/// <summary>
/// Performs a handshake over the specified stream to establish a secure communication channel between two nodes
/// asynchronously.
/// </summary>
/// <param name="stream">The stream used for exchanging handshake messages between nodes. Must be readable and writable.</param>
/// <param name="isInitiator">true to initiate the handshake as the local node; otherwise, false to respond as the remote node.</param>
/// <param name="myNodeId">The unique identifier of the local node participating in the handshake. Cannot be null.</param>
/// <param name="token">A cancellation token that can be used to cancel the handshake operation.</param>
/// <returns>A task that represents the asynchronous handshake operation. The task result contains a CipherState if the
/// handshake succeeds; otherwise, null.</returns>
public Task<CipherState?> HandshakeAsync(Stream stream, bool isInitiator, string myNodeId, CancellationToken token)
{
return Task.FromResult<CipherState?>(null);
}
}

View File

@@ -0,0 +1,112 @@
using System;
using System.IO;
using System.Security.Cryptography;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace ZB.MOM.WW.CBDDC.Network.Security;
public class SecureHandshakeService : IPeerHandshakeService
{
private readonly ILogger<SecureHandshakeService>? _logger;
/// <summary>
/// Initializes a new instance of the <see cref="SecureHandshakeService"/> class.
/// </summary>
/// <param name="logger">The optional logger instance.</param>
public SecureHandshakeService(ILogger<SecureHandshakeService>? logger = null)
{
_logger = logger;
}
// Simple protocol:
// Initiator -> [Public Key Length (4) + Public Key]
// Responder -> [Public Key Length (4) + Public Key]
// Both derive shared secret -> Split into SendKey/RecvKey using HKDF
/// <summary>
/// Performs a secure key exchange handshake over the provided stream.
/// </summary>
/// <param name="stream">The transport stream used for the handshake.</param>
/// <param name="isInitiator">A value indicating whether the local node initiated the handshake.</param>
/// <param name="myNodeId">The local node identifier.</param>
/// <param name="token">A token used to cancel the handshake.</param>
/// <returns>
/// A task that returns the negotiated <see cref="CipherState"/>, or <see langword="null"/> if unavailable.
/// </returns>
public async Task<CipherState?> HandshakeAsync(Stream stream, bool isInitiator, string myNodeId, CancellationToken token)
{
#if NET6_0_OR_GREATER
using var ecdh = ECDiffieHellman.Create();
ecdh.KeySize = 256;
// 1. ExportAsync & Send Public Key
var myPublicKey = ecdh.ExportSubjectPublicKeyInfo();
var lenBytes = BitConverter.GetBytes(myPublicKey.Length);
await stream.WriteAsync(lenBytes, 0, 4, token);
await stream.WriteAsync(myPublicKey, 0, myPublicKey.Length, token);
await stream.FlushAsync(token); // CRITICAL: Ensure data is sent immediately
// 2. Receive Peer Public Key
var peerLenBuf = new byte[4];
await ReadExactAsync(stream, peerLenBuf, 0, 4, token);
int peerLen = BitConverter.ToInt32(peerLenBuf, 0);
// Validate peer key length to prevent DoS
if (peerLen <= 0 || peerLen > 10000)
{
throw new InvalidOperationException($"Invalid peer key length: {peerLen}");
}
var peerKeyBytes = new byte[peerLen];
await ReadExactAsync(stream, peerKeyBytes, 0, peerLen, token);
// 3. Import Peer Key & Derive Shared Secret
using var peerEcdh = ECDiffieHellman.Create();
peerEcdh.ImportSubjectPublicKeyInfo(peerKeyBytes, out _);
byte[] sharedSecret = ecdh.DeriveKeyMaterial(peerEcdh.PublicKey);
// 4. Derive Session Keys (HKDF-like expansion)
// Use SHA256 to split/expand secret into EncryptKey and DecryptKey
// Simple approach: Hash(secret + "0") -> Key1, Hash(secret + "1") -> Key2
using var sha = SHA256.Create();
var k1Input = new byte[sharedSecret.Length + 1];
Buffer.BlockCopy(sharedSecret, 0, k1Input, 0, sharedSecret.Length);
k1Input[sharedSecret.Length] = 0; // "0"
var key1 = sha.ComputeHash(k1Input);
var k2Input = new byte[sharedSecret.Length + 1];
Buffer.BlockCopy(sharedSecret, 0, k2Input, 0, sharedSecret.Length);
k2Input[sharedSecret.Length] = 1; // "1"
var key2 = sha.ComputeHash(k2Input);
// If initiator: Encrypt with Key1, Decrypt with Key2
// If responder: Encrypt with Key2, Decrypt with Key1
var encryptKey = isInitiator ? key1 : key2;
var decryptKey = isInitiator ? key2 : key1;
return new CipherState(encryptKey, decryptKey);
#else
// For netstandard2.0, standard ECDH import is broken/hard without external libs.
// Returning null or throwing.
throw new PlatformNotSupportedException("Secure handshake requires .NET 6.0+");
#endif
}
private async Task<int> ReadExactAsync(Stream stream, byte[] buffer, int offset, int count, CancellationToken token)
{
int total = 0;
while (total < count)
{
int read = await stream.ReadAsync(buffer, offset + total, count - total, token);
if (read == 0) throw new EndOfStreamException();
total += read;
}
return total;
}
}

View File

@@ -0,0 +1,919 @@
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Network.Security;
using ZB.MOM.WW.CBDDC.Network.Telemetry;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Sockets;
using System.Threading;
using System.Threading.Tasks;
using Serilog.Context;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Orchestrates the synchronization process between the local node and discovered peers.
/// Manages anti-entropy sessions and data exchange.
/// </summary>
public class SyncOrchestrator : ISyncOrchestrator
{
private readonly IDiscoveryService _discovery;
private readonly IOplogStore _oplogStore;
private readonly IOplogPruneCutoffCalculator? _oplogPruneCutoffCalculator;
private readonly IPeerOplogConfirmationStore? _peerOplogConfirmationStore;
private readonly IDocumentStore _documentStore;
private readonly ISnapshotMetadataStore _snapshotMetadataStore;
private readonly ISnapshotService _snapshotService;
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
private readonly ILogger<SyncOrchestrator> _logger;
private readonly ILoggerFactory _loggerFactory;
private CancellationTokenSource? _cts;
private readonly Random _random = new Random();
private readonly object _startStopLock = new object();
// Persistent clients pool
private readonly ConcurrentDictionary<string, TcpPeerClient> _clients = new();
private readonly ConcurrentDictionary<string, PeerStatus> _peerStates = new();
private readonly IPeerHandshakeService? _handshakeService;
private readonly INetworkTelemetryService? _telemetry;
private class PeerStatus
{
/// <summary>
/// Gets or sets the number of consecutive failures for the peer.
/// </summary>
public int FailureCount { get; set; }
/// <summary>
/// Gets or sets the next time a retry attempt is allowed.
/// </summary>
public DateTime NextRetryTime { get; set; }
}
private DateTime _lastMaintenanceTime = DateTime.MinValue;
/// <summary>
/// Initializes a new instance of the <see cref="SyncOrchestrator"/> class.
/// </summary>
/// <param name="discovery">The discovery service.</param>
/// <param name="oplogStore">The oplog store.</param>
/// <param name="documentStore">The document store.</param>
/// <param name="snapshotStore">The snapshot metadata store.</param>
/// <param name="snapshotService">The snapshot service.</param>
/// <param name="peerNodeConfigurationProvider">The peer configuration provider.</param>
/// <param name="loggerFactory">The logger factory.</param>
/// <param name="peerOplogConfirmationStore">The optional peer confirmation watermark store.</param>
/// <param name="handshakeService">The optional peer handshake service.</param>
/// <param name="telemetry">The optional network telemetry service.</param>
/// <param name="oplogPruneCutoffCalculator">The optional cutoff calculator for safe maintenance pruning.</param>
public SyncOrchestrator(
IDiscoveryService discovery,
IOplogStore oplogStore,
IDocumentStore documentStore,
ISnapshotMetadataStore snapshotStore,
ISnapshotService snapshotService,
IPeerNodeConfigurationProvider peerNodeConfigurationProvider,
ILoggerFactory loggerFactory,
IPeerOplogConfirmationStore? peerOplogConfirmationStore = null,
IPeerHandshakeService? handshakeService = null,
INetworkTelemetryService? telemetry = null,
IOplogPruneCutoffCalculator? oplogPruneCutoffCalculator = null)
{
_discovery = discovery;
_oplogStore = oplogStore;
_oplogPruneCutoffCalculator = oplogPruneCutoffCalculator;
_peerOplogConfirmationStore = peerOplogConfirmationStore;
_documentStore = documentStore;
_snapshotMetadataStore = snapshotStore;
_snapshotService = snapshotService;
_peerNodeConfigurationProvider = peerNodeConfigurationProvider;
_loggerFactory = loggerFactory;
_logger = loggerFactory.CreateLogger<SyncOrchestrator>();
_handshakeService = handshakeService;
_telemetry = telemetry;
}
/// <summary>
/// Starts the synchronization orchestrator loop.
/// </summary>
/// <returns>A completed task once startup has been triggered.</returns>
public async Task Start()
{
lock (_startStopLock)
{
if (_cts != null)
{
_logger.LogWarning("Sync Orchestrator already started");
return;
}
_cts = new CancellationTokenSource();
}
var token = _cts.Token;
_ = Task.Run(async () =>
{
try
{
await SyncLoopAsync(token);
}
catch (Exception ex)
{
_logger.LogError(ex, "Sync Loop task failed");
}
}, token);
await Task.CompletedTask;
}
/// <summary>
/// Stops the synchronization orchestrator loop and releases client resources.
/// </summary>
/// <returns>A completed task once shutdown has been triggered.</returns>
public async Task Stop()
{
CancellationTokenSource? ctsToDispose = null;
lock (_startStopLock)
{
if (_cts == null)
{
_logger.LogWarning("Sync Orchestrator already stopped or never started");
return;
}
ctsToDispose = _cts;
_cts = null;
}
try
{
ctsToDispose.Cancel();
}
catch (ObjectDisposedException)
{
// Already disposed, ignore
}
finally
{
ctsToDispose.Dispose();
}
// Cleanup clients
foreach (var client in _clients.Values)
{
try
{
client.Dispose();
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Error disposing client during shutdown");
}
}
_clients.Clear();
await Task.CompletedTask;
}
/// <summary>
/// Main synchronization loop. Periodically selects random peers to gossip with.
/// </summary>
private async Task SyncLoopAsync(CancellationToken token)
{
_logger.LogInformation("Sync Orchestrator Started (Parallel P2P)");
while (!token.IsCancellationRequested)
{
var config = await _peerNodeConfigurationProvider.GetConfiguration();
try
{
var discoveredPeers = _discovery.GetActivePeers();
var knownPeers = config.KnownPeers.Select(k => new PeerNode(
k.NodeId,
$"{k.Host}:{k.Port}",
DateTimeOffset.UtcNow,
PeerType.StaticRemote));
var allPeers = BuildMergedPeerList(discoveredPeers, knownPeers, config.NodeId);
await EnsurePeersRegisteredAsync(allPeers, config.NodeId, token);
// Filter peers based on backoff
var now = DateTime.UtcNow;
var eligiblePeers = allPeers.Where(p =>
{
if (_peerStates.TryGetValue(p.NodeId, out var status))
{
return status.NextRetryTime <= now;
}
return true;
}).ToList();
// Interest-Aware Gossip: Prioritize peers sharing interests with us
var localInterests = _documentStore.InterestedCollection.ToList();
var targets = eligiblePeers
.OrderByDescending(p => p.InterestingCollections.Any(ci => localInterests.Contains(ci)))
.ThenBy(x => _random.Next())
.Take(3)
.ToList();
// NetStandard 2.0 fallback: Use Task.WhenAll
var tasks = targets.Select(peer => TrySyncWithPeer(peer, token));
await Task.WhenAll(tasks);
await RunMaintenanceIfDueAsync(config, now, token);
}
catch (OperationCanceledException)
{
_logger.LogInformation("Sync Loop Cancelled");
break;
}
catch (Exception ex)
{
_logger.LogError(ex, "Sync Loop Error");
}
try
{
await Task.Delay(2000, token);
}
catch (OperationCanceledException)
{
break;
}
}
}
/// <summary>
/// Runs periodic maintenance when the configured interval has elapsed.
/// </summary>
/// <param name="config">The current peer node configuration.</param>
/// <param name="now">The current UTC time used for interval evaluation.</param>
/// <param name="token">The cancellation token.</param>
/// <returns>A task that represents the asynchronous maintenance operation.</returns>
internal async Task RunMaintenanceIfDueAsync(PeerNodeConfiguration config, DateTime now, CancellationToken token)
{
var maintenanceInterval = TimeSpan.FromMinutes(config.MaintenanceIntervalMinutes);
if ((now - _lastMaintenanceTime) < maintenanceInterval)
{
return;
}
_logger.LogInformation("Running periodic maintenance (Oplog pruning)...");
try
{
var cutoffDecision = await CalculatePruneCutoffAsync(config, token);
if (!cutoffDecision.HasCutoff || !cutoffDecision.EffectiveCutoff.HasValue)
{
_lastMaintenanceTime = now;
var reason = string.IsNullOrWhiteSpace(cutoffDecision.Reason)
? "No effective cutoff was produced."
: cutoffDecision.Reason;
_logger.LogInformation("Skipping oplog prune for this maintenance cycle: {Reason}", reason);
return;
}
await _oplogStore.PruneOplogAsync(cutoffDecision.EffectiveCutoff.Value, token);
_lastMaintenanceTime = now;
if (cutoffDecision.ConfirmationCutoff.HasValue)
{
_logger.LogInformation(
"Maintenance completed successfully (Retention: {RetentionHours}h, EffectiveCutoff: {EffectiveCutoff}, ConfirmationCutoff: {ConfirmationCutoff}).",
config.OplogRetentionHours,
cutoffDecision.EffectiveCutoff.Value,
cutoffDecision.ConfirmationCutoff.Value);
}
else
{
_logger.LogInformation(
"Maintenance completed successfully (Retention: {RetentionHours}h, EffectiveCutoff: {EffectiveCutoff}).",
config.OplogRetentionHours,
cutoffDecision.EffectiveCutoff.Value);
}
}
catch (Exception maintenanceEx)
{
_logger.LogError(maintenanceEx, "Maintenance failed.");
}
}
private async Task<OplogPruneCutoffDecision> CalculatePruneCutoffAsync(PeerNodeConfiguration config, CancellationToken token)
{
if (_oplogPruneCutoffCalculator == null)
{
var retentionCutoff = new HlcTimestamp(
DateTimeOffset.UtcNow.AddHours(-config.OplogRetentionHours).ToUnixTimeMilliseconds(),
0,
config.NodeId);
return OplogPruneCutoffDecision.WithCutoff(
retentionCutoff,
confirmationCutoff: null,
effectiveCutoff: retentionCutoff,
reason: "Oplog prune cutoff calculator is not configured.");
}
return await _oplogPruneCutoffCalculator.CalculateEffectiveCutoffAsync(config, token);
}
/// <summary>
/// Attempts to synchronize with a specific peer.
/// Uses Vector Clock comparison to determine what to pull/push for each node.
/// Performs handshake, vector clock exchange, and data exchange (Push/Pull per node).
/// </summary>
private async Task TrySyncWithPeer(PeerNode peer, CancellationToken token)
{
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var peerContext = LogContext.PushProperty("PeerNodeId", peer.NodeId);
using var peerAddressContext = LogContext.PushProperty("PeerAddress", peer.Address);
TcpPeerClient? client = null;
bool shouldRemoveClient = false;
bool syncSuccessful = false;
try
{
var config = await _peerNodeConfigurationProvider.GetConfiguration();
// Get or create persistent client
client = _clients.GetOrAdd(peer.NodeId, id => new TcpPeerClient(
peer.Address,
_loggerFactory.CreateLogger<TcpPeerClient>(),
_handshakeService,
_telemetry));
// Reconnect if disconnected
if (!client.IsConnected)
{
await client.ConnectAsync(token);
}
// Handshake (idempotent)
if (!await client.HandshakeAsync(config.NodeId, config.AuthToken, _documentStore.InterestedCollection, token))
{
_logger.LogWarning("Handshake rejected by {NodeId}", peer.NodeId);
shouldRemoveClient = true;
throw new Exception("Handshake rejected");
}
// 1. Exchange Vector Clocks
var remoteVectorClock = await client.GetVectorClockAsync(token);
var localVectorClock = await _oplogStore.GetVectorClockAsync(token);
_logger.LogDebug("Vector Clock - Local: {Local}, Remote: {Remote}", localVectorClock, remoteVectorClock);
await AdvanceConfirmationsFromVectorClockAsync(peer.NodeId, localVectorClock, remoteVectorClock, token);
// 2. Determine causality relationship
var causality = localVectorClock.CompareTo(remoteVectorClock);
// 3. PULL: Identify nodes where remote is ahead
var nodesToPull = localVectorClock.GetNodesWithUpdates(remoteVectorClock).ToList();
var nodesToPush = localVectorClock.GetNodesToPush(remoteVectorClock).ToList();
if (nodesToPull.Any())
{
_logger.LogInformation("Pulling changes from {PeerNodeId} for {Count} nodes: {Nodes}",
peer.NodeId, nodesToPull.Count, string.Join(", ", nodesToPull));
foreach (var nodeId in nodesToPull)
{
var localTs = localVectorClock.GetTimestamp(nodeId);
var remoteTs = remoteVectorClock.GetTimestamp(nodeId);
_logger.LogDebug("Pulling Node {NodeId}: Local={LocalTs}, Remote={RemoteTs}",
nodeId, localTs, remoteTs);
// PASS LOCAL INTERESTS TO PULL
var changes = await client.PullChangesFromNodeAsync(nodeId, localTs, _documentStore.InterestedCollection, token);
if (changes != null && changes.Count > 0)
{
var result = await ProcessInboundBatchAsync(client, peer.NodeId, changes, token);
if (result != SyncBatchResult.Success)
{
_logger.LogWarning("Inbound batch processing failed with status {Status}. Aborting sync for this session.", result);
RecordFailure(peer.NodeId);
return;
}
}
}
}
// 4. PUSH: Identify nodes where local is ahead
if (nodesToPush.Any())
{
_logger.LogInformation("Pushing changes to {PeerNodeId} for {Count} nodes: {Nodes}",
peer.NodeId, nodesToPush.Count, string.Join(", ", nodesToPush));
foreach (var nodeId in nodesToPush)
{
var remoteTs = remoteVectorClock.GetTimestamp(nodeId);
// PUSH FILTERING: Pass remote receiver's interests to oplogStore for efficient retrieval
var remoteInterests = client.RemoteInterests;
var changes = (await _oplogStore.GetOplogForNodeAfterAsync(nodeId, remoteTs, remoteInterests, token)).ToList();
if (changes.Any())
{
_logger.LogDebug("Pushing {Count} filtered changes for Node {NodeId}", changes.Count, nodeId);
await client.PushChangesAsync(changes, token);
await AdvanceConfirmationForPushedBatchAsync(peer.NodeId, nodeId, changes, token);
}
}
}
// 5. Handle Concurrent/Equal cases
if (causality == CausalityRelation.Equal)
{
_logger.LogDebug("Vector clocks are equal with {PeerNodeId}. No sync needed.", peer.NodeId);
}
else if (causality == CausalityRelation.Concurrent && !nodesToPull.Any() && !nodesToPush.Any())
{
_logger.LogDebug("Vector clocks are concurrent with {PeerNodeId}, but no divergence detected.", peer.NodeId);
}
syncSuccessful = true;
RecordSuccess(peer.NodeId);
}
catch (SnapshotRequiredException)
{
_logger.LogWarning("Snapshot required for peer {NodeId}. Initiating merge sync.", peer.NodeId);
if (client != null && client.IsConnected)
{
try
{
await PerformSnapshotSyncAsync(client, true, token);
syncSuccessful = true;
RecordSuccess(peer.NodeId);
}
catch
{
RecordFailure(peer.NodeId);
shouldRemoveClient = true;
}
}
else
{
RecordFailure(peer.NodeId);
shouldRemoveClient = true;
}
}
catch (CorruptDatabaseException cex)
{
_logger.LogCritical(cex, "Local database corruption detected during sync with {NodeId}. Initiating EMERGENCY SNAPSHOT RECOVERY.", peer.NodeId);
if (client != null && client.IsConnected)
{
try
{
// EMERGENCY RECOVERY: Replace local DB with remote snapshot (mergeOnly: false)
await PerformSnapshotSyncAsync(client, false, token);
syncSuccessful = true;
RecordSuccess(peer.NodeId);
_logger.LogInformation("Emergency recovery successful. Local database replaced.");
}
catch (Exception recoveryEx)
{
_logger.LogCritical(recoveryEx, "Emergency recovery failed. App state is critical.");
RecordFailure(peer.NodeId);
shouldRemoveClient = true;
}
}
else
{
RecordFailure(peer.NodeId);
shouldRemoveClient = true;
}
}
catch (TimeoutException tex)
{
_logger.LogWarning("Sync with {NodeId} timed out: {Message}. Will retry later.", peer.NodeId, tex.Message);
shouldRemoveClient = true;
RecordFailure(peer.NodeId);
}
catch (SocketException sex)
{
_logger.LogWarning("Network error syncing with {NodeId}: {Message}. Will retry later.", peer.NodeId, sex.Message);
shouldRemoveClient = true;
RecordFailure(peer.NodeId);
}
catch (Exception ex)
{
_logger.LogWarning("Sync failed with {NodeId}: {Message}. Resetting connection.", peer.NodeId, ex.Message);
shouldRemoveClient = true;
RecordFailure(peer.NodeId);
}
finally
{
if (shouldRemoveClient && client != null)
{
if (_clients.TryRemove(peer.NodeId, out var removedClient))
{
try { removedClient.Dispose(); } catch { /* Ignore disposal errors */ }
}
}
// Log successful sync outcome (failures are already logged in catch blocks)
if (syncSuccessful)
{
_logger.LogInformation("Sync with {NodeId} completed successfully.", peer.NodeId);
}
}
}
private void RecordSuccess(string nodeId)
{
_peerStates.AddOrUpdate(nodeId,
new PeerStatus { FailureCount = 0, NextRetryTime = DateTime.MinValue },
(k, v) => { v.FailureCount = 0; v.NextRetryTime = DateTime.MinValue; return v; });
}
/// <summary>
/// Merges discovered and configured peers into a distinct list that excludes the local node.
/// </summary>
/// <param name="discoveredPeers">The peers discovered dynamically.</param>
/// <param name="knownPeers">The peers configured statically.</param>
/// <param name="localNodeId">The local node identifier to exclude from results.</param>
/// <returns>A de-duplicated list of peers eligible for synchronization.</returns>
internal static IReadOnlyList<PeerNode> BuildMergedPeerList(
IEnumerable<PeerNode> discoveredPeers,
IEnumerable<PeerNode> knownPeers,
string localNodeId)
{
return discoveredPeers
.Concat(knownPeers)
.Where(p => !string.Equals(p.NodeId, localNodeId, StringComparison.Ordinal))
.GroupBy(p => p.NodeId, StringComparer.Ordinal)
.Select(g => g.First())
.ToList();
}
/// <summary>
/// Ensures peers are registered in the confirmation store when that store is available.
/// </summary>
/// <param name="peers">The peers to register.</param>
/// <param name="localNodeId">The local node identifier used to skip self-registration.</param>
/// <param name="token">The cancellation token.</param>
/// <returns>A task that represents the asynchronous registration operation.</returns>
internal async Task EnsurePeersRegisteredAsync(IEnumerable<PeerNode> peers, string localNodeId, CancellationToken token)
{
if (_peerOplogConfirmationStore == null)
{
return;
}
foreach (var peer in peers)
{
if (string.Equals(peer.NodeId, localNodeId, StringComparison.Ordinal))
{
continue;
}
try
{
await _peerOplogConfirmationStore.EnsurePeerRegisteredAsync(peer.NodeId, peer.Address, peer.Type, token);
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
{
throw;
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to register peer {PeerNodeId} in confirmation store", peer.NodeId);
}
}
}
/// <summary>
/// Advances peer confirmation watermarks using local and remote vector clock state.
/// </summary>
/// <param name="peerNodeId">The peer node identifier whose confirmations are being updated.</param>
/// <param name="localVectorClock">The local vector clock.</param>
/// <param name="remoteVectorClock">The remote vector clock.</param>
/// <param name="token">The cancellation token.</param>
/// <returns>A task that represents the asynchronous confirmation update operation.</returns>
internal async Task AdvanceConfirmationsFromVectorClockAsync(
string peerNodeId,
VectorClock localVectorClock,
VectorClock remoteVectorClock,
CancellationToken token)
{
if (_peerOplogConfirmationStore == null)
{
return;
}
var nodeIds = new HashSet<string>(localVectorClock.NodeIds, StringComparer.Ordinal);
foreach (var nodeId in remoteVectorClock.NodeIds)
{
nodeIds.Add(nodeId);
}
foreach (var sourceNodeId in nodeIds)
{
var localTimestamp = localVectorClock.GetTimestamp(sourceNodeId);
if (localTimestamp == default)
{
continue;
}
var remoteTimestamp = remoteVectorClock.GetTimestamp(sourceNodeId);
if (remoteTimestamp < localTimestamp)
{
continue;
}
await UpdatePeerConfirmationAsync(peerNodeId, sourceNodeId, localTimestamp, token);
}
}
/// <summary>
/// Advances the peer confirmation watermark after successfully pushing a batch of changes.
/// </summary>
/// <param name="peerNodeId">The peer node identifier that received the changes.</param>
/// <param name="sourceNodeId">The source node identifier associated with the pushed changes.</param>
/// <param name="pushedChanges">The pushed oplog entries.</param>
/// <param name="token">The cancellation token.</param>
/// <returns>A task that represents the asynchronous confirmation update operation.</returns>
internal async Task AdvanceConfirmationForPushedBatchAsync(
string peerNodeId,
string sourceNodeId,
IReadOnlyCollection<OplogEntry> pushedChanges,
CancellationToken token)
{
if (_peerOplogConfirmationStore == null || pushedChanges.Count == 0)
{
return;
}
var maxPushed = pushedChanges
.OrderBy(entry => entry.Timestamp)
.Last();
try
{
await _peerOplogConfirmationStore.UpdateConfirmationAsync(
peerNodeId,
sourceNodeId,
maxPushed.Timestamp,
maxPushed.Hash ?? string.Empty,
token);
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
{
throw;
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"Failed to advance push confirmation watermark for peer {PeerNodeId} and source {SourceNodeId}",
peerNodeId, sourceNodeId);
}
}
private async Task UpdatePeerConfirmationAsync(
string peerNodeId,
string sourceNodeId,
HlcTimestamp timestamp,
CancellationToken token)
{
if (_peerOplogConfirmationStore == null)
{
return;
}
try
{
// Best-effort hash lookup: IOplogStore exposes latest hash per source node.
var hash = await _oplogStore.GetLastEntryHashAsync(sourceNodeId, token) ?? string.Empty;
await _peerOplogConfirmationStore.UpdateConfirmationAsync(peerNodeId, sourceNodeId, timestamp, hash, token);
}
catch (OperationCanceledException) when (token.IsCancellationRequested)
{
throw;
}
catch (Exception ex)
{
_logger.LogWarning(ex,
"Failed to advance confirmation watermark for peer {PeerNodeId} and source {SourceNodeId}",
peerNodeId, sourceNodeId);
}
}
private void RecordFailure(string nodeId)
{
_peerStates.AddOrUpdate(nodeId,
new PeerStatus { FailureCount = 1, NextRetryTime = DateTime.UtcNow.AddSeconds(1) },
(k, v) =>
{
v.FailureCount++;
// Exponential backoff: 1s, 2s, 4s... max 60s
var delaySeconds = Math.Min(Math.Pow(2, v.FailureCount), 60);
v.NextRetryTime = DateTime.UtcNow.AddSeconds(delaySeconds);
return v;
});
}
/// <summary>
/// Validates an inbound batch of changes, checks for gaps, performs recovery if needed, and applies to oplogStore.
/// Extracted to enforce Single Responsibility Principle.
/// </summary>
private enum SyncBatchResult
{
Success,
GapDetected,
IntegrityError,
ChainBroken
}
/// <summary>
/// Validates an inbound batch of changes, checks for gaps, performs recovery if needed, and applies to oplogStore.
/// Extracted to enforce Single Responsibility Principle.
/// </summary>
private async Task<SyncBatchResult> ProcessInboundBatchAsync(TcpPeerClient client, string peerNodeId, IList<OplogEntry> changes, CancellationToken token)
{
_logger.LogInformation("Received {Count} changes from {NodeId}", changes.Count, peerNodeId);
// 1. Validate internal integrity of the batch (Hash check)
foreach (var entry in changes)
{
if (!entry.IsValid())
{
// CHANGED: Log Critical Error but ACCEPT the entry to allow sync to progress (Soft Validation).
// Throwing here would cause an unrecoverable state where this batch blocks sync forever.
_logger.LogError("Integrity Check Failed for Entry {Hash} (Node: {NodeId}). Expected: {computedHash}. ACCEPTING payload despite mismatch to maintain availability.",
entry.Hash, entry.Timestamp.NodeId, entry.ComputeHash());
}
}
// 2. Group changes by Author Node to validate Source Chains independently
var changesByNode = changes.GroupBy(c => c.Timestamp.NodeId);
foreach (var group in changesByNode)
{
var authorNodeId = group.Key;
// FIX: Order by the full Timestamp (Physical + Logical), not just LogicalCounter.
// LogicalCounter resets when PhysicalTime advances, so sorting by Counter alone breaks chronological order.
var authorChain = group.OrderBy(c => c.Timestamp).ToList();
// Check linkage within the batch
for (int i = 1; i < authorChain.Count; i++)
{
if (authorChain[i].PreviousHash != authorChain[i - 1].Hash)
{
_logger.LogError("Chain Broken in Batch for Node {AuthorId}", authorNodeId);
return SyncBatchResult.ChainBroken;
}
}
// Check linkage with Local State
var firstEntry = authorChain[0];
var localHeadHash = await _oplogStore.GetLastEntryHashAsync(authorNodeId, token);
_logger.LogDebug("Processing chain for Node {AuthorId}: FirstEntry.PrevHash={PrevHash}, FirstEntry.Hash={Hash}, LocalHeadHash={LocalHead}",
authorNodeId, firstEntry.PreviousHash, firstEntry.Hash, localHeadHash ?? "(null)");
if (localHeadHash != null && firstEntry.PreviousHash != localHeadHash)
{
// Check if entry starts from snapshot boundary (valid case after pruning)
var snapshotHash = await _snapshotMetadataStore.GetSnapshotHashAsync(authorNodeId, token);
if (snapshotHash != null && firstEntry.PreviousHash == snapshotHash)
{
// Entry connects to snapshot boundary - this is expected after pruning/snapshot sync
// This prevents infinite snapshot request loops when old nodes reconnect
_logger.LogInformation(
"Entry for Node {AuthorId} connects to snapshot boundary (Hash: {SnapshotHash}). Accepting without gap recovery. Network convergence in effect.",
authorNodeId, snapshotHash);
// No gap recovery needed - this is a valid state
}
else
{
// GAP DETECTED (not a snapshot boundary case)
_logger.LogWarning("Gap Detected for Node {AuthorId}. Local Head: {Local}, Remote Prev: {Prev}. Initiating Recovery.",
authorNodeId, localHeadHash, firstEntry.PreviousHash);
// Gap Recovery (Range Sync)
List<OplogEntry>? missingChain = null;
try
{
missingChain = await client.GetChainRangeAsync(localHeadHash, firstEntry.PreviousHash, token);
}
catch (SnapshotRequiredException)
{
throw; // Propagate up to trigger full sync
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Gap Recovery failed.");
/* Fallthrough to decision logic */
}
if (missingChain != null && missingChain.Any())
{
_logger.LogInformation("Gap Recovery: Retrieved {Count} missing entries.", missingChain.Count);
// Validate Recovery Chain Linkage
bool linkValid = true;
if (missingChain[0].PreviousHash != localHeadHash) linkValid = false;
for (int i = 1; i < missingChain.Count; i++)
if (missingChain[i].PreviousHash != missingChain[i - 1].Hash) linkValid = false;
if (missingChain.Last().Hash != firstEntry.PreviousHash) linkValid = false;
if (!linkValid)
{
_logger.LogError("Recovery Chain Invalid Linkage. Aborting Gap Recovery.");
return SyncBatchResult.GapDetected;
}
// Apply Missing Chain First
await _oplogStore.ApplyBatchAsync(missingChain, token);
_logger.LogInformation("Gap Recovery Applied Successfully.");
}
else
{
// Gap recovery failed. This can happen if:
// 1. This is actually our first contact with this node's history
// 2. The peer doesn't have the full history
// 3. There's a true gap that cannot be recovered
// DECISION: Accept the entries anyway but log a warning
// This allows forward progress even with partial history
_logger.LogWarning("Could not recover gap for Node {AuthorId}. Local Head: {Local}, Remote Prev: {Prev}. Accepting entries anyway (partial sync).",
authorNodeId, localHeadHash, firstEntry.PreviousHash);
// Optionally: Mark this as a partial sync in metadata
// For now, we proceed and let the chain continue from this point
}
}
}
else if (localHeadHash == null && !string.IsNullOrEmpty(firstEntry.PreviousHash))
{
// Implicit Accept / Partial Sync warning
_logger.LogWarning("First contact with Node {AuthorId} at explicit state (Not Genesis). Accepting.", authorNodeId);
}
// Apply original batch (grouped by node for clarity, but oplogStore usually handles bulk)
await _oplogStore.ApplyBatchAsync(authorChain, token);
}
return SyncBatchResult.Success;
}
private async Task PerformSnapshotSyncAsync(TcpPeerClient client, bool mergeOnly, CancellationToken token)
{
_logger.LogInformation(mergeOnly ? "Starting Snapshot Merge..." : "Starting Full Database Replacement...");
var tempFile = Path.GetTempFileName();
try
{
_logger.LogInformation("Downloading snapshot to {TempFile}...", tempFile);
using (var fs = File.Create(tempFile))
{
await client.GetSnapshotAsync(fs, token);
}
_logger.LogInformation("Snapshot Downloaded. applying to store...");
using (var fs = File.OpenRead(tempFile))
{
if (mergeOnly)
{
await _snapshotService.MergeSnapshotAsync(fs, token);
}
else
{
await _snapshotService.ReplaceDatabaseAsync(fs, token);
}
}
_logger.LogInformation("Snapshot applied successfully.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to perform snapshot sync");
throw;
}
finally
{
if (File.Exists(tempFile))
{
try
{
File.Delete(tempFile);
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to delete temporary snapshot file {TempFile}", tempFile);
}
}
}
}
}

View File

@@ -0,0 +1,502 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net.Sockets;
using System.Threading;
using System.Threading.Tasks;
using Google.Protobuf;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Network.Proto;
using ZB.MOM.WW.CBDDC.Network.Security;
using ZB.MOM.WW.CBDDC.Network.Protocol;
using ZB.MOM.WW.CBDDC.Network.Telemetry;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Represents a TCP client connection to a remote peer for synchronization.
/// </summary>
public class TcpPeerClient : IDisposable
{
private readonly TcpClient _client;
private readonly string _peerAddress;
private readonly ILogger<TcpPeerClient> _logger;
private readonly IPeerHandshakeService? _handshakeService;
private NetworkStream? _stream;
private CipherState? _cipherState;
private readonly object _connectionLock = new object();
private bool _disposed = false;
private const int ConnectionTimeoutMs = 5000;
private const int OperationTimeoutMs = 30000;
private readonly ProtocolHandler _protocol;
/// <summary>
/// Gets a value indicating whether the client currently has an active connection.
/// </summary>
public bool IsConnected
{
get
{
lock (_connectionLock)
{
return _client != null && _client.Connected && _stream != null && !_disposed;
}
}
}
/// <summary>
/// Gets a value indicating whether the handshake with the remote peer has completed successfully.
/// </summary>
public bool HasHandshaked { get; private set; }
private readonly INetworkTelemetryService? _telemetry;
/// <summary>
/// Initializes a new instance of the <see cref="TcpPeerClient"/> class.
/// </summary>
/// <param name="peerAddress">The remote peer address in <c>host:port</c> format.</param>
/// <param name="logger">The logger used for connection and protocol events.</param>
/// <param name="handshakeService">The optional handshake service used to establish secure sessions.</param>
/// <param name="telemetry">The optional telemetry service for network metrics.</param>
public TcpPeerClient(string peerAddress, ILogger<TcpPeerClient> logger, IPeerHandshakeService? handshakeService = null, INetworkTelemetryService? telemetry = null)
{
_client = new TcpClient();
_peerAddress = peerAddress;
_logger = logger;
_handshakeService = handshakeService;
_telemetry = telemetry;
_protocol = new ProtocolHandler(logger, telemetry);
}
/// <summary>
/// Connects to the configured remote peer.
/// </summary>
/// <param name="token">A token used to cancel the connection attempt.</param>
/// <returns>A task that represents the asynchronous connect operation.</returns>
public async Task ConnectAsync(CancellationToken token)
{
lock (_connectionLock)
{
if (_disposed)
{
throw new ObjectDisposedException(nameof(TcpPeerClient));
}
if (IsConnected) return;
}
var parts = _peerAddress.Split(':');
if (parts.Length != 2)
{
throw new ArgumentException($"Invalid address format: {_peerAddress}. Expected format: host:port");
}
if (!int.TryParse(parts[1], out int port) || port <= 0 || port > 65535)
{
throw new ArgumentException($"Invalid port number: {parts[1]}");
}
// Connect with timeout
using var timeoutCts = new CancellationTokenSource(ConnectionTimeoutMs);
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(token, timeoutCts.Token);
try
{
await _client.ConnectAsync(parts[0], port);
lock (_connectionLock)
{
if (_disposed)
{
throw new ObjectDisposedException(nameof(TcpPeerClient));
}
_stream = _client.GetStream();
// CRITICAL for Android: Disable Nagle's algorithm to prevent buffering delays
// This ensures immediate packet transmission for handshake data
_client.NoDelay = true;
// Configure TCP keepalive
_client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true);
// Set read/write timeouts
_stream.ReadTimeout = OperationTimeoutMs;
_stream.WriteTimeout = OperationTimeoutMs;
}
_logger.LogDebug("Connected to peer: {Address} (NoDelay=true for immediate send)", _peerAddress);
}
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
{
throw new TimeoutException($"Connection to {_peerAddress} timed out after {ConnectionTimeoutMs}ms");
}
}
/// <summary>
/// Gets the list of collections the remote peer is interested in.
/// </summary>
public System.Collections.Generic.IReadOnlyList<string> RemoteInterests => _remoteInterests.AsReadOnly();
private List<string> _remoteInterests = new();
/// <summary>
/// Performs authentication handshake with the remote peer.
/// </summary>
/// <param name="myNodeId">The local node identifier.</param>
/// <param name="authToken">The authentication token.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>True if handshake was accepted, false otherwise.</returns>
public async Task<bool> HandshakeAsync(string myNodeId, string authToken, CancellationToken token)
{
return await HandshakeAsync(myNodeId, authToken, null, token);
}
/// <summary>
/// Performs authentication handshake with the remote peer, including collection interests.
/// </summary>
/// <param name="myNodeId">The local node identifier.</param>
/// <param name="authToken">The authentication token.</param>
/// <param name="interestingCollections">Optional collection names this node is interested in receiving.</param>
/// <param name="token">Cancellation token.</param>
/// <returns><see langword="true"/> if handshake was accepted; otherwise <see langword="false"/>.</returns>
public async Task<bool> HandshakeAsync(string myNodeId, string authToken, IEnumerable<string>? interestingCollections, CancellationToken token)
{
if (HasHandshaked) return true;
if (_handshakeService != null)
{
// Perform secure handshake if service is available
// We assume we are initiator here
_cipherState = await _handshakeService.HandshakeAsync(_stream!, true, myNodeId, token);
}
var req = new HandshakeRequest { NodeId = myNodeId, AuthToken = authToken ?? "" };
if (interestingCollections != null)
{
foreach (var coll in interestingCollections)
{
req.InterestingCollections.Add(coll);
}
}
if (CompressionHelper.IsBrotliSupported)
{
req.SupportedCompression.Add("brotli");
}
_logger.LogDebug("Sending HandshakeReq to {Address}", _peerAddress);
await _protocol.SendMessageAsync(_stream!, MessageType.HandshakeReq, req, false, _cipherState, token);
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
_logger.LogDebug("Received Handshake response type: {Type}", type);
if (type != MessageType.HandshakeRes) return false;
var res = HandshakeResponse.Parser.ParseFrom(payload);
// Store remote interests
_remoteInterests = res.InterestingCollections.ToList();
// Negotiation Result
if (res.SelectedCompression == "brotli")
{
_useCompression = true;
_logger.LogInformation("Brotli compression negotiated.");
}
HasHandshaked = res.Accepted;
return res.Accepted;
}
/// <summary>
/// Retrieves the remote peer's latest HLC timestamp.
/// </summary>
/// <param name="token">Cancellation token.</param>
/// <returns>The latest remote hybrid logical clock timestamp.</returns>
public async Task<HlcTimestamp> GetClockAsync(CancellationToken token)
{
using (_telemetry?.StartMetric(MetricType.RoundTripTime))
{
await _protocol.SendMessageAsync(_stream!, MessageType.GetClockReq, new GetClockRequest(), _useCompression, _cipherState, token);
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
if (type != MessageType.ClockRes) throw new Exception("Unexpected response");
var res = ClockResponse.Parser.ParseFrom(payload);
return new HlcTimestamp(res.HlcWall, res.HlcLogic, res.HlcNode);
}
}
/// <summary>
/// Retrieves the remote peer's vector clock (latest timestamp per node).
/// </summary>
/// <param name="token">Cancellation token.</param>
/// <returns>The remote vector clock.</returns>
public async Task<VectorClock> GetVectorClockAsync(CancellationToken token)
{
using (_telemetry?.StartMetric(MetricType.RoundTripTime))
{
await _protocol.SendMessageAsync(_stream!, MessageType.GetVectorClockReq, new GetVectorClockRequest(), _useCompression, _cipherState, token);
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
if (type != MessageType.VectorClockRes) throw new Exception("Unexpected response");
var res = VectorClockResponse.Parser.ParseFrom(payload);
var vectorClock = new VectorClock();
foreach (var entry in res.Entries)
{
vectorClock.SetTimestamp(entry.NodeId, new HlcTimestamp(entry.HlcWall, entry.HlcLogic, entry.NodeId));
}
return vectorClock;
}
}
/// <summary>
/// Pulls oplog changes from the remote peer since the specified timestamp.
/// </summary>
/// <param name="since">The starting timestamp for requested changes.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>The list of oplog entries returned by the remote peer.</returns>
public async Task<List<OplogEntry>> PullChangesAsync(HlcTimestamp since, CancellationToken token)
{
return await PullChangesAsync(since, null, token);
}
/// <summary>
/// Pulls oplog changes from the remote peer since the specified timestamp, filtered by collections.
/// </summary>
/// <param name="since">The starting timestamp for requested changes.</param>
/// <param name="collections">Optional collection names used to filter the returned entries.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>The list of oplog entries returned by the remote peer.</returns>
public async Task<List<OplogEntry>> PullChangesAsync(HlcTimestamp since, IEnumerable<string>? collections, CancellationToken token)
{
var req = new PullChangesRequest
{
SinceWall = since.PhysicalTime,
SinceLogic = since.LogicalCounter,
// Empty SinceNode indicates a global pull (not source-node filtered).
SinceNode = string.Empty
};
if (collections != null)
{
foreach (var coll in collections)
{
req.Collections.Add(coll);
}
}
await _protocol.SendMessageAsync(_stream!, MessageType.PullChangesReq, req, _useCompression, _cipherState, token);
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
if (type != MessageType.ChangeSetRes) throw new Exception("Unexpected response");
var res = ChangeSetResponse.Parser.ParseFrom(payload);
return res.Entries.Select(e => new OplogEntry(
e.Collection,
e.Key,
ParseOp(e.Operation),
string.IsNullOrEmpty(e.JsonData) ? default : System.Text.Json.JsonSerializer.Deserialize<System.Text.Json.JsonElement>(e.JsonData),
new HlcTimestamp(e.HlcWall, e.HlcLogic, e.HlcNode),
e.PreviousHash,
e.Hash // Pass the received hash to preserve integrity reference
)).ToList();
}
/// <summary>
/// Pulls oplog changes for a specific node from the remote peer since the specified timestamp.
/// </summary>
/// <param name="nodeId">The node identifier to filter changes by.</param>
/// <param name="since">The starting timestamp for requested changes.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>The list of oplog entries returned by the remote peer.</returns>
public async Task<List<OplogEntry>> PullChangesFromNodeAsync(string nodeId, HlcTimestamp since, CancellationToken token)
{
return await PullChangesFromNodeAsync(nodeId, since, null, token);
}
/// <summary>
/// Pulls oplog changes for a specific node from the remote peer since the specified timestamp, filtered by collections.
/// </summary>
/// <param name="nodeId">The node identifier to filter changes by.</param>
/// <param name="since">The starting timestamp for requested changes.</param>
/// <param name="collections">Optional collection names used to filter the returned entries.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>The list of oplog entries returned by the remote peer.</returns>
public async Task<List<OplogEntry>> PullChangesFromNodeAsync(string nodeId, HlcTimestamp since, IEnumerable<string>? collections, CancellationToken token)
{
var req = new PullChangesRequest
{
SinceNode = nodeId,
SinceWall = since.PhysicalTime,
SinceLogic = since.LogicalCounter
};
if (collections != null)
{
foreach (var coll in collections)
{
req.Collections.Add(coll);
}
}
await _protocol.SendMessageAsync(_stream!, MessageType.PullChangesReq, req, _useCompression, _cipherState, token);
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
if (type != MessageType.ChangeSetRes) throw new Exception("Unexpected response");
var res = ChangeSetResponse.Parser.ParseFrom(payload);
return res.Entries.Select(e => new OplogEntry(
e.Collection,
e.Key,
ParseOp(e.Operation),
string.IsNullOrEmpty(e.JsonData) ? default : System.Text.Json.JsonSerializer.Deserialize<System.Text.Json.JsonElement>(e.JsonData),
new HlcTimestamp(e.HlcWall, e.HlcLogic, e.HlcNode),
e.PreviousHash,
e.Hash
)).ToList();
}
/// <summary>
/// Retrieves a range of oplog entries connecting two hashes (Gap Recovery).
/// </summary>
/// <param name="startHash">The starting hash in the chain.</param>
/// <param name="endHash">The ending hash in the chain.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>The chain entries connecting the requested hash range.</returns>
public virtual async Task<List<OplogEntry>> GetChainRangeAsync(string startHash, string endHash, CancellationToken token)
{
var req = new GetChainRangeRequest { StartHash = startHash, EndHash = endHash };
await _protocol.SendMessageAsync(_stream!, MessageType.GetChainRangeReq, req, _useCompression, _cipherState, token);
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
if (type != MessageType.ChainRangeRes) throw new Exception($"Unexpected response for ChainRange: {type}");
var res = ChainRangeResponse.Parser.ParseFrom(payload);
if (res.SnapshotRequired) throw new SnapshotRequiredException();
return res.Entries.Select(e => new OplogEntry(
e.Collection,
e.Key,
ParseOp(e.Operation),
string.IsNullOrEmpty(e.JsonData) ? default : System.Text.Json.JsonSerializer.Deserialize<System.Text.Json.JsonElement>(e.JsonData),
new HlcTimestamp(e.HlcWall, e.HlcLogic, e.HlcNode),
e.PreviousHash,
e.Hash
)).ToList();
}
/// <summary>
/// Pushes local oplog changes to the remote peer.
/// </summary>
/// <param name="entries">The oplog entries to push.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>A task that represents the asynchronous push operation.</returns>
public async Task PushChangesAsync(IEnumerable<OplogEntry> entries, CancellationToken token)
{
var req = new PushChangesRequest();
var entryList = entries.ToList();
if (entryList.Count == 0) return;
foreach (var e in entryList)
{
req.Entries.Add(new ProtoOplogEntry
{
Collection = e.Collection,
Key = e.Key,
Operation = e.Operation.ToString(),
JsonData = e.Payload?.GetRawText() ?? "",
HlcWall = e.Timestamp.PhysicalTime,
HlcLogic = e.Timestamp.LogicalCounter,
HlcNode = e.Timestamp.NodeId,
Hash = e.Hash,
PreviousHash = e.PreviousHash
});
}
await _protocol.SendMessageAsync(_stream!, MessageType.PushChangesReq, req, _useCompression, _cipherState, token);
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
if (type != MessageType.AckRes) throw new Exception("Push failed");
var res = AckResponse.Parser.ParseFrom(payload);
if (res.SnapshotRequired) throw new SnapshotRequiredException();
if (!res.Success) throw new Exception("Push failed");
}
private bool _useCompression = false; // Negotiated after handshake
private OperationType ParseOp(string op) => Enum.TryParse<OperationType>(op, out var val) ? val : OperationType.Put;
/// <summary>
/// Downloads a full snapshot from the remote peer to the provided destination stream.
/// </summary>
/// <param name="destination">The stream that receives snapshot bytes.</param>
/// <param name="token">Cancellation token.</param>
/// <returns>A task that represents the asynchronous snapshot transfer operation.</returns>
public async Task GetSnapshotAsync(Stream destination, CancellationToken token)
{
await _protocol.SendMessageAsync(_stream!, MessageType.GetSnapshotReq, new GetSnapshotRequest(), _useCompression, _cipherState, token);
while (true)
{
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
if (type != MessageType.SnapshotChunkMsg) throw new Exception($"Unexpected message type during snapshot: {type}");
var chunk = SnapshotChunk.Parser.ParseFrom(payload);
if (chunk.Data.Length > 0)
{
await destination.WriteAsync(chunk.Data.ToByteArray(), 0, chunk.Data.Length, token);
}
if (chunk.IsLast) break;
}
}
/// <summary>
/// Releases resources used by the peer client.
/// </summary>
public void Dispose()
{
lock (_connectionLock)
{
if (_disposed) return;
_disposed = true;
}
try
{
_stream?.Dispose();
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Error disposing network stream");
}
try
{
_client?.Dispose();
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Error disposing TCP client");
}
_logger.LogDebug("Disposed connection to peer: {Address}", _peerAddress);
}
}
public class SnapshotRequiredException : Exception
{
/// <summary>
/// Initializes a new instance of the <see cref="SnapshotRequiredException"/> class.
/// </summary>
public SnapshotRequiredException() : base("Peer requires a full snapshot sync.") { }
}

View File

@@ -0,0 +1,474 @@
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Network.Proto;
using ZB.MOM.WW.CBDDC.Network.Security;
using ZB.MOM.WW.CBDDC.Network.Protocol;
using ZB.MOM.WW.CBDDC.Network.Telemetry;
using Google.Protobuf;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Sockets;
using System.Threading;
using System.Threading.Tasks;
using Serilog.Context;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// TCP server that handles incoming synchronization requests from remote peers.
/// </summary>
internal class TcpSyncServer : ISyncServer
{
private readonly IOplogStore _oplogStore;
private readonly IDocumentStore _documentStore;
private readonly ISnapshotService _snapshotStore;
private readonly ILogger<TcpSyncServer> _logger;
private readonly IPeerNodeConfigurationProvider _configProvider;
private CancellationTokenSource? _cts;
private TcpListener? _listener;
private readonly object _startStopLock = new object();
private int _activeConnections = 0;
internal int MaxConnections = 100;
private const int ClientOperationTimeoutMs = 60000;
private readonly IAuthenticator _authenticator;
private readonly IPeerHandshakeService _handshakeService;
private readonly INetworkTelemetryService? _telemetry;
/// <summary>
/// Initializes a new instance of the TcpSyncServer class with the specified peer oplogStore, configuration provider,
/// logger, and authenticator.
/// </summary>
/// <remarks>The server automatically restarts when the configuration provided by
/// peerNodeConfigurationProvider changes. This ensures that configuration updates are applied without requiring
/// manual intervention.</remarks>
/// <param name="oplogStore">The peer oplogStore used to manage and persist peer information for the server.</param>
/// <param name="documentStore">The document store used to read and apply synchronized documents.</param>
/// <param name="snapshotStore">The snapshot store used to create and manage database snapshots for synchronization.</param>
/// <param name="peerNodeConfigurationProvider">The provider that supplies configuration settings for the peer node and notifies the server of configuration
/// changes.</param>
/// <param name="logger">The logger used to record informational and error messages for the server instance.</param>
/// <param name="authenticator">The authenticator responsible for validating peer connections to the server.</param>
/// <param name="handshakeService">The service used to perform secure handshake (optional).</param>
/// <param name="telemetry">The optional telemetry service used to record network performance metrics.</param>
public TcpSyncServer(
IOplogStore oplogStore,
IDocumentStore documentStore,
ISnapshotService snapshotStore,
IPeerNodeConfigurationProvider peerNodeConfigurationProvider,
ILogger<TcpSyncServer> logger,
IAuthenticator authenticator,
IPeerHandshakeService handshakeService,
INetworkTelemetryService? telemetry = null)
{
_oplogStore = oplogStore;
_documentStore = documentStore;
_snapshotStore = snapshotStore;
_logger = logger;
_authenticator = authenticator;
_handshakeService = handshakeService;
_configProvider = peerNodeConfigurationProvider;
_telemetry = telemetry;
_configProvider.ConfigurationChanged += async (s, e) =>
{
_logger.LogInformation("Configuration changed, restarting TCP Sync Server...");
await Stop();
await Start();
};
}
/// <summary>
/// Starts the TCP synchronization server and begins listening for incoming connections asynchronously.
/// </summary>
/// <remarks>If the server is already running, this method returns immediately without starting a new
/// listener. The server will listen on the TCP port specified in the current configuration.</remarks>
/// <returns>A task that represents the asynchronous start operation.</returns>
public async Task Start()
{
var config = await _configProvider.GetConfiguration();
lock (_startStopLock)
{
if (_cts != null)
{
_logger.LogWarning("TCP Sync Server already started");
return;
}
_cts = new CancellationTokenSource();
}
_listener = new TcpListener(IPAddress.Any, config.TcpPort);
_listener.Start();
_logger.LogInformation("TCP Sync Server Listening on port {Port}", config.TcpPort);
var token = _cts.Token;
_ = Task.Run(async () =>
{
try
{
await ListenAsync(token);
}
catch (Exception ex)
{
_logger.LogError(ex, "TCP Listen task failed");
}
}, token);
await Task.CompletedTask;
}
/// <summary>
/// Stops the listener and cancels any pending operations.
/// </summary>
/// <remarks>After calling this method, the listener will no longer accept new connections or process
/// requests. This method is safe to call multiple times; subsequent calls have no effect if the listener is already
/// stopped.</remarks>
/// <returns>A task that represents the asynchronous stop operation.</returns>
public async Task Stop()
{
CancellationTokenSource? ctsToDispose = null;
TcpListener? listenerToStop = null;
lock (_startStopLock)
{
if (_cts == null)
{
_logger.LogWarning("TCP Sync Server already stopped or never started");
return;
}
ctsToDispose = _cts;
listenerToStop = _listener;
_cts = null;
_listener = null;
}
try
{
ctsToDispose.Cancel();
}
catch (ObjectDisposedException)
{
// Already disposed, ignore
}
finally
{
ctsToDispose.Dispose();
}
listenerToStop?.Stop();
await Task.CompletedTask;
}
/// <summary>
/// Gets the full local endpoint on which the server is listening.
/// </summary>
public IPEndPoint? ListeningEndpoint => _listener?.LocalEndpoint as IPEndPoint;
/// <summary>
/// Gets the port on which the server is listening.
/// </summary>
public int? ListeningPort => ListeningEndpoint?.Port;
private async Task ListenAsync(CancellationToken token)
{
while (!token.IsCancellationRequested)
{
try
{
if (_listener == null) break;
var client = await _listener.AcceptTcpClientAsync();
if (_activeConnections >= MaxConnections)
{
_logger.LogWarning("Max connections reached ({Max}). Rejecting client.", MaxConnections);
client.Close();
continue;
}
Interlocked.Increment(ref _activeConnections);
_ = Task.Run(async () =>
{
try
{
await HandleClientAsync(client, token);
}
finally
{
Interlocked.Decrement(ref _activeConnections);
}
}, token);
}
catch (ObjectDisposedException) { break; }
catch (Exception ex)
{
_logger.LogError(ex, "TCP Accept Error");
}
}
}
private async Task HandleClientAsync(TcpClient client, CancellationToken token)
{
var remoteEp = client.Client.RemoteEndPoint;
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
using var endpointContext = LogContext.PushProperty("RemoteEndpoint", remoteEp?.ToString() ?? "unknown");
_logger.LogDebug("Client Connected: {Endpoint}", remoteEp);
try
{
using (client)
using (var stream = client.GetStream())
{
// CRITICAL for Android: Disable Nagle's algorithm for immediate packet send
client.NoDelay = true;
// Configure TCP keepalive
client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true);
// Set stream timeouts
stream.ReadTimeout = ClientOperationTimeoutMs;
stream.WriteTimeout = ClientOperationTimeoutMs;
var protocol = new ProtocolHandler(_logger, _telemetry);
bool useCompression = false;
CipherState? cipherState = null;
List<string> remoteInterests = new();
// Perform Secure Handshake (if service is available)
var config = await _configProvider.GetConfiguration();
if (_handshakeService != null)
{
try
{
// We are NOT initiator
_logger.LogDebug("Starting Secure Handshake as Responder.");
cipherState = await _handshakeService.HandshakeAsync(stream, false, config.NodeId, token);
_logger.LogDebug("Secure Handshake Completed.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Secure Handshake failed check logic");
return;
}
}
while (client.Connected && !token.IsCancellationRequested)
{
// Re-fetch config if needed, though usually stable
config = await _configProvider.GetConfiguration();
var (type, payload) = await protocol.ReadMessageAsync(stream, cipherState, token);
if (type == MessageType.Unknown) break; // EOF or Error
// Handshake Loop
if (type == MessageType.HandshakeReq)
{
var hReq = HandshakeRequest.Parser.ParseFrom(payload);
_logger.LogDebug("Received HandshakeReq from Node {NodeId}", hReq.NodeId);
// Track remote peer interests
remoteInterests = hReq.InterestingCollections.ToList();
bool valid = await _authenticator.ValidateAsync(hReq.NodeId, hReq.AuthToken);
if (!valid)
{
_logger.LogWarning("Authentication failed for Node {NodeId}", hReq.NodeId);
await protocol.SendMessageAsync(stream, MessageType.HandshakeRes, new HandshakeResponse { NodeId = config.NodeId, Accepted = false }, false, cipherState, token);
return;
}
var hRes = new HandshakeResponse { NodeId = config.NodeId, Accepted = true };
// Include local interests from IDocumentStore in response for push filtering
foreach (var coll in _documentStore.InterestedCollection)
{
hRes.InterestingCollections.Add(coll);
}
if (CompressionHelper.IsBrotliSupported && hReq.SupportedCompression.Contains("brotli"))
{
hRes.SelectedCompression = "brotli";
useCompression = true;
}
await protocol.SendMessageAsync(stream, MessageType.HandshakeRes, hRes, false, cipherState, token);
continue;
}
IMessage? response = null;
MessageType resType = MessageType.Unknown;
switch (type)
{
case MessageType.GetClockReq:
var clock = await _oplogStore.GetLatestTimestampAsync(token);
response = new ClockResponse
{
HlcWall = clock.PhysicalTime,
HlcLogic = clock.LogicalCounter,
HlcNode = clock.NodeId
};
resType = MessageType.ClockRes;
break;
case MessageType.GetVectorClockReq:
var vectorClock = await _oplogStore.GetVectorClockAsync(token);
var vcRes = new VectorClockResponse();
foreach (var nodeId in vectorClock.NodeIds)
{
var ts = vectorClock.GetTimestamp(nodeId);
vcRes.Entries.Add(new VectorClockEntry
{
NodeId = nodeId,
HlcWall = ts.PhysicalTime,
HlcLogic = ts.LogicalCounter
});
}
response = vcRes;
resType = MessageType.VectorClockRes;
break;
case MessageType.PullChangesReq:
var pReq = PullChangesRequest.Parser.ParseFrom(payload);
var since = new HlcTimestamp(pReq.SinceWall, pReq.SinceLogic, pReq.SinceNode);
// Use collection filter from request
var filter = pReq.Collections.Any() ? pReq.Collections : null;
var oplog = string.IsNullOrWhiteSpace(pReq.SinceNode)
? await _oplogStore.GetOplogAfterAsync(since, filter, token)
: await _oplogStore.GetOplogForNodeAfterAsync(pReq.SinceNode, since, filter, token);
var csRes = new ChangeSetResponse();
foreach (var e in oplog)
{
csRes.Entries.Add(new ProtoOplogEntry
{
Collection = e.Collection,
Key = e.Key,
Operation = e.Operation.ToString(),
JsonData = e.Payload?.GetRawText() ?? "",
HlcWall = e.Timestamp.PhysicalTime,
HlcLogic = e.Timestamp.LogicalCounter,
HlcNode = e.Timestamp.NodeId,
Hash = e.Hash,
PreviousHash = e.PreviousHash
});
}
response = csRes;
resType = MessageType.ChangeSetRes;
break;
case MessageType.PushChangesReq:
var pushReq = PushChangesRequest.Parser.ParseFrom(payload);
var entries = pushReq.Entries.Select(e => new OplogEntry(
e.Collection,
e.Key,
(OperationType)Enum.Parse(typeof(OperationType), e.Operation),
string.IsNullOrEmpty(e.JsonData) ? (System.Text.Json.JsonElement?)null : System.Text.Json.JsonSerializer.Deserialize<System.Text.Json.JsonElement>(e.JsonData),
new HlcTimestamp(e.HlcWall, e.HlcLogic, e.HlcNode),
e.PreviousHash, // Restore PreviousHash
e.Hash // Restore Hash
));
await _oplogStore.ApplyBatchAsync(entries, token);
response = new AckResponse { Success = true };
resType = MessageType.AckRes;
break;
case MessageType.GetChainRangeReq:
var rangeReq = GetChainRangeRequest.Parser.ParseFrom(payload);
var rangeEntries = await _oplogStore.GetChainRangeAsync(rangeReq.StartHash, rangeReq.EndHash, token);
var rangeRes = new ChainRangeResponse();
if (!rangeEntries.Any() && rangeReq.StartHash != rangeReq.EndHash)
{
// Gap cannot be filled (likely pruned or unknown branch)
rangeRes.SnapshotRequired = true;
}
else
{
foreach (var e in rangeEntries)
{
rangeRes.Entries.Add(new ProtoOplogEntry
{
Collection = e.Collection,
Key = e.Key,
Operation = e.Operation.ToString(),
JsonData = e.Payload?.GetRawText() ?? "",
HlcWall = e.Timestamp.PhysicalTime,
HlcLogic = e.Timestamp.LogicalCounter,
HlcNode = e.Timestamp.NodeId,
Hash = e.Hash,
PreviousHash = e.PreviousHash
});
}
}
response = rangeRes;
resType = MessageType.ChainRangeRes;
break;
case MessageType.GetSnapshotReq:
_logger.LogInformation("Processing GetSnapshotReq from {Endpoint}", remoteEp);
var tempFile = Path.GetTempFileName();
try
{
// Create backup
using (var fs = File.Create(tempFile))
{
await _snapshotStore.CreateSnapshotAsync(fs, token);
}
using (var fs = File.OpenRead(tempFile))
{
byte[] buffer = new byte[80 * 1024]; // 80KB chunks
int bytesRead;
while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length, token)) > 0)
{
var chunk = new SnapshotChunk
{
Data = ByteString.CopyFrom(buffer, 0, bytesRead),
IsLast = false
};
await protocol.SendMessageAsync(stream, MessageType.SnapshotChunkMsg, chunk, false, cipherState, token);
}
// Send End of Snapshot
await protocol.SendMessageAsync(stream, MessageType.SnapshotChunkMsg, new SnapshotChunk { IsLast = true }, false, cipherState, token);
}
}
finally
{
if (File.Exists(tempFile)) File.Delete(tempFile);
}
break;
}
if (response != null)
{
await protocol.SendMessageAsync(stream, resType, response, useCompression, cipherState, token);
}
}
}
}
catch (Exception ex)
{
_logger.LogWarning("Client Handler Error from {Endpoint}: {Message}", remoteEp, ex.Message);
}
finally
{
_logger.LogDebug("Client Disconnected: {Endpoint}", remoteEp);
}
}
}

View File

@@ -0,0 +1,60 @@
using System;
using System.Diagnostics;
namespace ZB.MOM.WW.CBDDC.Network.Telemetry;
public interface INetworkTelemetryService
{
/// <summary>
/// Records a metric value for the specified metric type.
/// </summary>
/// <param name="type">The metric type to record.</param>
/// <param name="value">The metric value.</param>
void RecordValue(MetricType type, double value);
/// <summary>
/// Starts timing a metric for the specified metric type.
/// </summary>
/// <param name="type">The metric type to time.</param>
/// <returns>A timer that records elapsed time when disposed.</returns>
MetricTimer StartMetric(MetricType type);
/// <summary>
/// Gets a snapshot of all recorded metric values.
/// </summary>
/// <returns>A dictionary of metric values grouped by metric type and bucket.</returns>
System.Collections.Generic.Dictionary<MetricType, System.Collections.Generic.Dictionary<int, double>> GetSnapshot();
}
public readonly struct MetricTimer : IDisposable
{
private readonly INetworkTelemetryService _service;
private readonly MetricType _type;
private readonly long _startTimestamp;
/// <summary>
/// Initializes a new metric timer.
/// </summary>
/// <param name="service">The telemetry service that receives the recorded value.</param>
/// <param name="type">The metric type being timed.</param>
public MetricTimer(INetworkTelemetryService service, MetricType type)
{
_service = service;
_type = type;
_startTimestamp = Stopwatch.GetTimestamp();
}
/// <summary>
/// Stops timing and records the elapsed duration.
/// </summary>
public void Dispose()
{
var elapsed = Stopwatch.GetTimestamp() - _startTimestamp;
// Convert ticks to milliseconds? Or keep as ticks?
// Plan said "latency", usually ms.
// Stopwatch.Frequency depends on hardware.
// Let's store MS representation.
double ms = (double)elapsed * 1000 / Stopwatch.Frequency;
_service.RecordValue(_type, ms);
}
}

View File

@@ -0,0 +1,9 @@
namespace ZB.MOM.WW.CBDDC.Network.Telemetry;
public enum MetricType
{
CompressionRatio = 0,
EncryptionTime = 1,
DecryptionTime = 2,
RoundTripTime = 3
}

View File

@@ -0,0 +1,283 @@
using System;
using System.Buffers;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Channels;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
namespace ZB.MOM.WW.CBDDC.Network.Telemetry;
public class NetworkTelemetryService : INetworkTelemetryService, IDisposable
{
private readonly Channel<(MetricType Type, double Value)> _metricChannel;
private readonly CancellationTokenSource _cts;
private readonly ILogger<NetworkTelemetryService> _logger;
private readonly string _persistencePath;
// Aggregation State
// We keep 30m of history with 1s resolution = 1800 buckets.
private const int MaxHistorySeconds = 1800;
private readonly object _lock = new object();
private readonly MetricBucket[] _history;
private int _headIndex = 0; // Points to current second
private long _currentSecondTimestamp; // Unix timestamp of current bucket
// Rolling Averages (Last calculated)
private readonly Dictionary<string, double> _averages = new Dictionary<string, double>();
/// <summary>
/// Initializes a new instance of the <see cref="NetworkTelemetryService"/> class.
/// </summary>
/// <param name="logger">The logger used to report telemetry processing and persistence errors.</param>
/// <param name="persistencePath">The file path where persisted telemetry snapshots are written.</param>
public NetworkTelemetryService(ILogger<NetworkTelemetryService> logger, string persistencePath)
{
_logger = logger;
_persistencePath = persistencePath;
_metricChannel = Channel.CreateUnbounded<(MetricType, double)>(new UnboundedChannelOptions
{
SingleReader = true,
SingleWriter = false
});
_cts = new CancellationTokenSource();
_history = new MetricBucket[MaxHistorySeconds];
for (int i = 0; i < MaxHistorySeconds; i++) _history[i] = new MetricBucket();
_currentSecondTimestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
_ = Task.Run(ProcessMetricsLoop);
_ = Task.Run(PersistenceLoop);
}
/// <summary>
/// Records a metric value for the specified metric type.
/// </summary>
/// <param name="type">The metric category to update.</param>
/// <param name="value">The metric value to record.</param>
public void RecordValue(MetricType type, double value)
{
_metricChannel.Writer.TryWrite((type, value));
}
/// <summary>
/// Starts a timer for the specified metric type.
/// </summary>
/// <param name="type">The metric category to time.</param>
/// <returns>A metric timer that records elapsed time when disposed.</returns>
public MetricTimer StartMetric(MetricType type)
{
return new MetricTimer(this, type);
}
/// <summary>
/// Gets a point-in-time snapshot of rolling averages for each metric type.
/// </summary>
/// <returns>A dictionary keyed by metric type containing average values by window size in seconds.</returns>
public Dictionary<MetricType, Dictionary<int, double>> GetSnapshot()
{
var snapshot = new Dictionary<MetricType, Dictionary<int, double>>();
var windows = new[] { 60, 300, 600, 1800 };
lock (_lock)
{
foreach (var type in Enum.GetValues(typeof(MetricType)).Cast<MetricType>())
{
var typeDict = new Dictionary<int, double>();
foreach (var w in windows)
{
typeDict[w] = CalculateAverage(type, w);
}
snapshot[type] = typeDict;
}
}
return snapshot;
}
private async Task ProcessMetricsLoop()
{
var reader = _metricChannel.Reader;
while (!_cts.IsCancellationRequested)
{
try
{
if (await reader.WaitToReadAsync(_cts.Token))
{
while (reader.TryRead(out var item))
{
AddMetricToCurrentBucket(item.Type, item.Value);
}
}
}
catch (OperationCanceledException) { break; }
catch (Exception ex)
{
_logger.LogError(ex, "Error processing metrics");
}
}
}
private void AddMetricToCurrentBucket(MetricType type, double value)
{
long now = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
lock (_lock)
{
// Rotate bucket if second changed
if (now > _currentSecondTimestamp)
{
long diff = now - _currentSecondTimestamp;
// Move head forward, clearing buckets in between if gap > 1s
for (int i = 0; i < diff && i < MaxHistorySeconds; i++)
{
_headIndex = (_headIndex + 1) % MaxHistorySeconds;
_history[_headIndex].Reset();
}
_currentSecondTimestamp = now;
}
_history[_headIndex].Add(type, value);
}
}
private async Task PersistenceLoop()
{
while (!_cts.IsCancellationRequested)
{
try
{
await Task.Delay(TimeSpan.FromMinutes(1), _cts.Token);
CalculateAndPersist();
}
catch (OperationCanceledException) { break; }
catch (Exception ex)
{
_logger.LogError(ex, "Error persisting metrics");
}
}
}
private void CalculateAndPersist()
{
lock (_lock)
{
// Calculate averages
var windows = new[] { 60, 300, 600, 1800 }; // 1m, 5m, 10m, 30m
using var fs = new FileStream(_persistencePath, FileMode.Create, FileAccess.Write);
using var bw = new BinaryWriter(fs);
// Header
bw.Write((byte)1); // Version
bw.Write(DateTimeOffset.UtcNow.ToUnixTimeSeconds()); // Timestamp
foreach (var type in Enum.GetValues(typeof(MetricType)).Cast<MetricType>())
{
bw.Write((int)type);
foreach (var w in windows)
{
double avg = CalculateAverage(type, w);
bw.Write(w); // Window Seconds
bw.Write(avg); // Average Value
}
}
}
}
/// <summary>
/// Forces immediate calculation and persistence of telemetry data.
/// </summary>
internal void ForcePersist()
{
CalculateAndPersist();
}
private double CalculateAverage(MetricType type, int seconds)
{
// Go backwards from head
double sum = 0;
int count = 0;
int scanned = 0;
int idx = _headIndex;
while (scanned < seconds && scanned < MaxHistorySeconds)
{
var bucket = _history[idx];
sum += bucket.GetSum(type);
count += bucket.GetCount(type);
idx--;
if (idx < 0) idx = MaxHistorySeconds - 1;
scanned++;
}
return count == 0 ? 0 : sum / count;
}
/// <summary>
/// Releases resources used by the telemetry service.
/// </summary>
public void Dispose()
{
_cts.Cancel();
_cts.Dispose();
}
}
internal class MetricBucket
{
// Simple lock-free or locked accumulation? Global lock handles it for now.
// Storing Sum and Count for each type
private readonly double[] _sums;
private readonly int[] _counts;
/// <summary>
/// Initializes a new instance of the <see cref="MetricBucket"/> class.
/// </summary>
public MetricBucket()
{
var typeCount = Enum.GetValues(typeof(MetricType)).Length;
_sums = new double[typeCount];
_counts = new int[typeCount];
}
/// <summary>
/// Clears all accumulated metric sums and counts in this bucket.
/// </summary>
public void Reset()
{
Array.Clear(_sums, 0, _sums.Length);
Array.Clear(_counts, 0, _counts.Length);
}
/// <summary>
/// Adds a metric value to the bucket.
/// </summary>
/// <param name="type">The metric category to update.</param>
/// <param name="value">The value to accumulate.</param>
public void Add(MetricType type, double value)
{
int idx = (int)type;
_sums[idx] += value;
_counts[idx]++;
}
/// <summary>
/// Gets the accumulated sum for a metric type.
/// </summary>
/// <param name="type">The metric category to read.</param>
/// <returns>The accumulated sum for the specified metric type.</returns>
public double GetSum(MetricType type) => _sums[(int)type];
/// <summary>
/// Gets the accumulated count for a metric type.
/// </summary>
/// <param name="type">The metric category to read.</param>
/// <returns>The accumulated sample count for the specified metric type.</returns>
public int GetCount(MetricType type) => _counts[(int)type];
}

View File

@@ -0,0 +1,317 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Sockets;
using System.Text;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using ZB.MOM.WW.CBDDC.Core.Storage;
using Microsoft.Extensions.Logging;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Core;
namespace ZB.MOM.WW.CBDDC.Network;
/// <summary>
/// Provides UDP-based peer discovery for the CBDDC network.
/// Broadcasts presence beacons and listens for other nodes on the local network.
/// </summary>
internal class UdpDiscoveryService : IDiscoveryService
{
private const int DiscoveryPort = 25000;
private readonly ILogger<UdpDiscoveryService> _logger;
private readonly IPeerNodeConfigurationProvider _configProvider;
private readonly IDocumentStore _documentStore;
private CancellationTokenSource? _cts;
private readonly ConcurrentDictionary<string, PeerNode> _activePeers = new();
private readonly object _startStopLock = new object();
/// <summary>
/// Initializes a new instance of the <see cref="UdpDiscoveryService"/> class.
/// </summary>
/// <param name="peerNodeConfigurationProvider">Provider for peer node configuration.</param>
/// <param name="documentStore">Document store used to obtain collection interests.</param>
/// <param name="logger">Logger for discovery service events.</param>
public UdpDiscoveryService(
IPeerNodeConfigurationProvider peerNodeConfigurationProvider,
IDocumentStore documentStore,
ILogger<UdpDiscoveryService> logger)
{
_configProvider = peerNodeConfigurationProvider ?? throw new ArgumentNullException(nameof(peerNodeConfigurationProvider));
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
_logger = logger;
}
/// <summary>
/// Starts the discovery service, initiating listener, broadcaster, and cleanup tasks.
/// </summary>
public async Task Start()
{
lock (_startStopLock)
{
if (_cts != null)
{
_logger.LogWarning("UDP Discovery Service already started");
return;
}
_cts = new CancellationTokenSource();
}
var token = _cts.Token;
_ = Task.Run(async () =>
{
try
{
await ListenAsync(token);
}
catch (Exception ex)
{
_logger.LogError(ex, "UDP Listen task failed");
}
}, token);
_ = Task.Run(async () =>
{
try
{
await BroadcastAsync(token);
}
catch (Exception ex)
{
_logger.LogError(ex, "UDP Broadcast task failed");
}
}, token);
_ = Task.Run(async () =>
{
try
{
await CleanupAsync(token);
}
catch (Exception ex)
{
_logger.LogError(ex, "UDP Cleanup task failed");
}
}, token);
await Task.CompletedTask;
}
// ... Stop ...
private async Task CleanupAsync(CancellationToken token)
{
while (!token.IsCancellationRequested)
{
try
{
await Task.Delay(10000, token); // Check every 10s
var now = DateTimeOffset.UtcNow;
var expired = new List<string>();
foreach (var pair in _activePeers)
{
// Expiry: 15 seconds (broadcast is every 5s, so 3 missed beats = dead)
if ((now - pair.Value.LastSeen).TotalSeconds > 15)
{
expired.Add(pair.Key);
}
}
foreach (var id in expired)
{
if (_activePeers.TryRemove(id, out var removed))
{
_logger.LogInformation("Peer Expired: {NodeId} at {Endpoint}", removed.NodeId, removed.Address);
}
}
}
catch (OperationCanceledException) { break; }
catch (Exception ex)
{
_logger.LogError(ex, "Cleanup Loop Error");
}
}
}
// ... Listen ...
private void HandleBeacon(DiscoveryBeacon beacon, IPAddress address)
{
var peerId = beacon.NodeId;
var endpoint = $"{address}:{beacon.TcpPort}";
var peer = new PeerNode(peerId, endpoint, DateTimeOffset.UtcNow, interestingCollections: beacon.InterestingCollections);
_activePeers.AddOrUpdate(peerId, peer, (key, old) => peer);
}
/// <summary>
/// Stops the discovery service.
/// </summary>
/// <returns>A task that completes when stop processing has finished.</returns>
public async Task Stop()
{
CancellationTokenSource? ctsToDispose = null;
lock (_startStopLock)
{
if (_cts == null)
{
_logger.LogWarning("UDP Discovery Service already stopped or never started");
return;
}
ctsToDispose = _cts;
_cts = null;
}
try
{
ctsToDispose.Cancel();
}
catch (ObjectDisposedException)
{
// Already disposed, ignore
}
finally
{
ctsToDispose.Dispose();
}
await Task.CompletedTask;
}
/// <summary>
/// Gets the currently active peers discovered on the network.
/// </summary>
/// <returns>The collection of active peers.</returns>
public IEnumerable<PeerNode> GetActivePeers() => _activePeers.Values;
private async Task ListenAsync(CancellationToken token)
{
using var udp = new UdpClient();
udp.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, true);
udp.Client.Bind(new IPEndPoint(IPAddress.Any, DiscoveryPort));
_logger.LogInformation("UDP Discovery Listening on port {Port}", DiscoveryPort);
while (!token.IsCancellationRequested)
{
try
{
var result = await udp.ReceiveAsync();
var json = Encoding.UTF8.GetString(result.Buffer);
try
{
var config = await _configProvider.GetConfiguration();
var _nodeId = config.NodeId;
var localClusterHash = ComputeClusterHash(config.AuthToken);
var beacon = JsonSerializer.Deserialize<DiscoveryBeacon>(json);
if (beacon != null && beacon.NodeId != _nodeId)
{
// Filter by ClusterHash to reduce congestion from different clusters
if (!string.Equals(beacon.ClusterHash, localClusterHash, StringComparison.Ordinal))
{
// Optional: Log trace if needed, but keeping it silent avoids flooding logs during congestion
continue;
}
HandleBeacon(beacon, result.RemoteEndPoint.Address);
}
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse beacon from {Address}", result.RemoteEndPoint.Address);
}
}
catch (ObjectDisposedException) { break; }
catch (Exception ex)
{
_logger.LogError(ex, "UDP Listener Error");
}
}
}
private async Task BroadcastAsync(CancellationToken token)
{
using var udp = new UdpClient();
udp.EnableBroadcast = true;
var endpoint = new IPEndPoint(IPAddress.Broadcast, DiscoveryPort);
while (!token.IsCancellationRequested)
{
try
{
// Re-fetch config each time in case it changes (though usually static)
var conf = await _configProvider.GetConfiguration();
var beacon = new DiscoveryBeacon
{
NodeId = conf.NodeId,
TcpPort = conf.TcpPort,
ClusterHash = ComputeClusterHash(conf.AuthToken),
InterestingCollections = _documentStore.InterestedCollection.ToList()
};
var json = JsonSerializer.Serialize(beacon);
var bytes = Encoding.UTF8.GetBytes(json);
await udp.SendAsync(bytes, bytes.Length, endpoint);
}
catch (Exception ex)
{
_logger.LogError(ex, "UDP Broadcast Error");
}
await Task.Delay(5000, token);
}
}
private string ComputeClusterHash(string authToken)
{
if (string.IsNullOrEmpty(authToken)) return "";
using var sha256 = System.Security.Cryptography.SHA256.Create();
var bytes = Encoding.UTF8.GetBytes(authToken);
var hash = sha256.ComputeHash(bytes);
// Return first 8 chars (4 bytes hex) is enough for filtering
return BitConverter.ToString(hash).Replace("-", "").Substring(0, 8);
}
private class DiscoveryBeacon
{
/// <summary>
/// Gets or sets the broadcasting node identifier.
/// </summary>
[System.Text.Json.Serialization.JsonPropertyName("node_id")]
public string NodeId { get; set; } = "";
/// <summary>
/// Gets or sets the TCP port used by the broadcasting node.
/// </summary>
[System.Text.Json.Serialization.JsonPropertyName("tcp_port")]
public int TcpPort { get; set; }
/// <summary>
/// Gets or sets the cluster hash used for discovery filtering.
/// </summary>
[System.Text.Json.Serialization.JsonPropertyName("cluster_hash")]
public string ClusterHash { get; set; } = "";
/// <summary>
/// Gets or sets the collections the node is interested in.
/// </summary>
[System.Text.Json.Serialization.JsonPropertyName("interests")]
public List<string> InterestingCollections { get; set; } = new();
}
}

View File

@@ -0,0 +1,52 @@
<Project Sdk="Microsoft.NET.Sdk">
<ItemGroup>
<ProjectReference Include="..\ZB.MOM.WW.CBDDC.Core\ZB.MOM.WW.CBDDC.Core.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Google.Protobuf" Version="3.25.1" />
<PackageReference Include="Grpc.Tools" Version="2.76.0">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
<PrivateAssets>all</PrivateAssets>
</PackageReference>
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" Version="8.0.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
<PackageReference Include="Serilog" Version="4.2.0" />
</ItemGroup>
<ItemGroup>
<Protobuf Include="sync.proto" GrpcServices="None" />
</ItemGroup>
<PropertyGroup>
<AssemblyName>ZB.MOM.WW.CBDDC.Network</AssemblyName>
<RootNamespace>ZB.MOM.WW.CBDDC.Network</RootNamespace>
<PackageId>ZB.MOM.WW.CBDDC.Network</PackageId>
<TargetFramework>net10.0</TargetFramework>
<LangVersion>latest</LangVersion>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Version>1.0.3</Version>
<Authors>MrDevRobot</Authors>
<Description>Networking layer (TCP/UDP/Gossip) for CBDDC.</Description>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
<PackageTags>p2p;mesh;network;gossip;lan;udp;tcp;discovery</PackageTags>
<PackageProjectUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</PackageProjectUrl>
<RepositoryUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageReadmeFile>README.md</PackageReadmeFile>
</PropertyGroup>
<ItemGroup>
<None Include="README.md" Pack="true" PackagePath="\" />
</ItemGroup>
<ItemGroup>
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
<_Parameter1>ZB.MOM.WW.CBDDC.Network.Tests</_Parameter1>
</AssemblyAttribute>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,117 @@
syntax = "proto3";
package ZB.MOM.WW.CBDDC.Network.Proto;
option csharp_namespace = "ZB.MOM.WW.CBDDC.Network.Proto";
message HandshakeRequest {
string node_id = 1;
string auth_token = 2;
repeated string supported_compression = 3; // v4
repeated string interesting_collections = 4; // v5
}
message HandshakeResponse {
string node_id = 1;
bool accepted = 2;
string selected_compression = 3; // v4
repeated string interesting_collections = 4; // v5
}
message GetClockRequest {
}
message ClockResponse {
int64 hlc_wall = 1;
int32 hlc_logic = 2;
string hlc_node = 3;
}
message GetVectorClockRequest {
}
message VectorClockResponse {
repeated VectorClockEntry entries = 1;
}
message VectorClockEntry {
string node_id = 1;
int64 hlc_wall = 2;
int32 hlc_logic = 3;
}
message PullChangesRequest {
int64 since_wall = 1;
int32 since_logic = 2;
string since_node = 3;
repeated string collections = 4; // v5: Filter by collection
}
message ChangeSetResponse {
repeated ProtoOplogEntry entries = 1;
}
message PushChangesRequest {
repeated ProtoOplogEntry entries = 1;
}
message GetChainRangeRequest {
string start_hash = 1;
string end_hash = 2;
}
message ChainRangeResponse {
repeated ProtoOplogEntry entries = 1;
bool snapshot_required = 2;
}
message AckResponse {
bool success = 1;
bool snapshot_required = 2;
}
message ProtoOplogEntry {
string collection = 1;
string key = 2;
string operation = 3; // "Put" or "Delete"
string json_data = 4;
int64 hlc_wall = 5;
int32 hlc_logic = 6;
string hlc_node = 7;
string hash = 8;
string previous_hash = 9;
}
message GetSnapshotRequest {
}
message SnapshotChunk {
bytes data = 1;
bool is_last = 2;
}
// Enum for wire framing (1 byte)
enum MessageType {
Unknown = 0;
HandshakeReq = 1;
HandshakeRes = 2;
GetClockReq = 3;
ClockRes = 4;
PullChangesReq = 5;
ChangeSetRes = 6;
PushChangesReq = 7;
AckRes = 8;
SecureEnv = 9;
GetChainRangeReq = 10;
ChainRangeRes = 11;
GetVectorClockReq = 12;
VectorClockRes = 13;
GetSnapshotReq = 14;
SnapshotChunkMsg = 15;
}
message SecureEnvelope {
bytes ciphertext = 1; // Encrypted payload
bytes nonce = 2; // IV or Nonce
bytes auth_tag = 3; // HMAC or Auth Tag if using AEAD (optional if concatenated)
}

View File

@@ -0,0 +1,230 @@
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
/// <summary>
/// BLite implementation of document metadata storage for sync tracking.
/// </summary>
/// <typeparam name="TDbContext">The type of CBDDCDocumentDbContext.</typeparam>
public class BLiteDocumentMetadataStore<TDbContext> : DocumentMetadataStore where TDbContext : CBDDCDocumentDbContext
{
private readonly TDbContext _context;
private readonly ILogger<BLiteDocumentMetadataStore<TDbContext>> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="BLiteDocumentMetadataStore{TDbContext}"/> class.
/// </summary>
/// <param name="context">The BLite document database context.</param>
/// <param name="logger">The optional logger instance.</param>
public BLiteDocumentMetadataStore(TDbContext context, ILogger<BLiteDocumentMetadataStore<TDbContext>>? logger = null)
{
_context = context ?? throw new ArgumentNullException(nameof(context));
_logger = logger ?? NullLogger<BLiteDocumentMetadataStore<TDbContext>>.Instance;
}
/// <inheritdoc />
public override async Task<DocumentMetadata?> GetMetadataAsync(string collection, string key, CancellationToken cancellationToken = default)
{
var entity = _context.DocumentMetadatas
.Find(m => m.Collection == collection && m.Key == key)
.FirstOrDefault();
return entity != null ? ToDomain(entity) : null;
}
/// <inheritdoc />
public override async Task<IEnumerable<DocumentMetadata>> GetMetadataByCollectionAsync(string collection, CancellationToken cancellationToken = default)
{
return _context.DocumentMetadatas
.Find(m => m.Collection == collection)
.Select(ToDomain)
.ToList();
}
/// <inheritdoc />
public override async Task UpsertMetadataAsync(DocumentMetadata metadata, CancellationToken cancellationToken = default)
{
var existing = _context.DocumentMetadatas
.Find(m => m.Collection == metadata.Collection && m.Key == metadata.Key)
.FirstOrDefault();
if (existing == null)
{
await _context.DocumentMetadatas.InsertAsync(ToEntity(metadata));
}
else
{
existing.HlcPhysicalTime = metadata.UpdatedAt.PhysicalTime;
existing.HlcLogicalCounter = metadata.UpdatedAt.LogicalCounter;
existing.HlcNodeId = metadata.UpdatedAt.NodeId;
existing.IsDeleted = metadata.IsDeleted;
await _context.DocumentMetadatas.UpdateAsync(existing);
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task UpsertMetadataBatchAsync(IEnumerable<DocumentMetadata> metadatas, CancellationToken cancellationToken = default)
{
foreach (var metadata in metadatas)
{
var existing = _context.DocumentMetadatas
.Find(m => m.Collection == metadata.Collection && m.Key == metadata.Key)
.FirstOrDefault();
if (existing == null)
{
await _context.DocumentMetadatas.InsertAsync(ToEntity(metadata));
}
else
{
existing.HlcPhysicalTime = metadata.UpdatedAt.PhysicalTime;
existing.HlcLogicalCounter = metadata.UpdatedAt.LogicalCounter;
existing.HlcNodeId = metadata.UpdatedAt.NodeId;
existing.IsDeleted = metadata.IsDeleted;
await _context.DocumentMetadatas.UpdateAsync(existing);
}
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task MarkDeletedAsync(string collection, string key, HlcTimestamp timestamp, CancellationToken cancellationToken = default)
{
var existing = _context.DocumentMetadatas
.Find(m => m.Collection == collection && m.Key == key)
.FirstOrDefault();
if (existing == null)
{
await _context.DocumentMetadatas.InsertAsync(new DocumentMetadataEntity
{
Id = Guid.NewGuid().ToString(),
Collection = collection,
Key = key,
HlcPhysicalTime = timestamp.PhysicalTime,
HlcLogicalCounter = timestamp.LogicalCounter,
HlcNodeId = timestamp.NodeId,
IsDeleted = true
});
}
else
{
existing.HlcPhysicalTime = timestamp.PhysicalTime;
existing.HlcLogicalCounter = timestamp.LogicalCounter;
existing.HlcNodeId = timestamp.NodeId;
existing.IsDeleted = true;
await _context.DocumentMetadatas.UpdateAsync(existing);
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task<IEnumerable<DocumentMetadata>> GetMetadataAfterAsync(HlcTimestamp since, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default)
{
var query = _context.DocumentMetadatas.AsQueryable()
.Where(m => (m.HlcPhysicalTime > since.PhysicalTime) ||
(m.HlcPhysicalTime == since.PhysicalTime && m.HlcLogicalCounter > since.LogicalCounter));
if (collections != null)
{
var collectionSet = new HashSet<string>(collections);
query = query.Where(m => collectionSet.Contains(m.Collection));
}
return query
.OrderBy(m => m.HlcPhysicalTime)
.ThenBy(m => m.HlcLogicalCounter)
.Select(ToDomain)
.ToList();
}
/// <inheritdoc />
public override async Task DropAsync(CancellationToken cancellationToken = default)
{
var allIds = _context.DocumentMetadatas.FindAll().Select(m => m.Id).ToList();
await _context.DocumentMetadatas.DeleteBulkAsync(allIds);
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task<IEnumerable<DocumentMetadata>> ExportAsync(CancellationToken cancellationToken = default)
{
return _context.DocumentMetadatas.FindAll().Select(ToDomain).ToList();
}
/// <inheritdoc />
public override async Task ImportAsync(IEnumerable<DocumentMetadata> items, CancellationToken cancellationToken = default)
{
foreach (var item in items)
{
await _context.DocumentMetadatas.InsertAsync(ToEntity(item));
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task MergeAsync(IEnumerable<DocumentMetadata> items, CancellationToken cancellationToken = default)
{
foreach (var item in items)
{
var existing = _context.DocumentMetadatas
.Find(m => m.Collection == item.Collection && m.Key == item.Key)
.FirstOrDefault();
if (existing == null)
{
await _context.DocumentMetadatas.InsertAsync(ToEntity(item));
}
else
{
// Update only if incoming is newer
var existingTs = new HlcTimestamp(existing.HlcPhysicalTime, existing.HlcLogicalCounter, existing.HlcNodeId);
if (item.UpdatedAt.CompareTo(existingTs) > 0)
{
existing.HlcPhysicalTime = item.UpdatedAt.PhysicalTime;
existing.HlcLogicalCounter = item.UpdatedAt.LogicalCounter;
existing.HlcNodeId = item.UpdatedAt.NodeId;
existing.IsDeleted = item.IsDeleted;
await _context.DocumentMetadatas.UpdateAsync(existing);
}
}
}
await _context.SaveChangesAsync(cancellationToken);
}
#region Mappers
private static DocumentMetadata ToDomain(DocumentMetadataEntity entity)
{
return new DocumentMetadata(
entity.Collection,
entity.Key,
new HlcTimestamp(entity.HlcPhysicalTime, entity.HlcLogicalCounter, entity.HlcNodeId),
entity.IsDeleted
);
}
private static DocumentMetadataEntity ToEntity(DocumentMetadata metadata)
{
return new DocumentMetadataEntity
{
Id = Guid.NewGuid().ToString(),
Collection = metadata.Collection,
Key = metadata.Key,
HlcPhysicalTime = metadata.UpdatedAt.PhysicalTime,
HlcLogicalCounter = metadata.UpdatedAt.LogicalCounter,
HlcNodeId = metadata.UpdatedAt.NodeId,
IsDeleted = metadata.IsDeleted
};
}
#endregion
}

View File

@@ -0,0 +1,209 @@
# BLiteDocumentStore - Usage Guide
## Overview
`BLiteDocumentStore<TDbContext>` is an abstract base class that simplifies creating document stores for CBDDC with BLite persistence. It handles all Oplog management internally, so you only need to implement entity-to-JSON mapping methods.
## Key Features
- ? **Automatic Oplog Creation** - Local changes automatically create Oplog entries
- ? **Remote Sync Handling** - AsyncLocal flag suppresses Oplog during sync (prevents duplicates)
- ? **No CDC Events Needed** - Direct Oplog management eliminates event loops
- ? **Simple API** - Only 4 abstract methods to implement
## Architecture
```
User Code ? SampleDocumentStore (extends BLiteDocumentStore)
?
BLiteDocumentStore
??? _context.Users / TodoLists (read/write entities)
??? _context.OplogEntries (write oplog directly)
Remote Sync ? OplogStore.ApplyBatchAsync()
?
BLiteDocumentStore.PutDocumentAsync(fromSync=true)
??? _context.Users / TodoLists (write only)
??? _context.OplogEntries (skip - already exists)
```
**Key Advantage**: No circular dependency! `BLiteDocumentStore` writes directly to `CBDDCDocumentDbContext.OplogEntries` collection.
## Implementation Example
```csharp
public class SampleDocumentStore : BLiteDocumentStore<SampleDbContext>
{
public SampleDocumentStore(
SampleDbContext context,
IPeerNodeConfigurationProvider configProvider,
ILogger<SampleDocumentStore>? logger = null)
: base(context, configProvider, new LastWriteWinsConflictResolver(), logger)
{
}
public override IEnumerable<string> InterestedCollection => new[] { "Users", "TodoLists" };
protected override async Task ApplyContentToEntityAsync(
string collection, string key, JsonElement content, CancellationToken ct)
{
switch (collection)
{
case "Users":
var user = content.Deserialize<User>()!;
user.Id = key;
var existingUser = _context.Users.FindById(key);
if (existingUser != null)
await _context.Users.UpdateAsync(user);
else
await _context.Users.InsertAsync(user);
await _context.SaveChangesAsync(ct);
break;
case "TodoLists":
var todoList = content.Deserialize<TodoList>()!;
todoList.Id = key;
var existingTodoList = _context.TodoLists.FindById(key);
if (existingTodoList != null)
await _context.TodoLists.UpdateAsync(todoList);
else
await _context.TodoLists.InsertAsync(todoList);
await _context.SaveChangesAsync(ct);
break;
default:
throw new NotSupportedException($"Collection '{collection}' is not supported");
}
}
protected override Task<JsonElement?> GetEntityAsJsonAsync(
string collection, string key, CancellationToken ct)
{
return Task.FromResult<JsonElement?>(collection switch
{
"Users" => SerializeEntity(_context.Users.FindById(key)),
"TodoLists" => SerializeEntity(_context.TodoLists.FindById(key)),
_ => null
});
}
protected override async Task RemoveEntityAsync(
string collection, string key, CancellationToken ct)
{
switch (collection)
{
case "Users":
await _context.Users.DeleteAsync(key);
await _context.SaveChangesAsync(ct);
break;
case "TodoLists":
await _context.TodoLists.DeleteAsync(key);
await _context.SaveChangesAsync(ct);
break;
}
}
protected override async Task<IEnumerable<(string Key, JsonElement Content)>> GetAllEntitiesAsJsonAsync(
string collection, CancellationToken ct)
{
return await Task.Run(() => collection switch
{
"Users" => _context.Users.FindAll()
.Select(u => (u.Id, SerializeEntity(u)!.Value)),
"TodoLists" => _context.TodoLists.FindAll()
.Select(t => (t.Id, SerializeEntity(t)!.Value)),
_ => Enumerable.Empty<(string, JsonElement)>()
}, ct);
}
private static JsonElement? SerializeEntity<T>(T? entity) where T : class
{
if (entity == null) return null;
return JsonSerializer.SerializeToElement(entity);
}
}
```
## Usage in Application
### Setup (DI Container)
```csharp
services.AddSingleton<SampleDbContext>(sp =>
new SampleDbContext("data/sample.blite"));
// No OplogStore dependency needed!
services.AddSingleton<IDocumentStore, SampleDocumentStore>();
services.AddSingleton<IOplogStore, BLiteOplogStore<SampleDbContext>>();
```
### Local Changes (User operations)
```csharp
// User inserts a new user
var user = new User { Id = "user-1", Name = "Alice" };
await _context.Users.InsertAsync(user);
await _context.SaveChangesAsync();
// The application then needs to notify the DocumentStore:
var document = new Document(
"Users",
"user-1",
JsonSerializer.SerializeToElement(user),
new HlcTimestamp(0, 0, ""),
false);
await documentStore.PutDocumentAsync(document);
// ? This creates an OplogEntry automatically
```
### Remote Sync (Automatic)
```csharp
// When OplogStore.ApplyBatchAsync receives remote changes:
await oplogStore.ApplyBatchAsync(remoteEntries, cancellationToken);
// Internally, this calls:
using (documentStore.BeginRemoteSync()) // ? Suppresses Oplog creation
{
foreach (var entry in remoteEntries)
{
await documentStore.PutDocumentAsync(entryAsDocument);
// ? Writes to DB only, no Oplog duplication
}
}
```
## Migration from Old CDC-based Approach
### Before (with CDC Events)
```csharp
// SampleDocumentStore subscribes to BLite CDC
// CDC emits events ? OplogCoordinator creates Oplog
// Problem: Remote sync also triggers CDC ? duplicate Oplog entries
```
### After (with BLiteDocumentStore)
```csharp
// Direct Oplog management in DocumentStore
// AsyncLocal flag prevents duplicates during sync
// No CDC events needed
```
## Benefits
1. **No Event Loops** - Direct control over Oplog creation
2. **Thread-Safe** - AsyncLocal handles concurrent operations
3. **Simpler** - Only 4 methods to implement vs full CDC subscription
4. **Transparent** - Oplog management is hidden from user code
## Next Steps
After implementing your DocumentStore:
1. Remove CDC subscriptions from your code
2. Remove `OplogCoordinator` from DI (no longer needed)
3. Test local operations create Oplog entries
4. Test remote sync doesn't create duplicate entries

View File

@@ -0,0 +1,733 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
using BLite.Core.CDC;
using BLite.Core.Collections;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Core.Sync;
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using BLiteOperationType = BLite.Core.Transactions.OperationType;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
/// <summary>
/// Abstract base class for BLite-based document stores.
/// Handles Oplog creation internally - subclasses only implement entity mapping.
/// </summary>
/// <typeparam name="TDbContext">The BLite DbContext type.</typeparam>
public abstract class BLiteDocumentStore<TDbContext> : IDocumentStore, IDisposable
where TDbContext : CBDDCDocumentDbContext
{
protected readonly TDbContext _context;
protected readonly IPeerNodeConfigurationProvider _configProvider;
protected readonly IConflictResolver _conflictResolver;
protected readonly IVectorClockService _vectorClock;
protected readonly ILogger<BLiteDocumentStore<TDbContext>> _logger;
/// <summary>
/// Semaphore used to suppress CDC-triggered OplogEntry creation during remote sync.
/// CurrentCount == 0 ? sync in progress, CDC must skip.
/// CurrentCount == 1 ? no sync, CDC creates OplogEntry.
/// </summary>
private readonly SemaphoreSlim _remoteSyncGuard = new SemaphoreSlim(1, 1);
private readonly List<IDisposable> _cdcWatchers = new();
private readonly HashSet<string> _registeredCollections = new();
// HLC state for generating timestamps for local changes
private long _lastPhysicalTime;
private int _logicalCounter;
private readonly object _clockLock = new object();
/// <summary>
/// Initializes a new instance of the <see cref="BLiteDocumentStore{TDbContext}"/> class.
/// </summary>
/// <param name="context">The BLite database context.</param>
/// <param name="configProvider">The peer node configuration provider.</param>
/// <param name="vectorClockService">The vector clock service.</param>
/// <param name="conflictResolver">The conflict resolver to use for merges.</param>
/// <param name="logger">The logger instance.</param>
protected BLiteDocumentStore(
TDbContext context,
IPeerNodeConfigurationProvider configProvider,
IVectorClockService vectorClockService,
IConflictResolver? conflictResolver = null,
ILogger? logger = null)
{
_context = context ?? throw new ArgumentNullException(nameof(context));
_configProvider = configProvider ?? throw new ArgumentNullException(nameof(configProvider));
_vectorClock = vectorClockService ?? throw new ArgumentNullException(nameof(vectorClockService));
_conflictResolver = conflictResolver ?? new LastWriteWinsConflictResolver();
_logger = CreateTypedLogger(logger);
_lastPhysicalTime = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
_logicalCounter = 0;
}
private static ILogger<BLiteDocumentStore<TDbContext>> CreateTypedLogger(ILogger? logger)
{
if (logger is null)
{
return NullLogger<BLiteDocumentStore<TDbContext>>.Instance;
}
if (logger is ILogger<BLiteDocumentStore<TDbContext>> typedLogger)
{
return typedLogger;
}
return new ForwardingLogger(logger);
}
private sealed class ForwardingLogger : ILogger<BLiteDocumentStore<TDbContext>>
{
private readonly ILogger _inner;
/// <summary>
/// Initializes a new instance of the <see cref="ForwardingLogger"/> class.
/// </summary>
/// <param name="inner">The underlying logger instance.</param>
public ForwardingLogger(ILogger inner)
{
_inner = inner;
}
/// <inheritdoc />
public IDisposable? BeginScope<TState>(TState state) where TState : notnull
{
return _inner.BeginScope(state);
}
/// <inheritdoc />
public bool IsEnabled(LogLevel logLevel)
{
return _inner.IsEnabled(logLevel);
}
/// <inheritdoc />
public void Log<TState>(
LogLevel logLevel,
EventId eventId,
TState state,
Exception? exception,
Func<TState, Exception?, string> formatter)
{
_inner.Log(logLevel, eventId, state, exception, formatter);
}
}
#region CDC Registration
/// <summary>
/// Registers a BLite collection for CDC tracking.
/// Call in subclass constructor for each collection to sync.
/// </summary>
/// <typeparam name="TEntity">The entity type.</typeparam>
/// <param name="collectionName">The logical collection name used in Oplog.</param>
/// <param name="collection">The BLite DocumentCollection.</param>
/// <param name="keySelector">Function to extract the entity key.</param>
protected void WatchCollection<TEntity>(
string collectionName,
DocumentCollection<string, TEntity> collection,
Func<TEntity, string> keySelector)
where TEntity : class
{
_registeredCollections.Add(collectionName);
var watcher = collection.Watch(capturePayload: true)
.Subscribe(new CdcObserver<TEntity>(collectionName, keySelector, this));
_cdcWatchers.Add(watcher);
}
/// <summary>
/// Generic CDC observer. Forwards BLite change events to OnLocalChangeDetectedAsync.
/// Automatically skips events when remote sync is in progress.
/// </summary>
private class CdcObserver<TEntity> : IObserver<ChangeStreamEvent<string, TEntity>>
where TEntity : class
{
private readonly string _collectionName;
private readonly Func<TEntity, string> _keySelector;
private readonly BLiteDocumentStore<TDbContext> _store;
/// <summary>
/// Initializes a new instance of the <see cref="CdcObserver{TEntity}"/> class.
/// </summary>
/// <param name="collectionName">The logical collection name.</param>
/// <param name="keySelector">The key selector for observed entities.</param>
/// <param name="store">The owning document store instance.</param>
public CdcObserver(
string collectionName,
Func<TEntity, string> keySelector,
BLiteDocumentStore<TDbContext> store)
{
_collectionName = collectionName;
_keySelector = keySelector;
_store = store;
}
/// <summary>
/// Handles a change stream event from BLite CDC.
/// </summary>
/// <param name="changeEvent">The change event payload.</param>
public void OnNext(ChangeStreamEvent<string, TEntity> changeEvent)
{
if (_store._remoteSyncGuard.CurrentCount == 0) return;
var entityId = changeEvent.DocumentId?.ToString() ?? "";
if (changeEvent.Type == BLiteOperationType.Delete)
{
_store.OnLocalChangeDetectedAsync(_collectionName, entityId, OperationType.Delete, null)
.GetAwaiter().GetResult();
}
else if (changeEvent.Entity != null)
{
var content = JsonSerializer.SerializeToElement(changeEvent.Entity);
var key = _keySelector(changeEvent.Entity);
_store.OnLocalChangeDetectedAsync(_collectionName, key, OperationType.Put, content)
.GetAwaiter().GetResult();
}
}
/// <summary>
/// Handles CDC observer errors.
/// </summary>
/// <param name="error">The observed exception.</param>
public void OnError(Exception error) { }
/// <summary>
/// Handles completion of the CDC stream.
/// </summary>
public void OnCompleted() { }
}
#endregion
#region Abstract Methods - Implemented by subclass
/// <summary>
/// Applies JSON content to a single entity (insert or update) and commits changes.
/// Called for single-document operations.
/// </summary>
/// <param name="collection">The logical collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="content">The document content to apply.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected abstract Task ApplyContentToEntityAsync(
string collection, string key, JsonElement content, CancellationToken cancellationToken);
/// <summary>
/// Applies JSON content to multiple entities (insert or update) with a single commit.
/// Called for batch operations. Must commit all changes in a single SaveChanges.
/// </summary>
/// <param name="documents">The documents to apply in one batch.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected abstract Task ApplyContentToEntitiesBatchAsync(
IEnumerable<(string Collection, string Key, JsonElement Content)> documents, CancellationToken cancellationToken);
/// <summary>
/// Reads an entity from the DbContext and returns it as JsonElement.
/// </summary>
/// <param name="collection">The logical collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected abstract Task<JsonElement?> GetEntityAsJsonAsync(
string collection, string key, CancellationToken cancellationToken);
/// <summary>
/// Removes a single entity from the DbContext and commits changes.
/// </summary>
/// <param name="collection">The logical collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected abstract Task RemoveEntityAsync(
string collection, string key, CancellationToken cancellationToken);
/// <summary>
/// Removes multiple entities from the DbContext with a single commit.
/// </summary>
/// <param name="documents">The documents to remove in one batch.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected abstract Task RemoveEntitiesBatchAsync(
IEnumerable<(string Collection, string Key)> documents, CancellationToken cancellationToken);
/// <summary>
/// Reads all entities from a collection as JsonElements.
/// </summary>
/// <param name="collection">The logical collection name.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected abstract Task<IEnumerable<(string Key, JsonElement Content)>> GetAllEntitiesAsJsonAsync(
string collection, CancellationToken cancellationToken);
#endregion
#region IDocumentStore Implementation
/// <summary>
/// Returns the collections registered via WatchCollection.
/// </summary>
public IEnumerable<string> InterestedCollection => _registeredCollections;
/// <summary>
/// Gets a document by collection and key.
/// </summary>
/// <param name="collection">The logical collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns>The matching document, or <see langword="null"/> when not found.</returns>
public async Task<Document?> GetDocumentAsync(string collection, string key, CancellationToken cancellationToken = default)
{
var content = await GetEntityAsJsonAsync(collection, key, cancellationToken);
if (content == null) return null;
var timestamp = new HlcTimestamp(0, 0, ""); // Will be populated from metadata if needed
return new Document(collection, key, content.Value, timestamp, false);
}
/// <summary>
/// Gets all documents for a collection.
/// </summary>
/// <param name="collection">The logical collection name.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns>The documents in the specified collection.</returns>
public async Task<IEnumerable<Document>> GetDocumentsByCollectionAsync(string collection, CancellationToken cancellationToken = default)
{
var entities = await GetAllEntitiesAsJsonAsync(collection, cancellationToken);
var timestamp = new HlcTimestamp(0, 0, "");
return entities.Select(e => new Document(collection, e.Key, e.Content, timestamp, false));
}
/// <summary>
/// Gets documents for the specified collection and key pairs.
/// </summary>
/// <param name="documentKeys">The collection and key pairs to resolve.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns>The documents that were found.</returns>
public async Task<IEnumerable<Document>> GetDocumentsAsync(List<(string Collection, string Key)> documentKeys, CancellationToken cancellationToken)
{
var documents = new List<Document>();
foreach (var (collection, key) in documentKeys)
{
var doc = await GetDocumentAsync(collection, key, cancellationToken);
if (doc != null)
{
documents.Add(doc);
}
}
return documents;
}
/// <summary>
/// Inserts or updates a single document.
/// </summary>
/// <param name="document">The document to persist.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns><see langword="true"/> when the operation succeeds.</returns>
public async Task<bool> PutDocumentAsync(Document document, CancellationToken cancellationToken = default)
{
await _remoteSyncGuard.WaitAsync(cancellationToken);
try
{
await PutDocumentInternalAsync(document, cancellationToken);
}
finally
{
_remoteSyncGuard.Release();
}
return true;
}
private async Task PutDocumentInternalAsync(Document document, CancellationToken cancellationToken)
{
await ApplyContentToEntityAsync(document.Collection, document.Key, document.Content, cancellationToken);
}
/// <summary>
/// Updates a batch of documents.
/// </summary>
/// <param name="documents">The documents to update.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns><see langword="true"/> when the operation succeeds.</returns>
public async Task<bool> UpdateBatchDocumentsAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default)
{
await _remoteSyncGuard.WaitAsync(cancellationToken);
try
{
await ApplyContentToEntitiesBatchAsync(
documents.Select(d => (d.Collection, d.Key, d.Content)), cancellationToken);
}
finally
{
_remoteSyncGuard.Release();
}
return true;
}
/// <summary>
/// Inserts a batch of documents.
/// </summary>
/// <param name="documents">The documents to insert.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns><see langword="true"/> when the operation succeeds.</returns>
public async Task<bool> InsertBatchDocumentsAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default)
{
await _remoteSyncGuard.WaitAsync(cancellationToken);
try
{
await ApplyContentToEntitiesBatchAsync(
documents.Select(d => (d.Collection, d.Key, d.Content)), cancellationToken);
}
finally
{
_remoteSyncGuard.Release();
}
return true;
}
/// <summary>
/// Deletes a single document.
/// </summary>
/// <param name="collection">The logical collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns><see langword="true"/> when the operation succeeds.</returns>
public async Task<bool> DeleteDocumentAsync(string collection, string key, CancellationToken cancellationToken = default)
{
await _remoteSyncGuard.WaitAsync(cancellationToken);
try
{
await DeleteDocumentInternalAsync(collection, key, cancellationToken);
}
finally
{
_remoteSyncGuard.Release();
}
return true;
}
private async Task DeleteDocumentInternalAsync(string collection, string key, CancellationToken cancellationToken)
{
await RemoveEntityAsync(collection, key, cancellationToken);
}
/// <summary>
/// Deletes a batch of documents by composite keys.
/// </summary>
/// <param name="documentKeys">The document keys in collection/key format.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns><see langword="true"/> when the operation succeeds.</returns>
public async Task<bool> DeleteBatchDocumentsAsync(IEnumerable<string> documentKeys, CancellationToken cancellationToken = default)
{
var parsedKeys = new List<(string Collection, string Key)>();
foreach (var key in documentKeys)
{
var parts = key.Split('/');
if (parts.Length == 2)
{
parsedKeys.Add((parts[0], parts[1]));
}
else
{
_logger.LogWarning("Invalid document key format: {Key}", key);
}
}
if (parsedKeys.Count == 0) return true;
await _remoteSyncGuard.WaitAsync(cancellationToken);
try
{
await RemoveEntitiesBatchAsync(parsedKeys, cancellationToken);
}
finally
{
_remoteSyncGuard.Release();
}
return true;
}
/// <summary>
/// Merges an incoming document with the current stored document.
/// </summary>
/// <param name="incoming">The incoming document.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns>The stored document after merge resolution.</returns>
public async Task<Document> MergeAsync(Document incoming, CancellationToken cancellationToken = default)
{
var existing = await GetDocumentAsync(incoming.Collection, incoming.Key, cancellationToken);
if (existing == null)
{
// Use internal method - guard not acquired yet in single-document merge
await PutDocumentInternalAsync(incoming, cancellationToken);
return incoming;
}
// Use conflict resolver to merge
var resolution = _conflictResolver.Resolve(existing, new OplogEntry(
incoming.Collection,
incoming.Key,
OperationType.Put,
incoming.Content,
incoming.UpdatedAt,
""));
if (resolution.ShouldApply && resolution.MergedDocument != null)
{
await PutDocumentInternalAsync(resolution.MergedDocument, cancellationToken);
return resolution.MergedDocument;
}
return existing;
}
#endregion
#region ISnapshotable Implementation
/// <summary>
/// Removes all tracked documents from registered collections.
/// </summary>
/// <param name="cancellationToken">The cancellation token.</param>
public async Task DropAsync(CancellationToken cancellationToken = default)
{
foreach (var collection in InterestedCollection)
{
var entities = await GetAllEntitiesAsJsonAsync(collection, cancellationToken);
foreach (var (key, _) in entities)
{
await RemoveEntityAsync(collection, key, cancellationToken);
}
}
}
/// <summary>
/// Exports all tracked documents from registered collections.
/// </summary>
/// <param name="cancellationToken">The cancellation token.</param>
/// <returns>The exported documents.</returns>
public async Task<IEnumerable<Document>> ExportAsync(CancellationToken cancellationToken = default)
{
var documents = new List<Document>();
foreach (var collection in InterestedCollection)
{
var collectionDocs = await GetDocumentsByCollectionAsync(collection, cancellationToken);
documents.AddRange(collectionDocs);
}
return documents;
}
/// <summary>
/// Imports a batch of documents.
/// </summary>
/// <param name="items">The documents to import.</param>
/// <param name="cancellationToken">The cancellation token.</param>
public async Task ImportAsync(IEnumerable<Document> items, CancellationToken cancellationToken = default)
{
await _remoteSyncGuard.WaitAsync(cancellationToken);
try
{
await ApplyContentToEntitiesBatchAsync(
items.Select(d => (d.Collection, d.Key, d.Content)), cancellationToken);
}
finally
{
_remoteSyncGuard.Release();
}
}
/// <summary>
/// Merges a batch of incoming documents.
/// </summary>
/// <param name="items">The incoming documents.</param>
/// <param name="cancellationToken">The cancellation token.</param>
public async Task MergeAsync(IEnumerable<Document> items, CancellationToken cancellationToken = default)
{
// Acquire guard to prevent Oplog creation during merge
await _remoteSyncGuard.WaitAsync(cancellationToken);
try
{
foreach (var document in items)
{
await MergeAsync(document, cancellationToken);
}
}
finally
{
_remoteSyncGuard.Release();
}
}
#endregion
#region Oplog Management
/// <summary>
/// Returns true if a remote sync operation is in progress (guard acquired).
/// CDC listeners should check this before creating OplogEntry.
/// </summary>
protected bool IsRemoteSyncInProgress => _remoteSyncGuard.CurrentCount == 0;
/// <summary>
/// Called by subclass CDC listeners when a local change is detected.
/// Creates OplogEntry + DocumentMetadata only if no remote sync is in progress.
/// </summary>
/// <param name="collection">The logical collection name.</param>
/// <param name="key">The document key.</param>
/// <param name="operationType">The detected operation type.</param>
/// <param name="content">The document content when available.</param>
/// <param name="cancellationToken">The cancellation token.</param>
protected async Task OnLocalChangeDetectedAsync(
string collection,
string key,
OperationType operationType,
JsonElement? content,
CancellationToken cancellationToken = default)
{
if (IsRemoteSyncInProgress) return;
await CreateOplogEntryAsync(collection, key, operationType, content, cancellationToken);
}
private HlcTimestamp GenerateTimestamp(string nodeId)
{
lock (_clockLock)
{
var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
if (now > _lastPhysicalTime)
{
_lastPhysicalTime = now;
_logicalCounter = 0;
}
else
{
_logicalCounter++;
}
return new HlcTimestamp(_lastPhysicalTime, _logicalCounter, nodeId);
}
}
private async Task CreateOplogEntryAsync(
string collection,
string key,
OperationType operationType,
JsonElement? content,
CancellationToken cancellationToken)
{
var config = await _configProvider.GetConfiguration();
var nodeId = config.NodeId;
// Get last hash from OplogEntries collection directly
var lastEntry = _context.OplogEntries
.Find(e => e.TimestampNodeId == nodeId)
.OrderByDescending(e => e.TimestampPhysicalTime)
.ThenByDescending(e => e.TimestampLogicalCounter)
.FirstOrDefault();
var previousHash = lastEntry?.Hash ?? string.Empty;
var timestamp = GenerateTimestamp(nodeId);
var oplogEntry = new OplogEntry(
collection,
key,
operationType,
content,
timestamp,
previousHash);
// Write directly to OplogEntries collection
await _context.OplogEntries.InsertAsync(oplogEntry.ToEntity());
// Write DocumentMetadata for sync tracking
var docMetadata = EntityMappers.CreateDocumentMetadata(
collection,
key,
timestamp,
isDeleted: operationType == OperationType.Delete);
var existingMetadata = _context.DocumentMetadatas
.Find(m => m.Collection == collection && m.Key == key)
.FirstOrDefault();
if (existingMetadata != null)
{
// Update existing metadata
existingMetadata.HlcPhysicalTime = timestamp.PhysicalTime;
existingMetadata.HlcLogicalCounter = timestamp.LogicalCounter;
existingMetadata.HlcNodeId = timestamp.NodeId;
existingMetadata.IsDeleted = operationType == OperationType.Delete;
await _context.DocumentMetadatas.UpdateAsync(existingMetadata);
}
else
{
await _context.DocumentMetadatas.InsertAsync(docMetadata);
}
await _context.SaveChangesAsync(cancellationToken);
// Notify VectorClockService so sync sees local changes
_vectorClock.Update(oplogEntry);
_logger.LogDebug(
"Created Oplog entry: {Operation} {Collection}/{Key} at {Timestamp} (hash: {Hash})",
operationType, collection, key, timestamp, oplogEntry.Hash);
}
/// <summary>
/// Marks the start of remote sync operations (suppresses CDC-triggered Oplog creation).
/// Use in using statement: using (store.BeginRemoteSync()) { ... }
/// </summary>
public IDisposable BeginRemoteSync()
{
_remoteSyncGuard.Wait();
return new RemoteSyncScope(_remoteSyncGuard);
}
private class RemoteSyncScope : IDisposable
{
private readonly SemaphoreSlim _guard;
/// <summary>
/// Initializes a new instance of the <see cref="RemoteSyncScope"/> class.
/// </summary>
/// <param name="guard">The semaphore guarding remote sync operations.</param>
public RemoteSyncScope(SemaphoreSlim guard)
{
_guard = guard;
}
/// <summary>
/// Releases the remote sync guard.
/// </summary>
public void Dispose()
{
_guard.Release();
}
}
#endregion
/// <summary>
/// Releases managed resources used by this document store.
/// </summary>
public virtual void Dispose()
{
foreach (var watcher in _cdcWatchers)
{
try { watcher.Dispose(); } catch { }
}
_cdcWatchers.Clear();
_remoteSyncGuard.Dispose();
}
}

View File

@@ -0,0 +1,249 @@
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Core.Sync;
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
public class BLiteOplogStore<TDbContext> : OplogStore where TDbContext : CBDDCDocumentDbContext
{
protected readonly TDbContext _context;
protected readonly ILogger<BLiteOplogStore<TDbContext>> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="BLiteOplogStore{TDbContext}"/> class.
/// </summary>
/// <param name="dbContext">The BLite database context.</param>
/// <param name="documentStore">The document store used by the oplog store.</param>
/// <param name="conflictResolver">The conflict resolver used during merges.</param>
/// <param name="vectorClockService">The vector clock service used for timestamp coordination.</param>
/// <param name="snapshotMetadataStore">Optional snapshot metadata store used for initialization.</param>
/// <param name="logger">Optional logger instance.</param>
public BLiteOplogStore(
TDbContext dbContext,
IDocumentStore documentStore,
IConflictResolver conflictResolver,
IVectorClockService vectorClockService,
ISnapshotMetadataStore? snapshotMetadataStore = null,
ILogger<BLiteOplogStore<TDbContext>>? logger = null) : base(documentStore, conflictResolver, vectorClockService, snapshotMetadataStore)
{
_context = dbContext ?? throw new ArgumentNullException(nameof(dbContext));
_logger = logger ?? NullLogger<BLiteOplogStore<TDbContext>>.Instance;
}
/// <inheritdoc />
public override async Task ApplyBatchAsync(IEnumerable<OplogEntry> oplogEntries, CancellationToken cancellationToken = default)
{
// BLite transactions are committed by each SaveChangesAsync internally.
// Wrapping in an explicit transaction causes "Cannot rollback committed transaction"
// because PutDocumentAsync → SaveChangesAsync already commits.
await base.ApplyBatchAsync(oplogEntries, cancellationToken);
}
/// <inheritdoc />
public override async Task DropAsync(CancellationToken cancellationToken = default)
{
// Use Id (technical key) for deletion, not Hash (business key)
await _context.OplogEntries.DeleteBulkAsync(_context.OplogEntries.FindAll().Select(e => e.Id));
await _context.SaveChangesAsync(cancellationToken);
_vectorClock.Invalidate();
}
/// <inheritdoc />
public override async Task<IEnumerable<OplogEntry>> ExportAsync(CancellationToken cancellationToken = default)
{
return _context.OplogEntries.FindAll().ToDomain();
}
/// <inheritdoc />
public override async Task<IEnumerable<OplogEntry>> GetChainRangeAsync(string startHash, string endHash, CancellationToken cancellationToken = default)
{
var startRow = _context.OplogEntries.Find(o => o.Hash == startHash).FirstOrDefault();
var endRow = _context.OplogEntries.Find(o => o.Hash == endHash).FirstOrDefault();
if (startRow == null || endRow == null) return [];
var nodeId = startRow.TimestampNodeId;
// 2. Fetch range (Start < Entry <= End)
var entities = _context.OplogEntries
.Find(o => o.TimestampNodeId == nodeId &&
((o.TimestampPhysicalTime > startRow.TimestampPhysicalTime) ||
(o.TimestampPhysicalTime == startRow.TimestampPhysicalTime && o.TimestampLogicalCounter > startRow.TimestampLogicalCounter)) &&
((o.TimestampPhysicalTime < endRow.TimestampPhysicalTime) ||
(o.TimestampPhysicalTime == endRow.TimestampPhysicalTime && o.TimestampLogicalCounter <= endRow.TimestampLogicalCounter)))
.OrderBy(o => o.TimestampPhysicalTime)
.ThenBy(o => o.TimestampLogicalCounter)
.ToList();
return entities.ToDomain();
}
/// <inheritdoc />
public override async Task<OplogEntry?> GetEntryByHashAsync(string hash, CancellationToken cancellationToken = default)
{
// Hash is now a regular indexed property, not the Key
return _context.OplogEntries.Find(o => o.Hash == hash).FirstOrDefault()?.ToDomain();
}
/// <inheritdoc />
public override async Task<IEnumerable<OplogEntry>> GetOplogAfterAsync(HlcTimestamp timestamp, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default)
{
var query = _context.OplogEntries
.Find(o => (o.TimestampPhysicalTime > timestamp.PhysicalTime) ||
(o.TimestampPhysicalTime == timestamp.PhysicalTime && o.TimestampLogicalCounter > timestamp.LogicalCounter));
if (collections != null)
{
var collectionSet = new HashSet<string>(collections);
query = query.Where(o => collectionSet.Contains(o.Collection));
}
return query
.OrderBy(o => o.TimestampPhysicalTime)
.ThenBy(o => o.TimestampLogicalCounter)
.ToDomain()
.ToList();
}
/// <inheritdoc />
public override async Task<IEnumerable<OplogEntry>> GetOplogForNodeAfterAsync(string nodeId, HlcTimestamp since, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default)
{
var query = _context.OplogEntries.AsQueryable()
.Where(o => o.TimestampNodeId == nodeId &&
((o.TimestampPhysicalTime > since.PhysicalTime) ||
(o.TimestampPhysicalTime == since.PhysicalTime && o.TimestampLogicalCounter > since.LogicalCounter)));
if (collections != null)
{
var collectionSet = new HashSet<string>(collections);
query = query.Where(o => collectionSet.Contains(o.Collection));
}
return query
.OrderBy(o => o.TimestampPhysicalTime)
.ThenBy(o => o.TimestampLogicalCounter)
.ToDomain()
.ToList();
}
/// <inheritdoc />
public override async Task ImportAsync(IEnumerable<OplogEntry> items, CancellationToken cancellationToken = default)
{
foreach (var item in items)
{
await _context.OplogEntries.InsertAsync(item.ToEntity());
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task MergeAsync(IEnumerable<OplogEntry> items, CancellationToken cancellationToken = default)
{
foreach (var item in items)
{
// Hash is now a regular indexed property, not the Key
var existing = _context.OplogEntries.Find(o => o.Hash == item.Hash).FirstOrDefault();
if (existing == null)
{
await _context.OplogEntries.InsertAsync(item.ToEntity());
}
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task PruneOplogAsync(HlcTimestamp cutoff, CancellationToken cancellationToken = default)
{
var toDelete = _context.OplogEntries.AsQueryable()
.Where(o => (o.TimestampPhysicalTime < cutoff.PhysicalTime) ||
(o.TimestampPhysicalTime == cutoff.PhysicalTime && o.TimestampLogicalCounter <= cutoff.LogicalCounter))
.Select(o => o.Hash)
.ToList();
await _context.OplogEntries.DeleteBulkAsync(toDelete);
}
/// <inheritdoc />
protected override void InitializeVectorClock()
{
if (_vectorClock.IsInitialized) return;
// Early check: if context or OplogEntries is null, skip initialization
if (_context?.OplogEntries == null)
{
_vectorClock.IsInitialized = true;
return;
}
// Step 1: Load from SnapshotMetadata FIRST (base state after prune)
if (_snapshotMetadataStore != null)
{
try
{
var snapshots = _snapshotMetadataStore.GetAllSnapshotMetadataAsync().GetAwaiter().GetResult();
foreach (var snapshot in snapshots)
{
_vectorClock.UpdateNode(
snapshot.NodeId,
new HlcTimestamp(snapshot.TimestampPhysicalTime, snapshot.TimestampLogicalCounter, snapshot.NodeId),
snapshot.Hash ?? "");
}
}
catch
{
// Ignore errors during initialization - oplog data will be used as fallback
}
}
// Step 2: Load from Oplog (Latest State - Overrides Snapshot if newer)
var latestPerNode = _context.OplogEntries.AsQueryable()
.GroupBy(o => o.TimestampNodeId)
.Select(g => new
{
NodeId = g.Key,
MaxEntry = g.OrderByDescending(o => o.TimestampPhysicalTime)
.ThenByDescending(o => o.TimestampLogicalCounter)
.FirstOrDefault()
})
.ToList()
.Where(x => x.MaxEntry != null)
.ToList();
foreach (var node in latestPerNode)
{
if (node.MaxEntry != null)
{
_vectorClock.UpdateNode(
node.NodeId,
new HlcTimestamp(node.MaxEntry.TimestampPhysicalTime, node.MaxEntry.TimestampLogicalCounter, node.MaxEntry.TimestampNodeId),
node.MaxEntry.Hash ?? "");
}
}
_vectorClock.IsInitialized = true;
}
/// <inheritdoc />
protected override async Task InsertOplogEntryAsync(OplogEntry entry, CancellationToken cancellationToken = default)
{
await _context.OplogEntries.InsertAsync(entry.ToEntity());
}
/// <inheritdoc />
protected override async Task<string?> QueryLastHashForNodeAsync(string nodeId, CancellationToken cancellationToken = default)
{
var lastEntry = _context.OplogEntries.AsQueryable()
.Where(o => o.TimestampNodeId == nodeId)
.OrderByDescending(o => o.TimestampPhysicalTime)
.ThenByDescending(o => o.TimestampLogicalCounter)
.FirstOrDefault();
return lastEntry?.Hash;
}
/// <inheritdoc />
protected override async Task<(long Wall, int Logic)?> QueryLastHashTimestampFromOplogAsync(string hash, CancellationToken cancellationToken = default)
{
// Hash is now a regular indexed property, not the Key
var entry = _context.OplogEntries.Find(o => o.Hash == hash).FirstOrDefault();
if (entry == null) return null;
return (entry.TimestampPhysicalTime, entry.TimestampLogicalCounter);
}
}

View File

@@ -0,0 +1,114 @@
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
/// <summary>
/// Provides a peer configuration store implementation that uses a specified CBDDCDocumentDbContext for persistence
/// operations.
/// </summary>
/// <remarks>This class enables storage, retrieval, and management of remote peer configurations using the provided
/// database context. It is typically used in scenarios where peer configurations need to be persisted in a document
/// database.</remarks>
/// <typeparam name="TDbContext">The type of the document database context used for accessing and managing peer configurations. Must inherit from
/// CBDDCDocumentDbContext.</typeparam>
public class BLitePeerConfigurationStore<TDbContext> : PeerConfigurationStore where TDbContext : CBDDCDocumentDbContext
{
/// <summary>
/// Represents the database context used for data access operations within the derived class.
/// </summary>
protected readonly TDbContext _context;
/// <summary>
/// Provides logging capabilities for the BLitePeerConfigurationStore operations.
/// </summary>
protected readonly ILogger<BLitePeerConfigurationStore<TDbContext>> _logger;
/// <summary>
/// Initializes a new instance of the BLitePeerConfigurationStore class using the specified database context and
/// optional logger.
/// </summary>
/// <param name="context">The database context used to access and manage peer configuration data. Cannot be null.</param>
/// <param name="logger">An optional logger for logging diagnostic messages. If null, a no-op logger is used.</param>
/// <exception cref="ArgumentNullException">Thrown if the context parameter is null.</exception>
public BLitePeerConfigurationStore(TDbContext context, ILogger<BLitePeerConfigurationStore<TDbContext>>? logger = null)
{
_context = context ?? throw new ArgumentNullException(nameof(context));
_logger = logger ?? NullLogger<BLitePeerConfigurationStore<TDbContext>>.Instance;
}
/// <inheritdoc />
public override async Task DropAsync(CancellationToken cancellationToken = default)
{
_logger.LogWarning("Dropping peer configuration store - all remote peer configurations will be permanently deleted!");
// Use Id (technical key) for deletion, not NodeId (business key)
var allIds = await Task.Run(() => _context.RemotePeerConfigurations.FindAll().Select(p => p.Id).ToList(), cancellationToken);
await _context.RemotePeerConfigurations.DeleteBulkAsync(allIds);
await _context.SaveChangesAsync(cancellationToken);
_logger.LogInformation("Peer configuration store dropped successfully.");
}
/// <inheritdoc />
public override async Task<IEnumerable<RemotePeerConfiguration>> ExportAsync(CancellationToken cancellationToken = default)
{
return await Task.Run(() => _context.RemotePeerConfigurations.FindAll().ToDomain().ToList(), cancellationToken);
}
/// <inheritdoc />
public override async Task<RemotePeerConfiguration?> GetRemotePeerAsync(string nodeId, CancellationToken cancellationToken)
{
// NodeId is now a regular indexed property, not the Key
return await Task.Run(() => _context.RemotePeerConfigurations.Find(p => p.NodeId == nodeId).FirstOrDefault()?.ToDomain(), cancellationToken);
}
/// <inheritdoc />
public override async Task<IEnumerable<RemotePeerConfiguration>> GetRemotePeersAsync(CancellationToken cancellationToken = default)
{
return await Task.Run(() => _context.RemotePeerConfigurations.FindAll().ToDomain().ToList(), cancellationToken);
}
/// <inheritdoc />
public override async Task RemoveRemotePeerAsync(string nodeId, CancellationToken cancellationToken = default)
{
// NodeId is now a regular indexed property, not the Key
var peer = await Task.Run(() => _context.RemotePeerConfigurations.Find(p => p.NodeId == nodeId).FirstOrDefault(), cancellationToken);
if (peer != null)
{
await _context.RemotePeerConfigurations.DeleteAsync(peer.Id);
await _context.SaveChangesAsync(cancellationToken);
_logger.LogInformation("Removed remote peer configuration: {NodeId}", nodeId);
}
else
{
_logger.LogWarning("Attempted to remove non-existent remote peer: {NodeId}", nodeId);
}
}
/// <inheritdoc />
public override async Task SaveRemotePeerAsync(RemotePeerConfiguration peer, CancellationToken cancellationToken = default)
{
// NodeId is now a regular indexed property, not the Key
var existing = await Task.Run(() => _context.RemotePeerConfigurations.Find(p => p.NodeId == peer.NodeId).FirstOrDefault(), cancellationToken);
if (existing == null)
{
await _context.RemotePeerConfigurations.InsertAsync(peer.ToEntity());
}
else
{
existing.NodeId = peer.NodeId;
existing.Address = peer.Address;
existing.Type = (int)peer.Type;
existing.IsEnabled = peer.IsEnabled;
existing.InterestsJson = peer.InterestingCollections.Count > 0
? System.Text.Json.JsonSerializer.Serialize(peer.InterestingCollections)
: "";
await _context.RemotePeerConfigurations.UpdateAsync(existing);
}
await _context.SaveChangesAsync(cancellationToken);
_logger.LogInformation("Saved remote peer configuration: {NodeId} ({Type})", peer.NodeId, peer.Type);
}
}

View File

@@ -0,0 +1,321 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network;
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
/// <summary>
/// BLite-backed peer oplog confirmation store.
/// </summary>
/// <typeparam name="TDbContext">The BLite context type.</typeparam>
public class BLitePeerOplogConfirmationStore<TDbContext> : PeerOplogConfirmationStore where TDbContext : CBDDCDocumentDbContext
{
internal const string RegistrationSourceNodeId = "__peer_registration__";
private readonly TDbContext _context;
private readonly ILogger<BLitePeerOplogConfirmationStore<TDbContext>> _logger;
/// <summary>
/// Initializes a new instance of the <see cref="BLitePeerOplogConfirmationStore{TDbContext}"/> class.
/// </summary>
/// <param name="context">The BLite context.</param>
/// <param name="logger">An optional logger.</param>
public BLitePeerOplogConfirmationStore(
TDbContext context,
ILogger<BLitePeerOplogConfirmationStore<TDbContext>>? logger = null)
{
_context = context ?? throw new ArgumentNullException(nameof(context));
_logger = logger ?? NullLogger<BLitePeerOplogConfirmationStore<TDbContext>>.Instance;
}
/// <inheritdoc />
public override async Task EnsurePeerRegisteredAsync(
string peerNodeId,
string address,
PeerType type,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(peerNodeId))
{
throw new ArgumentException("Peer node id is required.", nameof(peerNodeId));
}
var existing = _context.PeerOplogConfirmations
.Find(c => c.PeerNodeId == peerNodeId && c.SourceNodeId == RegistrationSourceNodeId)
.FirstOrDefault();
if (existing == null)
{
await _context.PeerOplogConfirmations.InsertAsync(new PeerOplogConfirmationEntity
{
Id = Guid.NewGuid().ToString(),
PeerNodeId = peerNodeId,
SourceNodeId = RegistrationSourceNodeId,
ConfirmedWall = 0,
ConfirmedLogic = 0,
ConfirmedHash = "",
LastConfirmedUtcMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
IsActive = true
});
await _context.SaveChangesAsync(cancellationToken);
_logger.LogDebug("Registered peer confirmation tracking for {PeerNodeId} ({Address}, {Type}).", peerNodeId, address, type);
return;
}
if (!existing.IsActive)
{
existing.IsActive = true;
existing.LastConfirmedUtcMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
await _context.PeerOplogConfirmations.UpdateAsync(existing);
await _context.SaveChangesAsync(cancellationToken);
}
}
/// <inheritdoc />
public override async Task UpdateConfirmationAsync(
string peerNodeId,
string sourceNodeId,
HlcTimestamp timestamp,
string hash,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(peerNodeId))
{
throw new ArgumentException("Peer node id is required.", nameof(peerNodeId));
}
if (string.IsNullOrWhiteSpace(sourceNodeId))
{
throw new ArgumentException("Source node id is required.", nameof(sourceNodeId));
}
var existing = _context.PeerOplogConfirmations
.Find(c => c.PeerNodeId == peerNodeId && c.SourceNodeId == sourceNodeId)
.FirstOrDefault();
var nowMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
if (existing == null)
{
await _context.PeerOplogConfirmations.InsertAsync(new PeerOplogConfirmationEntity
{
Id = Guid.NewGuid().ToString(),
PeerNodeId = peerNodeId,
SourceNodeId = sourceNodeId,
ConfirmedWall = timestamp.PhysicalTime,
ConfirmedLogic = timestamp.LogicalCounter,
ConfirmedHash = hash ?? "",
LastConfirmedUtcMs = nowMs,
IsActive = true
});
await _context.SaveChangesAsync(cancellationToken);
return;
}
var isNewer = IsIncomingTimestampNewer(timestamp, existing);
var samePointHashChanged = timestamp.PhysicalTime == existing.ConfirmedWall &&
timestamp.LogicalCounter == existing.ConfirmedLogic &&
!string.Equals(existing.ConfirmedHash, hash, StringComparison.Ordinal);
if (!isNewer && !samePointHashChanged && existing.IsActive)
{
return;
}
existing.ConfirmedWall = timestamp.PhysicalTime;
existing.ConfirmedLogic = timestamp.LogicalCounter;
existing.ConfirmedHash = hash ?? "";
existing.LastConfirmedUtcMs = nowMs;
existing.IsActive = true;
await _context.PeerOplogConfirmations.UpdateAsync(existing);
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override Task<IEnumerable<PeerOplogConfirmation>> GetConfirmationsAsync(CancellationToken cancellationToken = default)
{
var confirmations = _context.PeerOplogConfirmations
.Find(c => c.SourceNodeId != RegistrationSourceNodeId)
.ToDomain()
.ToList();
return Task.FromResult<IEnumerable<PeerOplogConfirmation>>(confirmations);
}
/// <inheritdoc />
public override Task<IEnumerable<PeerOplogConfirmation>> GetConfirmationsForPeerAsync(
string peerNodeId,
CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(peerNodeId))
{
throw new ArgumentException("Peer node id is required.", nameof(peerNodeId));
}
var confirmations = _context.PeerOplogConfirmations
.Find(c => c.PeerNodeId == peerNodeId && c.SourceNodeId != RegistrationSourceNodeId)
.ToDomain()
.ToList();
return Task.FromResult<IEnumerable<PeerOplogConfirmation>>(confirmations);
}
/// <inheritdoc />
public override async Task RemovePeerTrackingAsync(string peerNodeId, CancellationToken cancellationToken = default)
{
if (string.IsNullOrWhiteSpace(peerNodeId))
{
throw new ArgumentException("Peer node id is required.", nameof(peerNodeId));
}
var matches = _context.PeerOplogConfirmations
.Find(c => c.PeerNodeId == peerNodeId)
.ToList();
if (matches.Count == 0)
{
return;
}
var nowMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
foreach (var match in matches)
{
if (!match.IsActive)
{
continue;
}
match.IsActive = false;
match.LastConfirmedUtcMs = nowMs;
await _context.PeerOplogConfirmations.UpdateAsync(match);
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override Task<IEnumerable<string>> GetActiveTrackedPeersAsync(CancellationToken cancellationToken = default)
{
var peers = _context.PeerOplogConfirmations
.Find(c => c.IsActive)
.Select(c => c.PeerNodeId)
.Distinct(StringComparer.Ordinal)
.ToList();
return Task.FromResult<IEnumerable<string>>(peers);
}
/// <inheritdoc />
public override async Task DropAsync(CancellationToken cancellationToken = default)
{
var allIds = _context.PeerOplogConfirmations.FindAll().Select(c => c.Id).ToList();
await _context.PeerOplogConfirmations.DeleteBulkAsync(allIds);
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override Task<IEnumerable<PeerOplogConfirmation>> ExportAsync(CancellationToken cancellationToken = default)
{
var exported = _context.PeerOplogConfirmations
.FindAll()
.ToDomain()
.ToList();
return Task.FromResult<IEnumerable<PeerOplogConfirmation>>(exported);
}
/// <inheritdoc />
public override async Task ImportAsync(IEnumerable<PeerOplogConfirmation> items, CancellationToken cancellationToken = default)
{
foreach (var item in items)
{
var existing = _context.PeerOplogConfirmations
.Find(c => c.PeerNodeId == item.PeerNodeId && c.SourceNodeId == item.SourceNodeId)
.FirstOrDefault();
if (existing == null)
{
await _context.PeerOplogConfirmations.InsertAsync(item.ToEntity());
continue;
}
existing.ConfirmedWall = item.ConfirmedWall;
existing.ConfirmedLogic = item.ConfirmedLogic;
existing.ConfirmedHash = item.ConfirmedHash;
existing.LastConfirmedUtcMs = item.LastConfirmedUtc.ToUnixTimeMilliseconds();
existing.IsActive = item.IsActive;
await _context.PeerOplogConfirmations.UpdateAsync(existing);
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task MergeAsync(IEnumerable<PeerOplogConfirmation> items, CancellationToken cancellationToken = default)
{
foreach (var item in items)
{
var existing = _context.PeerOplogConfirmations
.Find(c => c.PeerNodeId == item.PeerNodeId && c.SourceNodeId == item.SourceNodeId)
.FirstOrDefault();
if (existing == null)
{
await _context.PeerOplogConfirmations.InsertAsync(item.ToEntity());
continue;
}
var changed = false;
var incomingTimestamp = new HlcTimestamp(item.ConfirmedWall, item.ConfirmedLogic, item.SourceNodeId);
var existingTimestamp = new HlcTimestamp(existing.ConfirmedWall, existing.ConfirmedLogic, existing.SourceNodeId);
if (incomingTimestamp > existingTimestamp)
{
existing.ConfirmedWall = item.ConfirmedWall;
existing.ConfirmedLogic = item.ConfirmedLogic;
existing.ConfirmedHash = item.ConfirmedHash;
changed = true;
}
var incomingLastConfirmedMs = item.LastConfirmedUtc.ToUnixTimeMilliseconds();
if (incomingLastConfirmedMs > existing.LastConfirmedUtcMs)
{
existing.LastConfirmedUtcMs = incomingLastConfirmedMs;
changed = true;
}
if (existing.IsActive != item.IsActive)
{
existing.IsActive = item.IsActive;
changed = true;
}
if (changed)
{
await _context.PeerOplogConfirmations.UpdateAsync(existing);
}
}
await _context.SaveChangesAsync(cancellationToken);
}
private static bool IsIncomingTimestampNewer(HlcTimestamp incomingTimestamp, PeerOplogConfirmationEntity existing)
{
if (incomingTimestamp.PhysicalTime > existing.ConfirmedWall)
{
return true;
}
if (incomingTimestamp.PhysicalTime == existing.ConfirmedWall &&
incomingTimestamp.LogicalCounter > existing.ConfirmedLogic)
{
return true;
}
return false;
}
}

View File

@@ -0,0 +1,145 @@
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
/// <summary>
/// Provides a snapshot metadata store implementation that uses a specified CBDDCDocumentDbContext for persistence
/// operations.
/// </summary>
/// <remarks>This class enables storage, retrieval, and management of snapshot metadata using the provided
/// database context. It is typically used in scenarios where snapshot metadata needs to be persisted in a document
/// database. The class supports bulk operations and incremental updates, and can be extended for custom database
/// contexts. Thread safety depends on the underlying context implementation.</remarks>
/// <typeparam name="TDbContext">The type of the document database context used for accessing and managing snapshot metadata. Must inherit from
/// CBDDCDocumentDbContext.</typeparam>
public class BLiteSnapshotMetadataStore<TDbContext> : SnapshotMetadataStore where TDbContext : CBDDCDocumentDbContext
{
/// <summary>
/// Represents the database context used for data access operations within the derived class.
/// </summary>
/// <remarks>Intended for use by derived classes to interact with the underlying database. The context
/// should be properly disposed of according to the application's lifetime management strategy.</remarks>
protected readonly TDbContext _context;
/// <summary>
/// Provides logging capabilities for the BLiteSnapshotMetadataStore operations.
/// </summary>
/// <remarks>Intended for use by derived classes to record diagnostic and operational information. The
/// logger instance is specific to the BLiteSnapshotMetadataStore<TDbContext> type.</remarks>
protected readonly ILogger<BLiteSnapshotMetadataStore<TDbContext>> _logger;
/// <summary>
/// Initializes a new instance of the BLiteSnapshotMetadataStore class using the specified database context and
/// optional logger.
/// </summary>
/// <param name="context">The database context to be used for accessing snapshot metadata. Cannot be null.</param>
/// <param name="logger">An optional logger for logging diagnostic messages. If null, a no-op logger is used.</param>
/// <exception cref="ArgumentNullException">Thrown if the context parameter is null.</exception>
public BLiteSnapshotMetadataStore(TDbContext context, ILogger<BLiteSnapshotMetadataStore<TDbContext>>? logger = null)
{
_context = context ?? throw new ArgumentNullException(nameof(context));
_logger = logger ?? NullLogger<BLiteSnapshotMetadataStore<TDbContext>>.Instance;
}
/// <inheritdoc />
public override async Task DropAsync(CancellationToken cancellationToken = default)
{
// Use Id (technical key) for deletion, not NodeId (business key)
var allIds = await Task.Run(() => _context.SnapshotMetadatas.FindAll().Select(s => s.Id).ToList(), cancellationToken);
await _context.SnapshotMetadatas.DeleteBulkAsync(allIds);
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task<IEnumerable<SnapshotMetadata>> ExportAsync(CancellationToken cancellationToken = default)
{
return await Task.Run(() => _context.SnapshotMetadatas.FindAll().ToDomain().ToList(), cancellationToken);
}
/// <inheritdoc />
public override async Task<string?> GetSnapshotHashAsync(string nodeId, CancellationToken cancellationToken = default)
{
// NodeId is now a regular indexed property, not the Key
var snapshot = await Task.Run(() => _context.SnapshotMetadatas.Find(s => s.NodeId == nodeId).FirstOrDefault(), cancellationToken);
return snapshot?.Hash;
}
/// <inheritdoc />
public override async Task ImportAsync(IEnumerable<SnapshotMetadata> items, CancellationToken cancellationToken = default)
{
foreach (var metadata in items)
{
await _context.SnapshotMetadatas.InsertAsync(metadata.ToEntity());
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task InsertSnapshotMetadataAsync(SnapshotMetadata metadata, CancellationToken cancellationToken = default)
{
await _context.SnapshotMetadatas.InsertAsync(metadata.ToEntity());
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task MergeAsync(IEnumerable<SnapshotMetadata> items, CancellationToken cancellationToken = default)
{
foreach (var metadata in items)
{
// NodeId is now a regular indexed property, not the Key
var existing = await Task.Run(() => _context.SnapshotMetadatas.Find(s => s.NodeId == metadata.NodeId).FirstOrDefault(), cancellationToken);
if (existing == null)
{
await _context.SnapshotMetadatas.InsertAsync(metadata.ToEntity());
}
else
{
// Update only if incoming is newer
if (metadata.TimestampPhysicalTime > existing.TimestampPhysicalTime ||
(metadata.TimestampPhysicalTime == existing.TimestampPhysicalTime &&
metadata.TimestampLogicalCounter > existing.TimestampLogicalCounter))
{
existing.NodeId = metadata.NodeId;
existing.TimestampPhysicalTime = metadata.TimestampPhysicalTime;
existing.TimestampLogicalCounter = metadata.TimestampLogicalCounter;
existing.Hash = metadata.Hash;
await _context.SnapshotMetadatas.UpdateAsync(existing);
}
}
}
await _context.SaveChangesAsync(cancellationToken);
}
/// <inheritdoc />
public override async Task UpdateSnapshotMetadataAsync(SnapshotMetadata existingMeta, CancellationToken cancellationToken)
{
// NodeId is now a regular indexed property, not the Key - find existing by NodeId
var existing = await Task.Run(() => _context.SnapshotMetadatas.Find(s => s.NodeId == existingMeta.NodeId).FirstOrDefault(), cancellationToken);
if (existing != null)
{
existing.NodeId = existingMeta.NodeId;
existing.TimestampPhysicalTime = existingMeta.TimestampPhysicalTime;
existing.TimestampLogicalCounter = existingMeta.TimestampLogicalCounter;
existing.Hash = existingMeta.Hash;
await _context.SnapshotMetadatas.UpdateAsync(existing);
await _context.SaveChangesAsync(cancellationToken);
}
}
/// <inheritdoc />
public override async Task<SnapshotMetadata?> GetSnapshotMetadataAsync(string nodeId, CancellationToken cancellationToken = default)
{
// NodeId is now a regular indexed property, not the Key
return await Task.Run(() => _context.SnapshotMetadatas.Find(s => s.NodeId == nodeId).FirstOrDefault()?.ToDomain(), cancellationToken);
}
/// <inheritdoc />
public override async Task<IEnumerable<SnapshotMetadata>> GetAllSnapshotMetadataAsync(CancellationToken cancellationToken = default)
{
return await Task.Run(() => _context.SnapshotMetadatas.FindAll().ToDomain().ToList(), cancellationToken);
}
}

View File

@@ -0,0 +1,102 @@
using ZB.MOM.WW.CBDDC.Core.Storage;
using ZB.MOM.WW.CBDDC.Core.Sync;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
/// <summary>
/// Extension methods for configuring BLite persistence for ZB.MOM.WW.CBDDC.
/// </summary>
public static class CBDDCBLiteExtensions
{
/// <summary>
/// Adds BLite persistence to CBDDC using a custom DbContext and DocumentStore implementation.
/// </summary>
/// <typeparam name="TDbContext">The type of the BLite document database context. Must inherit from CBDDCDocumentDbContext.</typeparam>
/// <typeparam name="TDocumentStore">The type of the document store implementation. Must implement IDocumentStore.</typeparam>
/// <param name="services">The service collection to add the services to.</param>
/// <param name="contextFactory">A factory function that creates the DbContext instance.</param>
/// <returns>The service collection for chaining.</returns>
public static IServiceCollection AddCBDDCBLite<TDbContext, TDocumentStore>(
this IServiceCollection services,
Func<IServiceProvider, TDbContext> contextFactory)
where TDbContext : CBDDCDocumentDbContext
where TDocumentStore : class, IDocumentStore
{
if (services == null) throw new ArgumentNullException(nameof(services));
if (contextFactory == null) throw new ArgumentNullException(nameof(contextFactory));
// Register the DbContext as singleton (must match store lifetime)
services.TryAddSingleton<TDbContext>(contextFactory);
services.TryAddSingleton<CBDDCDocumentDbContext>(sp => sp.GetRequiredService<TDbContext>());
// Default Conflict Resolver (Last Write Wins) if none is provided
services.TryAddSingleton<IConflictResolver, LastWriteWinsConflictResolver>();
// Vector Clock Service (shared between DocumentStore and OplogStore)
services.TryAddSingleton<IVectorClockService, VectorClockService>();
// Register BLite Stores (all Singleton)
services.TryAddSingleton<IOplogStore, BLiteOplogStore<TDbContext>>();
services.TryAddSingleton<IPeerConfigurationStore, BLitePeerConfigurationStore<TDbContext>>();
services.TryAddSingleton<IPeerOplogConfirmationStore, BLitePeerOplogConfirmationStore<TDbContext>>();
services.TryAddSingleton<ISnapshotMetadataStore, BLiteSnapshotMetadataStore<TDbContext>>();
services.TryAddSingleton<IDocumentMetadataStore, BLiteDocumentMetadataStore<TDbContext>>();
// Register the DocumentStore implementation
services.TryAddSingleton<IDocumentStore, TDocumentStore>();
// Register the SnapshotService (uses the generic SnapshotStore from ZB.MOM.WW.CBDDC.Persistence)
services.TryAddSingleton<ISnapshotService, SnapshotStore>();
return services;
}
/// <summary>
/// Adds BLite persistence to CBDDC using a custom DbContext (without explicit DocumentStore type).
/// </summary>
/// <typeparam name="TDbContext">The type of the BLite document database context. Must inherit from CBDDCDocumentDbContext.</typeparam>
/// <param name="services">The service collection to add the services to.</param>
/// <param name="contextFactory">A factory function that creates the DbContext instance.</param>
/// <returns>The service collection for chaining.</returns>
/// <remarks>You must manually register IDocumentStore after calling this method.</remarks>
public static IServiceCollection AddCBDDCBLite<TDbContext>(
this IServiceCollection services,
Func<IServiceProvider, TDbContext> contextFactory)
where TDbContext : CBDDCDocumentDbContext
{
if (services == null) throw new ArgumentNullException(nameof(services));
if (contextFactory == null) throw new ArgumentNullException(nameof(contextFactory));
// Register the DbContext as singleton
services.TryAddSingleton<TDbContext>(contextFactory);
services.TryAddSingleton<CBDDCDocumentDbContext>(sp => sp.GetRequiredService<TDbContext>());
// Default Conflict Resolver (Last Write Wins) if none is provided
services.TryAddSingleton<IConflictResolver, LastWriteWinsConflictResolver>();
// Register BLite Stores (all Singleton)
services.TryAddSingleton<IOplogStore, BLiteOplogStore<TDbContext>>();
services.TryAddSingleton<IPeerConfigurationStore, BLitePeerConfigurationStore<TDbContext>>();
services.TryAddSingleton<IPeerOplogConfirmationStore, BLitePeerOplogConfirmationStore<TDbContext>>();
services.TryAddSingleton<ISnapshotMetadataStore, BLiteSnapshotMetadataStore<TDbContext>>();
services.TryAddSingleton<IDocumentMetadataStore, BLiteDocumentMetadataStore<TDbContext>>();
// Register the SnapshotService (uses the generic SnapshotStore from ZB.MOM.WW.CBDDC.Persistence)
services.TryAddSingleton<ISnapshotService, SnapshotStore>();
return services;
}
}
/// <summary>
/// Options for configuring BLite persistence.
/// </summary>
public class BLiteOptions
{
/// <summary>
/// Gets or sets the file path to the BLite database file.
/// </summary>
public string DatabasePath { get; set; } = "";
}

View File

@@ -0,0 +1,105 @@
using BLite.Core;
using BLite.Core.Collections;
using BLite.Core.Metadata;
using BLite.Core.Storage;
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
public partial class CBDDCDocumentDbContext : DocumentDbContext
{
/// <summary>
/// Gets the collection of operation log entries associated with this instance.
/// </summary>
/// <remarks>The collection provides access to all recorded operation log (oplog) entries, which can be
/// used to track changes or replicate operations. The collection is read-only; entries cannot be added or removed
/// directly through this property.</remarks>
public DocumentCollection<string, OplogEntity> OplogEntries { get; private set; } = null!;
/// <summary>
/// Gets the collection of snapshot metadata associated with the document.
/// </summary>
public DocumentCollection<string, SnapshotMetadataEntity> SnapshotMetadatas { get; private set; } = null!;
/// <summary>
/// Gets the collection of remote peer configurations associated with this instance.
/// </summary>
/// <remarks>Use this collection to access or enumerate the configuration settings for each remote peer.
/// The collection is read-only; to modify peer configurations, use the appropriate methods provided by the
/// containing class.</remarks>
public DocumentCollection<string, RemotePeerEntity> RemotePeerConfigurations { get; private set; } = null!;
/// <summary>
/// Gets the collection of document metadata for sync tracking.
/// </summary>
/// <remarks>Stores HLC timestamps and deleted state for each document without modifying application entities.
/// Used to track document versions for incremental sync instead of full snapshots.</remarks>
public DocumentCollection<string, DocumentMetadataEntity> DocumentMetadatas { get; private set; } = null!;
/// <summary>
/// Gets the collection of peer oplog confirmation records for pruning safety tracking.
/// </summary>
public DocumentCollection<string, PeerOplogConfirmationEntity> PeerOplogConfirmations { get; private set; } = null!;
/// <summary>
/// Initializes a new instance of the CBDDCDocumentDbContext class using the specified database file path.
/// </summary>
/// <param name="databasePath">The file system path to the database file to be used by the context. Cannot be null or empty.</param>
public CBDDCDocumentDbContext(string databasePath) : base(databasePath)
{
}
/// <summary>
/// Initializes a new instance of the CBDDCDocumentDbContext class using the specified database path and page file
/// configuration.
/// </summary>
/// <param name="databasePath">The file system path to the database file. This value cannot be null or empty.</param>
/// <param name="config">The configuration settings for the page file. Specifies options that control how the database pages are managed.</param>
public CBDDCDocumentDbContext(string databasePath, PageFileConfig config) : base(databasePath, config)
{
}
/// <inheritdoc />
protected override void OnModelCreating(ModelBuilder modelBuilder)
{
base.OnModelCreating(modelBuilder);
// OplogEntries: Use Id as technical key, Hash as unique business key
modelBuilder.Entity<OplogEntity>()
.ToCollection("OplogEntries")
.HasKey(e => e.Id)
.HasIndex(e => e.Hash, unique: true) // Hash is unique business key
.HasIndex(e => new { e.TimestampPhysicalTime, e.TimestampLogicalCounter, e.TimestampNodeId })
.HasIndex(e => e.Collection);
// SnapshotMetadatas: Use Id as technical key, NodeId as unique business key
modelBuilder.Entity<SnapshotMetadataEntity>()
.ToCollection("SnapshotMetadatas")
.HasKey(e => e.Id)
.HasIndex(e => e.NodeId, unique: true) // NodeId is unique business key
.HasIndex(e => new { e.TimestampPhysicalTime, e.TimestampLogicalCounter });
// RemotePeerConfigurations: Use Id as technical key, NodeId as unique business key
modelBuilder.Entity<RemotePeerEntity>()
.ToCollection("RemotePeerConfigurations")
.HasKey(e => e.Id)
.HasIndex(e => e.NodeId, unique: true) // NodeId is unique business key
.HasIndex(e => e.IsEnabled);
// DocumentMetadatas: Use Id as technical key, Collection+Key as unique composite business key
modelBuilder.Entity<DocumentMetadataEntity>()
.ToCollection("DocumentMetadatas")
.HasKey(e => e.Id)
.HasIndex(e => new { e.Collection, e.Key }, unique: true) // Composite business key
.HasIndex(e => new { e.HlcPhysicalTime, e.HlcLogicalCounter, e.HlcNodeId })
.HasIndex(e => e.Collection);
// PeerOplogConfirmations: Use Id as technical key, PeerNodeId+SourceNodeId as unique business key
modelBuilder.Entity<PeerOplogConfirmationEntity>()
.ToCollection("PeerOplogConfirmations")
.HasKey(e => e.Id)
.HasIndex(e => new { e.PeerNodeId, e.SourceNodeId }, unique: true)
.HasIndex(e => e.IsActive)
.HasIndex(e => new { e.SourceNodeId, e.ConfirmedWall, e.ConfirmedLogic });
}
}

View File

@@ -0,0 +1,47 @@
using System.ComponentModel.DataAnnotations;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
/// <summary>
/// BLite entity representing document metadata for sync tracking.
/// Stores HLC timestamp and deleted state for each document without modifying application entities.
/// </summary>
public class DocumentMetadataEntity
{
/// <summary>
/// Gets or sets the unique identifier for this entity (technical key).
/// Auto-generated GUID string.
/// </summary>
[Key]
public string Id { get; set; } = "";
/// <summary>
/// Gets or sets the collection name (business key part 1).
/// </summary>
public string Collection { get; set; } = "";
/// <summary>
/// Gets or sets the document key within the collection (business key part 2).
/// </summary>
public string Key { get; set; } = "";
/// <summary>
/// Gets or sets the physical time component of the HLC timestamp.
/// </summary>
public long HlcPhysicalTime { get; set; }
/// <summary>
/// Gets or sets the logical counter component of the HLC timestamp.
/// </summary>
public int HlcLogicalCounter { get; set; }
/// <summary>
/// Gets or sets the node ID that last modified this document.
/// </summary>
public string HlcNodeId { get; set; } = "";
/// <summary>
/// Gets or sets whether this document is marked as deleted (tombstone).
/// </summary>
public bool IsDeleted { get; set; }
}

View File

@@ -0,0 +1,243 @@
using System.Text.Json;
using ZB.MOM.WW.CBDDC.Core;
using ZB.MOM.WW.CBDDC.Core.Network;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
/// <summary>
/// Provides extension methods for mapping between BLite entities and domain models.
/// </summary>
public static class EntityMappers
{
#region OplogEntity Mappers
/// <summary>
/// Converts an OplogEntry domain model to an OplogEntity for persistence.
/// </summary>
/// <param name="entry">The oplog entry to convert.</param>
public static OplogEntity ToEntity(this OplogEntry entry)
{
return new OplogEntity
{
Id = Guid.NewGuid().ToString(), // Auto-generate technical key
Collection = entry.Collection,
Key = entry.Key,
Operation = (int)entry.Operation,
// Use empty string instead of null to avoid BLite BSON serialization issues
PayloadJson = entry.Payload?.GetRawText() ?? "",
TimestampPhysicalTime = entry.Timestamp.PhysicalTime,
TimestampLogicalCounter = entry.Timestamp.LogicalCounter,
TimestampNodeId = entry.Timestamp.NodeId,
Hash = entry.Hash,
PreviousHash = entry.PreviousHash
};
}
/// <summary>
/// Converts an OplogEntity to an OplogEntry domain model.
/// </summary>
/// <param name="entity">The persisted oplog entity to convert.</param>
public static OplogEntry ToDomain(this OplogEntity entity)
{
JsonElement? payload = null;
// Treat empty string as null payload (Delete operations)
if (!string.IsNullOrEmpty(entity.PayloadJson))
{
payload = JsonSerializer.Deserialize<JsonElement>(entity.PayloadJson);
}
return new OplogEntry(
entity.Collection,
entity.Key,
(OperationType)entity.Operation,
payload,
new HlcTimestamp(entity.TimestampPhysicalTime, entity.TimestampLogicalCounter, entity.TimestampNodeId),
entity.PreviousHash,
entity.Hash);
}
/// <summary>
/// Converts a collection of OplogEntity to OplogEntry domain models.
/// </summary>
/// <param name="entities">The oplog entities to convert.</param>
public static IEnumerable<OplogEntry> ToDomain(this IEnumerable<OplogEntity> entities)
{
return entities.Select(e => e.ToDomain());
}
#endregion
#region SnapshotMetadataEntity Mappers
/// <summary>
/// Converts a SnapshotMetadata domain model to a SnapshotMetadataEntity for persistence.
/// </summary>
/// <param name="metadata">The snapshot metadata to convert.</param>
public static SnapshotMetadataEntity ToEntity(this SnapshotMetadata metadata)
{
return new SnapshotMetadataEntity
{
Id = Guid.NewGuid().ToString(), // Auto-generate technical key
NodeId = metadata.NodeId,
TimestampPhysicalTime = metadata.TimestampPhysicalTime,
TimestampLogicalCounter = metadata.TimestampLogicalCounter,
Hash = metadata.Hash
};
}
/// <summary>
/// Converts a SnapshotMetadataEntity to a SnapshotMetadata domain model.
/// </summary>
/// <param name="entity">The persisted snapshot metadata entity to convert.</param>
public static SnapshotMetadata ToDomain(this SnapshotMetadataEntity entity)
{
return new SnapshotMetadata
{
NodeId = entity.NodeId,
TimestampPhysicalTime = entity.TimestampPhysicalTime,
TimestampLogicalCounter = entity.TimestampLogicalCounter,
Hash = entity.Hash
};
}
/// <summary>
/// Converts a collection of SnapshotMetadataEntity to SnapshotMetadata domain models.
/// </summary>
/// <param name="entities">The snapshot metadata entities to convert.</param>
public static IEnumerable<SnapshotMetadata> ToDomain(this IEnumerable<SnapshotMetadataEntity> entities)
{
return entities.Select(e => e.ToDomain());
}
#endregion
#region RemotePeerEntity Mappers
/// <summary>
/// Converts a RemotePeerConfiguration domain model to a RemotePeerEntity for persistence.
/// </summary>
/// <param name="config">The remote peer configuration to convert.</param>
public static RemotePeerEntity ToEntity(this RemotePeerConfiguration config)
{
return new RemotePeerEntity
{
Id = Guid.NewGuid().ToString(), // Auto-generate technical key
NodeId = config.NodeId,
Address = config.Address,
Type = (int)config.Type,
IsEnabled = config.IsEnabled,
InterestsJson = config.InterestingCollections.Count > 0
? JsonSerializer.Serialize(config.InterestingCollections)
: ""
};
}
/// <summary>
/// Converts a RemotePeerEntity to a RemotePeerConfiguration domain model.
/// </summary>
/// <param name="entity">The persisted remote peer entity to convert.</param>
public static RemotePeerConfiguration ToDomain(this RemotePeerEntity entity)
{
var config = new RemotePeerConfiguration
{
NodeId = entity.NodeId,
Address = entity.Address,
Type = (PeerType)entity.Type,
IsEnabled = entity.IsEnabled
};
if (!string.IsNullOrEmpty(entity.InterestsJson))
{
config.InterestingCollections = JsonSerializer.Deserialize<List<string>>(entity.InterestsJson) ?? [];
}
return config;
}
/// <summary>
/// Converts a collection of RemotePeerEntity to RemotePeerConfiguration domain models.
/// </summary>
/// <param name="entities">The remote peer entities to convert.</param>
public static IEnumerable<RemotePeerConfiguration> ToDomain(this IEnumerable<RemotePeerEntity> entities)
{
return entities.Select(e => e.ToDomain());
}
#endregion
#region PeerOplogConfirmationEntity Mappers
/// <summary>
/// Converts a peer oplog confirmation domain model to a BLite entity.
/// </summary>
/// <param name="confirmation">The confirmation to convert.</param>
public static PeerOplogConfirmationEntity ToEntity(this PeerOplogConfirmation confirmation)
{
return new PeerOplogConfirmationEntity
{
Id = Guid.NewGuid().ToString(),
PeerNodeId = confirmation.PeerNodeId,
SourceNodeId = confirmation.SourceNodeId,
ConfirmedWall = confirmation.ConfirmedWall,
ConfirmedLogic = confirmation.ConfirmedLogic,
ConfirmedHash = confirmation.ConfirmedHash,
LastConfirmedUtcMs = confirmation.LastConfirmedUtc.ToUnixTimeMilliseconds(),
IsActive = confirmation.IsActive
};
}
/// <summary>
/// Converts a peer oplog confirmation entity to a domain model.
/// </summary>
/// <param name="entity">The entity to convert.</param>
public static PeerOplogConfirmation ToDomain(this PeerOplogConfirmationEntity entity)
{
return new PeerOplogConfirmation
{
PeerNodeId = entity.PeerNodeId,
SourceNodeId = entity.SourceNodeId,
ConfirmedWall = entity.ConfirmedWall,
ConfirmedLogic = entity.ConfirmedLogic,
ConfirmedHash = entity.ConfirmedHash,
LastConfirmedUtc = DateTimeOffset.FromUnixTimeMilliseconds(entity.LastConfirmedUtcMs),
IsActive = entity.IsActive
};
}
/// <summary>
/// Converts a collection of peer oplog confirmation entities to domain models.
/// </summary>
/// <param name="entities">The entities to convert.</param>
public static IEnumerable<PeerOplogConfirmation> ToDomain(this IEnumerable<PeerOplogConfirmationEntity> entities)
{
return entities.Select(e => e.ToDomain());
}
#endregion
#region DocumentMetadataEntity Helpers
/// <summary>
/// Creates a DocumentMetadataEntity from collection, key, timestamp, and deleted state.
/// Used for tracking document sync state.
/// </summary>
/// <param name="collection">The collection name that owns the document.</param>
/// <param name="key">The document key within the collection.</param>
/// <param name="timestamp">The hybrid logical clock timestamp for the document state.</param>
/// <param name="isDeleted">Indicates whether the document is marked as deleted.</param>
public static DocumentMetadataEntity CreateDocumentMetadata(string collection, string key, HlcTimestamp timestamp, bool isDeleted = false)
{
return new DocumentMetadataEntity
{
Id = Guid.NewGuid().ToString(),
Collection = collection,
Key = key,
HlcPhysicalTime = timestamp.PhysicalTime,
HlcLogicalCounter = timestamp.LogicalCounter,
HlcNodeId = timestamp.NodeId,
IsDeleted = isDeleted
};
}
#endregion
}

View File

@@ -0,0 +1,61 @@
using System.ComponentModel.DataAnnotations;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
/// <summary>
/// BLite entity representing an operation log entry.
/// </summary>
public class OplogEntity
{
/// <summary>
/// Gets or sets the unique identifier for this entity (technical key).
/// Auto-generated GUID string.
/// </summary>
[Key]
public string Id { get; set; } = "";
/// <summary>
/// Gets or sets the collection name.
/// </summary>
public string Collection { get; set; } = "";
/// <summary>
/// Gets or sets the document key.
/// </summary>
public string Key { get; set; } = "";
/// <summary>
/// Gets or sets the operation type (0 = Put, 1 = Delete).
/// </summary>
public int Operation { get; set; }
/// <summary>
/// Gets or sets the payload JSON (empty string for Delete operations).
/// </summary>
public string PayloadJson { get; set; } = "";
/// <summary>
/// Gets or sets the physical time component of the HLC timestamp.
/// </summary>
public long TimestampPhysicalTime { get; set; }
/// <summary>
/// Gets or sets the logical counter component of the HLC timestamp.
/// </summary>
public int TimestampLogicalCounter { get; set; }
/// <summary>
/// Gets or sets the node ID component of the HLC timestamp.
/// </summary>
public string TimestampNodeId { get; set; } = "";
/// <summary>
/// Gets or sets the cryptographic hash of this entry (business key).
/// </summary>
public string Hash { get; set; } = "";
/// <summary>
/// Gets or sets the hash of the previous entry in the chain.
/// </summary>
public string PreviousHash { get; set; } = "";
}

View File

@@ -0,0 +1,50 @@
using System.ComponentModel.DataAnnotations;
namespace ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
/// <summary>
/// BLite entity representing a peer oplog confirmation watermark.
/// </summary>
public class PeerOplogConfirmationEntity
{
/// <summary>
/// Gets or sets the unique technical identifier for this entity.
/// </summary>
[Key]
public string Id { get; set; } = "";
/// <summary>
/// Gets or sets the tracked peer node identifier.
/// </summary>
public string PeerNodeId { get; set; } = "";
/// <summary>
/// Gets or sets the source node identifier for this confirmation.
/// </summary>
public string SourceNodeId { get; set; } = "";
/// <summary>
/// Gets or sets the physical wall-clock component of the confirmed HLC timestamp.
/// </summary>
public long ConfirmedWall { get; set; }
/// <summary>
/// Gets or sets the logical component of the confirmed HLC timestamp.
/// </summary>
public int ConfirmedLogic { get; set; }
/// <summary>
/// Gets or sets the confirmed hash value.
/// </summary>
public string ConfirmedHash { get; set; } = "";
/// <summary>
/// Gets or sets the UTC instant of the last update as unix milliseconds.
/// </summary>
public long LastConfirmedUtcMs { get; set; }
/// <summary>
/// Gets or sets whether the tracked peer remains active.
/// </summary>
public bool IsActive { get; set; } = true;
}

Some files were not shown because too many files have changed in this diff Show More