Initial import of the CBDDC codebase with docs and tests. Add a .NET-focused gitignore to keep generated artifacts out of source control.
Some checks failed
CI / verify (push) Has been cancelled
Some checks failed
CI / verify (push) Has been cancelled
This commit is contained in:
179
src/ZB.MOM.WW.CBDDC.Core/Cache/DocumentCache.cs
Executable file
179
src/ZB.MOM.WW.CBDDC.Core/Cache/DocumentCache.cs
Executable file
@@ -0,0 +1,179 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Cache;
|
||||
|
||||
/// <summary>
|
||||
/// LRU cache entry with linked list node.
|
||||
/// </summary>
|
||||
internal class CacheEntry
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the cached document.
|
||||
/// </summary>
|
||||
public Document Document { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the linked-list node used for LRU tracking.
|
||||
/// </summary>
|
||||
public LinkedListNode<string> Node { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CacheEntry"/> class.
|
||||
/// </summary>
|
||||
/// <param name="document">The cached document.</param>
|
||||
/// <param name="node">The linked-list node used for LRU tracking.</param>
|
||||
public CacheEntry(Document document, LinkedListNode<string> node)
|
||||
{
|
||||
Document = document;
|
||||
Node = node;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// In-memory LRU cache for documents.
|
||||
/// </summary>
|
||||
public class DocumentCache : IDocumentCache
|
||||
{
|
||||
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
|
||||
private readonly Dictionary<string, CacheEntry> _cache = new();
|
||||
private readonly LinkedList<string> _lru = new();
|
||||
private readonly ILogger<DocumentCache> _logger;
|
||||
private readonly object _lock = new();
|
||||
|
||||
// Statistics
|
||||
private long _hits = 0;
|
||||
private long _misses = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DocumentCache"/> class.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeConfigurationProvider">The configuration provider used for cache size limits.</param>
|
||||
/// <param name="logger">The logger instance.</param>
|
||||
public DocumentCache(IPeerNodeConfigurationProvider peerNodeConfigurationProvider, ILogger<DocumentCache>? logger = null)
|
||||
{
|
||||
_peerNodeConfigurationProvider = peerNodeConfigurationProvider;
|
||||
_logger = logger ?? NullLogger<DocumentCache>.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a document from cache.
|
||||
/// </summary>
|
||||
/// <param name="collection">The document collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <returns>A task whose result is the cached document, or <see langword="null"/> if not found.</returns>
|
||||
public async Task<Document?> Get(string collection, string key)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var cacheKey = $"{collection}:{key}";
|
||||
|
||||
if (_cache.TryGetValue(cacheKey, out var entry))
|
||||
{
|
||||
// Move to front (most recently used)
|
||||
_lru.Remove(entry.Node);
|
||||
_lru.AddFirst(entry.Node);
|
||||
|
||||
_hits++;
|
||||
_logger.LogTrace("Cache hit for {Key}", cacheKey);
|
||||
return entry.Document;
|
||||
}
|
||||
|
||||
_misses++;
|
||||
_logger.LogTrace("Cache miss for {Key}", cacheKey);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets a document in cache.
|
||||
/// </summary>
|
||||
/// <param name="collection">The document collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="document">The document to cache.</param>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
public async Task Set(string collection, string key, Document document)
|
||||
{
|
||||
var peerConfig = await _peerNodeConfigurationProvider.GetConfiguration();
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
var cacheKey = $"{collection}:{key}";
|
||||
|
||||
// If already exists, update and move to front
|
||||
if (_cache.TryGetValue(cacheKey, out var existingEntry))
|
||||
{
|
||||
_lru.Remove(existingEntry.Node);
|
||||
var newNode = _lru.AddFirst(cacheKey);
|
||||
_cache[cacheKey] = new CacheEntry(document, newNode);
|
||||
_logger.LogTrace("Updated cache for {Key}", cacheKey);
|
||||
return;
|
||||
}
|
||||
|
||||
// Evict if full
|
||||
if (_cache.Count >= peerConfig.MaxDocumentCacheSize)
|
||||
{
|
||||
var oldest = _lru.Last!.Value;
|
||||
_lru.RemoveLast();
|
||||
_cache.Remove(oldest);
|
||||
_logger.LogTrace("Evicted oldest cache entry {Key}", oldest);
|
||||
}
|
||||
|
||||
var node = _lru.AddFirst(cacheKey);
|
||||
_cache[cacheKey] = new CacheEntry(document, node);
|
||||
_logger.LogTrace("Added to cache: {Key}", cacheKey);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Removes a document from cache.
|
||||
/// </summary>
|
||||
/// <param name="collection">The document collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
public void Remove(string collection, string key)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var cacheKey = $"{collection}:{key}";
|
||||
|
||||
if (_cache.TryGetValue(cacheKey, out var entry))
|
||||
{
|
||||
_lru.Remove(entry.Node);
|
||||
_cache.Remove(cacheKey);
|
||||
_logger.LogTrace("Removed from cache: {Key}", cacheKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Clears all cached documents.
|
||||
/// </summary>
|
||||
public void Clear()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var count = _cache.Count;
|
||||
_cache.Clear();
|
||||
_lru.Clear();
|
||||
_logger.LogInformation("Cleared cache ({Count} entries)", count);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets cache statistics.
|
||||
/// </summary>
|
||||
public (long Hits, long Misses, int Size, double HitRate) GetStatistics()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var total = _hits + _misses;
|
||||
var hitRate = total > 0 ? (double)_hits / total : 0;
|
||||
return (_hits, _misses, _cache.Count, hitRate);
|
||||
}
|
||||
}
|
||||
}
|
||||
45
src/ZB.MOM.WW.CBDDC.Core/Cache/IDocumentCache.cs
Executable file
45
src/ZB.MOM.WW.CBDDC.Core/Cache/IDocumentCache.cs
Executable file
@@ -0,0 +1,45 @@
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Cache
|
||||
{
|
||||
/// <summary>
|
||||
/// Defines operations for caching documents by collection and key.
|
||||
/// </summary>
|
||||
public interface IDocumentCache
|
||||
{
|
||||
/// <summary>
|
||||
/// Clears all cached documents.
|
||||
/// </summary>
|
||||
void Clear();
|
||||
|
||||
/// <summary>
|
||||
/// Gets a cached document by collection and key.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <returns>The cached document, or <see langword="null"/> if not found.</returns>
|
||||
Task<Document?> Get(string collection, string key);
|
||||
|
||||
/// <summary>
|
||||
/// Gets cache hit/miss statistics.
|
||||
/// </summary>
|
||||
/// <returns>A tuple containing hits, misses, current size, and hit rate.</returns>
|
||||
(long Hits, long Misses, int Size, double HitRate) GetStatistics();
|
||||
|
||||
/// <summary>
|
||||
/// Removes a cached document by collection and key.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
void Remove(string collection, string key);
|
||||
|
||||
/// <summary>
|
||||
/// Adds or updates a cached document.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="document">The document to cache.</param>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
Task Set(string collection, string key, Document document);
|
||||
}
|
||||
}
|
||||
24
src/ZB.MOM.WW.CBDDC.Core/ChangesAppliedEventArgs.cs
Executable file
24
src/ZB.MOM.WW.CBDDC.Core/ChangesAppliedEventArgs.cs
Executable file
@@ -0,0 +1,24 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Event arguments for when changes are applied to the peer store.
|
||||
/// </summary>
|
||||
public class ChangesAppliedEventArgs : EventArgs
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the changes that were applied.
|
||||
/// </summary>
|
||||
public IEnumerable<OplogEntry> Changes { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ChangesAppliedEventArgs"/> class.
|
||||
/// </summary>
|
||||
/// <param name="changes">The changes that were applied.</param>
|
||||
public ChangesAppliedEventArgs(IEnumerable<OplogEntry> changes)
|
||||
{
|
||||
Changes = changes;
|
||||
}
|
||||
}
|
||||
82
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/CBDDCHealthCheck.cs
Executable file
82
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/CBDDCHealthCheck.cs
Executable file
@@ -0,0 +1,82 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics;
|
||||
|
||||
/// <summary>
|
||||
/// Provides health check functionality.
|
||||
/// </summary>
|
||||
public class CBDDCHealthCheck : ICBDDCHealthCheck
|
||||
{
|
||||
private readonly IOplogStore _store;
|
||||
private readonly ISyncStatusTracker _syncTracker;
|
||||
private readonly ILogger<CBDDCHealthCheck> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CBDDCHealthCheck"/> class.
|
||||
/// </summary>
|
||||
/// <param name="store">The oplog store used for database health checks.</param>
|
||||
/// <param name="syncTracker">The tracker that provides synchronization status.</param>
|
||||
/// <param name="logger">The logger instance.</param>
|
||||
public CBDDCHealthCheck(
|
||||
IOplogStore store,
|
||||
ISyncStatusTracker syncTracker,
|
||||
ILogger<CBDDCHealthCheck>? logger = null)
|
||||
{
|
||||
_store = store ?? throw new ArgumentNullException(nameof(store));
|
||||
_syncTracker = syncTracker ?? throw new ArgumentNullException(nameof(syncTracker));
|
||||
_logger = logger ?? NullLogger<CBDDCHealthCheck>.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Performs a comprehensive health check.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel the health check.</param>
|
||||
public async Task<HealthStatus> CheckAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var status = new HealthStatus();
|
||||
|
||||
// Check database health
|
||||
try
|
||||
{
|
||||
// Try to get latest timestamp (simple database operation)
|
||||
var timestamp = await _store.GetLatestTimestampAsync(cancellationToken);
|
||||
status.DatabaseHealthy = true;
|
||||
_logger.LogDebug("Database health check passed (latest timestamp: {Timestamp})", timestamp);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
status.DatabaseHealthy = false;
|
||||
status.Errors.Add($"Database check failed: {ex.Message}");
|
||||
_logger.LogError(ex, "Database health check failed");
|
||||
}
|
||||
|
||||
// Get sync status
|
||||
var syncStatus = _syncTracker.GetStatus();
|
||||
status.NetworkHealthy = syncStatus.IsOnline;
|
||||
status.ConnectedPeers = syncStatus.ActivePeers.Count(p => p.IsConnected);
|
||||
status.LastSyncTime = syncStatus.LastSyncTime;
|
||||
|
||||
// Add error messages from sync tracker
|
||||
foreach (var error in syncStatus.SyncErrors.Take(5)) // Last 5 errors
|
||||
{
|
||||
status.Errors.Add($"{error.Timestamp:yyyy-MM-dd HH:mm:ss} - {error.Message}");
|
||||
}
|
||||
|
||||
// Add metadata
|
||||
status.Metadata["TotalDocumentsSynced"] = syncStatus.TotalDocumentsSynced;
|
||||
status.Metadata["TotalBytesTransferred"] = syncStatus.TotalBytesTransferred;
|
||||
status.Metadata["ActivePeers"] = syncStatus.ActivePeers.Count;
|
||||
|
||||
_logger.LogInformation("Health check completed: Database={DbHealth}, Network={NetHealth}, Peers={Peers}",
|
||||
status.DatabaseHealthy, status.NetworkHealthy, status.ConnectedPeers);
|
||||
|
||||
return status;
|
||||
}
|
||||
}
|
||||
148
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/DiagnosticsModels.cs
Executable file
148
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/DiagnosticsModels.cs
Executable file
@@ -0,0 +1,148 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the health status of an CBDDC instance.
|
||||
/// </summary>
|
||||
public class HealthStatus
|
||||
{
|
||||
/// <summary>
|
||||
/// Indicates if the database is healthy.
|
||||
/// </summary>
|
||||
public bool DatabaseHealthy { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Indicates if network connectivity is available.
|
||||
/// </summary>
|
||||
public bool NetworkHealthy { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of currently connected peers.
|
||||
/// </summary>
|
||||
public int ConnectedPeers { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Timestamp of the last successful sync operation.
|
||||
/// </summary>
|
||||
public DateTime? LastSyncTime { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// List of recent errors.
|
||||
/// </summary>
|
||||
public List<string> Errors { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Overall health status.
|
||||
/// </summary>
|
||||
public bool IsHealthy => DatabaseHealthy && NetworkHealthy && Errors.Count == 0;
|
||||
|
||||
/// <summary>
|
||||
/// Additional diagnostic information.
|
||||
/// </summary>
|
||||
public Dictionary<string, object> Metadata { get; set; } = new();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents the synchronization status.
|
||||
/// </summary>
|
||||
public class SyncStatus
|
||||
{
|
||||
/// <summary>
|
||||
/// Indicates if the node is currently online.
|
||||
/// </summary>
|
||||
public bool IsOnline { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Timestamp of the last sync operation.
|
||||
/// </summary>
|
||||
public DateTime? LastSyncTime { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of pending operations in the offline queue.
|
||||
/// </summary>
|
||||
public int PendingOperations { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// List of active peer nodes.
|
||||
/// </summary>
|
||||
public List<PeerInfo> ActivePeers { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Recent sync errors.
|
||||
/// </summary>
|
||||
public List<SyncError> SyncErrors { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Total number of documents synced.
|
||||
/// </summary>
|
||||
public long TotalDocumentsSynced { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Total bytes transferred.
|
||||
/// </summary>
|
||||
public long TotalBytesTransferred { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Information about a peer node.
|
||||
/// </summary>
|
||||
public class PeerInfo
|
||||
{
|
||||
/// <summary>
|
||||
/// Unique identifier of the peer.
|
||||
/// </summary>
|
||||
public string NodeId { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Network address of the peer.
|
||||
/// </summary>
|
||||
public string Address { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Last time the peer was seen.
|
||||
/// </summary>
|
||||
public DateTime LastSeen { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Indicates if the peer is currently connected.
|
||||
/// </summary>
|
||||
public bool IsConnected { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of successful syncs with this peer.
|
||||
/// </summary>
|
||||
public int SuccessfulSyncs { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of failed syncs with this peer.
|
||||
/// </summary>
|
||||
public int FailedSyncs { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents a synchronization error.
|
||||
/// </summary>
|
||||
public class SyncError
|
||||
{
|
||||
/// <summary>
|
||||
/// Timestamp when the error occurred.
|
||||
/// </summary>
|
||||
public DateTime Timestamp { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Error message.
|
||||
/// </summary>
|
||||
public string Message { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Peer node ID if applicable.
|
||||
/// </summary>
|
||||
public string? PeerNodeId { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Error code.
|
||||
/// </summary>
|
||||
public string? ErrorCode { get; set; }
|
||||
}
|
||||
15
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/ICBDDCHealthCheck.cs
Executable file
15
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/ICBDDCHealthCheck.cs
Executable file
@@ -0,0 +1,15 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics
|
||||
{
|
||||
public interface ICBDDCHealthCheck
|
||||
{
|
||||
/// <summary>
|
||||
/// Performs a health check for the implementing component.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>The resulting health status.</returns>
|
||||
Task<HealthStatus> CheckAsync(CancellationToken cancellationToken = default);
|
||||
}
|
||||
}
|
||||
63
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/ISyncStatusTracker.cs
Executable file
63
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/ISyncStatusTracker.cs
Executable file
@@ -0,0 +1,63 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics
|
||||
{
|
||||
/// <summary>
|
||||
/// Tracks synchronization status and peer health metrics.
|
||||
/// </summary>
|
||||
public interface ISyncStatusTracker
|
||||
{
|
||||
/// <summary>
|
||||
/// Removes peer entries that have been inactive longer than the specified threshold.
|
||||
/// </summary>
|
||||
/// <param name="inactiveThreshold">The inactivity threshold used to prune peers.</param>
|
||||
void CleanupInactivePeers(TimeSpan inactiveThreshold);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current synchronization status snapshot.
|
||||
/// </summary>
|
||||
/// <returns>The current <see cref="SyncStatus"/>.</returns>
|
||||
SyncStatus GetStatus();
|
||||
|
||||
/// <summary>
|
||||
/// Records an error encountered during synchronization.
|
||||
/// </summary>
|
||||
/// <param name="message">The error message.</param>
|
||||
/// <param name="peerNodeId">The related peer node identifier, if available.</param>
|
||||
/// <param name="errorCode">An optional error code.</param>
|
||||
void RecordError(string message, string? peerNodeId = null, string? errorCode = null);
|
||||
|
||||
/// <summary>
|
||||
/// Records a failed operation for the specified peer.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The peer node identifier.</param>
|
||||
void RecordPeerFailure(string nodeId);
|
||||
|
||||
/// <summary>
|
||||
/// Records a successful operation for the specified peer.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The peer node identifier.</param>
|
||||
void RecordPeerSuccess(string nodeId);
|
||||
|
||||
/// <summary>
|
||||
/// Records synchronization throughput metrics.
|
||||
/// </summary>
|
||||
/// <param name="documentCount">The number of synchronized documents.</param>
|
||||
/// <param name="bytesTransferred">The number of bytes transferred.</param>
|
||||
void RecordSync(int documentCount, long bytesTransferred);
|
||||
|
||||
/// <summary>
|
||||
/// Sets whether the local node is currently online.
|
||||
/// </summary>
|
||||
/// <param name="isOnline">A value indicating whether the node is online.</param>
|
||||
void SetOnlineStatus(bool isOnline);
|
||||
|
||||
/// <summary>
|
||||
/// Updates peer connectivity details.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The peer node identifier.</param>
|
||||
/// <param name="address">The peer network address.</param>
|
||||
/// <param name="isConnected">A value indicating whether the peer is connected.</param>
|
||||
void UpdatePeer(string nodeId, string address, bool isConnected);
|
||||
}
|
||||
}
|
||||
198
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/SyncStatusTracker.cs
Executable file
198
src/ZB.MOM.WW.CBDDC.Core/Diagnostics/SyncStatusTracker.cs
Executable file
@@ -0,0 +1,198 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Diagnostics;
|
||||
|
||||
/// <summary>
|
||||
/// Tracks synchronization status and provides diagnostics.
|
||||
/// </summary>
|
||||
public class SyncStatusTracker : ISyncStatusTracker
|
||||
{
|
||||
private readonly ILogger<SyncStatusTracker> _logger;
|
||||
private readonly object _lock = new();
|
||||
|
||||
private bool _isOnline = false;
|
||||
private DateTime? _lastSyncTime;
|
||||
private readonly List<PeerInfo> _activePeers = new();
|
||||
private readonly Queue<SyncError> _recentErrors = new();
|
||||
private long _totalDocumentsSynced = 0;
|
||||
private long _totalBytesTransferred = 0;
|
||||
private const int MaxErrorHistory = 50;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="SyncStatusTracker"/> class.
|
||||
/// </summary>
|
||||
/// <param name="logger">Optional logger instance.</param>
|
||||
public SyncStatusTracker(ILogger<SyncStatusTracker>? logger = null)
|
||||
{
|
||||
_logger = logger ?? NullLogger<SyncStatusTracker>.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Updates online status.
|
||||
/// </summary>
|
||||
/// <param name="isOnline">Whether the node is currently online.</param>
|
||||
public void SetOnlineStatus(bool isOnline)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (_isOnline != isOnline)
|
||||
{
|
||||
_isOnline = isOnline;
|
||||
_logger.LogInformation("Status changed to {Status}", isOnline ? "Online" : "Offline");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a successful sync operation.
|
||||
/// </summary>
|
||||
/// <param name="documentCount">The number of documents synchronized.</param>
|
||||
/// <param name="bytesTransferred">The number of bytes transferred.</param>
|
||||
public void RecordSync(int documentCount, long bytesTransferred)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
_lastSyncTime = DateTime.UtcNow;
|
||||
_totalDocumentsSynced += documentCount;
|
||||
_totalBytesTransferred += bytesTransferred;
|
||||
|
||||
_logger.LogDebug("Synced {Count} documents ({Bytes} bytes)", documentCount, bytesTransferred);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a sync error.
|
||||
/// </summary>
|
||||
/// <param name="message">The error message.</param>
|
||||
/// <param name="peerNodeId">The related peer node identifier, if available.</param>
|
||||
/// <param name="errorCode">The error code, if available.</param>
|
||||
public void RecordError(string message, string? peerNodeId = null, string? errorCode = null)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var error = new SyncError
|
||||
{
|
||||
Timestamp = DateTime.UtcNow,
|
||||
Message = message,
|
||||
PeerNodeId = peerNodeId,
|
||||
ErrorCode = errorCode
|
||||
};
|
||||
|
||||
_recentErrors.Enqueue(error);
|
||||
|
||||
while (_recentErrors.Count > MaxErrorHistory)
|
||||
{
|
||||
_recentErrors.Dequeue();
|
||||
}
|
||||
|
||||
_logger.LogWarning("Sync error recorded: {Message} (Peer: {Peer})", message, peerNodeId ?? "N/A");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Updates peer information.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The peer node identifier.</param>
|
||||
/// <param name="address">The peer address.</param>
|
||||
/// <param name="isConnected">Whether the peer is currently connected.</param>
|
||||
public void UpdatePeer(string nodeId, string address, bool isConnected)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var peer = _activePeers.FirstOrDefault(p => p.NodeId == nodeId);
|
||||
|
||||
if (peer == null)
|
||||
{
|
||||
peer = new PeerInfo
|
||||
{
|
||||
NodeId = nodeId,
|
||||
Address = address,
|
||||
IsConnected = isConnected,
|
||||
LastSeen = DateTime.UtcNow
|
||||
};
|
||||
_activePeers.Add(peer);
|
||||
_logger.LogInformation("New peer discovered: {NodeId} at {Address}", nodeId, address);
|
||||
}
|
||||
else
|
||||
{
|
||||
peer.Address = address;
|
||||
peer.IsConnected = isConnected;
|
||||
peer.LastSeen = DateTime.UtcNow;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records successful sync with a peer.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The peer node identifier.</param>
|
||||
public void RecordPeerSuccess(string nodeId)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var peer = _activePeers.FirstOrDefault(p => p.NodeId == nodeId);
|
||||
if (peer != null)
|
||||
{
|
||||
peer.SuccessfulSyncs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records failed sync with a peer.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The peer node identifier.</param>
|
||||
public void RecordPeerFailure(string nodeId)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var peer = _activePeers.FirstOrDefault(p => p.NodeId == nodeId);
|
||||
if (peer != null)
|
||||
{
|
||||
peer.FailedSyncs++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets current sync status.
|
||||
/// </summary>
|
||||
public SyncStatus GetStatus()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return new SyncStatus
|
||||
{
|
||||
IsOnline = _isOnline,
|
||||
LastSyncTime = _lastSyncTime,
|
||||
PendingOperations = 0, // Will be set by caller if offline queue is available
|
||||
ActivePeers = _activePeers.ToList(),
|
||||
SyncErrors = _recentErrors.ToList(),
|
||||
TotalDocumentsSynced = _totalDocumentsSynced,
|
||||
TotalBytesTransferred = _totalBytesTransferred
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Cleans up inactive peers.
|
||||
/// </summary>
|
||||
/// <param name="inactiveThreshold">The inactivity threshold used to remove peers.</param>
|
||||
public void CleanupInactivePeers(TimeSpan inactiveThreshold)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var cutoff = DateTime.UtcNow - inactiveThreshold;
|
||||
var removed = _activePeers.RemoveAll(p => p.LastSeen < cutoff);
|
||||
|
||||
if (removed > 0)
|
||||
{
|
||||
_logger.LogInformation("Removed {Count} inactive peers", removed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
83
src/ZB.MOM.WW.CBDDC.Core/Document.cs
Executable file
83
src/ZB.MOM.WW.CBDDC.Core/Document.cs
Executable file
@@ -0,0 +1,83 @@
|
||||
using ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a stored document and its synchronization metadata.
|
||||
/// </summary>
|
||||
public class Document
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the collection that contains the document.
|
||||
/// </summary>
|
||||
public string Collection { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the document key.
|
||||
/// </summary>
|
||||
public string Key { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the document content.
|
||||
/// </summary>
|
||||
public JsonElement Content { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the timestamp of the latest applied update.
|
||||
/// </summary>
|
||||
public HlcTimestamp UpdatedAt { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether the document is deleted.
|
||||
/// </summary>
|
||||
public bool IsDeleted { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="Document"/> class.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection that contains the document.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="content">The document content.</param>
|
||||
/// <param name="updatedAt">The timestamp of the latest applied update.</param>
|
||||
/// <param name="isDeleted">Whether the document is marked as deleted.</param>
|
||||
public Document(string collection, string key, JsonElement content, HlcTimestamp updatedAt, bool isDeleted)
|
||||
{
|
||||
Collection = collection;
|
||||
Key = key;
|
||||
Content = content;
|
||||
UpdatedAt = updatedAt;
|
||||
IsDeleted = isDeleted;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Merges a remote operation into the current document using last-write-wins or a conflict resolver.
|
||||
/// </summary>
|
||||
/// <param name="oplogEntry">The remote operation to merge.</param>
|
||||
/// <param name="resolver">An optional conflict resolver for custom merge behavior.</param>
|
||||
public void Merge(OplogEntry oplogEntry, IConflictResolver? resolver = null)
|
||||
{
|
||||
if (oplogEntry == null) return;
|
||||
if (Collection != oplogEntry.Collection) return;
|
||||
if (Key != oplogEntry.Key) return;
|
||||
if (resolver == null)
|
||||
{
|
||||
//last wins
|
||||
if (UpdatedAt <= oplogEntry.Timestamp)
|
||||
{
|
||||
Content = oplogEntry.Payload ?? default;
|
||||
UpdatedAt = oplogEntry.Timestamp;
|
||||
IsDeleted = oplogEntry.Operation == OperationType.Delete;
|
||||
}
|
||||
return;
|
||||
}
|
||||
var resolutionResult = resolver.Resolve(this, oplogEntry);
|
||||
if (resolutionResult.ShouldApply && resolutionResult.MergedDocument != null)
|
||||
{
|
||||
Content = resolutionResult.MergedDocument.Content;
|
||||
UpdatedAt = resolutionResult.MergedDocument.UpdatedAt;
|
||||
IsDeleted = resolutionResult.MergedDocument.IsDeleted;
|
||||
}
|
||||
}
|
||||
}
|
||||
189
src/ZB.MOM.WW.CBDDC.Core/Exceptions/CBDDCExceptions.cs
Executable file
189
src/ZB.MOM.WW.CBDDC.Core/Exceptions/CBDDCExceptions.cs
Executable file
@@ -0,0 +1,189 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Exceptions;
|
||||
|
||||
/// <summary>
|
||||
/// Base exception for all CBDDC-related errors.
|
||||
/// </summary>
|
||||
public class CBDDCException : Exception
|
||||
{
|
||||
/// <summary>
|
||||
/// Error code for programmatic error handling.
|
||||
/// </summary>
|
||||
public string ErrorCode { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CBDDCException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="errorCode">The application-specific error code.</param>
|
||||
/// <param name="message">The exception message.</param>
|
||||
public CBDDCException(string errorCode, string message)
|
||||
: base(message)
|
||||
{
|
||||
ErrorCode = errorCode;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CBDDCException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="errorCode">The application-specific error code.</param>
|
||||
/// <param name="message">The exception message.</param>
|
||||
/// <param name="innerException">The exception that caused the current exception.</param>
|
||||
public CBDDCException(string errorCode, string message, Exception innerException)
|
||||
: base(message, innerException)
|
||||
{
|
||||
ErrorCode = errorCode;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when network operations fail.
|
||||
/// </summary>
|
||||
public class NetworkException : CBDDCException
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="NetworkException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
public NetworkException(string message)
|
||||
: base("NETWORK_ERROR", message) { }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="NetworkException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
/// <param name="innerException">The exception that caused the current exception.</param>
|
||||
public NetworkException(string message, Exception innerException)
|
||||
: base("NETWORK_ERROR", message, innerException) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when persistence operations fail.
|
||||
/// </summary>
|
||||
public class PersistenceException : CBDDCException
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="PersistenceException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
public PersistenceException(string message)
|
||||
: base("PERSISTENCE_ERROR", message) { }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="PersistenceException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
/// <param name="innerException">The exception that caused the current exception.</param>
|
||||
public PersistenceException(string message, Exception innerException)
|
||||
: base("PERSISTENCE_ERROR", message, innerException) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when synchronization operations fail.
|
||||
/// </summary>
|
||||
public class SyncException : CBDDCException
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="SyncException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
public SyncException(string message)
|
||||
: base("SYNC_ERROR", message) { }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="SyncException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
/// <param name="innerException">The exception that caused the current exception.</param>
|
||||
public SyncException(string message, Exception innerException)
|
||||
: base("SYNC_ERROR", message, innerException) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when configuration is invalid.
|
||||
/// </summary>
|
||||
public class ConfigurationException : CBDDCException
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ConfigurationException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
public ConfigurationException(string message)
|
||||
: base("CONFIG_ERROR", message) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when database corruption is detected.
|
||||
/// </summary>
|
||||
public class DatabaseCorruptionException : PersistenceException
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DatabaseCorruptionException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
public DatabaseCorruptionException(string message)
|
||||
: base(message) { }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DatabaseCorruptionException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
/// <param name="innerException">The exception that caused the current exception.</param>
|
||||
public DatabaseCorruptionException(string message, Exception innerException)
|
||||
: base(message, innerException) { }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when a timeout occurs.
|
||||
/// </summary>
|
||||
public class TimeoutException : CBDDCException
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TimeoutException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="operation">The operation that timed out.</param>
|
||||
/// <param name="timeoutMs">The timeout in milliseconds.</param>
|
||||
public TimeoutException(string operation, int timeoutMs)
|
||||
: base("TIMEOUT_ERROR", $"Operation '{operation}' timed out after {timeoutMs}ms") { }
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when a document is not found in a collection.
|
||||
/// </summary>
|
||||
public class DocumentNotFoundException : PersistenceException
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the document key that was not found.
|
||||
/// </summary>
|
||||
public string Key { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the collection where the document was searched.
|
||||
/// </summary>
|
||||
public string Collection { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DocumentNotFoundException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection where the document was searched.</param>
|
||||
/// <param name="key">The document key that was not found.</param>
|
||||
public DocumentNotFoundException(string collection, string key)
|
||||
: base($"Document with key '{key}' not found in collection '{collection}'.")
|
||||
{
|
||||
Collection = collection;
|
||||
Key = key;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exception thrown when a concurrency conflict occurs during persistence operations.
|
||||
/// </summary>
|
||||
public class CBDDCConcurrencyException : PersistenceException
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CBDDCConcurrencyException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
public CBDDCConcurrencyException(string message) : base(message) { }
|
||||
}
|
||||
137
src/ZB.MOM.WW.CBDDC.Core/HlcTimestamp.cs
Executable file
137
src/ZB.MOM.WW.CBDDC.Core/HlcTimestamp.cs
Executable file
@@ -0,0 +1,137 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a Hybrid Logical Clock timestamp.
|
||||
/// Provides a Total Ordering of events in a distributed system.
|
||||
/// Implements value semantics and comparable interfaces.
|
||||
/// </summary>
|
||||
public readonly struct HlcTimestamp : IComparable<HlcTimestamp>, IComparable, IEquatable<HlcTimestamp>
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the physical time component of the timestamp.
|
||||
/// </summary>
|
||||
public long PhysicalTime { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the logical counter component used to order events with equal physical time.
|
||||
/// </summary>
|
||||
public int LogicalCounter { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the node identifier that produced this timestamp.
|
||||
/// </summary>
|
||||
public string NodeId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="HlcTimestamp"/> struct.
|
||||
/// </summary>
|
||||
/// <param name="physicalTime">The physical time component.</param>
|
||||
/// <param name="logicalCounter">The logical counter component.</param>
|
||||
/// <param name="nodeId">The node identifier.</param>
|
||||
public HlcTimestamp(long physicalTime, int logicalCounter, string nodeId)
|
||||
{
|
||||
PhysicalTime = physicalTime;
|
||||
LogicalCounter = logicalCounter;
|
||||
NodeId = nodeId ?? throw new ArgumentNullException(nameof(nodeId));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compares two timestamps to establish a total order.
|
||||
/// Order: PhysicalTime -> LogicalCounter -> NodeId (lexicographical tie-breaker).
|
||||
/// </summary>
|
||||
/// <param name="other">The other timestamp to compare with this instance.</param>
|
||||
/// <returns>
|
||||
/// A value less than zero if this instance is earlier than <paramref name="other"/>, zero if they are equal,
|
||||
/// or greater than zero if this instance is later than <paramref name="other"/>.
|
||||
/// </returns>
|
||||
public int CompareTo(HlcTimestamp other)
|
||||
{
|
||||
int timeComparison = PhysicalTime.CompareTo(other.PhysicalTime);
|
||||
if (timeComparison != 0) return timeComparison;
|
||||
|
||||
int counterComparison = LogicalCounter.CompareTo(other.LogicalCounter);
|
||||
if (counterComparison != 0) return counterComparison;
|
||||
|
||||
// Use Ordinal comparison for consistent tie-breaking across cultures/platforms
|
||||
return string.Compare(NodeId, other.NodeId, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compares this instance with another object.
|
||||
/// </summary>
|
||||
/// <param name="obj">The object to compare with this instance.</param>
|
||||
/// <returns>
|
||||
/// A value less than zero if this instance is earlier than <paramref name="obj"/>, zero if equal, or greater
|
||||
/// than zero if later.
|
||||
/// </returns>
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is null) return 1;
|
||||
if (obj is HlcTimestamp other) return CompareTo(other);
|
||||
throw new ArgumentException($"Object must be of type {nameof(HlcTimestamp)}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether this instance and another timestamp are equal.
|
||||
/// </summary>
|
||||
/// <param name="other">The other timestamp to compare.</param>
|
||||
/// <returns><see langword="true"/> if the timestamps are equal; otherwise, <see langword="false"/>.</returns>
|
||||
public bool Equals(HlcTimestamp other)
|
||||
{
|
||||
return PhysicalTime == other.PhysicalTime &&
|
||||
LogicalCounter == other.LogicalCounter &&
|
||||
string.Equals(NodeId, other.NodeId, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override bool Equals(object? obj)
|
||||
{
|
||||
return obj is HlcTimestamp other && Equals(other);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override int GetHashCode()
|
||||
{
|
||||
unchecked
|
||||
{
|
||||
var hashCode = PhysicalTime.GetHashCode();
|
||||
hashCode = (hashCode * 397) ^ LogicalCounter;
|
||||
// Ensure HashCode uses the same comparison logic as Equals/CompareTo
|
||||
// Handle null NodeId gracefully (possible via default(HlcTimestamp))
|
||||
hashCode = (hashCode * 397) ^ (NodeId != null ? StringComparer.Ordinal.GetHashCode(NodeId) : 0);
|
||||
return hashCode;
|
||||
}
|
||||
}
|
||||
|
||||
public static bool operator ==(HlcTimestamp left, HlcTimestamp right) => left.Equals(right);
|
||||
public static bool operator !=(HlcTimestamp left, HlcTimestamp right) => !left.Equals(right);
|
||||
|
||||
// Standard comparison operators making usage in SyncOrchestrator cleaner (e.g., remote > local)
|
||||
public static bool operator <(HlcTimestamp left, HlcTimestamp right) => left.CompareTo(right) < 0;
|
||||
public static bool operator <=(HlcTimestamp left, HlcTimestamp right) => left.CompareTo(right) <= 0;
|
||||
public static bool operator >(HlcTimestamp left, HlcTimestamp right) => left.CompareTo(right) > 0;
|
||||
public static bool operator >=(HlcTimestamp left, HlcTimestamp right) => left.CompareTo(right) >= 0;
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => FormattableString.Invariant($"{PhysicalTime}:{LogicalCounter}:{NodeId}");
|
||||
|
||||
/// <summary>
|
||||
/// Parses a timestamp string.
|
||||
/// </summary>
|
||||
/// <param name="s">The string to parse, in the format "PhysicalTime:LogicalCounter:NodeId".</param>
|
||||
/// <returns>The parsed <see cref="HlcTimestamp"/>.</returns>
|
||||
public static HlcTimestamp Parse(string s)
|
||||
{
|
||||
if (string.IsNullOrEmpty(s)) throw new ArgumentNullException(nameof(s));
|
||||
var parts = s.Split(':');
|
||||
if (parts.Length != 3) throw new FormatException("Invalid HlcTimestamp format. Expected 'PhysicalTime:LogicalCounter:NodeId'.");
|
||||
if (!long.TryParse(parts[0], out var physicalTime))
|
||||
throw new FormatException("Invalid PhysicalTime component in HlcTimestamp.");
|
||||
if (!int.TryParse(parts[1], out var logicalCounter))
|
||||
throw new FormatException("Invalid LogicalCounter component in HlcTimestamp.");
|
||||
var nodeId = parts[2];
|
||||
return new HlcTimestamp(physicalTime, logicalCounter, nodeId);
|
||||
}
|
||||
}
|
||||
60
src/ZB.MOM.WW.CBDDC.Core/Management/IPeerManagementService.cs
Executable file
60
src/ZB.MOM.WW.CBDDC.Core/Management/IPeerManagementService.cs
Executable file
@@ -0,0 +1,60 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Management;
|
||||
|
||||
/// <summary>
|
||||
/// Service for managing remote peer configurations.
|
||||
/// Provides CRUD operations for adding, removing, enabling/disabling remote cloud nodes.
|
||||
/// </summary>
|
||||
public interface IPeerManagementService
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds a static remote peer with simple authentication.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">Unique identifier for the remote peer.</param>
|
||||
/// <param name="address">Network address (hostname:port) of the remote peer.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task AddStaticPeerAsync(string nodeId, string address, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Removes a remote peer configuration.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">Unique identifier of the peer to remove.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task RemoveRemotePeerAsync(string nodeId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Removes confirmation tracking for a peer and optionally removes static remote configuration.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">Unique identifier of the peer to untrack.</param>
|
||||
/// <param name="removeRemoteConfig">When true, also removes static remote peer configuration.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task RemovePeerTrackingAsync(
|
||||
string nodeId,
|
||||
bool removeRemoteConfig = true,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves all configured remote peers.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Collection of remote peer configurations.</returns>
|
||||
Task<IEnumerable<RemotePeerConfiguration>> GetAllRemotePeersAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Enables synchronization with a remote peer.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">Unique identifier of the peer to enable.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task EnablePeerAsync(string nodeId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Disables synchronization with a remote peer (keeps configuration).
|
||||
/// </summary>
|
||||
/// <param name="nodeId">Unique identifier of the peer to disable.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task DisablePeerAsync(string nodeId, CancellationToken cancellationToken = default);
|
||||
}
|
||||
185
src/ZB.MOM.WW.CBDDC.Core/Management/PeerManagementService.cs
Executable file
185
src/ZB.MOM.WW.CBDDC.Core/Management/PeerManagementService.cs
Executable file
@@ -0,0 +1,185 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Management;
|
||||
|
||||
/// <summary>
|
||||
/// Implementation of peer management service.
|
||||
/// Provides CRUD operations for managing remote peer configurations.
|
||||
///
|
||||
/// Remote peer configurations are stored in a synchronized collection and automatically
|
||||
/// replicated across all nodes in the cluster. Any change made on one node will be
|
||||
/// synchronized to all other nodes through the normal CBDDC sync process.
|
||||
/// </summary>
|
||||
public class PeerManagementService : IPeerManagementService
|
||||
{
|
||||
private readonly IPeerConfigurationStore _store;
|
||||
private readonly IPeerOplogConfirmationStore _peerOplogConfirmationStore;
|
||||
private readonly ILogger<PeerManagementService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the PeerManagementService class.
|
||||
/// </summary>
|
||||
/// <param name="store">Database instance for accessing the synchronized collection.</param>
|
||||
/// <param name="peerOplogConfirmationStore">Peer confirmation tracking store.</param>
|
||||
/// <param name="logger">Logger instance.</param>
|
||||
public PeerManagementService(
|
||||
IPeerConfigurationStore store,
|
||||
IPeerOplogConfirmationStore peerOplogConfirmationStore,
|
||||
ILogger<PeerManagementService>? logger = null)
|
||||
{
|
||||
_store = store ?? throw new ArgumentNullException(nameof(store));
|
||||
_peerOplogConfirmationStore = peerOplogConfirmationStore ?? throw new ArgumentNullException(nameof(peerOplogConfirmationStore));
|
||||
_logger = logger ?? NullLogger<PeerManagementService>.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds or updates a static remote peer configuration.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique node identifier of the peer.</param>
|
||||
/// <param name="address">The peer network address in host:port format.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
public async Task AddStaticPeerAsync(string nodeId, string address, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ValidateNodeId(nodeId);
|
||||
ValidateAddress(address);
|
||||
|
||||
var config = new RemotePeerConfiguration
|
||||
{
|
||||
NodeId = nodeId,
|
||||
Address = address,
|
||||
Type = PeerType.StaticRemote,
|
||||
IsEnabled = true
|
||||
};
|
||||
|
||||
await _store.SaveRemotePeerAsync(config, cancellationToken);
|
||||
_logger.LogInformation("Added static remote peer: {NodeId} at {Address} (will sync to all cluster nodes)", nodeId, address);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Removes a remote peer configuration.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique node identifier of the peer to remove.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
public async Task RemoveRemotePeerAsync(string nodeId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await RemovePeerTrackingAsync(nodeId, removeRemoteConfig: true, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Removes peer tracking and optionally removes remote peer configuration.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique node identifier of the peer to untrack.</param>
|
||||
/// <param name="removeRemoteConfig">When true, also removes static remote peer configuration.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
public async Task RemovePeerTrackingAsync(
|
||||
string nodeId,
|
||||
bool removeRemoteConfig = true,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ValidateNodeId(nodeId);
|
||||
|
||||
await _peerOplogConfirmationStore.RemovePeerTrackingAsync(nodeId, cancellationToken);
|
||||
|
||||
if (removeRemoteConfig)
|
||||
{
|
||||
await _store.RemoveRemotePeerAsync(nodeId, cancellationToken);
|
||||
_logger.LogInformation("Removed remote peer and tracking: {NodeId} (will sync to all cluster nodes)", nodeId);
|
||||
return;
|
||||
}
|
||||
|
||||
_logger.LogInformation("Removed peer tracking only: {NodeId}", nodeId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all configured remote peers.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation. The task result contains remote peer configurations.</returns>
|
||||
public async Task<IEnumerable<RemotePeerConfiguration>> GetAllRemotePeersAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await _store.GetRemotePeersAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Enables a configured remote peer.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique node identifier of the peer to enable.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
public async Task EnablePeerAsync(string nodeId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ValidateNodeId(nodeId);
|
||||
|
||||
var peer = await _store.GetRemotePeerAsync(nodeId, cancellationToken);
|
||||
|
||||
if (peer == null)
|
||||
{
|
||||
return; // Peer not found, nothing to enable
|
||||
}
|
||||
|
||||
if (!peer.IsEnabled)
|
||||
{
|
||||
peer.IsEnabled = true;
|
||||
await _store.SaveRemotePeerAsync(peer, cancellationToken);
|
||||
_logger.LogInformation("Enabled remote peer: {NodeId} (will sync to all cluster nodes)", nodeId);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Disables a configured remote peer.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique node identifier of the peer to disable.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
public async Task DisablePeerAsync(string nodeId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
ValidateNodeId(nodeId);
|
||||
|
||||
var peer = await _store.GetRemotePeerAsync(nodeId, cancellationToken);
|
||||
|
||||
if (peer == null)
|
||||
{
|
||||
return; // Peer not found, nothing to disable
|
||||
}
|
||||
|
||||
if (peer.IsEnabled)
|
||||
{
|
||||
peer.IsEnabled = false;
|
||||
await _store.SaveRemotePeerAsync(peer, cancellationToken);
|
||||
_logger.LogInformation("Disabled remote peer: {NodeId} (will sync to all cluster nodes)", nodeId);
|
||||
}
|
||||
}
|
||||
|
||||
private static void ValidateNodeId(string nodeId)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(nodeId))
|
||||
{
|
||||
throw new ArgumentException("NodeId cannot be null or empty", nameof(nodeId));
|
||||
}
|
||||
}
|
||||
|
||||
private static void ValidateAddress(string address)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(address))
|
||||
{
|
||||
throw new ArgumentException("Address cannot be null or empty", nameof(address));
|
||||
}
|
||||
|
||||
// Basic format validation (should contain host:port)
|
||||
if (!address.Contains(':'))
|
||||
{
|
||||
throw new ArgumentException("Address must be in format 'host:port'", nameof(address));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
38
src/ZB.MOM.WW.CBDDC.Core/Network/IPeerNodeConfigurationProvider.cs
Executable file
38
src/ZB.MOM.WW.CBDDC.Core/Network/IPeerNodeConfigurationProvider.cs
Executable file
@@ -0,0 +1,38 @@
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a method that handles peer node configuration change notifications.
|
||||
/// </summary>
|
||||
/// <param name="sender">The source of the event.</param>
|
||||
/// <param name="newConfig">The updated peer node configuration.</param>
|
||||
public delegate void PeerNodeConfigurationChangedEventHandler(object? sender, PeerNodeConfiguration newConfig);
|
||||
|
||||
/// <summary>
|
||||
/// Defines a contract for retrieving and monitoring configuration settings for a peer node.
|
||||
/// </summary>
|
||||
/// <remarks>Implementations of this interface provide access to the current configuration and notify subscribers
|
||||
/// when configuration changes occur. This interface is typically used by components that require up-to-date
|
||||
/// configuration information for peer-to-peer networking scenarios.</remarks>
|
||||
public interface IPeerNodeConfigurationProvider
|
||||
{
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves the current configuration settings for the peer node.
|
||||
/// </summary>
|
||||
/// <returns>
|
||||
/// A task that represents the asynchronous operation. The task result contains the current
|
||||
/// <see cref="PeerNodeConfiguration"/>.
|
||||
/// </returns>
|
||||
public Task<PeerNodeConfiguration> GetConfiguration();
|
||||
|
||||
/// <summary>
|
||||
/// Occurs when the configuration of the peer node changes.
|
||||
/// </summary>
|
||||
/// <remarks>Subscribe to this event to be notified when any configuration settings for the peer node are
|
||||
/// modified. Event handlers can use this notification to update dependent components or respond to configuration
|
||||
/// changes as needed.</remarks>
|
||||
|
||||
public event PeerNodeConfigurationChangedEventHandler? ConfigurationChanged;
|
||||
}
|
||||
20
src/ZB.MOM.WW.CBDDC.Core/Network/NodeRole.cs
Executable file
20
src/ZB.MOM.WW.CBDDC.Core/Network/NodeRole.cs
Executable file
@@ -0,0 +1,20 @@
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Defines the role of a node in the distributed network cluster.
|
||||
/// </summary>
|
||||
public enum NodeRole
|
||||
{
|
||||
/// <summary>
|
||||
/// Standard member node that synchronizes only within the local area network.
|
||||
/// Does not connect to cloud remote nodes.
|
||||
/// </summary>
|
||||
Member = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Leader node that acts as a gateway to cloud remote nodes.
|
||||
/// Elected via the Bully algorithm (lexicographically smallest NodeId).
|
||||
/// Responsible for synchronizing local cluster changes with cloud nodes.
|
||||
/// </summary>
|
||||
CloudGateway = 1
|
||||
}
|
||||
76
src/ZB.MOM.WW.CBDDC.Core/Network/PeerNode.cs
Executable file
76
src/ZB.MOM.WW.CBDDC.Core/Network/PeerNode.cs
Executable file
@@ -0,0 +1,76 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a peer node in a distributed network, including its unique identifier, network address, and last seen
|
||||
/// timestamp.
|
||||
/// </summary>
|
||||
public class PeerNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the unique identifier for the node.
|
||||
/// </summary>
|
||||
public string NodeId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the address associated with the current instance.
|
||||
/// </summary>
|
||||
public string Address { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the date and time when the entity was last observed or updated.
|
||||
/// </summary>
|
||||
public DateTimeOffset LastSeen { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the configuration settings for the peer node.
|
||||
/// </summary>
|
||||
public PeerNodeConfiguration? Configuration { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the type of the peer node (LanDiscovered, StaticRemote, or CloudRemote).
|
||||
/// </summary>
|
||||
public PeerType Type { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the role assigned to this node within the cluster.
|
||||
/// </summary>
|
||||
public NodeRole Role { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the list of collections this peer is interested in.
|
||||
/// </summary>
|
||||
public System.Collections.Generic.IReadOnlyList<string> InterestingCollections { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the PeerNode class with the specified node identifier, network address, and last
|
||||
/// seen timestamp.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique identifier for the peer node. Cannot be null or empty.</param>
|
||||
/// <param name="address">The network address of the peer node. Cannot be null or empty.</param>
|
||||
/// <param name="lastSeen">The date and time when the peer node was last seen, expressed as a DateTimeOffset.</param>
|
||||
/// <param name="type">The type of the peer node. Defaults to LanDiscovered.</param>
|
||||
/// <param name="role">The role of the peer node. Defaults to Member.</param>
|
||||
/// <param name="configuration">The peer node configuration</param>
|
||||
/// <param name="interestingCollections">The list of collections this peer is interested in.</param>
|
||||
public PeerNode(
|
||||
string nodeId,
|
||||
string address,
|
||||
DateTimeOffset lastSeen,
|
||||
PeerType type = PeerType.LanDiscovered,
|
||||
NodeRole role = NodeRole.Member,
|
||||
PeerNodeConfiguration? configuration = null,
|
||||
IEnumerable<string>? interestingCollections = null)
|
||||
{
|
||||
NodeId = nodeId;
|
||||
Address = address;
|
||||
LastSeen = lastSeen;
|
||||
Type = type;
|
||||
Role = role;
|
||||
Configuration = configuration;
|
||||
InterestingCollections = new List<string>(interestingCollections ?? []).AsReadOnly();
|
||||
}
|
||||
}
|
||||
96
src/ZB.MOM.WW.CBDDC.Core/Network/PeerNodeConfiguration.cs
Executable file
96
src/ZB.MOM.WW.CBDDC.Core/Network/PeerNodeConfiguration.cs
Executable file
@@ -0,0 +1,96 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the configuration settings for a peer node in a distributed network.
|
||||
/// </summary>
|
||||
/// <remarks>Use this class to specify identification, network port, and authentication details required for a
|
||||
/// peer node to participate in a cluster or peer-to-peer environment. The <see cref="Default"/> property provides a
|
||||
/// basic configuration suitable for development or testing scenarios.</remarks>
|
||||
public class PeerNodeConfiguration
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the unique identifier for the node.
|
||||
/// </summary>
|
||||
public string NodeId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the TCP port number used for network communication.
|
||||
/// </summary>
|
||||
public int TcpPort { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the authentication token used to authorize API requests.
|
||||
/// </summary>
|
||||
public string AuthToken { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum size of the document cache items. Default: 10.
|
||||
/// </summary>
|
||||
public int MaxDocumentCacheSize { get; set; } = 100;
|
||||
|
||||
/// <summary>
|
||||
/// Maximum size of offline queue. Default: 1000.
|
||||
/// </summary>
|
||||
public int MaxQueueSize { get; set; } = 1000;
|
||||
|
||||
/// <summary>
|
||||
/// Number of retry attempts for failed network operations. Default: 3.
|
||||
/// </summary>
|
||||
public int RetryAttempts { get; set; } = 3;
|
||||
|
||||
/// <summary>
|
||||
/// Delay between retry attempts in milliseconds. Default: 1000ms.
|
||||
/// </summary>
|
||||
public int RetryDelayMs { get; set; } = 1000;
|
||||
|
||||
/// <summary>
|
||||
/// Interval between periodic maintenance operations (Oplog pruning) in minutes. Default: 60 minutes.
|
||||
/// </summary>
|
||||
public int MaintenanceIntervalMinutes { get; set; } = 60;
|
||||
|
||||
/// <summary>
|
||||
/// Oplog retention period in hours. Entries older than this will be pruned. Default: 24 hours.
|
||||
/// </summary>
|
||||
public int OplogRetentionHours { get; set; } = 24;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets a list of known peers to connect to directly, bypassing discovery.
|
||||
/// </summary>
|
||||
public System.Collections.Generic.List<KnownPeerConfiguration> KnownPeers { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Gets the default configuration settings for a peer node.
|
||||
/// </summary>
|
||||
/// <remarks>Each access returns a new instance of the configuration with a unique node identifier. The
|
||||
/// default settings use TCP port 9000 and a generated authentication token. Modify the returned instance as needed
|
||||
/// before use.</remarks>
|
||||
public static PeerNodeConfiguration Default => new PeerNodeConfiguration
|
||||
{
|
||||
NodeId = Guid.NewGuid().ToString(),
|
||||
TcpPort = 9000,
|
||||
AuthToken = Guid.NewGuid().ToString("N")
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for a known peer node.
|
||||
/// </summary>
|
||||
public class KnownPeerConfiguration
|
||||
{
|
||||
/// <summary>
|
||||
/// The unique identifier of the peer node.
|
||||
/// </summary>
|
||||
public string NodeId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// The hostname or IP address of the peer.
|
||||
/// </summary>
|
||||
public string Host { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// The TCP port of the peer.
|
||||
/// </summary>
|
||||
public int Port { get; set; }
|
||||
}
|
||||
26
src/ZB.MOM.WW.CBDDC.Core/Network/PeerType.cs
Executable file
26
src/ZB.MOM.WW.CBDDC.Core/Network/PeerType.cs
Executable file
@@ -0,0 +1,26 @@
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Defines the type of peer node in the distributed network.
|
||||
/// </summary>
|
||||
public enum PeerType
|
||||
{
|
||||
/// <summary>
|
||||
/// Peer discovered via UDP broadcast on the local area network.
|
||||
/// These peers are ephemeral and removed after timeout when no longer broadcasting.
|
||||
/// </summary>
|
||||
LanDiscovered = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Peer manually configured with a static address.
|
||||
/// These peers are persistent across restarts and stored in the database.
|
||||
/// </summary>
|
||||
StaticRemote = 1,
|
||||
|
||||
/// <summary>
|
||||
/// Cloud remote node.
|
||||
/// Always active if internet connectivity is available.
|
||||
/// Synchronized only by the elected leader node to reduce overhead.
|
||||
/// </summary>
|
||||
CloudRemote = 2
|
||||
}
|
||||
38
src/ZB.MOM.WW.CBDDC.Core/Network/RemotePeerConfiguration.cs
Executable file
38
src/ZB.MOM.WW.CBDDC.Core/Network/RemotePeerConfiguration.cs
Executable file
@@ -0,0 +1,38 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration for a remote peer node that is persistent across restarts.
|
||||
/// This collection is automatically synchronized across all nodes in the cluster.
|
||||
/// </summary>
|
||||
public class RemotePeerConfiguration
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the unique identifier for the remote peer node.
|
||||
/// </summary>
|
||||
[Key]
|
||||
public string NodeId { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the network address of the remote peer (hostname:port).
|
||||
/// </summary>
|
||||
public string Address { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the type of the peer (StaticRemote or CloudRemote).
|
||||
/// </summary>
|
||||
public PeerType Type { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether this peer is enabled for synchronization.
|
||||
/// Disabled peers are stored but not used for sync.
|
||||
/// </summary>
|
||||
public bool IsEnabled { get; set; } = true;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the list of collections this peer is interested in.
|
||||
/// If empty, the peer is interested in all collections.
|
||||
/// </summary>
|
||||
public System.Collections.Generic.List<string> InterestingCollections { get; set; } = new();
|
||||
}
|
||||
59
src/ZB.MOM.WW.CBDDC.Core/Network/StaticPeerNodeConfigurationProvider.cs
Executable file
59
src/ZB.MOM.WW.CBDDC.Core/Network/StaticPeerNodeConfigurationProvider.cs
Executable file
@@ -0,0 +1,59 @@
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Provides peer node configuration from an in-memory static source.
|
||||
/// </summary>
|
||||
public class StaticPeerNodeConfigurationProvider : IPeerNodeConfigurationProvider
|
||||
{
|
||||
private PeerNodeConfiguration _configuration = new();
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the current peer node configuration.
|
||||
/// </summary>
|
||||
public PeerNodeConfiguration Configuration
|
||||
{
|
||||
get => _configuration;
|
||||
set
|
||||
{
|
||||
if (_configuration != value)
|
||||
{
|
||||
_configuration = value;
|
||||
OnConfigurationChanged(_configuration);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="StaticPeerNodeConfigurationProvider"/> class.
|
||||
/// </summary>
|
||||
/// <param name="configuration">The initial peer node configuration.</param>
|
||||
public StaticPeerNodeConfigurationProvider(PeerNodeConfiguration configuration)
|
||||
{
|
||||
Configuration = configuration;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Occurs when the peer node configuration changes.
|
||||
/// </summary>
|
||||
public event PeerNodeConfigurationChangedEventHandler? ConfigurationChanged;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current peer node configuration.
|
||||
/// </summary>
|
||||
/// <returns>A task whose result is the current configuration.</returns>
|
||||
public Task<PeerNodeConfiguration> GetConfiguration()
|
||||
{
|
||||
return Task.FromResult(Configuration);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Raises the <see cref="ConfigurationChanged"/> event.
|
||||
/// </summary>
|
||||
/// <param name="newConfig">The new peer node configuration.</param>
|
||||
protected virtual void OnConfigurationChanged(PeerNodeConfiguration newConfig)
|
||||
{
|
||||
ConfigurationChanged?.Invoke(this, newConfig);
|
||||
}
|
||||
}
|
||||
107
src/ZB.MOM.WW.CBDDC.Core/OplogEntry.cs
Executable file
107
src/ZB.MOM.WW.CBDDC.Core/OplogEntry.cs
Executable file
@@ -0,0 +1,107 @@
|
||||
using System;
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
public enum OperationType
|
||||
{
|
||||
Put,
|
||||
Delete
|
||||
}
|
||||
|
||||
public static class OplogEntryExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Computes a deterministic hash for the specified oplog entry.
|
||||
/// </summary>
|
||||
/// <param name="entry">The oplog entry to hash.</param>
|
||||
/// <returns>The lowercase hexadecimal SHA-256 hash of the entry.</returns>
|
||||
public static string ComputeHash(this OplogEntry entry)
|
||||
{
|
||||
using var sha256 = System.Security.Cryptography.SHA256.Create();
|
||||
var sb = new System.Text.StringBuilder();
|
||||
|
||||
sb.Append(entry.Collection);
|
||||
sb.Append('|');
|
||||
sb.Append(entry.Key);
|
||||
sb.Append('|');
|
||||
// Ensure stable string representation for Enum (integer value)
|
||||
sb.Append(((int)entry.Operation).ToString(System.Globalization.CultureInfo.InvariantCulture));
|
||||
sb.Append('|');
|
||||
// Payload excluded from hash to avoid serialization non-determinism
|
||||
// sb.Append(entry.Payload...);
|
||||
sb.Append('|');
|
||||
// Timestamp.ToString() is now Invariant
|
||||
sb.Append(entry.Timestamp.ToString());
|
||||
sb.Append('|');
|
||||
sb.Append(entry.PreviousHash);
|
||||
|
||||
var bytes = System.Text.Encoding.UTF8.GetBytes(sb.ToString());
|
||||
var hashBytes = sha256.ComputeHash(bytes);
|
||||
|
||||
// Convert to hex string
|
||||
return BitConverter.ToString(hashBytes).Replace("-", "").ToLowerInvariant();
|
||||
}
|
||||
}
|
||||
|
||||
public class OplogEntry
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the collection name associated with this entry.
|
||||
/// </summary>
|
||||
public string Collection { get; }
|
||||
/// <summary>
|
||||
/// Gets the document key associated with this entry.
|
||||
/// </summary>
|
||||
public string Key { get; }
|
||||
/// <summary>
|
||||
/// Gets the operation represented by this entry.
|
||||
/// </summary>
|
||||
public OperationType Operation { get; }
|
||||
/// <summary>
|
||||
/// Gets the serialized payload for the operation.
|
||||
/// </summary>
|
||||
public JsonElement? Payload { get; }
|
||||
/// <summary>
|
||||
/// Gets the logical timestamp for this entry.
|
||||
/// </summary>
|
||||
public HlcTimestamp Timestamp { get; }
|
||||
/// <summary>
|
||||
/// Gets the hash of this entry.
|
||||
/// </summary>
|
||||
public string Hash { get; }
|
||||
/// <summary>
|
||||
/// Gets the hash of the previous entry in the chain.
|
||||
/// </summary>
|
||||
public string PreviousHash { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="OplogEntry"/> class.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="operation">The operation type.</param>
|
||||
/// <param name="payload">The serialized payload.</param>
|
||||
/// <param name="timestamp">The logical timestamp.</param>
|
||||
/// <param name="previousHash">The previous entry hash.</param>
|
||||
/// <param name="hash">The current entry hash. If null, it is computed.</param>
|
||||
public OplogEntry(string collection, string key, OperationType operation, JsonElement? payload, HlcTimestamp timestamp, string previousHash, string? hash = null)
|
||||
{
|
||||
Collection = collection;
|
||||
Key = key;
|
||||
Operation = operation;
|
||||
Payload = payload;
|
||||
Timestamp = timestamp;
|
||||
PreviousHash = previousHash ?? string.Empty;
|
||||
Hash = hash ?? this.ComputeHash();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies if the stored Hash matches the content.
|
||||
/// </summary>
|
||||
public bool IsValid()
|
||||
{
|
||||
return Hash == this.ComputeHash();
|
||||
}
|
||||
}
|
||||
44
src/ZB.MOM.WW.CBDDC.Core/PeerOplogConfirmation.cs
Normal file
44
src/ZB.MOM.WW.CBDDC.Core/PeerOplogConfirmation.cs
Normal file
@@ -0,0 +1,44 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a persisted confirmation watermark for a tracked peer and source node.
|
||||
/// </summary>
|
||||
public class PeerOplogConfirmation
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the tracked peer node identifier.
|
||||
/// </summary>
|
||||
public string PeerNodeId { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the source node identifier this confirmation applies to.
|
||||
/// </summary>
|
||||
public string SourceNodeId { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the physical wall-clock component of the confirmed HLC timestamp.
|
||||
/// </summary>
|
||||
public long ConfirmedWall { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the logical counter component of the confirmed HLC timestamp.
|
||||
/// </summary>
|
||||
public int ConfirmedLogic { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the confirmed hash at the watermark.
|
||||
/// </summary>
|
||||
public string ConfirmedHash { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets when this confirmation record was last updated in UTC.
|
||||
/// </summary>
|
||||
public DateTimeOffset LastConfirmedUtc { get; set; } = DateTimeOffset.UtcNow;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether this tracked peer is active for pruning/sync gating.
|
||||
/// </summary>
|
||||
public bool IsActive { get; set; } = true;
|
||||
}
|
||||
225
src/ZB.MOM.WW.CBDDC.Core/QueryNode.cs
Executable file
225
src/ZB.MOM.WW.CBDDC.Core/QueryNode.cs
Executable file
@@ -0,0 +1,225 @@
|
||||
using System.Text.Json;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
public abstract class QueryNode { }
|
||||
|
||||
public class Eq : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the field name to compare.
|
||||
/// </summary>
|
||||
public string Field { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the value to compare against.
|
||||
/// </summary>
|
||||
public object Value { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new equality query node.
|
||||
/// </summary>
|
||||
/// <param name="field">The field name to compare.</param>
|
||||
/// <param name="value">The value to compare against.</param>
|
||||
public Eq(string field, object value) { Field = field; Value = value; }
|
||||
}
|
||||
|
||||
public class Gt : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the field name to compare.
|
||||
/// </summary>
|
||||
public string Field { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the threshold value.
|
||||
/// </summary>
|
||||
public object Value { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new greater-than query node.
|
||||
/// </summary>
|
||||
/// <param name="field">The field name to compare.</param>
|
||||
/// <param name="value">The threshold value.</param>
|
||||
public Gt(string field, object value) { Field = field; Value = value; }
|
||||
}
|
||||
|
||||
public class Lt : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the field name to compare.
|
||||
/// </summary>
|
||||
public string Field { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the threshold value.
|
||||
/// </summary>
|
||||
public object Value { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new less-than query node.
|
||||
/// </summary>
|
||||
/// <param name="field">The field name to compare.</param>
|
||||
/// <param name="value">The threshold value.</param>
|
||||
public Lt(string field, object value) { Field = field; Value = value; }
|
||||
}
|
||||
|
||||
public class Gte : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the field name to compare.
|
||||
/// </summary>
|
||||
public string Field { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the threshold value.
|
||||
/// </summary>
|
||||
public object Value { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new greater-than-or-equal query node.
|
||||
/// </summary>
|
||||
/// <param name="field">The field name to compare.</param>
|
||||
/// <param name="value">The threshold value.</param>
|
||||
public Gte(string field, object value) { Field = field; Value = value; }
|
||||
}
|
||||
|
||||
public class Lte : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the field name to compare.
|
||||
/// </summary>
|
||||
public string Field { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the threshold value.
|
||||
/// </summary>
|
||||
public object Value { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new less-than-or-equal query node.
|
||||
/// </summary>
|
||||
/// <param name="field">The field name to compare.</param>
|
||||
/// <param name="value">The threshold value.</param>
|
||||
public Lte(string field, object value) { Field = field; Value = value; }
|
||||
}
|
||||
|
||||
public class Neq : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the field name to compare.
|
||||
/// </summary>
|
||||
public string Field { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the value to compare against.
|
||||
/// </summary>
|
||||
public object Value { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new not-equal query node.
|
||||
/// </summary>
|
||||
/// <param name="field">The field name to compare.</param>
|
||||
/// <param name="value">The value to compare against.</param>
|
||||
public Neq(string field, object value) { Field = field; Value = value; }
|
||||
}
|
||||
|
||||
public class In : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the field name to compare.
|
||||
/// </summary>
|
||||
public string Field { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the set of values to compare against.
|
||||
/// </summary>
|
||||
public object[] Values { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new in-list query node.
|
||||
/// </summary>
|
||||
/// <param name="field">The field name to compare.</param>
|
||||
/// <param name="values">The set of values to compare against.</param>
|
||||
public In(string field, object[] values) { Field = field; Values = values; }
|
||||
}
|
||||
|
||||
public class Contains : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the field name to compare.
|
||||
/// </summary>
|
||||
public string Field { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the substring value to search for.
|
||||
/// </summary>
|
||||
public string Value { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new contains query node.
|
||||
/// </summary>
|
||||
/// <param name="field">The field name to compare.</param>
|
||||
/// <param name="value">The substring value to search for.</param>
|
||||
public Contains(string field, string value) { Field = field; Value = value; }
|
||||
}
|
||||
|
||||
public class NotContains : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the field name to compare.
|
||||
/// </summary>
|
||||
public string Field { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the substring value to exclude.
|
||||
/// </summary>
|
||||
public string Value { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new not-contains query node.
|
||||
/// </summary>
|
||||
/// <param name="field">The field name to compare.</param>
|
||||
/// <param name="value">The substring value to exclude.</param>
|
||||
public NotContains(string field, string value) { Field = field; Value = value; }
|
||||
}
|
||||
|
||||
public class And : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the left side of the logical operation.
|
||||
/// </summary>
|
||||
public QueryNode Left { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the right side of the logical operation.
|
||||
/// </summary>
|
||||
public QueryNode Right { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new logical AND query node.
|
||||
/// </summary>
|
||||
/// <param name="left">The left query node.</param>
|
||||
/// <param name="right">The right query node.</param>
|
||||
public And(QueryNode left, QueryNode right) { Left = left; Right = right; }
|
||||
}
|
||||
|
||||
public class Or : QueryNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the left side of the logical operation.
|
||||
/// </summary>
|
||||
public QueryNode Left { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the right side of the logical operation.
|
||||
/// </summary>
|
||||
public QueryNode Right { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new logical OR query node.
|
||||
/// </summary>
|
||||
/// <param name="left">The left query node.</param>
|
||||
/// <param name="right">The right query node.</param>
|
||||
public Or(QueryNode left, QueryNode right) { Left = left; Right = right; }
|
||||
}
|
||||
105
src/ZB.MOM.WW.CBDDC.Core/README.md
Executable file
105
src/ZB.MOM.WW.CBDDC.Core/README.md
Executable file
@@ -0,0 +1,105 @@
|
||||
# ZB.MOM.WW.CBDDC.Core
|
||||
|
||||
Core abstractions and logic for **CBDDC**, a peer-to-peer data synchronization middleware for .NET.
|
||||
|
||||
## What Is CBDDC?
|
||||
|
||||
CBDDC is **not** a database — it's a sync layer that plugs into your existing data store (BLite) and enables automatic P2P replication across nodes in a mesh network. Your application reads and writes to its database as usual; CBDDC handles synchronization in the background.
|
||||
|
||||
## What's In This Package
|
||||
|
||||
- **Interfaces**: `IDocumentStore`, `IOplogStore`, `IVectorClockService`, `IConflictResolver`
|
||||
- **Models**: `OplogEntry`, `Document`, `HlcTimestamp`, `VectorClock`
|
||||
- **Conflict Resolution**: `LastWriteWinsConflictResolver`, `RecursiveNodeMergeConflictResolver`
|
||||
- **Production Features**: Document caching (LRU), offline queue, health monitoring, retry policies
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
# Pick a persistence provider
|
||||
dotnet add package ZB.MOM.WW.CBDDC.Persistence # Embedded document DB
|
||||
|
||||
# Add networking
|
||||
dotnet add package ZB.MOM.WW.CBDDC.Network
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```csharp
|
||||
// 1. Define your DbContext
|
||||
public class MyDbContext : CBDDCDocumentDbContext
|
||||
{
|
||||
public DocumentCollection<string, User> Users { get; private set; }
|
||||
public MyDbContext(string path) : base(path) { }
|
||||
}
|
||||
|
||||
// 2. Create your DocumentStore (the sync bridge)
|
||||
public class MyDocumentStore : BLiteDocumentStore<MyDbContext>
|
||||
{
|
||||
public MyDocumentStore(MyDbContext ctx, IPeerNodeConfigurationProvider cfg,
|
||||
IVectorClockService vc, ILogger<MyDocumentStore>? log = null)
|
||||
: base(ctx, cfg, vc, logger: log)
|
||||
{
|
||||
WatchCollection("Users", ctx.Users, u => u.Id);
|
||||
}
|
||||
|
||||
protected override async Task ApplyContentToEntityAsync(
|
||||
string collection, string key, JsonElement content, CancellationToken ct)
|
||||
{
|
||||
var user = content.Deserialize<User>()!;
|
||||
user.Id = key;
|
||||
var existing = _context.Users.Find(u => u.Id == key).FirstOrDefault();
|
||||
if (existing != null) _context.Users.Update(user);
|
||||
else _context.Users.Insert(user);
|
||||
await _context.SaveChangesAsync(ct);
|
||||
}
|
||||
// ... implement other abstract methods
|
||||
}
|
||||
|
||||
// 3. Register and use
|
||||
builder.Services.AddCBDDCCore()
|
||||
.AddCBDDCBLite<MyDbContext, MyDocumentStore>(
|
||||
sp => new MyDbContext("data.blite"))
|
||||
.AddCBDDCNetwork<StaticPeerNodeConfigurationProvider>();
|
||||
```
|
||||
|
||||
## Key Concepts
|
||||
|
||||
| Concept | Description |
|
||||
|---------|-------------|
|
||||
| **CDC** | Change Data Capture — watches collections registered via `WatchCollection()` |
|
||||
| **Oplog** | Append-only hash-chained journal of changes per node |
|
||||
| **VectorClock** | Tracks causal ordering across the mesh |
|
||||
| **DocumentStore** | Your bridge between entities and the sync engine |
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Your App ? DbContext.SaveChangesAsync()
|
||||
?
|
||||
? CDC Trigger
|
||||
DocumentStore.CreateOplogEntryAsync()
|
||||
?
|
||||
??? OplogEntry (hash-chained, HLC timestamped)
|
||||
??? VectorClockService.Update()
|
||||
?
|
||||
?
|
||||
SyncOrchestrator (background)
|
||||
??? Push to peers
|
||||
??? Pull from peers ? ApplyBatchAsync
|
||||
```
|
||||
|
||||
## Related Packages
|
||||
|
||||
- **ZB.MOM.WW.CBDDC.Persistence** � BLite embedded provider (.NET 10+)
|
||||
- **ZB.MOM.WW.CBDDC.Network** — P2P networking (UDP discovery, TCP sync, Gossip)
|
||||
|
||||
## Documentation
|
||||
|
||||
- **[Complete Documentation](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net)**
|
||||
- **[Sample Application](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/tree/main/samples/ZB.MOM.WW.CBDDC.Sample.Console)**
|
||||
- **[Integration Guide](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net#integrating-with-your-database)**
|
||||
|
||||
## License
|
||||
|
||||
MIT — see [LICENSE](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/LICENSE)
|
||||
27
src/ZB.MOM.WW.CBDDC.Core/Resilience/IRetryPolicy.cs
Executable file
27
src/ZB.MOM.WW.CBDDC.Core/Resilience/IRetryPolicy.cs
Executable file
@@ -0,0 +1,27 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Resilience
|
||||
{
|
||||
public interface IRetryPolicy
|
||||
{
|
||||
/// <summary>
|
||||
/// Executes an asynchronous operation with retry handling.
|
||||
/// </summary>
|
||||
/// <param name="operation">The operation to execute.</param>
|
||||
/// <param name="operationName">The operation name used for diagnostics.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous execution.</returns>
|
||||
Task ExecuteAsync(Func<Task> operation, string operationName, CancellationToken cancellationToken = default);
|
||||
/// <summary>
|
||||
/// Executes an asynchronous operation with retry handling and returns a result.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The result type.</typeparam>
|
||||
/// <param name="operation">The operation to execute.</param>
|
||||
/// <param name="operationName">The operation name used for diagnostics.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous execution and yields the operation result.</returns>
|
||||
Task<T> ExecuteAsync<T>(Func<Task<T>> operation, string operationName, CancellationToken cancellationToken = default);
|
||||
}
|
||||
}
|
||||
116
src/ZB.MOM.WW.CBDDC.Core/Resilience/RetryPolicy.cs
Executable file
116
src/ZB.MOM.WW.CBDDC.Core/Resilience/RetryPolicy.cs
Executable file
@@ -0,0 +1,116 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Exceptions;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Resilience;
|
||||
|
||||
/// <summary>
|
||||
/// Provides retry logic for transient failures.
|
||||
/// </summary>
|
||||
public class RetryPolicy : IRetryPolicy
|
||||
{
|
||||
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
|
||||
private readonly ILogger<RetryPolicy> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="RetryPolicy"/> class.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeConfigurationProvider">The provider for retry configuration values.</param>
|
||||
/// <param name="logger">The logger instance.</param>
|
||||
public RetryPolicy(IPeerNodeConfigurationProvider peerNodeConfigurationProvider, ILogger<RetryPolicy>? logger = null)
|
||||
{
|
||||
_logger = logger ?? NullLogger<RetryPolicy>.Instance;
|
||||
_peerNodeConfigurationProvider = peerNodeConfigurationProvider
|
||||
?? throw new ArgumentNullException(nameof(peerNodeConfigurationProvider));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Executes an operation with retry logic.
|
||||
/// </summary>
|
||||
/// <typeparam name="T">The result type returned by the operation.</typeparam>
|
||||
/// <param name="operation">The asynchronous operation to execute.</param>
|
||||
/// <param name="operationName">The operation name used for logging.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel retry delays.</param>
|
||||
public async Task<T> ExecuteAsync<T>(
|
||||
Func<Task<T>> operation,
|
||||
string operationName,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var config = await _peerNodeConfigurationProvider.GetConfiguration();
|
||||
Exception? lastException = null;
|
||||
|
||||
for (int attempt = 1; attempt <= config.RetryAttempts; attempt++)
|
||||
{
|
||||
try
|
||||
{
|
||||
_logger.LogDebug("Executing {Operation} (attempt {Attempt}/{Max})",
|
||||
operationName, attempt, config.RetryAttempts);
|
||||
|
||||
return await operation();
|
||||
}
|
||||
catch (Exception ex) when (attempt < config.RetryAttempts && IsTransient(ex))
|
||||
{
|
||||
lastException = ex;
|
||||
var delay = config.RetryDelayMs * attempt; // Exponential backoff
|
||||
|
||||
_logger.LogWarning(ex,
|
||||
"Operation {Operation} failed (attempt {Attempt}/{Max}). Retrying in {Delay}ms...",
|
||||
operationName, attempt, config.RetryAttempts, delay);
|
||||
|
||||
await Task.Delay(delay, cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
if (lastException != null)
|
||||
{
|
||||
_logger.LogError(lastException,
|
||||
"Operation {Operation} failed after {Attempts} attempts",
|
||||
operationName, config.RetryAttempts);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogError(
|
||||
"Operation {Operation} failed after {Attempts} attempts",
|
||||
operationName, config.RetryAttempts);
|
||||
}
|
||||
|
||||
throw new CBDDCException("RETRY_EXHAUSTED",
|
||||
$"Operation '{operationName}' failed after {config.RetryAttempts} attempts",
|
||||
lastException!);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Executes an operation with retry logic (void return).
|
||||
/// </summary>
|
||||
/// <param name="operation">The asynchronous operation to execute.</param>
|
||||
/// <param name="operationName">The operation name used for logging.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel retry delays.</param>
|
||||
public async Task ExecuteAsync(
|
||||
Func<Task> operation,
|
||||
string operationName,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
await ExecuteAsync(async () =>
|
||||
{
|
||||
await operation();
|
||||
return true;
|
||||
}, operationName, cancellationToken);
|
||||
}
|
||||
|
||||
private static bool IsTransient(Exception ex)
|
||||
{
|
||||
// Network errors are typically transient
|
||||
if (ex is NetworkException or System.Net.Sockets.SocketException or System.IO.IOException)
|
||||
return true;
|
||||
|
||||
// Timeout errors are transient
|
||||
if (ex is Exceptions.TimeoutException or OperationCanceledException)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
21
src/ZB.MOM.WW.CBDDC.Core/SnapshotMetadata.cs
Executable file
21
src/ZB.MOM.WW.CBDDC.Core/SnapshotMetadata.cs
Executable file
@@ -0,0 +1,21 @@
|
||||
namespace ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
public class SnapshotMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the node identifier associated with the snapshot.
|
||||
/// </summary>
|
||||
public string NodeId { get; set; } = "";
|
||||
/// <summary>
|
||||
/// Gets or sets the physical time component of the snapshot timestamp.
|
||||
/// </summary>
|
||||
public long TimestampPhysicalTime { get; set; }
|
||||
/// <summary>
|
||||
/// Gets or sets the logical counter component of the snapshot timestamp.
|
||||
/// </summary>
|
||||
public int TimestampLogicalCounter { get; set; }
|
||||
/// <summary>
|
||||
/// Gets or sets the snapshot hash.
|
||||
/// </summary>
|
||||
public string Hash { get; set; } = "";
|
||||
}
|
||||
16
src/ZB.MOM.WW.CBDDC.Core/Storage/CorruptDatabaseException.cs
Executable file
16
src/ZB.MOM.WW.CBDDC.Core/Storage/CorruptDatabaseException.cs
Executable file
@@ -0,0 +1,16 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Represents an error that occurs when a database is found to be corrupt.
|
||||
/// </summary>
|
||||
public class CorruptDatabaseException : Exception
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CorruptDatabaseException"/> class.
|
||||
/// </summary>
|
||||
/// <param name="message">The exception message.</param>
|
||||
/// <param name="innerException">The underlying exception that caused this error.</param>
|
||||
public CorruptDatabaseException(string message, Exception innerException) : base(message, innerException) { }
|
||||
}
|
||||
108
src/ZB.MOM.WW.CBDDC.Core/Storage/IDocumentMetadataStore.cs
Executable file
108
src/ZB.MOM.WW.CBDDC.Core/Storage/IDocumentMetadataStore.cs
Executable file
@@ -0,0 +1,108 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Defines the contract for storing and retrieving document metadata for sync tracking.
|
||||
/// Document metadata stores HLC timestamps and deleted state without modifying application entities.
|
||||
/// </summary>
|
||||
public interface IDocumentMetadataStore : ISnapshotable<DocumentMetadata>
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the metadata for a specific document.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
/// <returns>The document metadata if found; otherwise null.</returns>
|
||||
Task<DocumentMetadata?> GetMetadataAsync(string collection, string key, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets metadata for all documents in a collection.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection name.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
/// <returns>Enumerable of document metadata for the collection.</returns>
|
||||
Task<IEnumerable<DocumentMetadata>> GetMetadataByCollectionAsync(string collection, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Upserts (inserts or updates) metadata for a document.
|
||||
/// </summary>
|
||||
/// <param name="metadata">The metadata to upsert.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
Task UpsertMetadataAsync(DocumentMetadata metadata, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Upserts metadata for multiple documents in batch.
|
||||
/// </summary>
|
||||
/// <param name="metadatas">The metadata items to upsert.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
Task UpsertMetadataBatchAsync(IEnumerable<DocumentMetadata> metadatas, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Marks a document as deleted by setting IsDeleted=true and updating the timestamp.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="timestamp">The HLC timestamp of the deletion.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
Task MarkDeletedAsync(string collection, string key, HlcTimestamp timestamp, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all document metadata with timestamps after the specified timestamp.
|
||||
/// Used for incremental sync to find documents modified since last sync.
|
||||
/// </summary>
|
||||
/// <param name="since">The timestamp to compare against.</param>
|
||||
/// <param name="collections">Optional collection filter.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
/// <returns>Documents modified after the specified timestamp.</returns>
|
||||
Task<IEnumerable<DocumentMetadata>> GetMetadataAfterAsync(HlcTimestamp since, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents metadata for a document used in sync tracking.
|
||||
/// </summary>
|
||||
public class DocumentMetadata
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the collection name.
|
||||
/// </summary>
|
||||
public string Collection { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the document key.
|
||||
/// </summary>
|
||||
public string Key { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the HLC timestamp of the last modification.
|
||||
/// </summary>
|
||||
public HlcTimestamp UpdatedAt { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether this document is marked as deleted (tombstone).
|
||||
/// </summary>
|
||||
public bool IsDeleted { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DocumentMetadata"/> class.
|
||||
/// </summary>
|
||||
public DocumentMetadata() { }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DocumentMetadata"/> class.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="updatedAt">The last update timestamp.</param>
|
||||
/// <param name="isDeleted">Whether the document is marked as deleted.</param>
|
||||
public DocumentMetadata(string collection, string key, HlcTimestamp updatedAt, bool isDeleted = false)
|
||||
{
|
||||
Collection = collection;
|
||||
Key = key;
|
||||
UpdatedAt = updatedAt;
|
||||
IsDeleted = isDeleted;
|
||||
}
|
||||
}
|
||||
103
src/ZB.MOM.WW.CBDDC.Core/Storage/IDocumentStore.cs
Executable file
103
src/ZB.MOM.WW.CBDDC.Core/Storage/IDocumentStore.cs
Executable file
@@ -0,0 +1,103 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Handles basic CRUD operations for documents.
|
||||
/// </summary>
|
||||
public interface IDocumentStore : ISnapshotable<Document>
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the collections this store is interested in.
|
||||
/// </summary>
|
||||
IEnumerable<string> InterestedCollection { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves a incoming from the specified collection by its key.
|
||||
/// </summary>
|
||||
/// <param name="collection">The name of the collection containing the incoming to retrieve. Cannot be null or empty.</param>
|
||||
/// <param name="key">The unique key identifying the incoming within the collection. Cannot be null or empty.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation. The task result contains the incoming if found; otherwise, null.</returns>
|
||||
Task<Document?> GetDocumentAsync(string collection, string key, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves all documents belonging to the specified collection.
|
||||
/// </summary>
|
||||
/// <param name="collection">The name of the collection from which to retrieve documents. Cannot be null or empty.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation. The task result contains an enumerable collection of
|
||||
/// documents in the specified collection. The collection is empty if no documents are found.</returns>
|
||||
Task<IEnumerable<Document>> GetDocumentsByCollectionAsync(string collection, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously inserts a batch of documents into the data store.
|
||||
/// </summary>
|
||||
/// <param name="documents">The collection of documents to insert. Cannot be null or contain null elements.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation. The task result is <see langword="true"/> if all documents
|
||||
/// were inserted successfully; otherwise, <see langword="false"/>.</returns>
|
||||
Task<bool> InsertBatchDocumentsAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously updates the specified incoming in the data store.
|
||||
/// </summary>
|
||||
/// <param name="document">The incoming to update. Cannot be null.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the update operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation. The task result is <see langword="true"/> if the incoming was
|
||||
/// successfully updated; otherwise, <see langword="false"/>.</returns>
|
||||
Task<bool> PutDocumentAsync(Document document, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously updates a batch of documents in the data store.
|
||||
/// </summary>
|
||||
/// <param name="documents">The collection of documents to update. Cannot be null or contain null elements.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation. The task result is <see langword="true"/> if all documents
|
||||
/// were updated successfully; otherwise, <see langword="false"/>.</returns>
|
||||
Task<bool> UpdateBatchDocumentsAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously deletes a incoming identified by the specified key from the given collection.
|
||||
/// </summary>
|
||||
/// <param name="collection">The name of the collection containing the incoming to delete. Cannot be null or empty.</param>
|
||||
/// <param name="key">The unique key identifying the incoming to delete. Cannot be null or empty.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the delete operation.</param>
|
||||
/// <returns>A task that represents the asynchronous delete operation. The task result is <see langword="true"/> if the
|
||||
/// incoming was successfully deleted; otherwise, <see langword="false"/>.</returns>
|
||||
Task<bool> DeleteDocumentAsync(string collection, string key, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously deletes a batch of documents identified by their keys.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// If any of the specified documents cannot be deleted, the method returns <see langword="false"/> but does not
|
||||
/// throw an exception. The operation is performed asynchronously and may complete partially if cancellation is requested.
|
||||
/// </remarks>
|
||||
/// <param name="documentKeys">A collection of incoming keys that specify the documents to delete. Cannot be null or contain null or empty
|
||||
/// values.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the delete operation.</param>
|
||||
/// <returns>A task that represents the asynchronous delete operation. The task result is <see langword="true"/> if all
|
||||
/// specified documents were successfully deleted; otherwise, <see langword="false"/>.</returns>
|
||||
Task<bool> DeleteBatchDocumentsAsync(IEnumerable<string> documentKeys, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously merges the specified incoming with existing data and returns the updated incoming.
|
||||
/// </summary>
|
||||
/// <param name="incoming">The incoming to merge. Cannot be null.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the merge operation.</param>
|
||||
/// <returns>A task that represents the asynchronous merge operation. The task result contains the merged incoming.</returns>
|
||||
Task<Document> MergeAsync(Document incoming, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves documents identified by the specified collection and key pairs.
|
||||
/// </summary>
|
||||
/// <param name="documentKeys">A list of tuples, each containing the collection name and the document key that uniquely identify the documents
|
||||
/// to retrieve. Cannot be null or empty.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task that represents the asynchronous retrieval operation.</returns>
|
||||
Task<IEnumerable<Document>> GetDocumentsAsync(List<(string Collection, string Key)> documentKeys, CancellationToken cancellationToken);
|
||||
}
|
||||
101
src/ZB.MOM.WW.CBDDC.Core/Storage/IOplogStore.cs
Executable file
101
src/ZB.MOM.WW.CBDDC.Core/Storage/IOplogStore.cs
Executable file
@@ -0,0 +1,101 @@
|
||||
using System;
|
||||
using System.Buffers;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Handles operations related to the Operation Log (Oplog), synchronization, and logical clocks.
|
||||
/// </summary>
|
||||
public interface IOplogStore : ISnapshotable<OplogEntry>
|
||||
{
|
||||
/// <summary>
|
||||
/// Occurs when changes are applied to the store from external sources (sync).
|
||||
/// </summary>
|
||||
event EventHandler<ChangesAppliedEventArgs> ChangesApplied;
|
||||
|
||||
/// <summary>
|
||||
/// Appends a new entry to the operation log asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="entry">The operation log entry to append. Cannot be null.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the append operation.</param>
|
||||
/// <returns>A task that represents the asynchronous append operation.</returns>
|
||||
Task AppendOplogEntryAsync(OplogEntry entry, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves all oplog entries that occurred after the specified timestamp.
|
||||
/// </summary>
|
||||
/// <param name="timestamp">The timestamp after which oplog entries should be returned.</param>
|
||||
/// <param name="collections">An optional collection of collection names to filter the results.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation containing matching oplog entries.</returns>
|
||||
Task<IEnumerable<OplogEntry>> GetOplogAfterAsync(HlcTimestamp timestamp, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves the latest observed hybrid logical clock (HLC) timestamp.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation containing the latest HLC timestamp.</returns>
|
||||
Task<HlcTimestamp> GetLatestTimestampAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves the current vector clock representing the state of distributed events.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation containing the current vector clock.</returns>
|
||||
Task<VectorClock> GetVectorClockAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves a collection of oplog entries for the specified node that occurred after the given timestamp.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique identifier of the node for which to retrieve oplog entries. Cannot be null or empty.</param>
|
||||
/// <param name="since">The timestamp after which oplog entries should be returned.</param>
|
||||
/// <param name="collections">An optional collection of collection names to filter the oplog entries.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation containing oplog entries for the specified node.</returns>
|
||||
Task<IEnumerable<OplogEntry>> GetOplogForNodeAfterAsync(string nodeId, HlcTimestamp since, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves the hash of the most recent entry for the specified node.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique identifier of the node for which to retrieve the last entry hash. Cannot be null or empty.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation containing the hash string of the last entry or null.</returns>
|
||||
Task<string?> GetLastEntryHashAsync(string nodeId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves a sequence of oplog entries representing the chain between the specified start and end hashes.
|
||||
/// </summary>
|
||||
/// <param name="startHash">The hash of the first entry in the chain range. Cannot be null or empty.</param>
|
||||
/// <param name="endHash">The hash of the last entry in the chain range. Cannot be null or empty.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task that represents the asynchronous operation containing OplogEntry objects in chain order.</returns>
|
||||
Task<IEnumerable<OplogEntry>> GetChainRangeAsync(string startHash, string endHash, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves the oplog entry associated with the specified hash value.
|
||||
/// </summary>
|
||||
/// <param name="hash">The hash string identifying the oplog entry to retrieve. Cannot be null or empty.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task representing the asynchronous operation containing the OplogEntry if found, otherwise null.</returns>
|
||||
Task<OplogEntry?> GetEntryByHashAsync(string hash, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Applies a batch of oplog entries asynchronously to the target data store.
|
||||
/// </summary>
|
||||
/// <param name="oplogEntries">A collection of OplogEntry objects representing the operations to apply. Cannot be null.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the batch operation.</param>
|
||||
/// <returns>A task that represents the asynchronous batch apply operation.</returns>
|
||||
Task ApplyBatchAsync(IEnumerable<OplogEntry> oplogEntries, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously removes entries from the oplog that are older than the specified cutoff timestamp.
|
||||
/// </summary>
|
||||
/// <param name="cutoff">The timestamp that defines the upper bound for entries to be pruned.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the prune operation.</param>
|
||||
/// <returns>A task that represents the asynchronous prune operation.</returns>
|
||||
Task PruneOplogAsync(HlcTimestamp cutoff, CancellationToken cancellationToken = default);
|
||||
|
||||
}
|
||||
41
src/ZB.MOM.WW.CBDDC.Core/Storage/IPeerConfigurationStore.cs
Executable file
41
src/ZB.MOM.WW.CBDDC.Core/Storage/IPeerConfigurationStore.cs
Executable file
@@ -0,0 +1,41 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Handles storage and retrieval of remote peer configurations.
|
||||
/// </summary>
|
||||
public interface IPeerConfigurationStore : ISnapshotable<RemotePeerConfiguration>
|
||||
{
|
||||
/// <summary>
|
||||
/// Saves or updates a remote peer configuration in the persistent store.
|
||||
/// </summary>
|
||||
/// <param name="peer">The remote peer configuration to save.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task SaveRemotePeerAsync(RemotePeerConfiguration peer, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves all remote peer configurations from the persistent store.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
/// <returns>Collection of remote peer configurations.</returns>
|
||||
Task<IEnumerable<RemotePeerConfiguration>> GetRemotePeersAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves the configuration for a remote peer identified by the specified node ID.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique identifier of the remote peer whose configuration is to be retrieved.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task containing the RemotePeerConfiguration if found; otherwise, null.</returns>
|
||||
Task<RemotePeerConfiguration?> GetRemotePeerAsync(string nodeId, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Removes a remote peer configuration from the persistent store.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique identifier of the peer to remove.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
Task RemoveRemotePeerAsync(string nodeId, CancellationToken cancellationToken = default);
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Defines persistence operations for peer oplog confirmation tracking.
|
||||
/// </summary>
|
||||
public interface IPeerOplogConfirmationStore : ISnapshotable<PeerOplogConfirmation>
|
||||
{
|
||||
/// <summary>
|
||||
/// Ensures the specified peer is tracked for confirmation-based pruning.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeId">The peer node identifier.</param>
|
||||
/// <param name="address">The peer network address.</param>
|
||||
/// <param name="type">The peer type.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
Task EnsurePeerRegisteredAsync(
|
||||
string peerNodeId,
|
||||
string address,
|
||||
PeerType type,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Updates the confirmation watermark for a tracked peer and source node.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeId">The tracked peer node identifier.</param>
|
||||
/// <param name="sourceNodeId">The source node identifier of the confirmed oplog stream.</param>
|
||||
/// <param name="timestamp">The confirmed HLC timestamp.</param>
|
||||
/// <param name="hash">The confirmed hash at the specified timestamp.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
Task UpdateConfirmationAsync(
|
||||
string peerNodeId,
|
||||
string sourceNodeId,
|
||||
HlcTimestamp timestamp,
|
||||
string hash,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all persisted peer confirmations.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
/// <returns>All peer confirmations.</returns>
|
||||
Task<IEnumerable<PeerOplogConfirmation>> GetConfirmationsAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets persisted confirmations for a specific tracked peer.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeId">The peer node identifier.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
/// <returns>Peer confirmations for the requested peer.</returns>
|
||||
Task<IEnumerable<PeerOplogConfirmation>> GetConfirmationsForPeerAsync(
|
||||
string peerNodeId,
|
||||
CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Deactivates tracking for the specified peer.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeId">The peer node identifier.</param>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
Task RemovePeerTrackingAsync(string peerNodeId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all active tracked peer identifiers.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
/// <returns>Distinct active tracked peer identifiers.</returns>
|
||||
Task<IEnumerable<string>> GetActiveTrackedPeersAsync(CancellationToken cancellationToken = default);
|
||||
}
|
||||
48
src/ZB.MOM.WW.CBDDC.Core/Storage/ISnapshotMetadataStore.cs
Executable file
48
src/ZB.MOM.WW.CBDDC.Core/Storage/ISnapshotMetadataStore.cs
Executable file
@@ -0,0 +1,48 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
public interface ISnapshotMetadataStore : ISnapshotable<SnapshotMetadata>
|
||||
{
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves the snapshot metadata associated with the specified node identifier.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique identifier of the node for which to retrieve snapshot metadata. Cannot be null or empty.</param>
|
||||
/// <param name="cancellationToken">A token to monitor for cancellation requests.</param>
|
||||
/// <returns>A task that represents the asynchronous operation. The task result contains the <see cref="SnapshotMetadata"/>
|
||||
/// for the specified node if found; otherwise, <see langword="null"/>.</returns>
|
||||
Task<SnapshotMetadata?> GetSnapshotMetadataAsync(string nodeId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously inserts the specified snapshot metadata into the data store.
|
||||
/// </summary>
|
||||
/// <param name="metadata">The snapshot metadata to insert. Cannot be null.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task that represents the asynchronous insert operation.</returns>
|
||||
Task InsertSnapshotMetadataAsync(SnapshotMetadata metadata, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously updates the metadata for an existing snapshot.
|
||||
/// </summary>
|
||||
/// <param name="existingMeta">The metadata object representing the snapshot to update. Cannot be null.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the asynchronous operation.</param>
|
||||
/// <returns>A task that represents the asynchronous update operation.</returns>
|
||||
Task UpdateSnapshotMetadataAsync(SnapshotMetadata existingMeta, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously retrieves the hash of the current snapshot for the specified node.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The unique identifier of the node for which to obtain the snapshot hash.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
|
||||
/// <returns>A task containing the snapshot hash as a string, or null if no snapshot is available.</returns>
|
||||
Task<string?> GetSnapshotHashAsync(string nodeId, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Gets all snapshot metadata entries. Used for initializing VectorClock cache.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A cancellation token.</param>
|
||||
/// <returns>All snapshot metadata entries.</returns>
|
||||
Task<IEnumerable<SnapshotMetadata>> GetAllSnapshotMetadataAsync(CancellationToken cancellationToken = default);
|
||||
}
|
||||
35
src/ZB.MOM.WW.CBDDC.Core/Storage/ISnapshotService.cs
Executable file
35
src/ZB.MOM.WW.CBDDC.Core/Storage/ISnapshotService.cs
Executable file
@@ -0,0 +1,35 @@
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Handles full database lifecycle operations such as snapshots, replacement, and clearing data.
|
||||
/// </summary>
|
||||
public interface ISnapshotService
|
||||
{
|
||||
/// <summary>
|
||||
/// Asynchronously creates a snapshot of the current state and writes it to the specified destination stream.
|
||||
/// </summary>
|
||||
/// <param name="destination">The stream to which the snapshot data will be written.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the snapshot creation operation.</param>
|
||||
/// <returns>A task that represents the asynchronous snapshot creation operation.</returns>
|
||||
Task CreateSnapshotAsync(Stream destination, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Replaces the existing database with the contents provided in the specified stream asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="databaseStream">A stream containing the new database data to be used for replacement.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the operation.</param>
|
||||
/// <returns>A task that represents the asynchronous database replacement operation.</returns>
|
||||
Task ReplaceDatabaseAsync(Stream databaseStream, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Merges the provided snapshot stream into the current data store asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="snapshotStream">A stream containing the snapshot data to be merged.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the merge operation.</param>
|
||||
/// <returns>A task that represents the asynchronous merge operation.</returns>
|
||||
Task MergeSnapshotAsync(Stream snapshotStream, CancellationToken cancellationToken = default);
|
||||
}
|
||||
44
src/ZB.MOM.WW.CBDDC.Core/Storage/ISnapshotable.cs
Executable file
44
src/ZB.MOM.WW.CBDDC.Core/Storage/ISnapshotable.cs
Executable file
@@ -0,0 +1,44 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
public interface ISnapshotable<T>
|
||||
{
|
||||
/// <summary>
|
||||
/// Asynchronously deletes the underlying data store and all of its contents.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the drop operation.</param>
|
||||
/// <remarks>After calling this method, the data store and all stored data will be permanently removed.
|
||||
/// This operation cannot be undone. Any further operations on the data store may result in errors.</remarks>
|
||||
/// <returns>A task that represents the asynchronous drop operation.</returns>
|
||||
Task DropAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously exports a collection of items of type T.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the export operation.</param>
|
||||
/// <returns>A task that represents the asynchronous export operation. The task result contains an enumerable collection of
|
||||
/// exported items of type T.</returns>
|
||||
Task<IEnumerable<T>> ExportAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Imports the specified collection of items asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="items">The collection of items to import. Cannot be null. Each item will be processed in sequence.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the import operation.</param>
|
||||
/// <returns>A task that represents the asynchronous import operation.</returns>
|
||||
Task ImportAsync(IEnumerable<T> items, CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Merges the specified collection of items into the target data store asynchronously.
|
||||
/// </summary>
|
||||
/// <remarks>If the operation is canceled via the provided cancellation token, the returned task will be
|
||||
/// in a canceled state. The merge operation may update existing items or add new items, depending on the
|
||||
/// implementation.</remarks>
|
||||
/// <param name="items">The collection of items to merge into the data store. Cannot be null.</param>
|
||||
/// <param name="cancellationToken">A cancellation token that can be used to cancel the merge operation.</param>
|
||||
/// <returns>A task that represents the asynchronous merge operation.</returns>
|
||||
Task MergeAsync(IEnumerable<T> items, CancellationToken cancellationToken = default);
|
||||
}
|
||||
58
src/ZB.MOM.WW.CBDDC.Core/Storage/IVectorClockService.cs
Executable file
58
src/ZB.MOM.WW.CBDDC.Core/Storage/IVectorClockService.cs
Executable file
@@ -0,0 +1,58 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
/// <summary>
|
||||
/// Manages Vector Clock state for the local node.
|
||||
/// Tracks the latest timestamp and hash per node for sync coordination.
|
||||
/// </summary>
|
||||
public interface IVectorClockService
|
||||
{
|
||||
/// <summary>
|
||||
/// Indicates whether the cache has been populated with initial data.
|
||||
/// Reset to false by <see cref="Invalidate"/>.
|
||||
/// </summary>
|
||||
bool IsInitialized { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Updates the cache with a new OplogEntry's timestamp and hash.
|
||||
/// Called by both DocumentStore (local CDC) and OplogStore (remote sync).
|
||||
/// </summary>
|
||||
/// <param name="entry">The oplog entry containing timestamp and hash data.</param>
|
||||
void Update(OplogEntry entry);
|
||||
|
||||
/// <summary>
|
||||
/// Returns the current Vector Clock built from cached node timestamps.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
Task<VectorClock> GetVectorClockAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Returns the latest known timestamp across all nodes.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
Task<HlcTimestamp> GetLatestTimestampAsync(CancellationToken cancellationToken = default);
|
||||
|
||||
/// <summary>
|
||||
/// Returns the last known hash for the specified node.
|
||||
/// Returns null if the node is unknown.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The node identifier.</param>
|
||||
string? GetLastHash(string nodeId);
|
||||
|
||||
/// <summary>
|
||||
/// Updates the cache with a specific node's timestamp and hash.
|
||||
/// Used for snapshot metadata fallback.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The node identifier.</param>
|
||||
/// <param name="timestamp">The timestamp to store for the node.</param>
|
||||
/// <param name="hash">The hash to store for the node.</param>
|
||||
void UpdateNode(string nodeId, HlcTimestamp timestamp, string hash);
|
||||
|
||||
/// <summary>
|
||||
/// Clears the cache and resets <see cref="IsInitialized"/> to false,
|
||||
/// forcing re-initialization on next access.
|
||||
/// </summary>
|
||||
void Invalidate();
|
||||
}
|
||||
50
src/ZB.MOM.WW.CBDDC.Core/Sync/IConflictResolver.cs
Executable file
50
src/ZB.MOM.WW.CBDDC.Core/Sync/IConflictResolver.cs
Executable file
@@ -0,0 +1,50 @@
|
||||
using System.Text.Json;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
|
||||
public class ConflictResolutionResult
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether the remote change should be applied.
|
||||
/// </summary>
|
||||
public bool ShouldApply { get; }
|
||||
/// <summary>
|
||||
/// Gets the merged document to apply when conflict resolution produced one.
|
||||
/// </summary>
|
||||
public Document? MergedDocument { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ConflictResolutionResult"/> class.
|
||||
/// </summary>
|
||||
/// <param name="shouldApply">Indicates whether the change should be applied.</param>
|
||||
/// <param name="mergedDocument">The merged document produced by resolution, if any.</param>
|
||||
public ConflictResolutionResult(bool shouldApply, Document? mergedDocument)
|
||||
{
|
||||
ShouldApply = shouldApply;
|
||||
MergedDocument = mergedDocument;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a result indicating that the resolved document should be applied.
|
||||
/// </summary>
|
||||
/// <param name="document">The merged document to apply.</param>
|
||||
/// <returns>A resolution result that applies the provided document.</returns>
|
||||
public static ConflictResolutionResult Apply(Document document) => new(true, document);
|
||||
/// <summary>
|
||||
/// Creates a result indicating that the remote change should be ignored.
|
||||
/// </summary>
|
||||
/// <returns>A resolution result that skips applying the remote change.</returns>
|
||||
public static ConflictResolutionResult Ignore() => new(false, null);
|
||||
}
|
||||
|
||||
public interface IConflictResolver
|
||||
{
|
||||
/// <summary>
|
||||
/// Resolves a conflict between local state and a remote oplog entry.
|
||||
/// </summary>
|
||||
/// <param name="local">The local document state, if present.</param>
|
||||
/// <param name="remote">The incoming remote oplog entry.</param>
|
||||
/// <returns>The resolution outcome indicating whether and how to apply changes.</returns>
|
||||
ConflictResolutionResult Resolve(Document? local, OplogEntry remote);
|
||||
}
|
||||
40
src/ZB.MOM.WW.CBDDC.Core/Sync/IOfflineQueue.cs
Executable file
40
src/ZB.MOM.WW.CBDDC.Core/Sync/IOfflineQueue.cs
Executable file
@@ -0,0 +1,40 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Sync
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents a queue for operations that should be executed when connectivity is restored.
|
||||
/// </summary>
|
||||
public interface IOfflineQueue
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the number of pending operations in the queue.
|
||||
/// </summary>
|
||||
int Count { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Clears all pending operations from the queue.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
Task Clear();
|
||||
|
||||
/// <summary>
|
||||
/// Enqueues a pending operation.
|
||||
/// </summary>
|
||||
/// <param name="operation">The operation to enqueue.</param>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
Task Enqueue(PendingOperation operation);
|
||||
|
||||
/// <summary>
|
||||
/// Flushes the queue by executing each pending operation.
|
||||
/// </summary>
|
||||
/// <param name="executor">The delegate used to execute each operation.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the flush operation.</param>
|
||||
/// <returns>
|
||||
/// A task that returns a tuple containing the number of successful and failed operations.
|
||||
/// </returns>
|
||||
Task<(int Successful, int Failed)> FlushAsync(Func<PendingOperation, Task> executor, CancellationToken cancellationToken = default);
|
||||
}
|
||||
}
|
||||
37
src/ZB.MOM.WW.CBDDC.Core/Sync/LastWriteWinsConflictResolver.cs
Executable file
37
src/ZB.MOM.WW.CBDDC.Core/Sync/LastWriteWinsConflictResolver.cs
Executable file
@@ -0,0 +1,37 @@
|
||||
using System.Text.Json;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
|
||||
public class LastWriteWinsConflictResolver : IConflictResolver
|
||||
{
|
||||
/// <summary>
|
||||
/// Resolves document conflicts by preferring the entry with the latest timestamp.
|
||||
/// </summary>
|
||||
/// <param name="local">The local document, if available.</param>
|
||||
/// <param name="remote">The incoming remote oplog entry.</param>
|
||||
/// <returns>The conflict resolution result indicating whether to apply or ignore the remote change.</returns>
|
||||
public ConflictResolutionResult Resolve(Document? local, OplogEntry remote)
|
||||
{
|
||||
// If no local document exists, always apply remote change
|
||||
if (local == null)
|
||||
{
|
||||
// Construct new document from oplog entry
|
||||
var content = remote.Payload ?? default;
|
||||
var newDoc = new Document(remote.Collection, remote.Key, content, remote.Timestamp, remote.Operation == OperationType.Delete);
|
||||
return ConflictResolutionResult.Apply(newDoc);
|
||||
}
|
||||
|
||||
// If local exists, compare timestamps
|
||||
if (remote.Timestamp.CompareTo(local.UpdatedAt) > 0)
|
||||
{
|
||||
// Remote is newer, apply it
|
||||
var content = remote.Payload ?? default;
|
||||
var newDoc = new Document(remote.Collection, remote.Key, content, remote.Timestamp, remote.Operation == OperationType.Delete);
|
||||
return ConflictResolutionResult.Apply(newDoc);
|
||||
}
|
||||
|
||||
// Local is newer or equal, ignore remote
|
||||
return ConflictResolutionResult.Ignore();
|
||||
}
|
||||
}
|
||||
130
src/ZB.MOM.WW.CBDDC.Core/Sync/OfflineQueue.cs
Executable file
130
src/ZB.MOM.WW.CBDDC.Core/Sync/OfflineQueue.cs
Executable file
@@ -0,0 +1,130 @@
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
|
||||
/// <summary>
|
||||
/// Queue for operations performed while offline.
|
||||
/// </summary>
|
||||
public class OfflineQueue : IOfflineQueue
|
||||
{
|
||||
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
|
||||
private readonly Queue<PendingOperation> _queue = new();
|
||||
private readonly ILogger<OfflineQueue> _logger;
|
||||
private readonly object _lock = new();
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="OfflineQueue"/> class.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeConfigurationProvider">The configuration provider used for queue limits.</param>
|
||||
/// <param name="logger">The logger instance.</param>
|
||||
public OfflineQueue(IPeerNodeConfigurationProvider peerNodeConfigurationProvider, ILogger<OfflineQueue>? logger = null)
|
||||
{
|
||||
_peerNodeConfigurationProvider = peerNodeConfigurationProvider;
|
||||
_logger = logger ?? NullLogger<OfflineQueue>.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the number of pending operations.
|
||||
/// </summary>
|
||||
public int Count
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
return _queue.Count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Enqueues an operation for later execution.
|
||||
/// </summary>
|
||||
/// <param name="operation">The pending operation to enqueue.</param>
|
||||
/// <returns>A task that represents the asynchronous enqueue operation.</returns>
|
||||
public async Task Enqueue(PendingOperation operation)
|
||||
{
|
||||
var config = await _peerNodeConfigurationProvider.GetConfiguration();
|
||||
lock (_lock)
|
||||
{
|
||||
if (_queue.Count >= config.MaxQueueSize)
|
||||
{
|
||||
var dropped = _queue.Dequeue();
|
||||
_logger.LogWarning("Queue full, dropped oldest operation: {Type} {Collection}:{Key}",
|
||||
dropped.Type, dropped.Collection, dropped.Key);
|
||||
}
|
||||
|
||||
_queue.Enqueue(operation);
|
||||
_logger.LogDebug("Queued {Type} operation for {Collection}:{Key}",
|
||||
operation.Type, operation.Collection, operation.Key);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Flushes all pending operations.
|
||||
/// </summary>
|
||||
/// <param name="executor">The delegate that executes each pending operation.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the operation.</param>
|
||||
/// <returns>A task whose result contains the number of successful and failed operations.</returns>
|
||||
public async Task<(int Successful, int Failed)> FlushAsync(Func<PendingOperation, Task> executor, CancellationToken cancellationToken = default)
|
||||
{
|
||||
List<PendingOperation> operations;
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
operations = _queue.ToList();
|
||||
_queue.Clear();
|
||||
}
|
||||
|
||||
if (operations.Count == 0)
|
||||
{
|
||||
_logger.LogDebug("No pending operations to flush");
|
||||
return (0, 0);
|
||||
}
|
||||
|
||||
_logger.LogInformation("Flushing {Count} pending operations", operations.Count);
|
||||
|
||||
int successful = 0;
|
||||
int failed = 0;
|
||||
|
||||
foreach (var op in operations)
|
||||
{
|
||||
try
|
||||
{
|
||||
await executor(op);
|
||||
successful++;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
failed++;
|
||||
_logger.LogError(ex, "Failed to execute pending {Type} operation for {Collection}:{Key}",
|
||||
op.Type, op.Collection, op.Key);
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogInformation("Flush completed: {Successful} successful, {Failed} failed",
|
||||
successful, failed);
|
||||
|
||||
return (successful, failed);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Clears all pending operations.
|
||||
/// </summary>
|
||||
public async Task Clear()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
var count = _queue.Count;
|
||||
_queue.Clear();
|
||||
_logger.LogInformation("Cleared {Count} pending operations", count);
|
||||
}
|
||||
}
|
||||
}
|
||||
32
src/ZB.MOM.WW.CBDDC.Core/Sync/PendingOperation.cs
Executable file
32
src/ZB.MOM.WW.CBDDC.Core/Sync/PendingOperation.cs
Executable file
@@ -0,0 +1,32 @@
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a pending operation to be executed when connection is restored.
|
||||
/// </summary>
|
||||
public class PendingOperation
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the operation type.
|
||||
/// </summary>
|
||||
public string Type { get; set; } = "";
|
||||
/// <summary>
|
||||
/// Gets or sets the collection targeted by the operation.
|
||||
/// </summary>
|
||||
public string Collection { get; set; } = "";
|
||||
/// <summary>
|
||||
/// Gets or sets the document key targeted by the operation.
|
||||
/// </summary>
|
||||
public string Key { get; set; } = "";
|
||||
/// <summary>
|
||||
/// Gets or sets the payload associated with the operation.
|
||||
/// </summary>
|
||||
public object? Data { get; set; }
|
||||
/// <summary>
|
||||
/// Gets or sets the UTC time when the operation was queued.
|
||||
/// </summary>
|
||||
public DateTime QueuedAt { get; set; }
|
||||
}
|
||||
254
src/ZB.MOM.WW.CBDDC.Core/Sync/RecursiveNodeMergeConflictResolver.cs
Executable file
254
src/ZB.MOM.WW.CBDDC.Core/Sync/RecursiveNodeMergeConflictResolver.cs
Executable file
@@ -0,0 +1,254 @@
|
||||
using System;
|
||||
using System.Buffers;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
|
||||
/// <summary>
|
||||
/// Resolves merge conflicts by recursively merging object and array nodes.
|
||||
/// </summary>
|
||||
public class RecursiveNodeMergeConflictResolver : IConflictResolver
|
||||
{
|
||||
/// <summary>
|
||||
/// Resolves a conflict between a local document and a remote operation.
|
||||
/// </summary>
|
||||
/// <param name="local">The local document, or <see langword="null"/> if none exists.</param>
|
||||
/// <param name="remote">The remote operation to apply.</param>
|
||||
/// <returns>The conflict resolution result indicating whether and what to apply.</returns>
|
||||
public ConflictResolutionResult Resolve(Document? local, OplogEntry remote)
|
||||
{
|
||||
if (local == null)
|
||||
{
|
||||
var content = remote.Payload ?? default;
|
||||
var newDoc = new Document(remote.Collection, remote.Key, content, remote.Timestamp, remote.Operation == OperationType.Delete);
|
||||
return ConflictResolutionResult.Apply(newDoc);
|
||||
}
|
||||
|
||||
if (remote.Operation == OperationType.Delete)
|
||||
{
|
||||
if (remote.Timestamp.CompareTo(local.UpdatedAt) > 0)
|
||||
{
|
||||
var newDoc = new Document(remote.Collection, remote.Key, default, remote.Timestamp, true);
|
||||
return ConflictResolutionResult.Apply(newDoc);
|
||||
}
|
||||
return ConflictResolutionResult.Ignore();
|
||||
}
|
||||
|
||||
var localJson = local.Content;
|
||||
var remoteJson = remote.Payload ?? default;
|
||||
var localTs = local.UpdatedAt;
|
||||
var remoteTs = remote.Timestamp;
|
||||
|
||||
if (localJson.ValueKind == JsonValueKind.Undefined) return ConflictResolutionResult.Apply(new Document(remote.Collection, remote.Key, remoteJson, remoteTs, false));
|
||||
if (remoteJson.ValueKind == JsonValueKind.Undefined) return ConflictResolutionResult.Ignore();
|
||||
|
||||
// Optimization: Use ArrayBufferWriter (Net6.0) or MemoryStream (NS2.0)
|
||||
// Utf8JsonWriter works with both, but ArrayBufferWriter is more efficient for high throughput.
|
||||
|
||||
JsonElement mergedDocJson;
|
||||
|
||||
#if NET6_0_OR_GREATER
|
||||
var bufferWriter = new ArrayBufferWriter<byte>();
|
||||
using (var writer = new Utf8JsonWriter(bufferWriter))
|
||||
{
|
||||
MergeJson(writer, localJson, localTs, remoteJson, remoteTs);
|
||||
}
|
||||
mergedDocJson = JsonDocument.Parse(bufferWriter.WrittenMemory).RootElement;
|
||||
#else
|
||||
using (var ms = new MemoryStream())
|
||||
{
|
||||
using (var writer = new Utf8JsonWriter(ms))
|
||||
{
|
||||
MergeJson(writer, localJson, localTs, remoteJson, remoteTs);
|
||||
}
|
||||
// Parse expects ReadOnlyMemory or Byte array
|
||||
mergedDocJson = JsonDocument.Parse(ms.ToArray()).RootElement;
|
||||
}
|
||||
#endif
|
||||
|
||||
var maxTimestamp = remoteTs.CompareTo(localTs) > 0 ? remoteTs : localTs;
|
||||
var mergedDoc = new Document(remote.Collection, remote.Key, mergedDocJson, maxTimestamp, false);
|
||||
return ConflictResolutionResult.Apply(mergedDoc);
|
||||
}
|
||||
|
||||
private void MergeJson(Utf8JsonWriter writer, JsonElement local, HlcTimestamp localTs, JsonElement remote, HlcTimestamp remoteTs)
|
||||
{
|
||||
if (local.ValueKind != remote.ValueKind)
|
||||
{
|
||||
// Winner writes
|
||||
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
|
||||
else local.WriteTo(writer);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (local.ValueKind)
|
||||
{
|
||||
case JsonValueKind.Object:
|
||||
MergeObjects(writer, local, localTs, remote, remoteTs);
|
||||
break;
|
||||
case JsonValueKind.Array:
|
||||
MergeArrays(writer, local, localTs, remote, remoteTs);
|
||||
break;
|
||||
default:
|
||||
// Primitives
|
||||
if (local.GetRawText() == remote.GetRawText())
|
||||
{
|
||||
local.WriteTo(writer);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
|
||||
else local.WriteTo(writer);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private void MergeObjects(Utf8JsonWriter writer, JsonElement local, HlcTimestamp localTs, JsonElement remote, HlcTimestamp remoteTs)
|
||||
{
|
||||
writer.WriteStartObject();
|
||||
|
||||
// We need to iterate keys. To avoid double iteration efficiently, we can use a dictionary for the UNION of keys.
|
||||
// But populating a dictionary is effectively what we did before.
|
||||
// Can we do better?
|
||||
// Yes: Iterate Local, write merged/local. Track handled keys. Then iterate Remote, write remaining.
|
||||
|
||||
var processedKeys = new HashSet<string>();
|
||||
|
||||
foreach (var prop in local.EnumerateObject())
|
||||
{
|
||||
var key = prop.Name;
|
||||
processedKeys.Add(key); // Mark as processed
|
||||
|
||||
writer.WritePropertyName(key);
|
||||
|
||||
if (remote.TryGetProperty(key, out var remoteVal))
|
||||
{
|
||||
// Collision -> Merge
|
||||
MergeJson(writer, prop.Value, localTs, remoteVal, remoteTs);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Only local
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var prop in remote.EnumerateObject())
|
||||
{
|
||||
if (!processedKeys.Contains(prop.Name))
|
||||
{
|
||||
// New from remote
|
||||
writer.WritePropertyName(prop.Name);
|
||||
prop.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
|
||||
writer.WriteEndObject();
|
||||
}
|
||||
|
||||
private void MergeArrays(Utf8JsonWriter writer, JsonElement local, HlcTimestamp localTs, JsonElement remote, HlcTimestamp remoteTs)
|
||||
{
|
||||
// Heuristic check
|
||||
bool localIsObj = HasObjects(local);
|
||||
bool remoteIsObj = HasObjects(remote);
|
||||
|
||||
if (!localIsObj && !remoteIsObj)
|
||||
{
|
||||
// Primitive LWW
|
||||
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
|
||||
else local.WriteTo(writer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (localIsObj != remoteIsObj)
|
||||
{
|
||||
// Mixed mistmatch LWW
|
||||
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
|
||||
else local.WriteTo(writer);
|
||||
return;
|
||||
}
|
||||
|
||||
// Both Object Arrays - ID strategy
|
||||
// 1. Build map of IDs (JsonElement is struct, cheap to hold)
|
||||
var localMap = MapById(local);
|
||||
var remoteMap = MapById(remote);
|
||||
|
||||
if (localMap == null || remoteMap == null)
|
||||
{
|
||||
// Fallback LWW
|
||||
if (remoteTs.CompareTo(localTs) > 0) remote.WriteTo(writer);
|
||||
else local.WriteTo(writer);
|
||||
return;
|
||||
}
|
||||
|
||||
writer.WriteStartArray();
|
||||
|
||||
// We want to write Union of items by ID.
|
||||
// To preserve some semblance of order (or just determinism), we can iterate local IDs first, then remote new IDs.
|
||||
// Or just use the dictionary values.
|
||||
|
||||
// NOTE: We cannot simply write to writer inside the map loop if we are creating a merged map.
|
||||
// Let's iterate the union of keys similar to Objects.
|
||||
|
||||
var processedIds = new HashSet<string>();
|
||||
|
||||
// 1. Process Local Items (Merge or Write)
|
||||
foreach (var kvp in localMap)
|
||||
{
|
||||
var id = kvp.Key;
|
||||
var localItem = kvp.Value;
|
||||
processedIds.Add(id);
|
||||
|
||||
if (remoteMap.TryGetValue(id, out var remoteItem))
|
||||
{
|
||||
// Merge recursively
|
||||
MergeJson(writer, localItem, localTs, remoteItem, remoteTs);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Keep local item
|
||||
localItem.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Process New Remote Items
|
||||
foreach (var kvp in remoteMap)
|
||||
{
|
||||
if (!processedIds.Contains(kvp.Key))
|
||||
{
|
||||
kvp.Value.WriteTo(writer);
|
||||
}
|
||||
}
|
||||
|
||||
writer.WriteEndArray();
|
||||
}
|
||||
|
||||
private bool HasObjects(JsonElement array)
|
||||
{
|
||||
if (array.GetArrayLength() == 0) return false;
|
||||
// Check first item as heuristic
|
||||
return array[0].ValueKind == JsonValueKind.Object;
|
||||
}
|
||||
|
||||
private Dictionary<string, JsonElement>? MapById(JsonElement array)
|
||||
{
|
||||
var map = new Dictionary<string, JsonElement>();
|
||||
foreach (var item in array.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind != JsonValueKind.Object) return null; // Abort mixed
|
||||
|
||||
string? id = null;
|
||||
if (item.TryGetProperty("id", out var p)) id = p.ToString();
|
||||
else if (item.TryGetProperty("_id", out var p2)) id = p2.ToString();
|
||||
|
||||
if (id == null) return null; // Missing ID
|
||||
if (map.ContainsKey(id)) return null; // Duplicate ID
|
||||
|
||||
map[id] = item;
|
||||
}
|
||||
return map;
|
||||
}
|
||||
}
|
||||
196
src/ZB.MOM.WW.CBDDC.Core/VectorClock.cs
Executable file
196
src/ZB.MOM.WW.CBDDC.Core/VectorClock.cs
Executable file
@@ -0,0 +1,196 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a Vector Clock for tracking causality in a distributed system.
|
||||
/// Maps NodeId -> HlcTimestamp to track the latest known state of each node.
|
||||
/// </summary>
|
||||
public class VectorClock
|
||||
{
|
||||
private readonly Dictionary<string, HlcTimestamp> _clock;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new empty vector clock.
|
||||
/// </summary>
|
||||
public VectorClock()
|
||||
{
|
||||
_clock = new Dictionary<string, HlcTimestamp>(StringComparer.Ordinal);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new vector clock from an existing clock state.
|
||||
/// </summary>
|
||||
/// <param name="clock">The clock state to copy.</param>
|
||||
public VectorClock(Dictionary<string, HlcTimestamp> clock)
|
||||
{
|
||||
_clock = new Dictionary<string, HlcTimestamp>(clock, StringComparer.Ordinal);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all node IDs in this vector clock.
|
||||
/// </summary>
|
||||
public IEnumerable<string> NodeIds => _clock.Keys;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the timestamp for a specific node, or default if not present.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The node identifier.</param>
|
||||
public HlcTimestamp GetTimestamp(string nodeId)
|
||||
{
|
||||
return _clock.TryGetValue(nodeId, out var ts) ? ts : default;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets or updates the timestamp for a specific node.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The node identifier.</param>
|
||||
/// <param name="timestamp">The timestamp to set.</param>
|
||||
public void SetTimestamp(string nodeId, HlcTimestamp timestamp)
|
||||
{
|
||||
_clock[nodeId] = timestamp;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Merges another vector clock into this one, taking the maximum timestamp for each node.
|
||||
/// </summary>
|
||||
/// <param name="other">The vector clock to merge from.</param>
|
||||
public void Merge(VectorClock other)
|
||||
{
|
||||
foreach (var nodeId in other.NodeIds)
|
||||
{
|
||||
var otherTs = other.GetTimestamp(nodeId);
|
||||
if (!_clock.TryGetValue(nodeId, out var currentTs) || otherTs.CompareTo(currentTs) > 0)
|
||||
{
|
||||
_clock[nodeId] = otherTs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compares this vector clock with another to determine causality.
|
||||
/// Returns:
|
||||
/// - Positive: This is strictly ahead (dominates other)
|
||||
/// - Negative: Other is strictly ahead (other dominates this)
|
||||
/// - Zero: Concurrent (neither dominates)
|
||||
/// </summary>
|
||||
/// <param name="other">The vector clock to compare with.</param>
|
||||
public CausalityRelation CompareTo(VectorClock other)
|
||||
{
|
||||
bool thisAhead = false;
|
||||
bool otherAhead = false;
|
||||
|
||||
var allNodes = new HashSet<string>(_clock.Keys.Union(other._clock.Keys), StringComparer.Ordinal);
|
||||
|
||||
foreach (var nodeId in allNodes)
|
||||
{
|
||||
var thisTs = GetTimestamp(nodeId);
|
||||
var otherTs = other.GetTimestamp(nodeId);
|
||||
|
||||
int cmp = thisTs.CompareTo(otherTs);
|
||||
|
||||
if (cmp > 0)
|
||||
{
|
||||
thisAhead = true;
|
||||
}
|
||||
else if (cmp < 0)
|
||||
{
|
||||
otherAhead = true;
|
||||
}
|
||||
|
||||
// Early exit if concurrent
|
||||
if (thisAhead && otherAhead)
|
||||
{
|
||||
return CausalityRelation.Concurrent;
|
||||
}
|
||||
}
|
||||
|
||||
if (thisAhead && !otherAhead)
|
||||
return CausalityRelation.StrictlyAhead;
|
||||
if (otherAhead && !thisAhead)
|
||||
return CausalityRelation.StrictlyBehind;
|
||||
|
||||
return CausalityRelation.Equal;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines which nodes have updates that this vector clock doesn't have.
|
||||
/// Returns node IDs where the other vector clock is ahead.
|
||||
/// </summary>
|
||||
/// <param name="other">The vector clock to compare against.</param>
|
||||
public IEnumerable<string> GetNodesWithUpdates(VectorClock other)
|
||||
{
|
||||
var allNodes = new HashSet<string>(_clock.Keys, StringComparer.Ordinal);
|
||||
foreach (var nodeId in other._clock.Keys)
|
||||
{
|
||||
allNodes.Add(nodeId);
|
||||
}
|
||||
|
||||
foreach (var nodeId in allNodes)
|
||||
{
|
||||
var thisTs = GetTimestamp(nodeId);
|
||||
var otherTs = other.GetTimestamp(nodeId);
|
||||
|
||||
if (otherTs.CompareTo(thisTs) > 0)
|
||||
{
|
||||
yield return nodeId;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Determines which nodes have updates that the other vector clock doesn't have.
|
||||
/// Returns node IDs where this vector clock is ahead.
|
||||
/// </summary>
|
||||
/// <param name="other">The vector clock to compare against.</param>
|
||||
public IEnumerable<string> GetNodesToPush(VectorClock other)
|
||||
{
|
||||
var allNodes = new HashSet<string>(_clock.Keys.Union(other._clock.Keys), StringComparer.Ordinal);
|
||||
|
||||
foreach (var nodeId in allNodes)
|
||||
{
|
||||
var thisTs = GetTimestamp(nodeId);
|
||||
var otherTs = other.GetTimestamp(nodeId);
|
||||
|
||||
if (thisTs.CompareTo(otherTs) > 0)
|
||||
{
|
||||
yield return nodeId;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a copy of this vector clock.
|
||||
/// </summary>
|
||||
public VectorClock Clone()
|
||||
{
|
||||
return new VectorClock(new Dictionary<string, HlcTimestamp>(_clock, StringComparer.Ordinal));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString()
|
||||
{
|
||||
if (_clock.Count == 0)
|
||||
return "{}";
|
||||
|
||||
var entries = _clock.Select(kvp => $"{kvp.Key}:{kvp.Value}");
|
||||
return "{" + string.Join(", ", entries) + "}";
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents the causality relationship between two vector clocks.
|
||||
/// </summary>
|
||||
public enum CausalityRelation
|
||||
{
|
||||
/// <summary>Both vector clocks are equal.</summary>
|
||||
Equal,
|
||||
/// <summary>This vector clock is strictly ahead (dominates).</summary>
|
||||
StrictlyAhead,
|
||||
/// <summary>This vector clock is strictly behind (dominated).</summary>
|
||||
StrictlyBehind,
|
||||
/// <summary>Vector clocks are concurrent (neither dominates).</summary>
|
||||
Concurrent
|
||||
}
|
||||
33
src/ZB.MOM.WW.CBDDC.Core/ZB.MOM.WW.CBDDC.Core.csproj
Executable file
33
src/ZB.MOM.WW.CBDDC.Core/ZB.MOM.WW.CBDDC.Core.csproj
Executable file
@@ -0,0 +1,33 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<AssemblyName>ZB.MOM.WW.CBDDC.Core</AssemblyName>
|
||||
<RootNamespace>ZB.MOM.WW.CBDDC.Core</RootNamespace>
|
||||
<PackageId>ZB.MOM.WW.CBDDC.Core</PackageId>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<LangVersion>latest</LangVersion>
|
||||
<Nullable>enable</Nullable>
|
||||
<Version>1.0.3</Version>
|
||||
<Authors>MrDevRobot</Authors>
|
||||
<Description>Core abstractions and logic for CBDDC, a lightweight P2P mesh database.</Description>
|
||||
<PackageLicenseExpression>MIT</PackageLicenseExpression>
|
||||
<PackageTags>p2p;mesh;database;gossip;cbddc;lan;offline-first;distributed</PackageTags>
|
||||
<PackageProjectUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</RepositoryUrl>
|
||||
<RepositoryType>git</RepositoryType>
|
||||
<PackageReadmeFile>README.md</PackageReadmeFile>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="README.md" Pack="true" PackagePath="\" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Folder Include="Storage\Events\" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
92
src/ZB.MOM.WW.CBDDC.Hosting/CBDDCHostingExtensions.cs
Executable file
92
src/ZB.MOM.WW.CBDDC.Hosting/CBDDCHostingExtensions.cs
Executable file
@@ -0,0 +1,92 @@
|
||||
using ZB.MOM.WW.CBDDC.Hosting.Configuration;
|
||||
using ZB.MOM.WW.CBDDC.Hosting.HealthChecks;
|
||||
using ZB.MOM.WW.CBDDC.Hosting.HostedServices;
|
||||
using ZB.MOM.WW.CBDDC.Hosting.Services;
|
||||
using ZB.MOM.WW.CBDDC.Network;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for configuring CBDDC in ASP.NET Core applications.
|
||||
/// </summary>
|
||||
public static class CBDDCHostingExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds CBDDC ASP.NET integration with the specified configuration.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <param name="configure">Action to configure CBDDC options.</param>
|
||||
public static IServiceCollection AddCBDDCHosting(
|
||||
this IServiceCollection services,
|
||||
Action<CBDDCHostingOptions> configure)
|
||||
{
|
||||
if (services == null) throw new ArgumentNullException(nameof(services));
|
||||
if (configure == null) throw new ArgumentNullException(nameof(configure));
|
||||
|
||||
var options = new CBDDCHostingOptions();
|
||||
configure(options);
|
||||
|
||||
// Register options
|
||||
services.TryAddSingleton(options);
|
||||
|
||||
// Register services
|
||||
RegisterSingleClusterServices(services, options.Cluster);
|
||||
|
||||
// Register common services
|
||||
RegisterCommonServices(services, options);
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds CBDDC ASP.NET integration for single-cluster mode.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection.</param>
|
||||
/// <param name="configure">Action to configure single-cluster options.</param>
|
||||
public static IServiceCollection AddCBDDCHostingSingleCluster(
|
||||
this IServiceCollection services,
|
||||
Action<ClusterOptions>? configure = null)
|
||||
{
|
||||
return services.AddCBDDCHosting(options =>
|
||||
{
|
||||
configure?.Invoke(options.Cluster);
|
||||
});
|
||||
}
|
||||
|
||||
private static void RegisterSingleClusterServices(
|
||||
IServiceCollection services,
|
||||
ClusterOptions options)
|
||||
{
|
||||
// Discovery service (no-op in server mode - no UDP broadcast)
|
||||
services.TryAddSingleton<IDiscoveryService, NoOpDiscoveryService>();
|
||||
|
||||
// Sync orchestrator - use actual orchestrator to propagate changes between peers
|
||||
// Cloud nodes need to act as propagators for scenarios:
|
||||
// 1. Services connected to cloud that modify data
|
||||
// 2. Separate LAN clusters that connect through the cloud
|
||||
services.TryAddSingleton<ISyncOrchestrator, SyncOrchestrator>();
|
||||
|
||||
// Hosted services
|
||||
services.TryAddEnumerable(ServiceDescriptor.Singleton<IHostedService, TcpSyncServerHostedService>());
|
||||
services.TryAddEnumerable(ServiceDescriptor.Singleton<IHostedService, DiscoveryServiceHostedService>());
|
||||
}
|
||||
|
||||
private static void RegisterCommonServices(
|
||||
IServiceCollection services,
|
||||
CBDDCHostingOptions options)
|
||||
{
|
||||
// Health checks
|
||||
if (options.EnableHealthChecks)
|
||||
{
|
||||
services.AddHealthChecks()
|
||||
.AddCheck<CBDDCHealthCheck>(
|
||||
"cbddc",
|
||||
failureStatus: HealthStatus.Unhealthy,
|
||||
tags: new[] { "db", "ready" });
|
||||
}
|
||||
}
|
||||
}
|
||||
18
src/ZB.MOM.WW.CBDDC.Hosting/Configuration/CBDDCHostingOptions.cs
Executable file
18
src/ZB.MOM.WW.CBDDC.Hosting/Configuration/CBDDCHostingOptions.cs
Executable file
@@ -0,0 +1,18 @@
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration options for CBDDC ASP.NET integration.
|
||||
/// </summary>
|
||||
public class CBDDCHostingOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the cluster configuration.
|
||||
/// </summary>
|
||||
public ClusterOptions Cluster { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to enable health checks.
|
||||
/// Default: true
|
||||
/// </summary>
|
||||
public bool EnableHealthChecks { get; set; } = true;
|
||||
}
|
||||
40
src/ZB.MOM.WW.CBDDC.Hosting/Configuration/ClusterOptions.cs
Executable file
40
src/ZB.MOM.WW.CBDDC.Hosting/Configuration/ClusterOptions.cs
Executable file
@@ -0,0 +1,40 @@
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Configuration options for cluster mode.
|
||||
/// </summary>
|
||||
public class ClusterOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the node identifier for this instance.
|
||||
/// </summary>
|
||||
public string NodeId { get; set; } = Environment.MachineName;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the TCP port for sync operations.
|
||||
/// Default: 5001
|
||||
/// </summary>
|
||||
public int TcpPort { get; set; } = 5001;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether to enable UDP discovery.
|
||||
/// Default: false (disabled in server mode)
|
||||
/// </summary>
|
||||
public bool EnableUdpDiscovery { get; set; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the lag threshold (in milliseconds) used to determine when a tracked peer is considered lagging.
|
||||
/// Peers above this threshold degrade health status.
|
||||
/// Default: 30,000 ms.
|
||||
/// </summary>
|
||||
public long PeerConfirmationLagThresholdMs { get; set; } = 30_000;
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the critical lag threshold (in milliseconds) used to determine when a tracked peer causes unhealthy status.
|
||||
/// Peers above this threshold mark health as unhealthy.
|
||||
/// Default: 120,000 ms.
|
||||
/// </summary>
|
||||
public long PeerConfirmationCriticalLagThresholdMs { get; set; } = 120_000;
|
||||
}
|
||||
135
src/ZB.MOM.WW.CBDDC.Hosting/HealthChecks/CBDDCHealthCheck.cs
Executable file
135
src/ZB.MOM.WW.CBDDC.Hosting/HealthChecks/CBDDCHealthCheck.cs
Executable file
@@ -0,0 +1,135 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Hosting.Configuration;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting.HealthChecks;
|
||||
|
||||
/// <summary>
|
||||
/// Health check for CBDDC persistence layer.
|
||||
/// Verifies that the database connection is healthy.
|
||||
/// </summary>
|
||||
public class CBDDCHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly IOplogStore _oplogStore;
|
||||
private readonly IPeerOplogConfirmationStore _peerOplogConfirmationStore;
|
||||
private readonly CBDDCHostingOptions _options;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CBDDCHealthCheck"/> class.
|
||||
/// </summary>
|
||||
/// <param name="oplogStore">The oplog store used to verify persistence health.</param>
|
||||
/// <param name="peerOplogConfirmationStore">The peer confirmation store used for confirmation lag health checks.</param>
|
||||
/// <param name="options">Hosting options containing health lag thresholds.</param>
|
||||
public CBDDCHealthCheck(
|
||||
IOplogStore oplogStore,
|
||||
IPeerOplogConfirmationStore peerOplogConfirmationStore,
|
||||
CBDDCHostingOptions options)
|
||||
{
|
||||
_oplogStore = oplogStore ?? throw new ArgumentNullException(nameof(oplogStore));
|
||||
_peerOplogConfirmationStore = peerOplogConfirmationStore ?? throw new ArgumentNullException(nameof(peerOplogConfirmationStore));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Performs a health check against the CBDDC persistence layer.
|
||||
/// </summary>
|
||||
/// <param name="context">The health check execution context.</param>
|
||||
/// <param name="cancellationToken">A token used to cancel the health check.</param>
|
||||
/// <returns>A <see cref="HealthCheckResult"/> describing the health status.</returns>
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(
|
||||
HealthCheckContext context,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
try
|
||||
{
|
||||
var localHead = await _oplogStore.GetLatestTimestampAsync(cancellationToken);
|
||||
|
||||
var trackedPeers = (await _peerOplogConfirmationStore.GetActiveTrackedPeersAsync(cancellationToken))
|
||||
.Where(peerNodeId => !string.IsNullOrWhiteSpace(peerNodeId))
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.OrderBy(peerNodeId => peerNodeId, StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
var peersWithNoConfirmation = new List<string>();
|
||||
var laggingPeers = new List<string>();
|
||||
var criticalLaggingPeers = new List<string>();
|
||||
var lastSuccessfulConfirmationUpdateByPeer = new Dictionary<string, DateTimeOffset?>(StringComparer.Ordinal);
|
||||
var maxLagMs = 0L;
|
||||
|
||||
var lagThresholdMs = Math.Max(0, _options.Cluster.PeerConfirmationLagThresholdMs);
|
||||
var criticalLagThresholdMs = Math.Max(lagThresholdMs, _options.Cluster.PeerConfirmationCriticalLagThresholdMs);
|
||||
|
||||
foreach (var peerNodeId in trackedPeers)
|
||||
{
|
||||
var confirmations = (await _peerOplogConfirmationStore.GetConfirmationsForPeerAsync(peerNodeId, cancellationToken))
|
||||
.Where(confirmation => confirmation.IsActive)
|
||||
.ToList();
|
||||
|
||||
if (confirmations.Count == 0)
|
||||
{
|
||||
peersWithNoConfirmation.Add(peerNodeId);
|
||||
lastSuccessfulConfirmationUpdateByPeer[peerNodeId] = null;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Report worst-case peer lag across source streams.
|
||||
var oldestConfirmation = confirmations
|
||||
.OrderBy(confirmation => confirmation.ConfirmedWall)
|
||||
.ThenBy(confirmation => confirmation.ConfirmedLogic)
|
||||
.First();
|
||||
|
||||
var lagMs = Math.Max(0, localHead.PhysicalTime - oldestConfirmation.ConfirmedWall);
|
||||
maxLagMs = Math.Max(maxLagMs, lagMs);
|
||||
lastSuccessfulConfirmationUpdateByPeer[peerNodeId] = confirmations.Max(confirmation => confirmation.LastConfirmedUtc);
|
||||
|
||||
if (lagMs > lagThresholdMs)
|
||||
{
|
||||
laggingPeers.Add(peerNodeId);
|
||||
}
|
||||
|
||||
if (lagMs > criticalLagThresholdMs)
|
||||
{
|
||||
criticalLaggingPeers.Add(peerNodeId);
|
||||
}
|
||||
}
|
||||
|
||||
var payload = new Dictionary<string, object>
|
||||
{
|
||||
["trackedPeerCount"] = trackedPeers.Count,
|
||||
["peersWithNoConfirmation"] = peersWithNoConfirmation,
|
||||
["maxLagMs"] = maxLagMs,
|
||||
["laggingPeers"] = laggingPeers,
|
||||
["lastSuccessfulConfirmationUpdateByPeer"] = lastSuccessfulConfirmationUpdateByPeer
|
||||
};
|
||||
|
||||
if (criticalLaggingPeers.Count > 0)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy(
|
||||
$"CBDDC is unhealthy. Critical lag detected for {criticalLaggingPeers.Count} tracked peer(s).",
|
||||
data: payload);
|
||||
}
|
||||
|
||||
if (peersWithNoConfirmation.Count > 0 || laggingPeers.Count > 0)
|
||||
{
|
||||
return HealthCheckResult.Degraded(
|
||||
$"CBDDC is degraded. Lagging peers: {laggingPeers.Count}, unconfirmed peers: {peersWithNoConfirmation.Count}.",
|
||||
data: payload);
|
||||
}
|
||||
|
||||
return HealthCheckResult.Healthy(
|
||||
$"CBDDC is healthy. Latest timestamp: {localHead.PhysicalTime}.",
|
||||
payload);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy(
|
||||
"CBDDC persistence layer is unavailable",
|
||||
exception: ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
62
src/ZB.MOM.WW.CBDDC.Hosting/HostedServices/DiscoveryServiceHostedService.cs
Executable file
62
src/ZB.MOM.WW.CBDDC.Hosting/HostedServices/DiscoveryServiceHostedService.cs
Executable file
@@ -0,0 +1,62 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Serilog.Context;
|
||||
using ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting.HostedServices;
|
||||
|
||||
/// <summary>
|
||||
/// Hosted service that manages the lifecycle of the discovery service.
|
||||
/// </summary>
|
||||
public class DiscoveryServiceHostedService : IHostedService
|
||||
{
|
||||
private readonly IDiscoveryService _discoveryService;
|
||||
private readonly ILogger<DiscoveryServiceHostedService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="DiscoveryServiceHostedService"/> class.
|
||||
/// </summary>
|
||||
/// <param name="discoveryService">The discovery service to manage.</param>
|
||||
/// <param name="logger">The logger used for service lifecycle events.</param>
|
||||
public DiscoveryServiceHostedService(
|
||||
IDiscoveryService discoveryService,
|
||||
ILogger<DiscoveryServiceHostedService> logger)
|
||||
{
|
||||
_discoveryService = discoveryService;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the discovery service.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel the startup operation.</param>
|
||||
/// <returns>A task that represents the asynchronous start operation.</returns>
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(DiscoveryServiceHostedService));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Start");
|
||||
|
||||
_logger.LogInformation("Starting Discovery Service...");
|
||||
await _discoveryService.Start();
|
||||
_logger.LogInformation("Discovery Service started");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the discovery service.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel the shutdown operation.</param>
|
||||
/// <returns>A task that represents the asynchronous stop operation.</returns>
|
||||
public async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(DiscoveryServiceHostedService));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Stop");
|
||||
|
||||
_logger.LogInformation("Stopping Discovery Service...");
|
||||
await _discoveryService.Stop();
|
||||
_logger.LogInformation("Discovery Service stopped");
|
||||
}
|
||||
}
|
||||
60
src/ZB.MOM.WW.CBDDC.Hosting/HostedServices/TcpSyncServerHostedService.cs
Executable file
60
src/ZB.MOM.WW.CBDDC.Hosting/HostedServices/TcpSyncServerHostedService.cs
Executable file
@@ -0,0 +1,60 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Serilog.Context;
|
||||
using ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting.HostedServices;
|
||||
|
||||
/// <summary>
|
||||
/// Hosted service that manages the lifecycle of the TCP sync server.
|
||||
/// </summary>
|
||||
public class TcpSyncServerHostedService : IHostedService
|
||||
{
|
||||
private readonly ISyncServer _syncServer;
|
||||
private readonly ILogger<TcpSyncServerHostedService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TcpSyncServerHostedService"/> class.
|
||||
/// </summary>
|
||||
/// <param name="syncServer">The sync server to start and stop.</param>
|
||||
/// <param name="logger">The logger instance.</param>
|
||||
public TcpSyncServerHostedService(
|
||||
ISyncServer syncServer,
|
||||
ILogger<TcpSyncServerHostedService> logger)
|
||||
{
|
||||
_syncServer = syncServer;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the TCP sync server.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel startup.</param>
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(TcpSyncServerHostedService));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Start");
|
||||
|
||||
_logger.LogInformation("Starting TCP Sync Server...");
|
||||
await _syncServer.Start();
|
||||
_logger.LogInformation("TCP Sync Server started successfully");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the TCP sync server.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel shutdown.</param>
|
||||
public async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(TcpSyncServerHostedService));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Stop");
|
||||
|
||||
_logger.LogInformation("Stopping TCP Sync Server...");
|
||||
await _syncServer.Stop();
|
||||
_logger.LogInformation("TCP Sync Server stopped");
|
||||
}
|
||||
}
|
||||
90
src/ZB.MOM.WW.CBDDC.Hosting/README.md
Executable file
90
src/ZB.MOM.WW.CBDDC.Hosting/README.md
Executable file
@@ -0,0 +1,90 @@
|
||||
# ZB.MOM.WW.CBDDC.Hosting
|
||||
|
||||
ASP.NET Core integration for CBDDC with health checks and hosted services.
|
||||
|
||||
## Features
|
||||
|
||||
- Cluster mode
|
||||
- Built-in health endpoint integration
|
||||
- Hosted services for sync server lifecycle
|
||||
- Respond-only server operation
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
dotnet add package ZB.MOM.WW.CBDDC.Hosting
|
||||
```
|
||||
|
||||
## Quick Start - Cluster
|
||||
|
||||
```csharp
|
||||
var builder = WebApplication.CreateBuilder(args);
|
||||
|
||||
// Add CBDDC core + BLite persistence (custom DbContext + DocumentStore required)
|
||||
builder.Services.AddCBDDCCore()
|
||||
.AddCBDDCBLite<MyDbContext, MyDocumentStore>(
|
||||
sp => new MyDbContext("/var/lib/cbddc/data.blite"));
|
||||
|
||||
// Add ASP.NET integration (cluster mode)
|
||||
builder.Services.AddCBDDCHosting(options =>
|
||||
{
|
||||
options.Cluster.NodeId = "server-01";
|
||||
options.Cluster.TcpPort = 5001;
|
||||
});
|
||||
|
||||
var app = builder.Build();
|
||||
|
||||
app.MapHealthChecks("/health");
|
||||
app.Run();
|
||||
```
|
||||
|
||||
## Health Checks
|
||||
|
||||
CBDDC registers health checks that verify:
|
||||
- Database connectivity
|
||||
- Latest timestamp retrieval
|
||||
|
||||
```bash
|
||||
curl http://localhost:5000/health
|
||||
```
|
||||
|
||||
## Deployment Mode
|
||||
|
||||
### Cluster
|
||||
|
||||
Best for:
|
||||
- Dedicated database servers
|
||||
- Simple deployments
|
||||
- Development/testing environments
|
||||
|
||||
## Server Behavior
|
||||
|
||||
CBDDC servers operate in respond-only mode:
|
||||
- Accept incoming sync connections
|
||||
- Respond to sync requests
|
||||
- Do not initiate outbound sync
|
||||
- Do not perform UDP discovery
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### ClusterOptions
|
||||
|
||||
| Property | Type | Default | Description |
|
||||
|----------|------|---------|-------------|
|
||||
| NodeId | string | MachineName | Unique node identifier |
|
||||
| TcpPort | int | 5001 | TCP port for sync |
|
||||
| EnableUdpDiscovery | bool | false | Enable UDP discovery |
|
||||
|
||||
## Production Checklist
|
||||
|
||||
- Store BLite database files on durable storage in production
|
||||
- Configure health checks for load balancer
|
||||
- Set up proper logging and monitoring
|
||||
- Configure backup/restore for BLite database files
|
||||
- Configure proper firewall rules for TCP port
|
||||
- Set unique NodeId per instance
|
||||
- Test failover scenarios
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
72
src/ZB.MOM.WW.CBDDC.Hosting/Services/NoOpDiscoveryService.cs
Executable file
72
src/ZB.MOM.WW.CBDDC.Hosting/Services/NoOpDiscoveryService.cs
Executable file
@@ -0,0 +1,72 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Serilog.Context;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting.Services;
|
||||
|
||||
/// <summary>
|
||||
/// No-op implementation of IDiscoveryService for server scenarios.
|
||||
/// Does not perform UDP broadcast discovery - relies on explicit peer configuration.
|
||||
/// </summary>
|
||||
public class NoOpDiscoveryService : IDiscoveryService
|
||||
{
|
||||
private readonly ILogger<NoOpDiscoveryService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="NoOpDiscoveryService"/> class.
|
||||
/// </summary>
|
||||
/// <param name="logger">The logger instance to use, or <see langword="null"/> to use a no-op logger.</param>
|
||||
public NoOpDiscoveryService(ILogger<NoOpDiscoveryService>? logger = null)
|
||||
{
|
||||
_logger = logger ?? NullLogger<NoOpDiscoveryService>.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the currently active peers.
|
||||
/// </summary>
|
||||
/// <returns>An empty sequence because discovery is disabled.</returns>
|
||||
public IEnumerable<PeerNode> GetActivePeers()
|
||||
{
|
||||
return Array.Empty<PeerNode>();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the discovery service.
|
||||
/// </summary>
|
||||
/// <returns>A completed task.</returns>
|
||||
public Task Start()
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(NoOpDiscoveryService));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Start");
|
||||
|
||||
_logger.LogInformation("NoOpDiscoveryService started (passive mode - no UDP discovery)");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the discovery service.
|
||||
/// </summary>
|
||||
/// <returns>A completed task.</returns>
|
||||
public Task Stop()
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(NoOpDiscoveryService));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Stop");
|
||||
|
||||
_logger.LogInformation("NoOpDiscoveryService stopped");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases resources used by this instance.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
_logger.LogDebug("NoOpDiscoveryService disposed");
|
||||
}
|
||||
}
|
||||
61
src/ZB.MOM.WW.CBDDC.Hosting/Services/NoOpSyncOrchestrator.cs
Executable file
61
src/ZB.MOM.WW.CBDDC.Hosting/Services/NoOpSyncOrchestrator.cs
Executable file
@@ -0,0 +1,61 @@
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Serilog.Context;
|
||||
using ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Hosting.Services;
|
||||
|
||||
/// <summary>
|
||||
/// No-op implementation of ISyncOrchestrator for server scenarios.
|
||||
/// Does not initiate outbound sync - only responds to incoming sync requests.
|
||||
/// </summary>
|
||||
public class NoOpSyncOrchestrator : ISyncOrchestrator
|
||||
{
|
||||
private readonly ILogger<NoOpSyncOrchestrator> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="NoOpSyncOrchestrator"/> class.
|
||||
/// </summary>
|
||||
/// <param name="logger">The logger instance to use, or <see langword="null"/> for a no-op logger.</param>
|
||||
public NoOpSyncOrchestrator(ILogger<NoOpSyncOrchestrator>? logger = null)
|
||||
{
|
||||
_logger = logger ?? NullLogger<NoOpSyncOrchestrator>.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the orchestrator lifecycle.
|
||||
/// </summary>
|
||||
/// <returns>A completed task.</returns>
|
||||
public Task Start()
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(NoOpSyncOrchestrator));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Start");
|
||||
|
||||
_logger.LogInformation("NoOpSyncOrchestrator started (respond-only mode - no outbound sync)");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the orchestrator lifecycle.
|
||||
/// </summary>
|
||||
/// <returns>A completed task.</returns>
|
||||
public Task Stop()
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(NoOpSyncOrchestrator));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Stop");
|
||||
|
||||
_logger.LogInformation("NoOpSyncOrchestrator stopped");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases resources used by the orchestrator.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
_logger.LogDebug("NoOpSyncOrchestrator disposed");
|
||||
}
|
||||
}
|
||||
37
src/ZB.MOM.WW.CBDDC.Hosting/ZB.MOM.WW.CBDDC.Hosting.csproj
Executable file
37
src/ZB.MOM.WW.CBDDC.Hosting/ZB.MOM.WW.CBDDC.Hosting.csproj
Executable file
@@ -0,0 +1,37 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.CBDDC.Network\ZB.MOM.WW.CBDDC.Network.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.Extensions.Diagnostics.HealthChecks" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
|
||||
<PackageReference Include="Serilog" Version="4.2.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
<AssemblyName>ZB.MOM.WW.CBDDC.Hosting</AssemblyName>
|
||||
<RootNamespace>ZB.MOM.WW.CBDDC.Hosting</RootNamespace>
|
||||
<PackageId>ZB.MOM.WW.CBDDC.Hosting</PackageId>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<LangVersion>latest</LangVersion>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<Version>1.0.3</Version>
|
||||
<Authors>MrDevRobot</Authors>
|
||||
<Description>ASP.NET Core integration for CBDDC with health checks and hosted services.</Description>
|
||||
<PackageLicenseExpression>MIT</PackageLicenseExpression>
|
||||
<PackageTags>p2p;database;aspnetcore;healthcheck;hosting;cluster</PackageTags>
|
||||
<PackageProjectUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</RepositoryUrl>
|
||||
<RepositoryType>git</RepositoryType>
|
||||
<PackageReadmeFile>README.md</PackageReadmeFile>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="README.md" Pack="true" PackagePath="\" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
160
src/ZB.MOM.WW.CBDDC.Network/CBDDCNode.cs
Executable file
160
src/ZB.MOM.WW.CBDDC.Network/CBDDCNode.cs
Executable file
@@ -0,0 +1,160 @@
|
||||
using System;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a single CBDDC Peer Node.
|
||||
/// Acts as a facade to orchestrate the lifecycle of Networking, Discovery, and Synchronization components.
|
||||
/// </summary>
|
||||
public class CBDDCNode : ICBDDCNode
|
||||
{
|
||||
private readonly ILogger<CBDDCNode> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the Sync Server instance.
|
||||
/// </summary>
|
||||
public ISyncServer Server { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the Discovery Service instance.
|
||||
/// </summary>
|
||||
public IDiscoveryService Discovery { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the Synchronization Orchestrator instance.
|
||||
/// </summary>
|
||||
public ISyncOrchestrator Orchestrator { get; }
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CBDDCNode"/> class.
|
||||
/// </summary>
|
||||
/// <param name="server">The TCP server for handling incoming sync requests.</param>
|
||||
/// <param name="discovery">The UDP service for peer discovery.</param>
|
||||
/// <param name="orchestrator">The orchestrator for managing outgoing sync operations.</param>
|
||||
/// <param name="logger">The logger instance.</param>
|
||||
public CBDDCNode(
|
||||
ISyncServer server,
|
||||
IDiscoveryService discovery,
|
||||
ISyncOrchestrator orchestrator,
|
||||
ILogger<CBDDCNode> logger)
|
||||
{
|
||||
Server = server;
|
||||
Discovery = discovery;
|
||||
Orchestrator = orchestrator;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts all node components (Server, Discovery, Orchestrator).
|
||||
/// </summary>
|
||||
public async Task Start()
|
||||
{
|
||||
_logger.LogInformation("Starting CBDDC Node...");
|
||||
|
||||
await Task.WhenAll(
|
||||
Server.Start(),
|
||||
Discovery.Start(),
|
||||
Orchestrator.Start()
|
||||
);
|
||||
|
||||
_logger.LogInformation("CBDDC Node Started on {Address}", Address);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops all node components.
|
||||
/// </summary>
|
||||
public async Task Stop()
|
||||
{
|
||||
_logger.LogInformation("Stopping CBDDC Node...");
|
||||
|
||||
await Task.WhenAll(
|
||||
Orchestrator.Stop(),
|
||||
Discovery.Stop(),
|
||||
Server.Stop()
|
||||
);
|
||||
|
||||
_logger.LogInformation("CBDDC Node Stopped.");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the address information of this node.
|
||||
/// </summary>
|
||||
public NodeAddress Address
|
||||
{
|
||||
get
|
||||
{
|
||||
var ep = Server.ListeningEndpoint;
|
||||
if (ep != null)
|
||||
{
|
||||
// If the server is listening on "Any" (0.0.0.0), we cannot advertise that as a connectable address.
|
||||
// We must resolve the actual machine IP address that peers can reach.
|
||||
if (Equals(ep.Address, System.Net.IPAddress.Any) || Equals(ep.Address, System.Net.IPAddress.IPv6Any))
|
||||
{
|
||||
return new NodeAddress(GetLocalIpAddress(), ep.Port);
|
||||
}
|
||||
return new NodeAddress(ep.Address.ToString(), ep.Port);
|
||||
}
|
||||
return new NodeAddress("Unknown", 0);
|
||||
}
|
||||
}
|
||||
|
||||
private string GetLocalIpAddress()
|
||||
{
|
||||
try
|
||||
{
|
||||
var interfaces = System.Net.NetworkInformation.NetworkInterface.GetAllNetworkInterfaces()
|
||||
.Where(i => i.OperationalStatus == System.Net.NetworkInformation.OperationalStatus.Up
|
||||
&& i.NetworkInterfaceType != System.Net.NetworkInformation.NetworkInterfaceType.Loopback);
|
||||
|
||||
foreach (var i in interfaces)
|
||||
{
|
||||
var props = i.GetIPProperties();
|
||||
var ipInfo = props.UnicastAddresses
|
||||
.FirstOrDefault(u => u.Address.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork); // Prefer IPv4
|
||||
|
||||
if (ipInfo != null)
|
||||
{
|
||||
return ipInfo.Address.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
return "127.0.0.1";
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning("Failed to resolve local IP: {Message}. Fallback to localhost.", ex.Message);
|
||||
return "127.0.0.1";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class NodeAddress
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the host portion of the node address.
|
||||
/// </summary>
|
||||
public string Host { get; }
|
||||
/// <summary>
|
||||
/// Gets the port portion of the node address.
|
||||
/// </summary>
|
||||
public int Port { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="NodeAddress"/> class.
|
||||
/// </summary>
|
||||
/// <param name="host">The host name or IP address.</param>
|
||||
/// <param name="port">The port number.</param>
|
||||
public NodeAddress(string host, int port)
|
||||
{
|
||||
Host = host;
|
||||
Port = port;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => $"{Host}:{Port}";
|
||||
}
|
||||
85
src/ZB.MOM.WW.CBDDC.Network/CBDDCNodeService.cs
Executable file
85
src/ZB.MOM.WW.CBDDC.Network/CBDDCNodeService.cs
Executable file
@@ -0,0 +1,85 @@
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Serilog.Context;
|
||||
using System;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Hosted service that automatically starts and stops the CBDDC node.
|
||||
/// </summary>
|
||||
public class CBDDCNodeService : IHostedService
|
||||
{
|
||||
private readonly ICBDDCNode _node;
|
||||
private readonly ILogger<CBDDCNodeService> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CBDDCNodeService"/> class.
|
||||
/// </summary>
|
||||
/// <param name="node">The CBDDC node to manage.</param>
|
||||
/// <param name="logger">The logger instance.</param>
|
||||
public CBDDCNodeService(ICBDDCNode node, ILogger<CBDDCNodeService> logger)
|
||||
{
|
||||
_node = node;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the managed CBDDC node.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel startup.</param>
|
||||
/// <returns>A task that represents the asynchronous start operation.</returns>
|
||||
public async Task StartAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(CBDDCNodeService));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Start");
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation("Starting CBDDC Node Service...");
|
||||
|
||||
// Check for cancellation before starting
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
await _node.Start();
|
||||
_logger.LogInformation("CBDDC Node Service started successfully");
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning("CBDDC Node Service start was cancelled");
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to start CBDDC Node Service");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the managed CBDDC node.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">A token used to cancel shutdown.</param>
|
||||
/// <returns>A task that represents the asynchronous stop operation.</returns>
|
||||
public async Task StopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
using var serviceContext = LogContext.PushProperty("Service", nameof(CBDDCNodeService));
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var actionContext = LogContext.PushProperty("Action", "Stop");
|
||||
|
||||
try
|
||||
{
|
||||
_logger.LogInformation("Stopping CBDDC Node Service...");
|
||||
await _node.Stop();
|
||||
_logger.LogInformation("CBDDC Node Service stopped successfully");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error occurred while stopping CBDDC Node Service");
|
||||
// Don't rethrow during shutdown to avoid breaking the shutdown process
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using ZB.MOM.WW.CBDDC.Core.Cache;
|
||||
using ZB.MOM.WW.CBDDC.Core.Diagnostics;
|
||||
using ZB.MOM.WW.CBDDC.Core.Resilience;
|
||||
using ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Provides extension methods for registering core CBDDC services.
|
||||
/// </summary>
|
||||
public static class CBDDCServiceCollectionExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Registers core CBDDC service dependencies.
|
||||
/// </summary>
|
||||
/// <param name="services">The service collection to update.</param>
|
||||
/// <returns>The same <see cref="IServiceCollection"/> instance for chaining.</returns>
|
||||
public static IServiceCollection AddCBDDCCore(this IServiceCollection services)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
|
||||
services.TryAddSingleton<IDocumentCache, DocumentCache>();
|
||||
services.TryAddSingleton<IOfflineQueue, OfflineQueue>();
|
||||
services.TryAddSingleton<ISyncStatusTracker, SyncStatusTracker>();
|
||||
services.TryAddSingleton<IRetryPolicy, RetryPolicy>();
|
||||
services.TryAddSingleton<ICBDDCHealthCheck, CBDDCHealthCheck>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
195
src/ZB.MOM.WW.CBDDC.Network/CompositeDiscoveryService.cs
Executable file
195
src/ZB.MOM.WW.CBDDC.Network/CompositeDiscoveryService.cs
Executable file
@@ -0,0 +1,195 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Composite discovery service that combines UDP LAN discovery with persistent remote peers from the peerConfigurationStore.
|
||||
/// Periodically refreshes the remote peer list and merges with actively discovered LAN peers.
|
||||
///
|
||||
/// Remote peer configurations are stored in a synchronized collection that is automatically
|
||||
/// replicated across all nodes in the cluster. Any node that adds a remote peer will have
|
||||
/// it synchronized to all other nodes automatically.
|
||||
/// </summary>
|
||||
public class CompositeDiscoveryService : IDiscoveryService
|
||||
{
|
||||
private readonly IDiscoveryService _udpDiscovery;
|
||||
private readonly IPeerConfigurationStore _peerConfigurationStore;
|
||||
private readonly ILogger<CompositeDiscoveryService> _logger;
|
||||
private readonly TimeSpan _refreshInterval;
|
||||
private const string RemotePeersCollectionName = "_system_remote_peers";
|
||||
|
||||
private CancellationTokenSource? _cts;
|
||||
private readonly ConcurrentDictionary<string, PeerNode> _remotePeers = new();
|
||||
private readonly object _startStopLock = new object();
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the CompositeDiscoveryService class.
|
||||
/// </summary>
|
||||
/// <param name="udpDiscovery">UDP-based LAN discovery service.</param>
|
||||
/// <param name="peerConfigurationStore">Database instance for accessing the synchronized remote peers collection.</param>
|
||||
/// <param name="logger">Logger instance.</param>
|
||||
/// <param name="refreshInterval">Interval for refreshing remote peers from peerConfigurationStore. Defaults to 5 minutes.</param>
|
||||
public CompositeDiscoveryService(
|
||||
IDiscoveryService udpDiscovery,
|
||||
IPeerConfigurationStore peerConfigurationStore,
|
||||
ILogger<CompositeDiscoveryService>? logger = null,
|
||||
TimeSpan? refreshInterval = null)
|
||||
{
|
||||
_udpDiscovery = udpDiscovery ?? throw new ArgumentNullException(nameof(udpDiscovery));
|
||||
_peerConfigurationStore = peerConfigurationStore ?? throw new ArgumentNullException(nameof(peerConfigurationStore));
|
||||
_logger = logger ?? NullLogger<CompositeDiscoveryService>.Instance;
|
||||
_refreshInterval = refreshInterval ?? TimeSpan.FromMinutes(5);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the currently active peers from LAN discovery and configured remote peers.
|
||||
/// </summary>
|
||||
/// <returns>A sequence of active peer nodes.</returns>
|
||||
public IEnumerable<PeerNode> GetActivePeers()
|
||||
{
|
||||
// Merge LAN peers from UDP discovery with remote peers from peerConfigurationStore
|
||||
var lanPeers = _udpDiscovery.GetActivePeers();
|
||||
var remotePeers = _remotePeers.Values;
|
||||
|
||||
return lanPeers.Concat(remotePeers);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts peer discovery and the remote peer refresh loop.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous start operation.</returns>
|
||||
public async Task Start()
|
||||
{
|
||||
lock (_startStopLock)
|
||||
{
|
||||
if (_cts != null)
|
||||
{
|
||||
_logger.LogWarning("Composite discovery service already started");
|
||||
return;
|
||||
}
|
||||
_cts = new CancellationTokenSource();
|
||||
}
|
||||
|
||||
// Start UDP discovery
|
||||
await _udpDiscovery.Start();
|
||||
|
||||
// Start remote peer refresh loop
|
||||
var token = _cts.Token;
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await RefreshLoopAsync(token);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Remote peer refresh loop failed");
|
||||
}
|
||||
}, token);
|
||||
|
||||
// Initial load of remote peers
|
||||
await RefreshRemotePeersAsync();
|
||||
|
||||
_logger.LogInformation("Composite discovery service started (UDP + Synchronized Remote Peers)");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops peer discovery and the remote peer refresh loop.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous stop operation.</returns>
|
||||
public async Task Stop()
|
||||
{
|
||||
CancellationTokenSource? ctsToDispose = null;
|
||||
|
||||
lock (_startStopLock)
|
||||
{
|
||||
if (_cts == null)
|
||||
{
|
||||
_logger.LogWarning("Composite discovery service already stopped or never started");
|
||||
return;
|
||||
}
|
||||
|
||||
ctsToDispose = _cts;
|
||||
_cts = null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
ctsToDispose.Cancel();
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
// Already disposed, ignore
|
||||
}
|
||||
finally
|
||||
{
|
||||
ctsToDispose.Dispose();
|
||||
}
|
||||
|
||||
await _udpDiscovery.Stop();
|
||||
|
||||
_logger.LogInformation("Composite discovery service stopped");
|
||||
}
|
||||
|
||||
private async Task RefreshLoopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
while (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(_refreshInterval, cancellationToken);
|
||||
await RefreshRemotePeersAsync();
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal shutdown
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error during remote peer refresh");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RefreshRemotePeersAsync()
|
||||
{
|
||||
try
|
||||
{
|
||||
var peers = await _peerConfigurationStore.GetRemotePeersAsync();
|
||||
var remoteConfigs = peers.Where(p => p.IsEnabled);
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
|
||||
// Update remote peers dictionary
|
||||
_remotePeers.Clear();
|
||||
|
||||
foreach (var config in remoteConfigs)
|
||||
{
|
||||
var peerNode = new PeerNode(
|
||||
config.NodeId,
|
||||
config.Address,
|
||||
now, // LastSeen is now for persistent peers (always considered active)
|
||||
config.Type,
|
||||
NodeRole.Member // Remote peers are always members, never gateways
|
||||
);
|
||||
|
||||
_remotePeers[config.NodeId] = peerNode;
|
||||
}
|
||||
|
||||
_logger.LogInformation("Refreshed remote peers: {Count} enabled peers loaded from synchronized collection", _remotePeers.Count);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to refresh remote peers from database");
|
||||
}
|
||||
}
|
||||
}
|
||||
66
src/ZB.MOM.WW.CBDDC.Network/CompressionHelper.cs
Executable file
66
src/ZB.MOM.WW.CBDDC.Network/CompressionHelper.cs
Executable file
@@ -0,0 +1,66 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
public static class CompressionHelper
|
||||
{
|
||||
public const int THRESHOLD = 1024; // 1KB
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether Brotli compression is supported on the current target framework.
|
||||
/// </summary>
|
||||
public static bool IsBrotliSupported
|
||||
{
|
||||
get
|
||||
{
|
||||
#if NET6_0_OR_GREATER
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compresses the specified data when Brotli is supported and the payload exceeds the threshold.
|
||||
/// </summary>
|
||||
/// <param name="data">The input data to compress.</param>
|
||||
/// <returns>The compressed payload, or the original payload if compression is skipped.</returns>
|
||||
public static byte[] Compress(byte[] data)
|
||||
{
|
||||
if (data.Length < THRESHOLD || !IsBrotliSupported) return data;
|
||||
|
||||
#if NET6_0_OR_GREATER
|
||||
using var output = new MemoryStream();
|
||||
using (var brotli = new BrotliStream(output, CompressionLevel.Fastest))
|
||||
{
|
||||
brotli.Write(data, 0, data.Length);
|
||||
}
|
||||
return output.ToArray();
|
||||
#else
|
||||
return data;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decompresses Brotli-compressed data.
|
||||
/// </summary>
|
||||
/// <param name="compressedData">The compressed payload.</param>
|
||||
/// <returns>The decompressed payload.</returns>
|
||||
public static byte[] Decompress(byte[] compressedData)
|
||||
{
|
||||
#if NET6_0_OR_GREATER
|
||||
using var input = new MemoryStream(compressedData);
|
||||
using var output = new MemoryStream();
|
||||
using (var brotli = new BrotliStream(input, CompressionMode.Decompress))
|
||||
{
|
||||
brotli.CopyTo(output);
|
||||
}
|
||||
return output.ToArray();
|
||||
#else
|
||||
throw new NotSupportedException("Brotli decompression not supported on this platform.");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
35
src/ZB.MOM.WW.CBDDC.Network/ICBDDCNode.cs
Executable file
35
src/ZB.MOM.WW.CBDDC.Network/ICBDDCNode.cs
Executable file
@@ -0,0 +1,35 @@
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network
|
||||
{
|
||||
public interface ICBDDCNode
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the node address.
|
||||
/// </summary>
|
||||
NodeAddress Address { get; }
|
||||
/// <summary>
|
||||
/// Gets the discovery service.
|
||||
/// </summary>
|
||||
IDiscoveryService Discovery { get; }
|
||||
/// <summary>
|
||||
/// Gets the synchronization orchestrator.
|
||||
/// </summary>
|
||||
ISyncOrchestrator Orchestrator { get; }
|
||||
/// <summary>
|
||||
/// Gets the synchronization server.
|
||||
/// </summary>
|
||||
ISyncServer Server { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Starts the node services.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous start operation.</returns>
|
||||
Task Start();
|
||||
/// <summary>
|
||||
/// Stops the node services.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous stop operation.</returns>
|
||||
Task Stop();
|
||||
}
|
||||
}
|
||||
30
src/ZB.MOM.WW.CBDDC.Network/IDiscoveryService.cs
Executable file
30
src/ZB.MOM.WW.CBDDC.Network/IDiscoveryService.cs
Executable file
@@ -0,0 +1,30 @@
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network
|
||||
{
|
||||
/// <summary>
|
||||
/// Defines peer discovery operations.
|
||||
/// </summary>
|
||||
public interface IDiscoveryService
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the currently active peers.
|
||||
/// </summary>
|
||||
/// <returns>The active peer nodes.</returns>
|
||||
IEnumerable<PeerNode> GetActivePeers();
|
||||
|
||||
/// <summary>
|
||||
/// Starts the discovery service.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
Task Start();
|
||||
|
||||
/// <summary>
|
||||
/// Stops the discovery service.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
Task Stop();
|
||||
}
|
||||
}
|
||||
21
src/ZB.MOM.WW.CBDDC.Network/IOplogPruneCutoffCalculator.cs
Normal file
21
src/ZB.MOM.WW.CBDDC.Network/IOplogPruneCutoffCalculator.cs
Normal file
@@ -0,0 +1,21 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Calculates the effective oplog prune cutoff for maintenance.
|
||||
/// </summary>
|
||||
public interface IOplogPruneCutoffCalculator
|
||||
{
|
||||
/// <summary>
|
||||
/// Calculates the effective prune cutoff for the provided node configuration.
|
||||
/// </summary>
|
||||
/// <param name="configuration">The local node configuration.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>A prune cutoff decision describing whether prune is allowed and at which cutoff.</returns>
|
||||
Task<OplogPruneCutoffDecision> CalculateEffectiveCutoffAsync(
|
||||
PeerNodeConfiguration configuration,
|
||||
CancellationToken cancellationToken = default);
|
||||
}
|
||||
22
src/ZB.MOM.WW.CBDDC.Network/ISyncOrchestrator.cs
Executable file
22
src/ZB.MOM.WW.CBDDC.Network/ISyncOrchestrator.cs
Executable file
@@ -0,0 +1,22 @@
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network
|
||||
{
|
||||
/// <summary>
|
||||
/// Defines lifecycle operations for synchronization orchestration.
|
||||
/// </summary>
|
||||
public interface ISyncOrchestrator
|
||||
{
|
||||
/// <summary>
|
||||
/// Starts synchronization orchestration.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous start operation.</returns>
|
||||
Task Start();
|
||||
|
||||
/// <summary>
|
||||
/// Stops synchronization orchestration.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous stop operation.</returns>
|
||||
Task Stop();
|
||||
}
|
||||
}
|
||||
31
src/ZB.MOM.WW.CBDDC.Network/ISyncServer.cs
Executable file
31
src/ZB.MOM.WW.CBDDC.Network/ISyncServer.cs
Executable file
@@ -0,0 +1,31 @@
|
||||
using System.Net;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Defines the contract for a server that supports starting, stopping, and reporting its listening network endpoint for
|
||||
/// synchronization operations.
|
||||
/// </summary>
|
||||
/// <remarks>Implementations of this interface are expected to provide asynchronous methods for starting and
|
||||
/// stopping the server. The listening endpoint may be null if the server is not currently active or has not been
|
||||
/// started.</remarks>
|
||||
public interface ISyncServer
|
||||
{
|
||||
/// <summary>
|
||||
/// Starts the synchronization server.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
Task Start();
|
||||
|
||||
/// <summary>
|
||||
/// Stops the synchronization server.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous operation.</returns>
|
||||
Task Stop();
|
||||
|
||||
/// <summary>
|
||||
/// Gets the network endpoint currently used by the server for listening.
|
||||
/// </summary>
|
||||
IPEndPoint? ListeningEndpoint { get; }
|
||||
}
|
||||
166
src/ZB.MOM.WW.CBDDC.Network/Leadership/BullyLeaderElectionService.cs
Executable file
166
src/ZB.MOM.WW.CBDDC.Network/Leadership/BullyLeaderElectionService.cs
Executable file
@@ -0,0 +1,166 @@
|
||||
using System;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Leadership;
|
||||
|
||||
/// <summary>
|
||||
/// Implements the Bully algorithm for leader election.
|
||||
/// The node with the lexicographically smallest NodeId becomes the cloud gateway (leader).
|
||||
/// Elections run periodically (every 5 seconds) to adapt to cluster changes.
|
||||
/// </summary>
|
||||
public class BullyLeaderElectionService : ILeaderElectionService
|
||||
{
|
||||
private readonly IDiscoveryService _discoveryService;
|
||||
private readonly IPeerNodeConfigurationProvider _configProvider;
|
||||
private readonly ILogger<BullyLeaderElectionService> _logger;
|
||||
private readonly TimeSpan _electionInterval;
|
||||
|
||||
private CancellationTokenSource? _cts;
|
||||
private string? _localNodeId;
|
||||
private string? _currentGatewayNodeId;
|
||||
private bool _isCloudGateway;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether this node is currently the cloud gateway leader.
|
||||
/// </summary>
|
||||
public bool IsCloudGateway => _isCloudGateway;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the current gateway node identifier.
|
||||
/// </summary>
|
||||
public string? CurrentGatewayNodeId => _currentGatewayNodeId;
|
||||
|
||||
/// <summary>
|
||||
/// Occurs when leadership changes.
|
||||
/// </summary>
|
||||
public event EventHandler<LeadershipChangedEventArgs>? LeadershipChanged;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the BullyLeaderElectionService class.
|
||||
/// </summary>
|
||||
/// <param name="discoveryService">Service providing active peer information.</param>
|
||||
/// <param name="configProvider">Provider for local node configuration.</param>
|
||||
/// <param name="logger">Logger instance.</param>
|
||||
/// <param name="electionInterval">Interval between elections. Defaults to 5 seconds.</param>
|
||||
public BullyLeaderElectionService(
|
||||
IDiscoveryService discoveryService,
|
||||
IPeerNodeConfigurationProvider configProvider,
|
||||
ILogger<BullyLeaderElectionService>? logger = null,
|
||||
TimeSpan? electionInterval = null)
|
||||
{
|
||||
_discoveryService = discoveryService ?? throw new ArgumentNullException(nameof(discoveryService));
|
||||
_configProvider = configProvider ?? throw new ArgumentNullException(nameof(configProvider));
|
||||
_logger = logger ?? NullLogger<BullyLeaderElectionService>.Instance;
|
||||
_electionInterval = electionInterval ?? TimeSpan.FromSeconds(5);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the leader election loop.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous start operation.</returns>
|
||||
public async Task Start()
|
||||
{
|
||||
if (_cts != null)
|
||||
{
|
||||
_logger.LogWarning("Leader election service already started");
|
||||
return;
|
||||
}
|
||||
|
||||
var config = await _configProvider.GetConfiguration();
|
||||
_localNodeId = config.NodeId;
|
||||
|
||||
_cts = new CancellationTokenSource();
|
||||
_ = Task.Run(() => ElectionLoopAsync(_cts.Token));
|
||||
|
||||
_logger.LogInformation("Leader election service started for node {NodeId}", _localNodeId);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the leader election loop.
|
||||
/// </summary>
|
||||
/// <returns>A task that represents the asynchronous stop operation.</returns>
|
||||
public Task Stop()
|
||||
{
|
||||
if (_cts == null) return Task.CompletedTask;
|
||||
|
||||
_cts.Cancel();
|
||||
_cts.Dispose();
|
||||
_cts = null;
|
||||
|
||||
_logger.LogInformation("Leader election service stopped");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private async Task ElectionLoopAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
while (!cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(_electionInterval, cancellationToken);
|
||||
RunElection();
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
// Normal shutdown
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error during leader election");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void RunElection()
|
||||
{
|
||||
if (_localNodeId == null) return;
|
||||
|
||||
// Get all active LAN peers (excluding remote cloud nodes)
|
||||
var lanPeers = _discoveryService.GetActivePeers()
|
||||
.Where(p => p.Type == PeerType.LanDiscovered)
|
||||
.Select(p => p.NodeId)
|
||||
.ToList();
|
||||
|
||||
// Add local node to the pool
|
||||
lanPeers.Add(_localNodeId);
|
||||
|
||||
// Bully algorithm: smallest NodeId wins (lexicographic comparison)
|
||||
var newLeader = lanPeers.OrderBy(id => id, StringComparer.Ordinal).FirstOrDefault();
|
||||
|
||||
if (newLeader == null)
|
||||
{
|
||||
// No peers available, local node is leader by default
|
||||
newLeader = _localNodeId;
|
||||
}
|
||||
|
||||
// Check if leadership changed
|
||||
if (newLeader != _currentGatewayNodeId)
|
||||
{
|
||||
var wasLeader = _isCloudGateway;
|
||||
_currentGatewayNodeId = newLeader;
|
||||
_isCloudGateway = newLeader == _localNodeId;
|
||||
|
||||
if (wasLeader != _isCloudGateway)
|
||||
{
|
||||
if (_isCloudGateway)
|
||||
{
|
||||
_logger.LogInformation("🔐 This node is now the CLOUD GATEWAY (Leader) - Will sync with remote cloud nodes");
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogInformation("👤 This node is now a MEMBER - Cloud sync handled by gateway: {Gateway}", _currentGatewayNodeId);
|
||||
}
|
||||
|
||||
// Raise event
|
||||
LeadershipChanged?.Invoke(this, new LeadershipChangedEventArgs(_currentGatewayNodeId, _isCloudGateway));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
65
src/ZB.MOM.WW.CBDDC.Network/Leadership/ILeaderElectionService.cs
Executable file
65
src/ZB.MOM.WW.CBDDC.Network/Leadership/ILeaderElectionService.cs
Executable file
@@ -0,0 +1,65 @@
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Leadership;
|
||||
|
||||
/// <summary>
|
||||
/// Event arguments for leadership change events.
|
||||
/// </summary>
|
||||
public class LeadershipChangedEventArgs : EventArgs
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the NodeId of the current cloud gateway (leader).
|
||||
/// Null if no leader is elected.
|
||||
/// </summary>
|
||||
public string? CurrentGatewayNodeId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether the local node is now the cloud gateway.
|
||||
/// </summary>
|
||||
public bool IsLocalNodeGateway { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the LeadershipChangedEventArgs class.
|
||||
/// </summary>
|
||||
/// <param name="currentGatewayNodeId">The NodeId of the current gateway node, or <see langword="null"/> when none is elected.</param>
|
||||
/// <param name="isLocalNodeGateway">A value indicating whether the local node is the gateway.</param>
|
||||
public LeadershipChangedEventArgs(string? currentGatewayNodeId, bool isLocalNodeGateway)
|
||||
{
|
||||
CurrentGatewayNodeId = currentGatewayNodeId;
|
||||
IsLocalNodeGateway = isLocalNodeGateway;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Service for managing leader election in a distributed cluster.
|
||||
/// Uses the Bully algorithm where the node with the lexicographically smallest NodeId becomes the leader.
|
||||
/// Only the leader (Cloud Gateway) synchronizes with remote cloud nodes.
|
||||
/// </summary>
|
||||
public interface ILeaderElectionService
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether the local node is currently the cloud gateway (leader).
|
||||
/// </summary>
|
||||
bool IsCloudGateway { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the NodeId of the current cloud gateway, or null if no gateway is elected.
|
||||
/// </summary>
|
||||
string? CurrentGatewayNodeId { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Event raised when leadership changes.
|
||||
/// </summary>
|
||||
event EventHandler<LeadershipChangedEventArgs>? LeadershipChanged;
|
||||
|
||||
/// <summary>
|
||||
/// Starts the leader election service.
|
||||
/// </summary>
|
||||
Task Start();
|
||||
|
||||
/// <summary>
|
||||
/// Stops the leader election service.
|
||||
/// </summary>
|
||||
Task Stop();
|
||||
}
|
||||
164
src/ZB.MOM.WW.CBDDC.Network/OplogPruneCutoffCalculator.cs
Normal file
164
src/ZB.MOM.WW.CBDDC.Network/OplogPruneCutoffCalculator.cs
Normal file
@@ -0,0 +1,164 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Default implementation for effective oplog prune cutoff calculation.
|
||||
/// </summary>
|
||||
public class OplogPruneCutoffCalculator : IOplogPruneCutoffCalculator
|
||||
{
|
||||
private readonly IOplogStore _oplogStore;
|
||||
private readonly IPeerOplogConfirmationStore? _peerOplogConfirmationStore;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="OplogPruneCutoffCalculator"/> class.
|
||||
/// </summary>
|
||||
/// <param name="oplogStore">The oplog store.</param>
|
||||
/// <param name="peerOplogConfirmationStore">The optional peer confirmation store.</param>
|
||||
public OplogPruneCutoffCalculator(
|
||||
IOplogStore oplogStore,
|
||||
IPeerOplogConfirmationStore? peerOplogConfirmationStore = null)
|
||||
{
|
||||
_oplogStore = oplogStore ?? throw new ArgumentNullException(nameof(oplogStore));
|
||||
_peerOplogConfirmationStore = peerOplogConfirmationStore;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<OplogPruneCutoffDecision> CalculateEffectiveCutoffAsync(
|
||||
PeerNodeConfiguration configuration,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(configuration);
|
||||
|
||||
var retentionCutoff = BuildRetentionCutoff(configuration);
|
||||
if (_peerOplogConfirmationStore == null)
|
||||
{
|
||||
return OplogPruneCutoffDecision.WithCutoff(
|
||||
retentionCutoff,
|
||||
confirmationCutoff: null,
|
||||
effectiveCutoff: retentionCutoff,
|
||||
reason: "Confirmation tracking is not configured.");
|
||||
}
|
||||
|
||||
var relevantSources = await GetRelevantSourceNodesAsync(cancellationToken);
|
||||
if (relevantSources.Count == 0)
|
||||
{
|
||||
return OplogPruneCutoffDecision.WithCutoff(
|
||||
retentionCutoff,
|
||||
confirmationCutoff: null,
|
||||
effectiveCutoff: retentionCutoff,
|
||||
reason: "No local non-default oplog/vector-clock sources were found.");
|
||||
}
|
||||
|
||||
var activeTrackedPeers = (await _peerOplogConfirmationStore.GetActiveTrackedPeersAsync(cancellationToken))
|
||||
.Where(peerNodeId => !string.IsNullOrWhiteSpace(peerNodeId))
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
if (activeTrackedPeers.Count == 0)
|
||||
{
|
||||
return OplogPruneCutoffDecision.WithCutoff(
|
||||
retentionCutoff,
|
||||
confirmationCutoff: null,
|
||||
effectiveCutoff: retentionCutoff,
|
||||
reason: "No active tracked peers found for confirmation gating.");
|
||||
}
|
||||
|
||||
HlcTimestamp? confirmationCutoff = null;
|
||||
|
||||
foreach (var peerNodeId in activeTrackedPeers)
|
||||
{
|
||||
var confirmationsForPeer = (await _peerOplogConfirmationStore.GetConfirmationsForPeerAsync(peerNodeId, cancellationToken))
|
||||
.Where(confirmation => confirmation.IsActive)
|
||||
.Where(confirmation => !string.IsNullOrWhiteSpace(confirmation.SourceNodeId))
|
||||
.GroupBy(confirmation => confirmation.SourceNodeId, StringComparer.Ordinal)
|
||||
.ToDictionary(
|
||||
group => group.Key,
|
||||
group => group
|
||||
.Select(ToTimestamp)
|
||||
.OrderBy(timestamp => timestamp)
|
||||
.Last(),
|
||||
StringComparer.Ordinal);
|
||||
|
||||
foreach (var sourceNodeId in relevantSources)
|
||||
{
|
||||
if (!confirmationsForPeer.TryGetValue(sourceNodeId, out var confirmedTimestamp) || confirmedTimestamp == default)
|
||||
{
|
||||
return OplogPruneCutoffDecision.NoCutoff(
|
||||
retentionCutoff,
|
||||
$"Active tracked peer '{peerNodeId}' is missing confirmation for source '{sourceNodeId}'.");
|
||||
}
|
||||
|
||||
if (!confirmationCutoff.HasValue || confirmedTimestamp < confirmationCutoff.Value)
|
||||
{
|
||||
confirmationCutoff = confirmedTimestamp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!confirmationCutoff.HasValue)
|
||||
{
|
||||
return OplogPruneCutoffDecision.WithCutoff(
|
||||
retentionCutoff,
|
||||
confirmationCutoff: null,
|
||||
effectiveCutoff: retentionCutoff,
|
||||
reason: "No confirmation cutoff could be determined.");
|
||||
}
|
||||
|
||||
var effectiveCutoff = retentionCutoff <= confirmationCutoff.Value
|
||||
? retentionCutoff
|
||||
: confirmationCutoff.Value;
|
||||
|
||||
return OplogPruneCutoffDecision.WithCutoff(
|
||||
retentionCutoff,
|
||||
confirmationCutoff,
|
||||
effectiveCutoff);
|
||||
}
|
||||
|
||||
private static HlcTimestamp BuildRetentionCutoff(PeerNodeConfiguration configuration)
|
||||
{
|
||||
var retentionTimestamp = DateTimeOffset.UtcNow
|
||||
.AddHours(-configuration.OplogRetentionHours)
|
||||
.ToUnixTimeMilliseconds();
|
||||
|
||||
return new HlcTimestamp(retentionTimestamp, 0, configuration.NodeId ?? string.Empty);
|
||||
}
|
||||
|
||||
private async Task<HashSet<string>> GetRelevantSourceNodesAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var localVectorClock = await _oplogStore.GetVectorClockAsync(cancellationToken);
|
||||
var sourceNodes = new HashSet<string>(StringComparer.Ordinal);
|
||||
foreach (var sourceNodeId in localVectorClock.NodeIds)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(sourceNodeId))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var timestamp = localVectorClock.GetTimestamp(sourceNodeId);
|
||||
if (timestamp == default)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
sourceNodes.Add(sourceNodeId);
|
||||
}
|
||||
|
||||
return sourceNodes;
|
||||
}
|
||||
|
||||
private static HlcTimestamp ToTimestamp(PeerOplogConfirmation confirmation)
|
||||
{
|
||||
return new HlcTimestamp(
|
||||
confirmation.ConfirmedWall,
|
||||
confirmation.ConfirmedLogic,
|
||||
confirmation.SourceNodeId ?? string.Empty);
|
||||
}
|
||||
}
|
||||
84
src/ZB.MOM.WW.CBDDC.Network/OplogPruneCutoffDecision.cs
Normal file
84
src/ZB.MOM.WW.CBDDC.Network/OplogPruneCutoffDecision.cs
Normal file
@@ -0,0 +1,84 @@
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Represents the prune cutoff decision for an oplog maintenance cycle.
|
||||
/// </summary>
|
||||
public sealed class OplogPruneCutoffDecision
|
||||
{
|
||||
private OplogPruneCutoffDecision(
|
||||
bool hasCutoff,
|
||||
HlcTimestamp retentionCutoff,
|
||||
HlcTimestamp? confirmationCutoff,
|
||||
HlcTimestamp? effectiveCutoff,
|
||||
string reason)
|
||||
{
|
||||
HasCutoff = hasCutoff;
|
||||
RetentionCutoff = retentionCutoff;
|
||||
ConfirmationCutoff = confirmationCutoff;
|
||||
EffectiveCutoff = effectiveCutoff;
|
||||
Reason = reason;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether pruning is allowed for this decision.
|
||||
/// </summary>
|
||||
public bool HasCutoff { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the retention-based cutoff.
|
||||
/// </summary>
|
||||
public HlcTimestamp RetentionCutoff { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the confirmation-based cutoff, when available.
|
||||
/// </summary>
|
||||
public HlcTimestamp? ConfirmationCutoff { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the effective cutoff to use for pruning when <see cref="HasCutoff"/> is true.
|
||||
/// </summary>
|
||||
public HlcTimestamp? EffectiveCutoff { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the explanatory reason for skip/special handling decisions.
|
||||
/// </summary>
|
||||
public string Reason { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a prune-allowed decision with the provided cutoffs.
|
||||
/// </summary>
|
||||
/// <param name="retentionCutoff">The cutoff derived from retention policy.</param>
|
||||
/// <param name="confirmationCutoff">The cutoff derived from peer confirmations, if available.</param>
|
||||
/// <param name="effectiveCutoff">The effective cutoff to apply for pruning.</param>
|
||||
/// <param name="reason">The explanatory reason associated with the decision.</param>
|
||||
public static OplogPruneCutoffDecision WithCutoff(
|
||||
HlcTimestamp retentionCutoff,
|
||||
HlcTimestamp? confirmationCutoff,
|
||||
HlcTimestamp effectiveCutoff,
|
||||
string reason = "")
|
||||
{
|
||||
return new OplogPruneCutoffDecision(
|
||||
hasCutoff: true,
|
||||
retentionCutoff: retentionCutoff,
|
||||
confirmationCutoff: confirmationCutoff,
|
||||
effectiveCutoff: effectiveCutoff,
|
||||
reason: reason);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a prune-blocked decision.
|
||||
/// </summary>
|
||||
/// <param name="retentionCutoff">The cutoff derived from retention policy.</param>
|
||||
/// <param name="reason">The explanatory reason associated with the decision.</param>
|
||||
public static OplogPruneCutoffDecision NoCutoff(HlcTimestamp retentionCutoff, string reason)
|
||||
{
|
||||
return new OplogPruneCutoffDecision(
|
||||
hasCutoff: false,
|
||||
retentionCutoff: retentionCutoff,
|
||||
confirmationCutoff: null,
|
||||
effectiveCutoff: null,
|
||||
reason: reason);
|
||||
}
|
||||
}
|
||||
57
src/ZB.MOM.WW.CBDDC.Network/PeerDbNetworkExtensions.cs
Executable file
57
src/ZB.MOM.WW.CBDDC.Network/PeerDbNetworkExtensions.cs
Executable file
@@ -0,0 +1,57 @@
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network; // For IMeshNetwork if we implement it
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Network.Security;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using System;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
public static class CBDDCNetworkExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds CBDDC network services to the service collection.
|
||||
/// </summary>
|
||||
/// <typeparam name="TPeerNodeConfigurationProvider">The peer node configuration provider implementation type.</typeparam>
|
||||
/// <param name="services">The service collection to register services into.</param>
|
||||
/// <param name="useHostedService">If true, registers CBDDCNodeService as IHostedService to automatically start/stop the node.</param>
|
||||
public static IServiceCollection AddCBDDCNetwork<TPeerNodeConfigurationProvider>(
|
||||
this IServiceCollection services,
|
||||
bool useHostedService = true)
|
||||
where TPeerNodeConfigurationProvider : class, IPeerNodeConfigurationProvider
|
||||
{
|
||||
services.TryAddSingleton<IPeerNodeConfigurationProvider, TPeerNodeConfigurationProvider>();
|
||||
|
||||
services.TryAddSingleton<IAuthenticator, ClusterKeyAuthenticator>();
|
||||
|
||||
services.TryAddSingleton<IPeerHandshakeService, SecureHandshakeService>();
|
||||
|
||||
services.TryAddSingleton<IDiscoveryService, UdpDiscoveryService>();
|
||||
|
||||
services.TryAddSingleton<ZB.MOM.WW.CBDDC.Network.Telemetry.INetworkTelemetryService>(sp =>
|
||||
{
|
||||
var logger = sp.GetRequiredService<ILogger<ZB.MOM.WW.CBDDC.Network.Telemetry.NetworkTelemetryService>>();
|
||||
var path = System.IO.Path.Combine(System.AppContext.BaseDirectory, "cbddc_metrics.bin");
|
||||
return new ZB.MOM.WW.CBDDC.Network.Telemetry.NetworkTelemetryService(logger, path);
|
||||
});
|
||||
|
||||
services.TryAddSingleton<ISyncServer, TcpSyncServer>();
|
||||
|
||||
services.TryAddSingleton<IOplogPruneCutoffCalculator, OplogPruneCutoffCalculator>();
|
||||
|
||||
services.TryAddSingleton<ISyncOrchestrator, SyncOrchestrator>();
|
||||
|
||||
services.TryAddSingleton<ICBDDCNode, CBDDCNode>();
|
||||
|
||||
// Optionally register hosted service for automatic node lifecycle management
|
||||
if (useHostedService)
|
||||
{
|
||||
services.AddHostedService<CBDDCNodeService>();
|
||||
}
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
259
src/ZB.MOM.WW.CBDDC.Network/Protocol/ProtocolHandler.cs
Executable file
259
src/ZB.MOM.WW.CBDDC.Network/Protocol/ProtocolHandler.cs
Executable file
@@ -0,0 +1,259 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Google.Protobuf;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.CBDDC.Network.Proto;
|
||||
using ZB.MOM.WW.CBDDC.Network.Security;
|
||||
using ZB.MOM.WW.CBDDC.Network.Telemetry;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Protocol
|
||||
{
|
||||
/// <summary>
|
||||
/// Handles the low-level framing, compression, encryption, and serialization of CBDDC messages.
|
||||
/// Encapsulates the wire format: [Length (4)] [Type (1)] [Compression (1)] [Payload (N)]
|
||||
/// </summary>
|
||||
internal class ProtocolHandler
|
||||
{
|
||||
private readonly ILogger<ProtocolHandler> _logger;
|
||||
private readonly INetworkTelemetryService? _telemetry;
|
||||
private readonly SemaphoreSlim _writeLock = new SemaphoreSlim(1, 1);
|
||||
private readonly SemaphoreSlim _readLock = new SemaphoreSlim(1, 1);
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ProtocolHandler"/> class.
|
||||
/// </summary>
|
||||
/// <param name="logger">The logger used for protocol diagnostics.</param>
|
||||
/// <param name="telemetry">An optional telemetry service used to record network metrics.</param>
|
||||
public ProtocolHandler(ILogger<ProtocolHandler> logger, INetworkTelemetryService? telemetry = null)
|
||||
{
|
||||
_logger = logger;
|
||||
_telemetry = telemetry;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ProtocolHandler"/> class using a non-generic logger.
|
||||
/// </summary>
|
||||
/// <param name="logger">The logger used for protocol diagnostics.</param>
|
||||
/// <param name="telemetry">An optional telemetry service used to record network metrics.</param>
|
||||
internal ProtocolHandler(ILogger logger, INetworkTelemetryService? telemetry = null)
|
||||
: this(new ForwardingLogger(logger), telemetry)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Serializes and sends a protocol message to the provided stream.
|
||||
/// </summary>
|
||||
/// <param name="stream">The destination stream.</param>
|
||||
/// <param name="type">The protocol message type.</param>
|
||||
/// <param name="message">The message payload to serialize.</param>
|
||||
/// <param name="useCompression">Whether payload compression should be attempted.</param>
|
||||
/// <param name="cipherState">Optional cipher state used to encrypt outgoing payloads.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>A task that represents the asynchronous send operation.</returns>
|
||||
public async Task SendMessageAsync(Stream stream, MessageType type, IMessage message, bool useCompression, CipherState? cipherState, CancellationToken token = default)
|
||||
{
|
||||
if (stream == null) throw new ArgumentNullException(nameof(stream));
|
||||
|
||||
// 1. Serialize
|
||||
byte[] payloadBytes = message.ToByteArray();
|
||||
int originalSize = payloadBytes.Length;
|
||||
byte compressionFlag = 0x00;
|
||||
|
||||
// 2. Compress (inner payload)
|
||||
if (useCompression && payloadBytes.Length > CompressionHelper.THRESHOLD && type != MessageType.SecureEnv)
|
||||
{
|
||||
// Measure Compression Time
|
||||
// using var _ = _telemetry?.StartMetric(MetricType.CompressionTime); // Oops, MetricType.CompressionTime not defined? Wait, user asked for "Compression Ratio".
|
||||
// User asked for "performance della compressione brotli (% media di compressione)".
|
||||
// That usually means ratio. But time is also good?
|
||||
// Plan said: "MetricType: CompressionRatio, EncryptionTime..."
|
||||
|
||||
// byte[] compressed; // Removed unused variable
|
||||
// using (_telemetry?.StartMetric(MetricType.CompressionTime)) // Let's stick to Time if relevant? NO, MetricType only has Ratio.
|
||||
// Ah I see MetricType enum: CompressionRatio, EncryptionTime, DecryptionTime, RoundTripTime.
|
||||
// So for compression we only record Ratio.
|
||||
|
||||
payloadBytes = CompressionHelper.Compress(payloadBytes);
|
||||
compressionFlag = 0x01; // Brotli
|
||||
|
||||
if (_telemetry != null && originalSize > 0)
|
||||
{
|
||||
double ratio = (double)payloadBytes.Length / originalSize;
|
||||
_telemetry.RecordValue(MetricType.CompressionRatio, ratio);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Encrypt
|
||||
if (cipherState != null)
|
||||
{
|
||||
using (_telemetry?.StartMetric(MetricType.EncryptionTime))
|
||||
{
|
||||
// Inner data: [Type (1)] [Compression (1)] [Payload (N)]
|
||||
var dataToEncrypt = new byte[2 + payloadBytes.Length];
|
||||
dataToEncrypt[0] = (byte)type;
|
||||
dataToEncrypt[1] = compressionFlag;
|
||||
Buffer.BlockCopy(payloadBytes, 0, dataToEncrypt, 2, payloadBytes.Length);
|
||||
|
||||
var (ciphertext, iv, tag) = CryptoHelper.Encrypt(dataToEncrypt, cipherState.EncryptKey);
|
||||
|
||||
var env = new SecureEnvelope
|
||||
{
|
||||
Ciphertext = ByteString.CopyFrom(ciphertext),
|
||||
Nonce = ByteString.CopyFrom(iv),
|
||||
AuthTag = ByteString.CopyFrom(tag)
|
||||
};
|
||||
|
||||
payloadBytes = env.ToByteArray();
|
||||
type = MessageType.SecureEnv;
|
||||
compressionFlag = 0x00; // Outer envelope is not compressed
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Thread-Safe Write
|
||||
await _writeLock.WaitAsync(token);
|
||||
try
|
||||
{
|
||||
_logger.LogDebug("Sending Message {Type}, OrgSize: {Org}, WireSize: {Wire}", type, originalSize, payloadBytes.Length);
|
||||
|
||||
// Framing: [Length (4)] [Type (1)] [Compression (1)] [Payload (N)]
|
||||
var lengthBytes = BitConverter.GetBytes(payloadBytes.Length);
|
||||
await stream.WriteAsync(lengthBytes, 0, 4, token);
|
||||
stream.WriteByte((byte)type);
|
||||
stream.WriteByte(compressionFlag);
|
||||
await stream.WriteAsync(payloadBytes, 0, payloadBytes.Length, token);
|
||||
await stream.FlushAsync(token);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_writeLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads and decodes the next protocol message from the provided stream.
|
||||
/// </summary>
|
||||
/// <param name="stream">The source stream.</param>
|
||||
/// <param name="cipherState">Optional cipher state used to decrypt incoming payloads.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>A tuple containing the decoded message type and payload bytes.</returns>
|
||||
public async Task<(MessageType, byte[])> ReadMessageAsync(Stream stream, CipherState? cipherState, CancellationToken token = default)
|
||||
{
|
||||
await _readLock.WaitAsync(token);
|
||||
try
|
||||
{
|
||||
var lenBuf = new byte[4];
|
||||
int read = await ReadExactAsync(stream, lenBuf, 0, 4, token);
|
||||
if (read == 0) return (MessageType.Unknown, null!);
|
||||
|
||||
int length = BitConverter.ToInt32(lenBuf, 0);
|
||||
|
||||
int typeByte = stream.ReadByte();
|
||||
if (typeByte == -1) throw new EndOfStreamException("Connection closed abruptly (type byte)");
|
||||
|
||||
int compByte = stream.ReadByte();
|
||||
if (compByte == -1) throw new EndOfStreamException("Connection closed abruptly (comp byte)");
|
||||
|
||||
var payload = new byte[length];
|
||||
await ReadExactAsync(stream, payload, 0, length, token);
|
||||
|
||||
var msgType = (MessageType)typeByte;
|
||||
|
||||
// Handle Secure Envelope
|
||||
if (msgType == MessageType.SecureEnv)
|
||||
{
|
||||
if (cipherState == null) throw new InvalidOperationException("Received encrypted message but no cipher state established");
|
||||
|
||||
byte[] decrypted;
|
||||
using (_telemetry?.StartMetric(MetricType.DecryptionTime))
|
||||
{
|
||||
var env = SecureEnvelope.Parser.ParseFrom(payload);
|
||||
decrypted = CryptoHelper.Decrypt(
|
||||
env.Ciphertext.ToByteArray(),
|
||||
env.Nonce.ToByteArray(),
|
||||
env.AuthTag.ToByteArray(),
|
||||
cipherState.DecryptKey);
|
||||
}
|
||||
|
||||
if (decrypted.Length < 2) throw new InvalidDataException("Decrypted payload too short");
|
||||
|
||||
msgType = (MessageType)decrypted[0];
|
||||
int innerComp = decrypted[1];
|
||||
|
||||
var innerPayload = new byte[decrypted.Length - 2];
|
||||
Buffer.BlockCopy(decrypted, 2, innerPayload, 0, innerPayload.Length);
|
||||
|
||||
if (innerComp == 0x01)
|
||||
{
|
||||
innerPayload = CompressionHelper.Decompress(innerPayload);
|
||||
}
|
||||
|
||||
return (msgType, innerPayload);
|
||||
}
|
||||
|
||||
// Handle Unencrypted Compression
|
||||
if (compByte == 0x01)
|
||||
{
|
||||
payload = CompressionHelper.Decompress(payload);
|
||||
}
|
||||
|
||||
_logger.LogDebug("Read Message {Type}, Size: {Size}", msgType, payload.Length);
|
||||
return (msgType, payload);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_readLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<int> ReadExactAsync(Stream stream, byte[] buffer, int offset, int count, CancellationToken token)
|
||||
{
|
||||
int total = 0;
|
||||
while (total < count)
|
||||
{
|
||||
int read = await stream.ReadAsync(buffer, offset + total, count - total, token);
|
||||
if (read == 0) return 0; // EOF
|
||||
total += read;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
private sealed class ForwardingLogger : ILogger<ProtocolHandler>
|
||||
{
|
||||
private readonly ILogger _inner;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ForwardingLogger"/> class.
|
||||
/// </summary>
|
||||
/// <param name="inner">The underlying logger instance.</param>
|
||||
public ForwardingLogger(ILogger inner)
|
||||
{
|
||||
_inner = inner ?? throw new ArgumentNullException(nameof(inner));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IDisposable? BeginScope<TState>(TState state) where TState : notnull
|
||||
{
|
||||
return _inner.BeginScope(state);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool IsEnabled(LogLevel logLevel)
|
||||
{
|
||||
return _inner.IsEnabled(logLevel);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Log<TState>(
|
||||
LogLevel logLevel,
|
||||
EventId eventId,
|
||||
TState state,
|
||||
Exception? exception,
|
||||
Func<TState, Exception?, string> formatter)
|
||||
{
|
||||
_inner.Log(logLevel, eventId, state, exception, formatter);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
72
src/ZB.MOM.WW.CBDDC.Network/README.md
Executable file
72
src/ZB.MOM.WW.CBDDC.Network/README.md
Executable file
@@ -0,0 +1,72 @@
|
||||
# ZB.MOM.WW.CBDDC.Network
|
||||
|
||||
Networking layer for **CBDDC** - provides peer-to-peer mesh networking with automatic discovery and synchronization.
|
||||
|
||||
## What's Included
|
||||
|
||||
This package handles all networking for CBDDC:
|
||||
|
||||
- **UDP Discovery**: Automatic peer discovery on LAN via broadcast
|
||||
- **TCP Synchronization**: Reliable data sync between nodes
|
||||
- **Gossip Protocol**: Efficient update propagation
|
||||
- **Sync Orchestrator**: Manages peer connections and sync operations
|
||||
- **Anti-Entropy**: Automatic reconciliation between peers
|
||||
- **Resilience**: Retry policies, timeouts, error handling
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
dotnet add package ZB.MOM.WW.CBDDC.Core
|
||||
dotnet add package ZB.MOM.WW.CBDDC.Network
|
||||
dotnet add package ZB.MOM.WW.CBDDC.Persistence.Sqlite
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```csharp
|
||||
using ZB.MOM.WW.CBDDC.Network;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
|
||||
var services = new ServiceCollection();
|
||||
|
||||
// Register networking
|
||||
services.AddCBDDCNetwork(
|
||||
nodeId: "my-node",
|
||||
tcpPort: 5000,
|
||||
authToken: "shared-secret"
|
||||
);
|
||||
|
||||
var provider = services.BuildServiceProvider();
|
||||
|
||||
// Start network node
|
||||
var node = provider.GetRequiredService<CBDDCNode>();
|
||||
node.Start();
|
||||
|
||||
// Nodes on the same LAN will discover each other automatically!
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
### Automatic Discovery
|
||||
Nodes broadcast their presence via UDP and automatically connect to peers on the same network.
|
||||
|
||||
### Secure Synchronization
|
||||
All nodes must share the same authentication token to sync data.
|
||||
|
||||
### Scalable Gossip
|
||||
Updates propagate exponentially - each node tells multiple peers, ensuring fast network-wide propagation.
|
||||
|
||||
## Documentation
|
||||
|
||||
- **[Architecture](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/docs/architecture.md)**
|
||||
- **[LAN Deployment](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/docs/deployment-lan.md)**
|
||||
- **[Network Configuration](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/docs/production-hardening.md)**
|
||||
|
||||
## Related Packages
|
||||
|
||||
- **ZB.MOM.WW.CBDDC.Core** - Core database abstractions
|
||||
- **ZB.MOM.WW.CBDDC.Persistence.Sqlite** - SQLite storage provider
|
||||
|
||||
## License
|
||||
|
||||
MIT - see [LICENSE](https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net/blob/main/LICENSE)
|
||||
33
src/ZB.MOM.WW.CBDDC.Network/Security/ClusterKeyAuthenticator.cs
Executable file
33
src/ZB.MOM.WW.CBDDC.Network/Security/ClusterKeyAuthenticator.cs
Executable file
@@ -0,0 +1,33 @@
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Authenticator implementation that uses a shared secret (pre-shared key) to validate nodes.
|
||||
/// Both nodes must possess the same key to successfully handshake.
|
||||
/// </summary>
|
||||
public class ClusterKeyAuthenticator : IAuthenticator
|
||||
{
|
||||
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ClusterKeyAuthenticator"/> class.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeConfigurationProvider">The provider for peer node configuration.</param>
|
||||
public ClusterKeyAuthenticator(IPeerNodeConfigurationProvider peerNodeConfigurationProvider)
|
||||
{
|
||||
_peerNodeConfigurationProvider = peerNodeConfigurationProvider;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> ValidateAsync(string nodeId, string token)
|
||||
{
|
||||
var config = await _peerNodeConfigurationProvider.GetConfiguration();
|
||||
var configuredHash = SHA256.HashData(Encoding.UTF8.GetBytes(config.AuthToken ?? string.Empty));
|
||||
var presentedHash = SHA256.HashData(Encoding.UTF8.GetBytes(token ?? string.Empty));
|
||||
return CryptographicOperations.FixedTimeEquals(configuredHash, presentedHash);
|
||||
}
|
||||
}
|
||||
81
src/ZB.MOM.WW.CBDDC.Network/Security/CryptoHelper.cs
Executable file
81
src/ZB.MOM.WW.CBDDC.Network/Security/CryptoHelper.cs
Executable file
@@ -0,0 +1,81 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Security.Cryptography;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Security;
|
||||
|
||||
public static class CryptoHelper
|
||||
{
|
||||
private const int KeySize = 32; // 256 bits
|
||||
private const int BlockSize = 16; // 128 bits
|
||||
private const int MacSize = 32; // 256 bits (HMACSHA256)
|
||||
|
||||
/// <summary>
|
||||
/// Encrypts plaintext and computes an authentication tag.
|
||||
/// </summary>
|
||||
/// <param name="plaintext">The plaintext bytes to encrypt.</param>
|
||||
/// <param name="key">The encryption and HMAC key.</param>
|
||||
/// <returns>The ciphertext, IV, and authentication tag.</returns>
|
||||
public static (byte[] ciphertext, byte[] iv, byte[] tag) Encrypt(byte[] plaintext, byte[] key)
|
||||
{
|
||||
using var aes = Aes.Create();
|
||||
aes.Key = key;
|
||||
aes.GenerateIV();
|
||||
var iv = aes.IV;
|
||||
|
||||
using var encryptor = aes.CreateEncryptor();
|
||||
var ciphertext = encryptor.TransformFinalBlock(plaintext, 0, plaintext.Length);
|
||||
|
||||
// Compute HMAC
|
||||
using var hmac = new HMACSHA256(key);
|
||||
// Authenticate IV + Ciphertext
|
||||
var toSign = new byte[iv.Length + ciphertext.Length];
|
||||
Buffer.BlockCopy(iv, 0, toSign, 0, iv.Length);
|
||||
Buffer.BlockCopy(ciphertext, 0, toSign, iv.Length, ciphertext.Length);
|
||||
var tag = hmac.ComputeHash(toSign);
|
||||
|
||||
return (ciphertext, iv, tag);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verifies and decrypts ciphertext.
|
||||
/// </summary>
|
||||
/// <param name="ciphertext">The encrypted bytes.</param>
|
||||
/// <param name="iv">The initialization vector used during encryption.</param>
|
||||
/// <param name="tag">The authentication tag for integrity verification.</param>
|
||||
/// <param name="key">The encryption and HMAC key.</param>
|
||||
/// <returns>The decrypted plaintext bytes.</returns>
|
||||
public static byte[] Decrypt(byte[] ciphertext, byte[] iv, byte[] tag, byte[] key)
|
||||
{
|
||||
// Verify HMAC
|
||||
using var hmac = new HMACSHA256(key);
|
||||
var toVerify = new byte[iv.Length + ciphertext.Length];
|
||||
Buffer.BlockCopy(iv, 0, toVerify, 0, iv.Length);
|
||||
Buffer.BlockCopy(ciphertext, 0, toVerify, iv.Length, ciphertext.Length);
|
||||
var computedTag = hmac.ComputeHash(toVerify);
|
||||
|
||||
if (!FixedTimeEquals(tag, computedTag))
|
||||
{
|
||||
throw new CryptographicException("Authentication failed (HMAC mismatch)");
|
||||
}
|
||||
|
||||
using var aes = Aes.Create();
|
||||
aes.Key = key;
|
||||
aes.IV = iv;
|
||||
|
||||
using var decryptor = aes.CreateDecryptor();
|
||||
return decryptor.TransformFinalBlock(ciphertext, 0, ciphertext.Length);
|
||||
}
|
||||
|
||||
private static bool FixedTimeEquals(byte[] left, byte[] right)
|
||||
{
|
||||
#if NET6_0_OR_GREATER
|
||||
return CryptographicOperations.FixedTimeEquals(left, right);
|
||||
#else
|
||||
if (left.Length != right.Length) return false;
|
||||
int res = 0;
|
||||
for (int i = 0; i < left.Length; i++) res |= left[i] ^ right[i];
|
||||
return res == 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
14
src/ZB.MOM.WW.CBDDC.Network/Security/IAuthenticator.cs
Executable file
14
src/ZB.MOM.WW.CBDDC.Network/Security/IAuthenticator.cs
Executable file
@@ -0,0 +1,14 @@
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Security;
|
||||
|
||||
public interface IAuthenticator
|
||||
{
|
||||
/// <summary>
|
||||
/// Validates an authentication token for a node identifier.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The node identifier to validate.</param>
|
||||
/// <param name="token">The authentication token to validate.</param>
|
||||
/// <returns><see langword="true"/> if the token is valid for the node; otherwise <see langword="false"/>.</returns>
|
||||
Task<bool> ValidateAsync(string nodeId, string token);
|
||||
}
|
||||
42
src/ZB.MOM.WW.CBDDC.Network/Security/IPeerHandshakeService.cs
Executable file
42
src/ZB.MOM.WW.CBDDC.Network/Security/IPeerHandshakeService.cs
Executable file
@@ -0,0 +1,42 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Security;
|
||||
|
||||
public interface IPeerHandshakeService
|
||||
{
|
||||
/// <summary>
|
||||
/// Performs a handshake to establishing identity and optional security context.
|
||||
/// </summary>
|
||||
/// <param name="stream">The transport stream used for handshake message exchange.</param>
|
||||
/// <param name="isInitiator">A value indicating whether the caller initiated the connection.</param>
|
||||
/// <param name="myNodeId">The local node identifier.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>A CipherState if encryption is established, or null if plaintext.</returns>
|
||||
Task<CipherState?> HandshakeAsync(System.IO.Stream stream, bool isInitiator, string myNodeId, CancellationToken token);
|
||||
}
|
||||
|
||||
public class CipherState
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the key used to encrypt outgoing messages.
|
||||
/// </summary>
|
||||
public byte[] EncryptKey { get; }
|
||||
/// <summary>
|
||||
/// Gets the key used to decrypt incoming messages.
|
||||
/// </summary>
|
||||
public byte[] DecryptKey { get; }
|
||||
// For simplicity using IV chaining or explicit IVs.
|
||||
// We'll store just the keys here and let the encryption helper handle IVs.
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CipherState"/> class.
|
||||
/// </summary>
|
||||
/// <param name="encryptKey">The key used for encrypting outgoing payloads.</param>
|
||||
/// <param name="decryptKey">The key used for decrypting incoming payloads.</param>
|
||||
public CipherState(byte[] encryptKey, byte[] decryptKey)
|
||||
{
|
||||
EncryptKey = encryptKey;
|
||||
DecryptKey = decryptKey;
|
||||
}
|
||||
}
|
||||
29
src/ZB.MOM.WW.CBDDC.Network/Security/NoOpHandshakeService.cs
Executable file
29
src/ZB.MOM.WW.CBDDC.Network/Security/NoOpHandshakeService.cs
Executable file
@@ -0,0 +1,29 @@
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Security;
|
||||
|
||||
/// <summary>
|
||||
/// Provides a no-operation implementation of the peer handshake service that performs no handshake and always returns
|
||||
/// null.
|
||||
/// </summary>
|
||||
/// <remarks>This class can be used in scenarios where a handshake is not required or for testing purposes. All
|
||||
/// handshake attempts using this service will result in no cipher state being established.</remarks>
|
||||
public class NoOpHandshakeService : IPeerHandshakeService
|
||||
{
|
||||
/// <summary>
|
||||
/// Performs a handshake over the specified stream to establish a secure communication channel between two nodes
|
||||
/// asynchronously.
|
||||
/// </summary>
|
||||
/// <param name="stream">The stream used for exchanging handshake messages between nodes. Must be readable and writable.</param>
|
||||
/// <param name="isInitiator">true to initiate the handshake as the local node; otherwise, false to respond as the remote node.</param>
|
||||
/// <param name="myNodeId">The unique identifier of the local node participating in the handshake. Cannot be null.</param>
|
||||
/// <param name="token">A cancellation token that can be used to cancel the handshake operation.</param>
|
||||
/// <returns>A task that represents the asynchronous handshake operation. The task result contains a CipherState if the
|
||||
/// handshake succeeds; otherwise, null.</returns>
|
||||
public Task<CipherState?> HandshakeAsync(Stream stream, bool isInitiator, string myNodeId, CancellationToken token)
|
||||
{
|
||||
return Task.FromResult<CipherState?>(null);
|
||||
}
|
||||
}
|
||||
112
src/ZB.MOM.WW.CBDDC.Network/Security/SecureHandshakeService.cs
Executable file
112
src/ZB.MOM.WW.CBDDC.Network/Security/SecureHandshakeService.cs
Executable file
@@ -0,0 +1,112 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Security.Cryptography;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Security;
|
||||
|
||||
public class SecureHandshakeService : IPeerHandshakeService
|
||||
{
|
||||
private readonly ILogger<SecureHandshakeService>? _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="SecureHandshakeService"/> class.
|
||||
/// </summary>
|
||||
/// <param name="logger">The optional logger instance.</param>
|
||||
public SecureHandshakeService(ILogger<SecureHandshakeService>? logger = null)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
// Simple protocol:
|
||||
// Initiator -> [Public Key Length (4) + Public Key]
|
||||
// Responder -> [Public Key Length (4) + Public Key]
|
||||
// Both derive shared secret -> Split into SendKey/RecvKey using HKDF
|
||||
|
||||
/// <summary>
|
||||
/// Performs a secure key exchange handshake over the provided stream.
|
||||
/// </summary>
|
||||
/// <param name="stream">The transport stream used for the handshake.</param>
|
||||
/// <param name="isInitiator">A value indicating whether the local node initiated the handshake.</param>
|
||||
/// <param name="myNodeId">The local node identifier.</param>
|
||||
/// <param name="token">A token used to cancel the handshake.</param>
|
||||
/// <returns>
|
||||
/// A task that returns the negotiated <see cref="CipherState"/>, or <see langword="null"/> if unavailable.
|
||||
/// </returns>
|
||||
public async Task<CipherState?> HandshakeAsync(Stream stream, bool isInitiator, string myNodeId, CancellationToken token)
|
||||
{
|
||||
#if NET6_0_OR_GREATER
|
||||
using var ecdh = ECDiffieHellman.Create();
|
||||
ecdh.KeySize = 256;
|
||||
|
||||
// 1. ExportAsync & Send Public Key
|
||||
var myPublicKey = ecdh.ExportSubjectPublicKeyInfo();
|
||||
var lenBytes = BitConverter.GetBytes(myPublicKey.Length);
|
||||
await stream.WriteAsync(lenBytes, 0, 4, token);
|
||||
await stream.WriteAsync(myPublicKey, 0, myPublicKey.Length, token);
|
||||
await stream.FlushAsync(token); // CRITICAL: Ensure data is sent immediately
|
||||
|
||||
// 2. Receive Peer Public Key
|
||||
var peerLenBuf = new byte[4];
|
||||
await ReadExactAsync(stream, peerLenBuf, 0, 4, token);
|
||||
int peerLen = BitConverter.ToInt32(peerLenBuf, 0);
|
||||
|
||||
// Validate peer key length to prevent DoS
|
||||
if (peerLen <= 0 || peerLen > 10000)
|
||||
{
|
||||
throw new InvalidOperationException($"Invalid peer key length: {peerLen}");
|
||||
}
|
||||
|
||||
var peerKeyBytes = new byte[peerLen];
|
||||
await ReadExactAsync(stream, peerKeyBytes, 0, peerLen, token);
|
||||
|
||||
// 3. Import Peer Key & Derive Shared Secret
|
||||
using var peerEcdh = ECDiffieHellman.Create();
|
||||
peerEcdh.ImportSubjectPublicKeyInfo(peerKeyBytes, out _);
|
||||
|
||||
byte[] sharedSecret = ecdh.DeriveKeyMaterial(peerEcdh.PublicKey);
|
||||
|
||||
// 4. Derive Session Keys (HKDF-like expansion)
|
||||
// Use SHA256 to split/expand secret into EncryptKey and DecryptKey
|
||||
// Simple approach: Hash(secret + "0") -> Key1, Hash(secret + "1") -> Key2
|
||||
|
||||
using var sha = SHA256.Create();
|
||||
|
||||
var k1Input = new byte[sharedSecret.Length + 1];
|
||||
Buffer.BlockCopy(sharedSecret, 0, k1Input, 0, sharedSecret.Length);
|
||||
k1Input[sharedSecret.Length] = 0; // "0"
|
||||
var key1 = sha.ComputeHash(k1Input);
|
||||
|
||||
var k2Input = new byte[sharedSecret.Length + 1];
|
||||
Buffer.BlockCopy(sharedSecret, 0, k2Input, 0, sharedSecret.Length);
|
||||
k2Input[sharedSecret.Length] = 1; // "1"
|
||||
var key2 = sha.ComputeHash(k2Input);
|
||||
|
||||
// If initiator: Encrypt with Key1, Decrypt with Key2
|
||||
// If responder: Encrypt with Key2, Decrypt with Key1
|
||||
|
||||
var encryptKey = isInitiator ? key1 : key2;
|
||||
var decryptKey = isInitiator ? key2 : key1;
|
||||
|
||||
return new CipherState(encryptKey, decryptKey);
|
||||
#else
|
||||
// For netstandard2.0, standard ECDH import is broken/hard without external libs.
|
||||
// Returning null or throwing.
|
||||
throw new PlatformNotSupportedException("Secure handshake requires .NET 6.0+");
|
||||
#endif
|
||||
}
|
||||
|
||||
private async Task<int> ReadExactAsync(Stream stream, byte[] buffer, int offset, int count, CancellationToken token)
|
||||
{
|
||||
int total = 0;
|
||||
while (total < count)
|
||||
{
|
||||
int read = await stream.ReadAsync(buffer, offset + total, count - total, token);
|
||||
if (read == 0) throw new EndOfStreamException();
|
||||
total += read;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
}
|
||||
919
src/ZB.MOM.WW.CBDDC.Network/SyncOrchestrator.cs
Executable file
919
src/ZB.MOM.WW.CBDDC.Network/SyncOrchestrator.cs
Executable file
@@ -0,0 +1,919 @@
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Network.Security;
|
||||
using ZB.MOM.WW.CBDDC.Network.Telemetry;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net.Sockets;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Serilog.Context;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Orchestrates the synchronization process between the local node and discovered peers.
|
||||
/// Manages anti-entropy sessions and data exchange.
|
||||
/// </summary>
|
||||
public class SyncOrchestrator : ISyncOrchestrator
|
||||
{
|
||||
private readonly IDiscoveryService _discovery;
|
||||
private readonly IOplogStore _oplogStore;
|
||||
private readonly IOplogPruneCutoffCalculator? _oplogPruneCutoffCalculator;
|
||||
private readonly IPeerOplogConfirmationStore? _peerOplogConfirmationStore;
|
||||
private readonly IDocumentStore _documentStore;
|
||||
private readonly ISnapshotMetadataStore _snapshotMetadataStore;
|
||||
private readonly ISnapshotService _snapshotService;
|
||||
private readonly IPeerNodeConfigurationProvider _peerNodeConfigurationProvider;
|
||||
private readonly ILogger<SyncOrchestrator> _logger;
|
||||
private readonly ILoggerFactory _loggerFactory;
|
||||
private CancellationTokenSource? _cts;
|
||||
private readonly Random _random = new Random();
|
||||
private readonly object _startStopLock = new object();
|
||||
|
||||
// Persistent clients pool
|
||||
private readonly ConcurrentDictionary<string, TcpPeerClient> _clients = new();
|
||||
private readonly ConcurrentDictionary<string, PeerStatus> _peerStates = new();
|
||||
|
||||
private readonly IPeerHandshakeService? _handshakeService;
|
||||
private readonly INetworkTelemetryService? _telemetry;
|
||||
private class PeerStatus
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the number of consecutive failures for the peer.
|
||||
/// </summary>
|
||||
public int FailureCount { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the next time a retry attempt is allowed.
|
||||
/// </summary>
|
||||
public DateTime NextRetryTime { get; set; }
|
||||
}
|
||||
|
||||
private DateTime _lastMaintenanceTime = DateTime.MinValue;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="SyncOrchestrator"/> class.
|
||||
/// </summary>
|
||||
/// <param name="discovery">The discovery service.</param>
|
||||
/// <param name="oplogStore">The oplog store.</param>
|
||||
/// <param name="documentStore">The document store.</param>
|
||||
/// <param name="snapshotStore">The snapshot metadata store.</param>
|
||||
/// <param name="snapshotService">The snapshot service.</param>
|
||||
/// <param name="peerNodeConfigurationProvider">The peer configuration provider.</param>
|
||||
/// <param name="loggerFactory">The logger factory.</param>
|
||||
/// <param name="peerOplogConfirmationStore">The optional peer confirmation watermark store.</param>
|
||||
/// <param name="handshakeService">The optional peer handshake service.</param>
|
||||
/// <param name="telemetry">The optional network telemetry service.</param>
|
||||
/// <param name="oplogPruneCutoffCalculator">The optional cutoff calculator for safe maintenance pruning.</param>
|
||||
public SyncOrchestrator(
|
||||
IDiscoveryService discovery,
|
||||
IOplogStore oplogStore,
|
||||
IDocumentStore documentStore,
|
||||
ISnapshotMetadataStore snapshotStore,
|
||||
ISnapshotService snapshotService,
|
||||
IPeerNodeConfigurationProvider peerNodeConfigurationProvider,
|
||||
ILoggerFactory loggerFactory,
|
||||
IPeerOplogConfirmationStore? peerOplogConfirmationStore = null,
|
||||
IPeerHandshakeService? handshakeService = null,
|
||||
INetworkTelemetryService? telemetry = null,
|
||||
IOplogPruneCutoffCalculator? oplogPruneCutoffCalculator = null)
|
||||
{
|
||||
_discovery = discovery;
|
||||
_oplogStore = oplogStore;
|
||||
_oplogPruneCutoffCalculator = oplogPruneCutoffCalculator;
|
||||
_peerOplogConfirmationStore = peerOplogConfirmationStore;
|
||||
_documentStore = documentStore;
|
||||
_snapshotMetadataStore = snapshotStore;
|
||||
_snapshotService = snapshotService;
|
||||
_peerNodeConfigurationProvider = peerNodeConfigurationProvider;
|
||||
_loggerFactory = loggerFactory;
|
||||
_logger = loggerFactory.CreateLogger<SyncOrchestrator>();
|
||||
_handshakeService = handshakeService;
|
||||
_telemetry = telemetry;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the synchronization orchestrator loop.
|
||||
/// </summary>
|
||||
/// <returns>A completed task once startup has been triggered.</returns>
|
||||
public async Task Start()
|
||||
{
|
||||
lock (_startStopLock)
|
||||
{
|
||||
if (_cts != null)
|
||||
{
|
||||
_logger.LogWarning("Sync Orchestrator already started");
|
||||
return;
|
||||
}
|
||||
_cts = new CancellationTokenSource();
|
||||
}
|
||||
|
||||
var token = _cts.Token;
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await SyncLoopAsync(token);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Sync Loop task failed");
|
||||
}
|
||||
}, token);
|
||||
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the synchronization orchestrator loop and releases client resources.
|
||||
/// </summary>
|
||||
/// <returns>A completed task once shutdown has been triggered.</returns>
|
||||
public async Task Stop()
|
||||
{
|
||||
CancellationTokenSource? ctsToDispose = null;
|
||||
|
||||
lock (_startStopLock)
|
||||
{
|
||||
if (_cts == null)
|
||||
{
|
||||
_logger.LogWarning("Sync Orchestrator already stopped or never started");
|
||||
return;
|
||||
}
|
||||
|
||||
ctsToDispose = _cts;
|
||||
_cts = null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
ctsToDispose.Cancel();
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
// Already disposed, ignore
|
||||
}
|
||||
finally
|
||||
{
|
||||
ctsToDispose.Dispose();
|
||||
}
|
||||
|
||||
// Cleanup clients
|
||||
foreach (var client in _clients.Values)
|
||||
{
|
||||
try
|
||||
{
|
||||
client.Dispose();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error disposing client during shutdown");
|
||||
}
|
||||
}
|
||||
_clients.Clear();
|
||||
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Main synchronization loop. Periodically selects random peers to gossip with.
|
||||
/// </summary>
|
||||
private async Task SyncLoopAsync(CancellationToken token)
|
||||
{
|
||||
_logger.LogInformation("Sync Orchestrator Started (Parallel P2P)");
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
var config = await _peerNodeConfigurationProvider.GetConfiguration();
|
||||
try
|
||||
{
|
||||
var discoveredPeers = _discovery.GetActivePeers();
|
||||
|
||||
var knownPeers = config.KnownPeers.Select(k => new PeerNode(
|
||||
k.NodeId,
|
||||
$"{k.Host}:{k.Port}",
|
||||
DateTimeOffset.UtcNow,
|
||||
PeerType.StaticRemote));
|
||||
|
||||
var allPeers = BuildMergedPeerList(discoveredPeers, knownPeers, config.NodeId);
|
||||
|
||||
await EnsurePeersRegisteredAsync(allPeers, config.NodeId, token);
|
||||
|
||||
// Filter peers based on backoff
|
||||
var now = DateTime.UtcNow;
|
||||
var eligiblePeers = allPeers.Where(p =>
|
||||
{
|
||||
if (_peerStates.TryGetValue(p.NodeId, out var status))
|
||||
{
|
||||
return status.NextRetryTime <= now;
|
||||
}
|
||||
return true;
|
||||
}).ToList();
|
||||
|
||||
// Interest-Aware Gossip: Prioritize peers sharing interests with us
|
||||
var localInterests = _documentStore.InterestedCollection.ToList();
|
||||
var targets = eligiblePeers
|
||||
.OrderByDescending(p => p.InterestingCollections.Any(ci => localInterests.Contains(ci)))
|
||||
.ThenBy(x => _random.Next())
|
||||
.Take(3)
|
||||
.ToList();
|
||||
|
||||
// NetStandard 2.0 fallback: Use Task.WhenAll
|
||||
var tasks = targets.Select(peer => TrySyncWithPeer(peer, token));
|
||||
await Task.WhenAll(tasks);
|
||||
|
||||
await RunMaintenanceIfDueAsync(config, now, token);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
_logger.LogInformation("Sync Loop Cancelled");
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Sync Loop Error");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(2000, token);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runs periodic maintenance when the configured interval has elapsed.
|
||||
/// </summary>
|
||||
/// <param name="config">The current peer node configuration.</param>
|
||||
/// <param name="now">The current UTC time used for interval evaluation.</param>
|
||||
/// <param name="token">The cancellation token.</param>
|
||||
/// <returns>A task that represents the asynchronous maintenance operation.</returns>
|
||||
internal async Task RunMaintenanceIfDueAsync(PeerNodeConfiguration config, DateTime now, CancellationToken token)
|
||||
{
|
||||
var maintenanceInterval = TimeSpan.FromMinutes(config.MaintenanceIntervalMinutes);
|
||||
if ((now - _lastMaintenanceTime) < maintenanceInterval)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_logger.LogInformation("Running periodic maintenance (Oplog pruning)...");
|
||||
try
|
||||
{
|
||||
var cutoffDecision = await CalculatePruneCutoffAsync(config, token);
|
||||
if (!cutoffDecision.HasCutoff || !cutoffDecision.EffectiveCutoff.HasValue)
|
||||
{
|
||||
_lastMaintenanceTime = now;
|
||||
var reason = string.IsNullOrWhiteSpace(cutoffDecision.Reason)
|
||||
? "No effective cutoff was produced."
|
||||
: cutoffDecision.Reason;
|
||||
_logger.LogInformation("Skipping oplog prune for this maintenance cycle: {Reason}", reason);
|
||||
return;
|
||||
}
|
||||
|
||||
await _oplogStore.PruneOplogAsync(cutoffDecision.EffectiveCutoff.Value, token);
|
||||
_lastMaintenanceTime = now;
|
||||
|
||||
if (cutoffDecision.ConfirmationCutoff.HasValue)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Maintenance completed successfully (Retention: {RetentionHours}h, EffectiveCutoff: {EffectiveCutoff}, ConfirmationCutoff: {ConfirmationCutoff}).",
|
||||
config.OplogRetentionHours,
|
||||
cutoffDecision.EffectiveCutoff.Value,
|
||||
cutoffDecision.ConfirmationCutoff.Value);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Maintenance completed successfully (Retention: {RetentionHours}h, EffectiveCutoff: {EffectiveCutoff}).",
|
||||
config.OplogRetentionHours,
|
||||
cutoffDecision.EffectiveCutoff.Value);
|
||||
}
|
||||
}
|
||||
catch (Exception maintenanceEx)
|
||||
{
|
||||
_logger.LogError(maintenanceEx, "Maintenance failed.");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<OplogPruneCutoffDecision> CalculatePruneCutoffAsync(PeerNodeConfiguration config, CancellationToken token)
|
||||
{
|
||||
if (_oplogPruneCutoffCalculator == null)
|
||||
{
|
||||
var retentionCutoff = new HlcTimestamp(
|
||||
DateTimeOffset.UtcNow.AddHours(-config.OplogRetentionHours).ToUnixTimeMilliseconds(),
|
||||
0,
|
||||
config.NodeId);
|
||||
|
||||
return OplogPruneCutoffDecision.WithCutoff(
|
||||
retentionCutoff,
|
||||
confirmationCutoff: null,
|
||||
effectiveCutoff: retentionCutoff,
|
||||
reason: "Oplog prune cutoff calculator is not configured.");
|
||||
}
|
||||
|
||||
return await _oplogPruneCutoffCalculator.CalculateEffectiveCutoffAsync(config, token);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Attempts to synchronize with a specific peer.
|
||||
/// Uses Vector Clock comparison to determine what to pull/push for each node.
|
||||
/// Performs handshake, vector clock exchange, and data exchange (Push/Pull per node).
|
||||
/// </summary>
|
||||
private async Task TrySyncWithPeer(PeerNode peer, CancellationToken token)
|
||||
{
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var peerContext = LogContext.PushProperty("PeerNodeId", peer.NodeId);
|
||||
using var peerAddressContext = LogContext.PushProperty("PeerAddress", peer.Address);
|
||||
|
||||
TcpPeerClient? client = null;
|
||||
bool shouldRemoveClient = false;
|
||||
bool syncSuccessful = false;
|
||||
|
||||
try
|
||||
{
|
||||
var config = await _peerNodeConfigurationProvider.GetConfiguration();
|
||||
|
||||
// Get or create persistent client
|
||||
client = _clients.GetOrAdd(peer.NodeId, id => new TcpPeerClient(
|
||||
peer.Address,
|
||||
_loggerFactory.CreateLogger<TcpPeerClient>(),
|
||||
_handshakeService,
|
||||
_telemetry));
|
||||
|
||||
// Reconnect if disconnected
|
||||
if (!client.IsConnected)
|
||||
{
|
||||
await client.ConnectAsync(token);
|
||||
}
|
||||
|
||||
// Handshake (idempotent)
|
||||
if (!await client.HandshakeAsync(config.NodeId, config.AuthToken, _documentStore.InterestedCollection, token))
|
||||
{
|
||||
_logger.LogWarning("Handshake rejected by {NodeId}", peer.NodeId);
|
||||
shouldRemoveClient = true;
|
||||
throw new Exception("Handshake rejected");
|
||||
}
|
||||
|
||||
// 1. Exchange Vector Clocks
|
||||
var remoteVectorClock = await client.GetVectorClockAsync(token);
|
||||
var localVectorClock = await _oplogStore.GetVectorClockAsync(token);
|
||||
|
||||
_logger.LogDebug("Vector Clock - Local: {Local}, Remote: {Remote}", localVectorClock, remoteVectorClock);
|
||||
|
||||
await AdvanceConfirmationsFromVectorClockAsync(peer.NodeId, localVectorClock, remoteVectorClock, token);
|
||||
|
||||
// 2. Determine causality relationship
|
||||
var causality = localVectorClock.CompareTo(remoteVectorClock);
|
||||
|
||||
// 3. PULL: Identify nodes where remote is ahead
|
||||
var nodesToPull = localVectorClock.GetNodesWithUpdates(remoteVectorClock).ToList();
|
||||
var nodesToPush = localVectorClock.GetNodesToPush(remoteVectorClock).ToList();
|
||||
if (nodesToPull.Any())
|
||||
{
|
||||
_logger.LogInformation("Pulling changes from {PeerNodeId} for {Count} nodes: {Nodes}",
|
||||
peer.NodeId, nodesToPull.Count, string.Join(", ", nodesToPull));
|
||||
|
||||
foreach (var nodeId in nodesToPull)
|
||||
{
|
||||
var localTs = localVectorClock.GetTimestamp(nodeId);
|
||||
var remoteTs = remoteVectorClock.GetTimestamp(nodeId);
|
||||
|
||||
_logger.LogDebug("Pulling Node {NodeId}: Local={LocalTs}, Remote={RemoteTs}",
|
||||
nodeId, localTs, remoteTs);
|
||||
|
||||
// PASS LOCAL INTERESTS TO PULL
|
||||
var changes = await client.PullChangesFromNodeAsync(nodeId, localTs, _documentStore.InterestedCollection, token);
|
||||
if (changes != null && changes.Count > 0)
|
||||
{
|
||||
var result = await ProcessInboundBatchAsync(client, peer.NodeId, changes, token);
|
||||
if (result != SyncBatchResult.Success)
|
||||
{
|
||||
_logger.LogWarning("Inbound batch processing failed with status {Status}. Aborting sync for this session.", result);
|
||||
RecordFailure(peer.NodeId);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. PUSH: Identify nodes where local is ahead
|
||||
if (nodesToPush.Any())
|
||||
{
|
||||
_logger.LogInformation("Pushing changes to {PeerNodeId} for {Count} nodes: {Nodes}",
|
||||
peer.NodeId, nodesToPush.Count, string.Join(", ", nodesToPush));
|
||||
|
||||
foreach (var nodeId in nodesToPush)
|
||||
{
|
||||
var remoteTs = remoteVectorClock.GetTimestamp(nodeId);
|
||||
|
||||
// PUSH FILTERING: Pass remote receiver's interests to oplogStore for efficient retrieval
|
||||
var remoteInterests = client.RemoteInterests;
|
||||
var changes = (await _oplogStore.GetOplogForNodeAfterAsync(nodeId, remoteTs, remoteInterests, token)).ToList();
|
||||
|
||||
if (changes.Any())
|
||||
{
|
||||
_logger.LogDebug("Pushing {Count} filtered changes for Node {NodeId}", changes.Count, nodeId);
|
||||
await client.PushChangesAsync(changes, token);
|
||||
await AdvanceConfirmationForPushedBatchAsync(peer.NodeId, nodeId, changes, token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Handle Concurrent/Equal cases
|
||||
if (causality == CausalityRelation.Equal)
|
||||
{
|
||||
_logger.LogDebug("Vector clocks are equal with {PeerNodeId}. No sync needed.", peer.NodeId);
|
||||
}
|
||||
else if (causality == CausalityRelation.Concurrent && !nodesToPull.Any() && !nodesToPush.Any())
|
||||
{
|
||||
_logger.LogDebug("Vector clocks are concurrent with {PeerNodeId}, but no divergence detected.", peer.NodeId);
|
||||
}
|
||||
|
||||
syncSuccessful = true;
|
||||
RecordSuccess(peer.NodeId);
|
||||
}
|
||||
|
||||
catch (SnapshotRequiredException)
|
||||
{
|
||||
_logger.LogWarning("Snapshot required for peer {NodeId}. Initiating merge sync.", peer.NodeId);
|
||||
if (client != null && client.IsConnected)
|
||||
{
|
||||
try
|
||||
{
|
||||
await PerformSnapshotSyncAsync(client, true, token);
|
||||
syncSuccessful = true;
|
||||
RecordSuccess(peer.NodeId);
|
||||
}
|
||||
catch
|
||||
{
|
||||
RecordFailure(peer.NodeId);
|
||||
shouldRemoveClient = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
RecordFailure(peer.NodeId);
|
||||
shouldRemoveClient = true;
|
||||
}
|
||||
}
|
||||
catch (CorruptDatabaseException cex)
|
||||
{
|
||||
_logger.LogCritical(cex, "Local database corruption detected during sync with {NodeId}. Initiating EMERGENCY SNAPSHOT RECOVERY.", peer.NodeId);
|
||||
if (client != null && client.IsConnected)
|
||||
{
|
||||
try
|
||||
{
|
||||
// EMERGENCY RECOVERY: Replace local DB with remote snapshot (mergeOnly: false)
|
||||
await PerformSnapshotSyncAsync(client, false, token);
|
||||
syncSuccessful = true;
|
||||
RecordSuccess(peer.NodeId);
|
||||
_logger.LogInformation("Emergency recovery successful. Local database replaced.");
|
||||
}
|
||||
catch (Exception recoveryEx)
|
||||
{
|
||||
_logger.LogCritical(recoveryEx, "Emergency recovery failed. App state is critical.");
|
||||
RecordFailure(peer.NodeId);
|
||||
shouldRemoveClient = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
RecordFailure(peer.NodeId);
|
||||
shouldRemoveClient = true;
|
||||
}
|
||||
}
|
||||
catch (TimeoutException tex)
|
||||
{
|
||||
_logger.LogWarning("Sync with {NodeId} timed out: {Message}. Will retry later.", peer.NodeId, tex.Message);
|
||||
shouldRemoveClient = true;
|
||||
RecordFailure(peer.NodeId);
|
||||
}
|
||||
catch (SocketException sex)
|
||||
{
|
||||
_logger.LogWarning("Network error syncing with {NodeId}: {Message}. Will retry later.", peer.NodeId, sex.Message);
|
||||
shouldRemoveClient = true;
|
||||
RecordFailure(peer.NodeId);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning("Sync failed with {NodeId}: {Message}. Resetting connection.", peer.NodeId, ex.Message);
|
||||
shouldRemoveClient = true;
|
||||
RecordFailure(peer.NodeId);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (shouldRemoveClient && client != null)
|
||||
{
|
||||
if (_clients.TryRemove(peer.NodeId, out var removedClient))
|
||||
{
|
||||
try { removedClient.Dispose(); } catch { /* Ignore disposal errors */ }
|
||||
}
|
||||
}
|
||||
|
||||
// Log successful sync outcome (failures are already logged in catch blocks)
|
||||
if (syncSuccessful)
|
||||
{
|
||||
_logger.LogInformation("Sync with {NodeId} completed successfully.", peer.NodeId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void RecordSuccess(string nodeId)
|
||||
{
|
||||
_peerStates.AddOrUpdate(nodeId,
|
||||
new PeerStatus { FailureCount = 0, NextRetryTime = DateTime.MinValue },
|
||||
(k, v) => { v.FailureCount = 0; v.NextRetryTime = DateTime.MinValue; return v; });
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Merges discovered and configured peers into a distinct list that excludes the local node.
|
||||
/// </summary>
|
||||
/// <param name="discoveredPeers">The peers discovered dynamically.</param>
|
||||
/// <param name="knownPeers">The peers configured statically.</param>
|
||||
/// <param name="localNodeId">The local node identifier to exclude from results.</param>
|
||||
/// <returns>A de-duplicated list of peers eligible for synchronization.</returns>
|
||||
internal static IReadOnlyList<PeerNode> BuildMergedPeerList(
|
||||
IEnumerable<PeerNode> discoveredPeers,
|
||||
IEnumerable<PeerNode> knownPeers,
|
||||
string localNodeId)
|
||||
{
|
||||
return discoveredPeers
|
||||
.Concat(knownPeers)
|
||||
.Where(p => !string.Equals(p.NodeId, localNodeId, StringComparison.Ordinal))
|
||||
.GroupBy(p => p.NodeId, StringComparer.Ordinal)
|
||||
.Select(g => g.First())
|
||||
.ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ensures peers are registered in the confirmation store when that store is available.
|
||||
/// </summary>
|
||||
/// <param name="peers">The peers to register.</param>
|
||||
/// <param name="localNodeId">The local node identifier used to skip self-registration.</param>
|
||||
/// <param name="token">The cancellation token.</param>
|
||||
/// <returns>A task that represents the asynchronous registration operation.</returns>
|
||||
internal async Task EnsurePeersRegisteredAsync(IEnumerable<PeerNode> peers, string localNodeId, CancellationToken token)
|
||||
{
|
||||
if (_peerOplogConfirmationStore == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var peer in peers)
|
||||
{
|
||||
if (string.Equals(peer.NodeId, localNodeId, StringComparison.Ordinal))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await _peerOplogConfirmationStore.EnsurePeerRegisteredAsync(peer.NodeId, peer.Address, peer.Type, token);
|
||||
}
|
||||
catch (OperationCanceledException) when (token.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to register peer {PeerNodeId} in confirmation store", peer.NodeId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Advances peer confirmation watermarks using local and remote vector clock state.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeId">The peer node identifier whose confirmations are being updated.</param>
|
||||
/// <param name="localVectorClock">The local vector clock.</param>
|
||||
/// <param name="remoteVectorClock">The remote vector clock.</param>
|
||||
/// <param name="token">The cancellation token.</param>
|
||||
/// <returns>A task that represents the asynchronous confirmation update operation.</returns>
|
||||
internal async Task AdvanceConfirmationsFromVectorClockAsync(
|
||||
string peerNodeId,
|
||||
VectorClock localVectorClock,
|
||||
VectorClock remoteVectorClock,
|
||||
CancellationToken token)
|
||||
{
|
||||
if (_peerOplogConfirmationStore == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var nodeIds = new HashSet<string>(localVectorClock.NodeIds, StringComparer.Ordinal);
|
||||
foreach (var nodeId in remoteVectorClock.NodeIds)
|
||||
{
|
||||
nodeIds.Add(nodeId);
|
||||
}
|
||||
|
||||
foreach (var sourceNodeId in nodeIds)
|
||||
{
|
||||
var localTimestamp = localVectorClock.GetTimestamp(sourceNodeId);
|
||||
if (localTimestamp == default)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var remoteTimestamp = remoteVectorClock.GetTimestamp(sourceNodeId);
|
||||
if (remoteTimestamp < localTimestamp)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
await UpdatePeerConfirmationAsync(peerNodeId, sourceNodeId, localTimestamp, token);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Advances the peer confirmation watermark after successfully pushing a batch of changes.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeId">The peer node identifier that received the changes.</param>
|
||||
/// <param name="sourceNodeId">The source node identifier associated with the pushed changes.</param>
|
||||
/// <param name="pushedChanges">The pushed oplog entries.</param>
|
||||
/// <param name="token">The cancellation token.</param>
|
||||
/// <returns>A task that represents the asynchronous confirmation update operation.</returns>
|
||||
internal async Task AdvanceConfirmationForPushedBatchAsync(
|
||||
string peerNodeId,
|
||||
string sourceNodeId,
|
||||
IReadOnlyCollection<OplogEntry> pushedChanges,
|
||||
CancellationToken token)
|
||||
{
|
||||
if (_peerOplogConfirmationStore == null || pushedChanges.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var maxPushed = pushedChanges
|
||||
.OrderBy(entry => entry.Timestamp)
|
||||
.Last();
|
||||
|
||||
try
|
||||
{
|
||||
await _peerOplogConfirmationStore.UpdateConfirmationAsync(
|
||||
peerNodeId,
|
||||
sourceNodeId,
|
||||
maxPushed.Timestamp,
|
||||
maxPushed.Hash ?? string.Empty,
|
||||
token);
|
||||
}
|
||||
catch (OperationCanceledException) when (token.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Failed to advance push confirmation watermark for peer {PeerNodeId} and source {SourceNodeId}",
|
||||
peerNodeId, sourceNodeId);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task UpdatePeerConfirmationAsync(
|
||||
string peerNodeId,
|
||||
string sourceNodeId,
|
||||
HlcTimestamp timestamp,
|
||||
CancellationToken token)
|
||||
{
|
||||
if (_peerOplogConfirmationStore == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// Best-effort hash lookup: IOplogStore exposes latest hash per source node.
|
||||
var hash = await _oplogStore.GetLastEntryHashAsync(sourceNodeId, token) ?? string.Empty;
|
||||
await _peerOplogConfirmationStore.UpdateConfirmationAsync(peerNodeId, sourceNodeId, timestamp, hash, token);
|
||||
}
|
||||
catch (OperationCanceledException) when (token.IsCancellationRequested)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Failed to advance confirmation watermark for peer {PeerNodeId} and source {SourceNodeId}",
|
||||
peerNodeId, sourceNodeId);
|
||||
}
|
||||
}
|
||||
|
||||
private void RecordFailure(string nodeId)
|
||||
{
|
||||
_peerStates.AddOrUpdate(nodeId,
|
||||
new PeerStatus { FailureCount = 1, NextRetryTime = DateTime.UtcNow.AddSeconds(1) },
|
||||
(k, v) =>
|
||||
{
|
||||
v.FailureCount++;
|
||||
// Exponential backoff: 1s, 2s, 4s... max 60s
|
||||
var delaySeconds = Math.Min(Math.Pow(2, v.FailureCount), 60);
|
||||
v.NextRetryTime = DateTime.UtcNow.AddSeconds(delaySeconds);
|
||||
return v;
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Validates an inbound batch of changes, checks for gaps, performs recovery if needed, and applies to oplogStore.
|
||||
/// Extracted to enforce Single Responsibility Principle.
|
||||
/// </summary>
|
||||
private enum SyncBatchResult
|
||||
{
|
||||
Success,
|
||||
GapDetected,
|
||||
IntegrityError,
|
||||
ChainBroken
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Validates an inbound batch of changes, checks for gaps, performs recovery if needed, and applies to oplogStore.
|
||||
/// Extracted to enforce Single Responsibility Principle.
|
||||
/// </summary>
|
||||
private async Task<SyncBatchResult> ProcessInboundBatchAsync(TcpPeerClient client, string peerNodeId, IList<OplogEntry> changes, CancellationToken token)
|
||||
{
|
||||
_logger.LogInformation("Received {Count} changes from {NodeId}", changes.Count, peerNodeId);
|
||||
|
||||
// 1. Validate internal integrity of the batch (Hash check)
|
||||
foreach (var entry in changes)
|
||||
{
|
||||
if (!entry.IsValid())
|
||||
{
|
||||
// CHANGED: Log Critical Error but ACCEPT the entry to allow sync to progress (Soft Validation).
|
||||
// Throwing here would cause an unrecoverable state where this batch blocks sync forever.
|
||||
_logger.LogError("Integrity Check Failed for Entry {Hash} (Node: {NodeId}). Expected: {computedHash}. ACCEPTING payload despite mismatch to maintain availability.",
|
||||
entry.Hash, entry.Timestamp.NodeId, entry.ComputeHash());
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Group changes by Author Node to validate Source Chains independently
|
||||
var changesByNode = changes.GroupBy(c => c.Timestamp.NodeId);
|
||||
|
||||
foreach (var group in changesByNode)
|
||||
{
|
||||
var authorNodeId = group.Key;
|
||||
|
||||
// FIX: Order by the full Timestamp (Physical + Logical), not just LogicalCounter.
|
||||
// LogicalCounter resets when PhysicalTime advances, so sorting by Counter alone breaks chronological order.
|
||||
var authorChain = group.OrderBy(c => c.Timestamp).ToList();
|
||||
|
||||
// Check linkage within the batch
|
||||
for (int i = 1; i < authorChain.Count; i++)
|
||||
{
|
||||
if (authorChain[i].PreviousHash != authorChain[i - 1].Hash)
|
||||
{
|
||||
_logger.LogError("Chain Broken in Batch for Node {AuthorId}", authorNodeId);
|
||||
return SyncBatchResult.ChainBroken;
|
||||
}
|
||||
}
|
||||
|
||||
// Check linkage with Local State
|
||||
var firstEntry = authorChain[0];
|
||||
var localHeadHash = await _oplogStore.GetLastEntryHashAsync(authorNodeId, token);
|
||||
|
||||
_logger.LogDebug("Processing chain for Node {AuthorId}: FirstEntry.PrevHash={PrevHash}, FirstEntry.Hash={Hash}, LocalHeadHash={LocalHead}",
|
||||
authorNodeId, firstEntry.PreviousHash, firstEntry.Hash, localHeadHash ?? "(null)");
|
||||
|
||||
if (localHeadHash != null && firstEntry.PreviousHash != localHeadHash)
|
||||
{
|
||||
// Check if entry starts from snapshot boundary (valid case after pruning)
|
||||
var snapshotHash = await _snapshotMetadataStore.GetSnapshotHashAsync(authorNodeId, token);
|
||||
|
||||
if (snapshotHash != null && firstEntry.PreviousHash == snapshotHash)
|
||||
{
|
||||
// Entry connects to snapshot boundary - this is expected after pruning/snapshot sync
|
||||
// This prevents infinite snapshot request loops when old nodes reconnect
|
||||
_logger.LogInformation(
|
||||
"Entry for Node {AuthorId} connects to snapshot boundary (Hash: {SnapshotHash}). Accepting without gap recovery. Network convergence in effect.",
|
||||
authorNodeId, snapshotHash);
|
||||
|
||||
// No gap recovery needed - this is a valid state
|
||||
}
|
||||
else
|
||||
{
|
||||
// GAP DETECTED (not a snapshot boundary case)
|
||||
_logger.LogWarning("Gap Detected for Node {AuthorId}. Local Head: {Local}, Remote Prev: {Prev}. Initiating Recovery.",
|
||||
authorNodeId, localHeadHash, firstEntry.PreviousHash);
|
||||
|
||||
// Gap Recovery (Range Sync)
|
||||
List<OplogEntry>? missingChain = null;
|
||||
try
|
||||
{
|
||||
missingChain = await client.GetChainRangeAsync(localHeadHash, firstEntry.PreviousHash, token);
|
||||
}
|
||||
catch (SnapshotRequiredException)
|
||||
{
|
||||
throw; // Propagate up to trigger full sync
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Gap Recovery failed.");
|
||||
/* Fallthrough to decision logic */
|
||||
}
|
||||
|
||||
if (missingChain != null && missingChain.Any())
|
||||
{
|
||||
_logger.LogInformation("Gap Recovery: Retrieved {Count} missing entries.", missingChain.Count);
|
||||
|
||||
// Validate Recovery Chain Linkage
|
||||
bool linkValid = true;
|
||||
if (missingChain[0].PreviousHash != localHeadHash) linkValid = false;
|
||||
for (int i = 1; i < missingChain.Count; i++)
|
||||
if (missingChain[i].PreviousHash != missingChain[i - 1].Hash) linkValid = false;
|
||||
if (missingChain.Last().Hash != firstEntry.PreviousHash) linkValid = false;
|
||||
|
||||
if (!linkValid)
|
||||
{
|
||||
_logger.LogError("Recovery Chain Invalid Linkage. Aborting Gap Recovery.");
|
||||
return SyncBatchResult.GapDetected;
|
||||
}
|
||||
|
||||
// Apply Missing Chain First
|
||||
await _oplogStore.ApplyBatchAsync(missingChain, token);
|
||||
_logger.LogInformation("Gap Recovery Applied Successfully.");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Gap recovery failed. This can happen if:
|
||||
// 1. This is actually our first contact with this node's history
|
||||
// 2. The peer doesn't have the full history
|
||||
// 3. There's a true gap that cannot be recovered
|
||||
|
||||
// DECISION: Accept the entries anyway but log a warning
|
||||
// This allows forward progress even with partial history
|
||||
_logger.LogWarning("Could not recover gap for Node {AuthorId}. Local Head: {Local}, Remote Prev: {Prev}. Accepting entries anyway (partial sync).",
|
||||
authorNodeId, localHeadHash, firstEntry.PreviousHash);
|
||||
|
||||
// Optionally: Mark this as a partial sync in metadata
|
||||
// For now, we proceed and let the chain continue from this point
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (localHeadHash == null && !string.IsNullOrEmpty(firstEntry.PreviousHash))
|
||||
{
|
||||
// Implicit Accept / Partial Sync warning
|
||||
_logger.LogWarning("First contact with Node {AuthorId} at explicit state (Not Genesis). Accepting.", authorNodeId);
|
||||
}
|
||||
|
||||
// Apply original batch (grouped by node for clarity, but oplogStore usually handles bulk)
|
||||
await _oplogStore.ApplyBatchAsync(authorChain, token);
|
||||
}
|
||||
|
||||
return SyncBatchResult.Success;
|
||||
}
|
||||
|
||||
private async Task PerformSnapshotSyncAsync(TcpPeerClient client, bool mergeOnly, CancellationToken token)
|
||||
{
|
||||
_logger.LogInformation(mergeOnly ? "Starting Snapshot Merge..." : "Starting Full Database Replacement...");
|
||||
|
||||
var tempFile = Path.GetTempFileName();
|
||||
try
|
||||
{
|
||||
_logger.LogInformation("Downloading snapshot to {TempFile}...", tempFile);
|
||||
using (var fs = File.Create(tempFile))
|
||||
{
|
||||
await client.GetSnapshotAsync(fs, token);
|
||||
}
|
||||
|
||||
_logger.LogInformation("Snapshot Downloaded. applying to store...");
|
||||
|
||||
using (var fs = File.OpenRead(tempFile))
|
||||
{
|
||||
if (mergeOnly)
|
||||
{
|
||||
await _snapshotService.MergeSnapshotAsync(fs, token);
|
||||
}
|
||||
else
|
||||
{
|
||||
await _snapshotService.ReplaceDatabaseAsync(fs, token);
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogInformation("Snapshot applied successfully.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Failed to perform snapshot sync");
|
||||
throw;
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (File.Exists(tempFile))
|
||||
{
|
||||
try
|
||||
{
|
||||
File.Delete(tempFile);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to delete temporary snapshot file {TempFile}", tempFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
502
src/ZB.MOM.WW.CBDDC.Network/TcpPeerClient.cs
Executable file
502
src/ZB.MOM.WW.CBDDC.Network/TcpPeerClient.cs
Executable file
@@ -0,0 +1,502 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net.Sockets;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Google.Protobuf;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Network.Proto;
|
||||
using ZB.MOM.WW.CBDDC.Network.Security;
|
||||
using ZB.MOM.WW.CBDDC.Network.Protocol;
|
||||
using ZB.MOM.WW.CBDDC.Network.Telemetry;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a TCP client connection to a remote peer for synchronization.
|
||||
/// </summary>
|
||||
public class TcpPeerClient : IDisposable
|
||||
{
|
||||
private readonly TcpClient _client;
|
||||
private readonly string _peerAddress;
|
||||
private readonly ILogger<TcpPeerClient> _logger;
|
||||
private readonly IPeerHandshakeService? _handshakeService;
|
||||
private NetworkStream? _stream;
|
||||
private CipherState? _cipherState;
|
||||
private readonly object _connectionLock = new object();
|
||||
private bool _disposed = false;
|
||||
|
||||
private const int ConnectionTimeoutMs = 5000;
|
||||
private const int OperationTimeoutMs = 30000;
|
||||
|
||||
private readonly ProtocolHandler _protocol;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether the client currently has an active connection.
|
||||
/// </summary>
|
||||
public bool IsConnected
|
||||
{
|
||||
get
|
||||
{
|
||||
lock (_connectionLock)
|
||||
{
|
||||
return _client != null && _client.Connected && _stream != null && !_disposed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether the handshake with the remote peer has completed successfully.
|
||||
/// </summary>
|
||||
public bool HasHandshaked { get; private set; }
|
||||
|
||||
private readonly INetworkTelemetryService? _telemetry;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="TcpPeerClient"/> class.
|
||||
/// </summary>
|
||||
/// <param name="peerAddress">The remote peer address in <c>host:port</c> format.</param>
|
||||
/// <param name="logger">The logger used for connection and protocol events.</param>
|
||||
/// <param name="handshakeService">The optional handshake service used to establish secure sessions.</param>
|
||||
/// <param name="telemetry">The optional telemetry service for network metrics.</param>
|
||||
public TcpPeerClient(string peerAddress, ILogger<TcpPeerClient> logger, IPeerHandshakeService? handshakeService = null, INetworkTelemetryService? telemetry = null)
|
||||
{
|
||||
_client = new TcpClient();
|
||||
_peerAddress = peerAddress;
|
||||
_logger = logger;
|
||||
_handshakeService = handshakeService;
|
||||
_telemetry = telemetry;
|
||||
_protocol = new ProtocolHandler(logger, telemetry);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Connects to the configured remote peer.
|
||||
/// </summary>
|
||||
/// <param name="token">A token used to cancel the connection attempt.</param>
|
||||
/// <returns>A task that represents the asynchronous connect operation.</returns>
|
||||
public async Task ConnectAsync(CancellationToken token)
|
||||
{
|
||||
lock (_connectionLock)
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
throw new ObjectDisposedException(nameof(TcpPeerClient));
|
||||
}
|
||||
|
||||
if (IsConnected) return;
|
||||
}
|
||||
|
||||
var parts = _peerAddress.Split(':');
|
||||
if (parts.Length != 2)
|
||||
{
|
||||
throw new ArgumentException($"Invalid address format: {_peerAddress}. Expected format: host:port");
|
||||
}
|
||||
|
||||
if (!int.TryParse(parts[1], out int port) || port <= 0 || port > 65535)
|
||||
{
|
||||
throw new ArgumentException($"Invalid port number: {parts[1]}");
|
||||
}
|
||||
|
||||
// Connect with timeout
|
||||
using var timeoutCts = new CancellationTokenSource(ConnectionTimeoutMs);
|
||||
using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(token, timeoutCts.Token);
|
||||
|
||||
try
|
||||
{
|
||||
await _client.ConnectAsync(parts[0], port);
|
||||
|
||||
lock (_connectionLock)
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
throw new ObjectDisposedException(nameof(TcpPeerClient));
|
||||
}
|
||||
|
||||
_stream = _client.GetStream();
|
||||
|
||||
// CRITICAL for Android: Disable Nagle's algorithm to prevent buffering delays
|
||||
// This ensures immediate packet transmission for handshake data
|
||||
_client.NoDelay = true;
|
||||
|
||||
// Configure TCP keepalive
|
||||
_client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true);
|
||||
|
||||
// Set read/write timeouts
|
||||
_stream.ReadTimeout = OperationTimeoutMs;
|
||||
_stream.WriteTimeout = OperationTimeoutMs;
|
||||
}
|
||||
|
||||
_logger.LogDebug("Connected to peer: {Address} (NoDelay=true for immediate send)", _peerAddress);
|
||||
}
|
||||
catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested)
|
||||
{
|
||||
throw new TimeoutException($"Connection to {_peerAddress} timed out after {ConnectionTimeoutMs}ms");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the list of collections the remote peer is interested in.
|
||||
/// </summary>
|
||||
public System.Collections.Generic.IReadOnlyList<string> RemoteInterests => _remoteInterests.AsReadOnly();
|
||||
private List<string> _remoteInterests = new();
|
||||
|
||||
/// <summary>
|
||||
/// Performs authentication handshake with the remote peer.
|
||||
/// </summary>
|
||||
/// <param name="myNodeId">The local node identifier.</param>
|
||||
/// <param name="authToken">The authentication token.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>True if handshake was accepted, false otherwise.</returns>
|
||||
public async Task<bool> HandshakeAsync(string myNodeId, string authToken, CancellationToken token)
|
||||
{
|
||||
return await HandshakeAsync(myNodeId, authToken, null, token);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Performs authentication handshake with the remote peer, including collection interests.
|
||||
/// </summary>
|
||||
/// <param name="myNodeId">The local node identifier.</param>
|
||||
/// <param name="authToken">The authentication token.</param>
|
||||
/// <param name="interestingCollections">Optional collection names this node is interested in receiving.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns><see langword="true"/> if handshake was accepted; otherwise <see langword="false"/>.</returns>
|
||||
public async Task<bool> HandshakeAsync(string myNodeId, string authToken, IEnumerable<string>? interestingCollections, CancellationToken token)
|
||||
{
|
||||
if (HasHandshaked) return true;
|
||||
|
||||
if (_handshakeService != null)
|
||||
{
|
||||
// Perform secure handshake if service is available
|
||||
// We assume we are initiator here
|
||||
_cipherState = await _handshakeService.HandshakeAsync(_stream!, true, myNodeId, token);
|
||||
}
|
||||
|
||||
var req = new HandshakeRequest { NodeId = myNodeId, AuthToken = authToken ?? "" };
|
||||
|
||||
if (interestingCollections != null)
|
||||
{
|
||||
foreach (var coll in interestingCollections)
|
||||
{
|
||||
req.InterestingCollections.Add(coll);
|
||||
}
|
||||
}
|
||||
|
||||
if (CompressionHelper.IsBrotliSupported)
|
||||
{
|
||||
req.SupportedCompression.Add("brotli");
|
||||
}
|
||||
|
||||
_logger.LogDebug("Sending HandshakeReq to {Address}", _peerAddress);
|
||||
await _protocol.SendMessageAsync(_stream!, MessageType.HandshakeReq, req, false, _cipherState, token);
|
||||
|
||||
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
|
||||
_logger.LogDebug("Received Handshake response type: {Type}", type);
|
||||
|
||||
if (type != MessageType.HandshakeRes) return false;
|
||||
|
||||
var res = HandshakeResponse.Parser.ParseFrom(payload);
|
||||
|
||||
// Store remote interests
|
||||
_remoteInterests = res.InterestingCollections.ToList();
|
||||
|
||||
// Negotiation Result
|
||||
if (res.SelectedCompression == "brotli")
|
||||
{
|
||||
_useCompression = true;
|
||||
_logger.LogInformation("Brotli compression negotiated.");
|
||||
}
|
||||
|
||||
HasHandshaked = res.Accepted;
|
||||
return res.Accepted;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves the remote peer's latest HLC timestamp.
|
||||
/// </summary>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>The latest remote hybrid logical clock timestamp.</returns>
|
||||
public async Task<HlcTimestamp> GetClockAsync(CancellationToken token)
|
||||
{
|
||||
using (_telemetry?.StartMetric(MetricType.RoundTripTime))
|
||||
{
|
||||
await _protocol.SendMessageAsync(_stream!, MessageType.GetClockReq, new GetClockRequest(), _useCompression, _cipherState, token);
|
||||
|
||||
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
|
||||
if (type != MessageType.ClockRes) throw new Exception("Unexpected response");
|
||||
|
||||
var res = ClockResponse.Parser.ParseFrom(payload);
|
||||
return new HlcTimestamp(res.HlcWall, res.HlcLogic, res.HlcNode);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves the remote peer's vector clock (latest timestamp per node).
|
||||
/// </summary>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>The remote vector clock.</returns>
|
||||
public async Task<VectorClock> GetVectorClockAsync(CancellationToken token)
|
||||
{
|
||||
using (_telemetry?.StartMetric(MetricType.RoundTripTime))
|
||||
{
|
||||
await _protocol.SendMessageAsync(_stream!, MessageType.GetVectorClockReq, new GetVectorClockRequest(), _useCompression, _cipherState, token);
|
||||
|
||||
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
|
||||
if (type != MessageType.VectorClockRes) throw new Exception("Unexpected response");
|
||||
|
||||
var res = VectorClockResponse.Parser.ParseFrom(payload);
|
||||
var vectorClock = new VectorClock();
|
||||
|
||||
foreach (var entry in res.Entries)
|
||||
{
|
||||
vectorClock.SetTimestamp(entry.NodeId, new HlcTimestamp(entry.HlcWall, entry.HlcLogic, entry.NodeId));
|
||||
}
|
||||
|
||||
return vectorClock;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pulls oplog changes from the remote peer since the specified timestamp.
|
||||
/// </summary>
|
||||
/// <param name="since">The starting timestamp for requested changes.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>The list of oplog entries returned by the remote peer.</returns>
|
||||
public async Task<List<OplogEntry>> PullChangesAsync(HlcTimestamp since, CancellationToken token)
|
||||
{
|
||||
return await PullChangesAsync(since, null, token);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pulls oplog changes from the remote peer since the specified timestamp, filtered by collections.
|
||||
/// </summary>
|
||||
/// <param name="since">The starting timestamp for requested changes.</param>
|
||||
/// <param name="collections">Optional collection names used to filter the returned entries.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>The list of oplog entries returned by the remote peer.</returns>
|
||||
public async Task<List<OplogEntry>> PullChangesAsync(HlcTimestamp since, IEnumerable<string>? collections, CancellationToken token)
|
||||
{
|
||||
var req = new PullChangesRequest
|
||||
{
|
||||
SinceWall = since.PhysicalTime,
|
||||
SinceLogic = since.LogicalCounter,
|
||||
// Empty SinceNode indicates a global pull (not source-node filtered).
|
||||
SinceNode = string.Empty
|
||||
};
|
||||
if (collections != null)
|
||||
{
|
||||
foreach (var coll in collections)
|
||||
{
|
||||
req.Collections.Add(coll);
|
||||
}
|
||||
}
|
||||
await _protocol.SendMessageAsync(_stream!, MessageType.PullChangesReq, req, _useCompression, _cipherState, token);
|
||||
|
||||
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
|
||||
if (type != MessageType.ChangeSetRes) throw new Exception("Unexpected response");
|
||||
|
||||
var res = ChangeSetResponse.Parser.ParseFrom(payload);
|
||||
|
||||
return res.Entries.Select(e => new OplogEntry(
|
||||
e.Collection,
|
||||
e.Key,
|
||||
ParseOp(e.Operation),
|
||||
string.IsNullOrEmpty(e.JsonData) ? default : System.Text.Json.JsonSerializer.Deserialize<System.Text.Json.JsonElement>(e.JsonData),
|
||||
new HlcTimestamp(e.HlcWall, e.HlcLogic, e.HlcNode),
|
||||
e.PreviousHash,
|
||||
e.Hash // Pass the received hash to preserve integrity reference
|
||||
)).ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pulls oplog changes for a specific node from the remote peer since the specified timestamp.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The node identifier to filter changes by.</param>
|
||||
/// <param name="since">The starting timestamp for requested changes.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>The list of oplog entries returned by the remote peer.</returns>
|
||||
public async Task<List<OplogEntry>> PullChangesFromNodeAsync(string nodeId, HlcTimestamp since, CancellationToken token)
|
||||
{
|
||||
return await PullChangesFromNodeAsync(nodeId, since, null, token);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pulls oplog changes for a specific node from the remote peer since the specified timestamp, filtered by collections.
|
||||
/// </summary>
|
||||
/// <param name="nodeId">The node identifier to filter changes by.</param>
|
||||
/// <param name="since">The starting timestamp for requested changes.</param>
|
||||
/// <param name="collections">Optional collection names used to filter the returned entries.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>The list of oplog entries returned by the remote peer.</returns>
|
||||
public async Task<List<OplogEntry>> PullChangesFromNodeAsync(string nodeId, HlcTimestamp since, IEnumerable<string>? collections, CancellationToken token)
|
||||
{
|
||||
var req = new PullChangesRequest
|
||||
{
|
||||
SinceNode = nodeId,
|
||||
SinceWall = since.PhysicalTime,
|
||||
SinceLogic = since.LogicalCounter
|
||||
};
|
||||
if (collections != null)
|
||||
{
|
||||
foreach (var coll in collections)
|
||||
{
|
||||
req.Collections.Add(coll);
|
||||
}
|
||||
}
|
||||
await _protocol.SendMessageAsync(_stream!, MessageType.PullChangesReq, req, _useCompression, _cipherState, token);
|
||||
|
||||
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
|
||||
if (type != MessageType.ChangeSetRes) throw new Exception("Unexpected response");
|
||||
|
||||
var res = ChangeSetResponse.Parser.ParseFrom(payload);
|
||||
|
||||
return res.Entries.Select(e => new OplogEntry(
|
||||
e.Collection,
|
||||
e.Key,
|
||||
ParseOp(e.Operation),
|
||||
string.IsNullOrEmpty(e.JsonData) ? default : System.Text.Json.JsonSerializer.Deserialize<System.Text.Json.JsonElement>(e.JsonData),
|
||||
new HlcTimestamp(e.HlcWall, e.HlcLogic, e.HlcNode),
|
||||
e.PreviousHash,
|
||||
e.Hash
|
||||
)).ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Retrieves a range of oplog entries connecting two hashes (Gap Recovery).
|
||||
/// </summary>
|
||||
/// <param name="startHash">The starting hash in the chain.</param>
|
||||
/// <param name="endHash">The ending hash in the chain.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>The chain entries connecting the requested hash range.</returns>
|
||||
public virtual async Task<List<OplogEntry>> GetChainRangeAsync(string startHash, string endHash, CancellationToken token)
|
||||
{
|
||||
var req = new GetChainRangeRequest { StartHash = startHash, EndHash = endHash };
|
||||
await _protocol.SendMessageAsync(_stream!, MessageType.GetChainRangeReq, req, _useCompression, _cipherState, token);
|
||||
|
||||
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
|
||||
if (type != MessageType.ChainRangeRes) throw new Exception($"Unexpected response for ChainRange: {type}");
|
||||
|
||||
var res = ChainRangeResponse.Parser.ParseFrom(payload);
|
||||
|
||||
if (res.SnapshotRequired) throw new SnapshotRequiredException();
|
||||
|
||||
return res.Entries.Select(e => new OplogEntry(
|
||||
e.Collection,
|
||||
e.Key,
|
||||
ParseOp(e.Operation),
|
||||
string.IsNullOrEmpty(e.JsonData) ? default : System.Text.Json.JsonSerializer.Deserialize<System.Text.Json.JsonElement>(e.JsonData),
|
||||
new HlcTimestamp(e.HlcWall, e.HlcLogic, e.HlcNode),
|
||||
e.PreviousHash,
|
||||
e.Hash
|
||||
)).ToList();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pushes local oplog changes to the remote peer.
|
||||
/// </summary>
|
||||
/// <param name="entries">The oplog entries to push.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>A task that represents the asynchronous push operation.</returns>
|
||||
public async Task PushChangesAsync(IEnumerable<OplogEntry> entries, CancellationToken token)
|
||||
{
|
||||
var req = new PushChangesRequest();
|
||||
var entryList = entries.ToList();
|
||||
if (entryList.Count == 0) return;
|
||||
|
||||
foreach (var e in entryList)
|
||||
{
|
||||
req.Entries.Add(new ProtoOplogEntry
|
||||
{
|
||||
Collection = e.Collection,
|
||||
Key = e.Key,
|
||||
Operation = e.Operation.ToString(),
|
||||
JsonData = e.Payload?.GetRawText() ?? "",
|
||||
HlcWall = e.Timestamp.PhysicalTime,
|
||||
HlcLogic = e.Timestamp.LogicalCounter,
|
||||
HlcNode = e.Timestamp.NodeId,
|
||||
Hash = e.Hash,
|
||||
PreviousHash = e.PreviousHash
|
||||
});
|
||||
}
|
||||
|
||||
await _protocol.SendMessageAsync(_stream!, MessageType.PushChangesReq, req, _useCompression, _cipherState, token);
|
||||
|
||||
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
|
||||
if (type != MessageType.AckRes) throw new Exception("Push failed");
|
||||
|
||||
var res = AckResponse.Parser.ParseFrom(payload);
|
||||
if (res.SnapshotRequired) throw new SnapshotRequiredException();
|
||||
if (!res.Success) throw new Exception("Push failed");
|
||||
}
|
||||
|
||||
private bool _useCompression = false; // Negotiated after handshake
|
||||
|
||||
private OperationType ParseOp(string op) => Enum.TryParse<OperationType>(op, out var val) ? val : OperationType.Put;
|
||||
|
||||
/// <summary>
|
||||
/// Downloads a full snapshot from the remote peer to the provided destination stream.
|
||||
/// </summary>
|
||||
/// <param name="destination">The stream that receives snapshot bytes.</param>
|
||||
/// <param name="token">Cancellation token.</param>
|
||||
/// <returns>A task that represents the asynchronous snapshot transfer operation.</returns>
|
||||
public async Task GetSnapshotAsync(Stream destination, CancellationToken token)
|
||||
{
|
||||
await _protocol.SendMessageAsync(_stream!, MessageType.GetSnapshotReq, new GetSnapshotRequest(), _useCompression, _cipherState, token);
|
||||
|
||||
while (true)
|
||||
{
|
||||
var (type, payload) = await _protocol.ReadMessageAsync(_stream!, _cipherState, token);
|
||||
if (type != MessageType.SnapshotChunkMsg) throw new Exception($"Unexpected message type during snapshot: {type}");
|
||||
|
||||
var chunk = SnapshotChunk.Parser.ParseFrom(payload);
|
||||
if (chunk.Data.Length > 0)
|
||||
{
|
||||
await destination.WriteAsync(chunk.Data.ToByteArray(), 0, chunk.Data.Length, token);
|
||||
}
|
||||
|
||||
if (chunk.IsLast) break;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases resources used by the peer client.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
lock (_connectionLock)
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
_stream?.Dispose();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error disposing network stream");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
_client?.Dispose();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Error disposing TCP client");
|
||||
}
|
||||
|
||||
_logger.LogDebug("Disposed connection to peer: {Address}", _peerAddress);
|
||||
}
|
||||
}
|
||||
|
||||
public class SnapshotRequiredException : Exception
|
||||
{
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="SnapshotRequiredException"/> class.
|
||||
/// </summary>
|
||||
public SnapshotRequiredException() : base("Peer requires a full snapshot sync.") { }
|
||||
}
|
||||
474
src/ZB.MOM.WW.CBDDC.Network/TcpSyncServer.cs
Executable file
474
src/ZB.MOM.WW.CBDDC.Network/TcpSyncServer.cs
Executable file
@@ -0,0 +1,474 @@
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Network.Proto;
|
||||
using ZB.MOM.WW.CBDDC.Network.Security;
|
||||
using ZB.MOM.WW.CBDDC.Network.Protocol;
|
||||
using ZB.MOM.WW.CBDDC.Network.Telemetry;
|
||||
using Google.Protobuf;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Serilog.Context;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// TCP server that handles incoming synchronization requests from remote peers.
|
||||
/// </summary>
|
||||
internal class TcpSyncServer : ISyncServer
|
||||
{
|
||||
private readonly IOplogStore _oplogStore;
|
||||
private readonly IDocumentStore _documentStore;
|
||||
private readonly ISnapshotService _snapshotStore;
|
||||
private readonly ILogger<TcpSyncServer> _logger;
|
||||
private readonly IPeerNodeConfigurationProvider _configProvider;
|
||||
private CancellationTokenSource? _cts;
|
||||
private TcpListener? _listener;
|
||||
private readonly object _startStopLock = new object();
|
||||
private int _activeConnections = 0;
|
||||
|
||||
internal int MaxConnections = 100;
|
||||
private const int ClientOperationTimeoutMs = 60000;
|
||||
|
||||
private readonly IAuthenticator _authenticator;
|
||||
private readonly IPeerHandshakeService _handshakeService;
|
||||
private readonly INetworkTelemetryService? _telemetry;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the TcpSyncServer class with the specified peer oplogStore, configuration provider,
|
||||
/// logger, and authenticator.
|
||||
/// </summary>
|
||||
/// <remarks>The server automatically restarts when the configuration provided by
|
||||
/// peerNodeConfigurationProvider changes. This ensures that configuration updates are applied without requiring
|
||||
/// manual intervention.</remarks>
|
||||
/// <param name="oplogStore">The peer oplogStore used to manage and persist peer information for the server.</param>
|
||||
/// <param name="documentStore">The document store used to read and apply synchronized documents.</param>
|
||||
/// <param name="snapshotStore">The snapshot store used to create and manage database snapshots for synchronization.</param>
|
||||
/// <param name="peerNodeConfigurationProvider">The provider that supplies configuration settings for the peer node and notifies the server of configuration
|
||||
/// changes.</param>
|
||||
/// <param name="logger">The logger used to record informational and error messages for the server instance.</param>
|
||||
/// <param name="authenticator">The authenticator responsible for validating peer connections to the server.</param>
|
||||
/// <param name="handshakeService">The service used to perform secure handshake (optional).</param>
|
||||
/// <param name="telemetry">The optional telemetry service used to record network performance metrics.</param>
|
||||
public TcpSyncServer(
|
||||
IOplogStore oplogStore,
|
||||
IDocumentStore documentStore,
|
||||
ISnapshotService snapshotStore,
|
||||
IPeerNodeConfigurationProvider peerNodeConfigurationProvider,
|
||||
ILogger<TcpSyncServer> logger,
|
||||
IAuthenticator authenticator,
|
||||
IPeerHandshakeService handshakeService,
|
||||
INetworkTelemetryService? telemetry = null)
|
||||
{
|
||||
_oplogStore = oplogStore;
|
||||
_documentStore = documentStore;
|
||||
_snapshotStore = snapshotStore;
|
||||
_logger = logger;
|
||||
_authenticator = authenticator;
|
||||
_handshakeService = handshakeService;
|
||||
_configProvider = peerNodeConfigurationProvider;
|
||||
_telemetry = telemetry;
|
||||
_configProvider.ConfigurationChanged += async (s, e) =>
|
||||
{
|
||||
_logger.LogInformation("Configuration changed, restarting TCP Sync Server...");
|
||||
await Stop();
|
||||
await Start();
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the TCP synchronization server and begins listening for incoming connections asynchronously.
|
||||
/// </summary>
|
||||
/// <remarks>If the server is already running, this method returns immediately without starting a new
|
||||
/// listener. The server will listen on the TCP port specified in the current configuration.</remarks>
|
||||
/// <returns>A task that represents the asynchronous start operation.</returns>
|
||||
public async Task Start()
|
||||
{
|
||||
var config = await _configProvider.GetConfiguration();
|
||||
|
||||
lock (_startStopLock)
|
||||
{
|
||||
if (_cts != null)
|
||||
{
|
||||
_logger.LogWarning("TCP Sync Server already started");
|
||||
return;
|
||||
}
|
||||
_cts = new CancellationTokenSource();
|
||||
}
|
||||
|
||||
_listener = new TcpListener(IPAddress.Any, config.TcpPort);
|
||||
_listener.Start();
|
||||
|
||||
_logger.LogInformation("TCP Sync Server Listening on port {Port}", config.TcpPort);
|
||||
|
||||
var token = _cts.Token;
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await ListenAsync(token);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "TCP Listen task failed");
|
||||
}
|
||||
}, token);
|
||||
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the listener and cancels any pending operations.
|
||||
/// </summary>
|
||||
/// <remarks>After calling this method, the listener will no longer accept new connections or process
|
||||
/// requests. This method is safe to call multiple times; subsequent calls have no effect if the listener is already
|
||||
/// stopped.</remarks>
|
||||
/// <returns>A task that represents the asynchronous stop operation.</returns>
|
||||
public async Task Stop()
|
||||
{
|
||||
CancellationTokenSource? ctsToDispose = null;
|
||||
TcpListener? listenerToStop = null;
|
||||
|
||||
lock (_startStopLock)
|
||||
{
|
||||
if (_cts == null)
|
||||
{
|
||||
_logger.LogWarning("TCP Sync Server already stopped or never started");
|
||||
return;
|
||||
}
|
||||
|
||||
ctsToDispose = _cts;
|
||||
listenerToStop = _listener;
|
||||
_cts = null;
|
||||
_listener = null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
ctsToDispose.Cancel();
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
// Already disposed, ignore
|
||||
}
|
||||
finally
|
||||
{
|
||||
ctsToDispose.Dispose();
|
||||
}
|
||||
|
||||
listenerToStop?.Stop();
|
||||
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the full local endpoint on which the server is listening.
|
||||
/// </summary>
|
||||
public IPEndPoint? ListeningEndpoint => _listener?.LocalEndpoint as IPEndPoint;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the port on which the server is listening.
|
||||
/// </summary>
|
||||
public int? ListeningPort => ListeningEndpoint?.Port;
|
||||
|
||||
private async Task ListenAsync(CancellationToken token)
|
||||
{
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (_listener == null) break;
|
||||
var client = await _listener.AcceptTcpClientAsync();
|
||||
|
||||
if (_activeConnections >= MaxConnections)
|
||||
{
|
||||
_logger.LogWarning("Max connections reached ({Max}). Rejecting client.", MaxConnections);
|
||||
client.Close();
|
||||
continue;
|
||||
}
|
||||
|
||||
Interlocked.Increment(ref _activeConnections);
|
||||
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await HandleClientAsync(client, token);
|
||||
}
|
||||
finally
|
||||
{
|
||||
Interlocked.Decrement(ref _activeConnections);
|
||||
}
|
||||
}, token);
|
||||
}
|
||||
catch (ObjectDisposedException) { break; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "TCP Accept Error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task HandleClientAsync(TcpClient client, CancellationToken token)
|
||||
{
|
||||
var remoteEp = client.Client.RemoteEndPoint;
|
||||
using var operationContext = LogContext.PushProperty("OperationId", Guid.NewGuid().ToString("N"));
|
||||
using var endpointContext = LogContext.PushProperty("RemoteEndpoint", remoteEp?.ToString() ?? "unknown");
|
||||
_logger.LogDebug("Client Connected: {Endpoint}", remoteEp);
|
||||
|
||||
try
|
||||
{
|
||||
using (client)
|
||||
using (var stream = client.GetStream())
|
||||
{
|
||||
// CRITICAL for Android: Disable Nagle's algorithm for immediate packet send
|
||||
client.NoDelay = true;
|
||||
|
||||
// Configure TCP keepalive
|
||||
client.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true);
|
||||
|
||||
// Set stream timeouts
|
||||
stream.ReadTimeout = ClientOperationTimeoutMs;
|
||||
stream.WriteTimeout = ClientOperationTimeoutMs;
|
||||
|
||||
var protocol = new ProtocolHandler(_logger, _telemetry);
|
||||
|
||||
bool useCompression = false;
|
||||
CipherState? cipherState = null;
|
||||
List<string> remoteInterests = new();
|
||||
|
||||
// Perform Secure Handshake (if service is available)
|
||||
var config = await _configProvider.GetConfiguration();
|
||||
if (_handshakeService != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
// We are NOT initiator
|
||||
_logger.LogDebug("Starting Secure Handshake as Responder.");
|
||||
cipherState = await _handshakeService.HandshakeAsync(stream, false, config.NodeId, token);
|
||||
_logger.LogDebug("Secure Handshake Completed.");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Secure Handshake failed check logic");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
while (client.Connected && !token.IsCancellationRequested)
|
||||
{
|
||||
// Re-fetch config if needed, though usually stable
|
||||
config = await _configProvider.GetConfiguration();
|
||||
|
||||
var (type, payload) = await protocol.ReadMessageAsync(stream, cipherState, token);
|
||||
if (type == MessageType.Unknown) break; // EOF or Error
|
||||
|
||||
// Handshake Loop
|
||||
if (type == MessageType.HandshakeReq)
|
||||
{
|
||||
var hReq = HandshakeRequest.Parser.ParseFrom(payload);
|
||||
_logger.LogDebug("Received HandshakeReq from Node {NodeId}", hReq.NodeId);
|
||||
|
||||
// Track remote peer interests
|
||||
remoteInterests = hReq.InterestingCollections.ToList();
|
||||
|
||||
bool valid = await _authenticator.ValidateAsync(hReq.NodeId, hReq.AuthToken);
|
||||
if (!valid)
|
||||
{
|
||||
_logger.LogWarning("Authentication failed for Node {NodeId}", hReq.NodeId);
|
||||
await protocol.SendMessageAsync(stream, MessageType.HandshakeRes, new HandshakeResponse { NodeId = config.NodeId, Accepted = false }, false, cipherState, token);
|
||||
return;
|
||||
}
|
||||
|
||||
var hRes = new HandshakeResponse { NodeId = config.NodeId, Accepted = true };
|
||||
|
||||
// Include local interests from IDocumentStore in response for push filtering
|
||||
foreach (var coll in _documentStore.InterestedCollection)
|
||||
{
|
||||
hRes.InterestingCollections.Add(coll);
|
||||
}
|
||||
|
||||
if (CompressionHelper.IsBrotliSupported && hReq.SupportedCompression.Contains("brotli"))
|
||||
{
|
||||
hRes.SelectedCompression = "brotli";
|
||||
useCompression = true;
|
||||
}
|
||||
|
||||
await protocol.SendMessageAsync(stream, MessageType.HandshakeRes, hRes, false, cipherState, token);
|
||||
continue;
|
||||
}
|
||||
|
||||
IMessage? response = null;
|
||||
MessageType resType = MessageType.Unknown;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case MessageType.GetClockReq:
|
||||
var clock = await _oplogStore.GetLatestTimestampAsync(token);
|
||||
response = new ClockResponse
|
||||
{
|
||||
HlcWall = clock.PhysicalTime,
|
||||
HlcLogic = clock.LogicalCounter,
|
||||
HlcNode = clock.NodeId
|
||||
};
|
||||
resType = MessageType.ClockRes;
|
||||
break;
|
||||
|
||||
case MessageType.GetVectorClockReq:
|
||||
var vectorClock = await _oplogStore.GetVectorClockAsync(token);
|
||||
var vcRes = new VectorClockResponse();
|
||||
foreach (var nodeId in vectorClock.NodeIds)
|
||||
{
|
||||
var ts = vectorClock.GetTimestamp(nodeId);
|
||||
vcRes.Entries.Add(new VectorClockEntry
|
||||
{
|
||||
NodeId = nodeId,
|
||||
HlcWall = ts.PhysicalTime,
|
||||
HlcLogic = ts.LogicalCounter
|
||||
});
|
||||
}
|
||||
response = vcRes;
|
||||
resType = MessageType.VectorClockRes;
|
||||
break;
|
||||
|
||||
case MessageType.PullChangesReq:
|
||||
var pReq = PullChangesRequest.Parser.ParseFrom(payload);
|
||||
var since = new HlcTimestamp(pReq.SinceWall, pReq.SinceLogic, pReq.SinceNode);
|
||||
|
||||
// Use collection filter from request
|
||||
var filter = pReq.Collections.Any() ? pReq.Collections : null;
|
||||
var oplog = string.IsNullOrWhiteSpace(pReq.SinceNode)
|
||||
? await _oplogStore.GetOplogAfterAsync(since, filter, token)
|
||||
: await _oplogStore.GetOplogForNodeAfterAsync(pReq.SinceNode, since, filter, token);
|
||||
|
||||
var csRes = new ChangeSetResponse();
|
||||
foreach (var e in oplog)
|
||||
{
|
||||
csRes.Entries.Add(new ProtoOplogEntry
|
||||
{
|
||||
Collection = e.Collection,
|
||||
Key = e.Key,
|
||||
Operation = e.Operation.ToString(),
|
||||
JsonData = e.Payload?.GetRawText() ?? "",
|
||||
HlcWall = e.Timestamp.PhysicalTime,
|
||||
HlcLogic = e.Timestamp.LogicalCounter,
|
||||
HlcNode = e.Timestamp.NodeId,
|
||||
Hash = e.Hash,
|
||||
PreviousHash = e.PreviousHash
|
||||
});
|
||||
}
|
||||
response = csRes;
|
||||
resType = MessageType.ChangeSetRes;
|
||||
break;
|
||||
|
||||
case MessageType.PushChangesReq:
|
||||
var pushReq = PushChangesRequest.Parser.ParseFrom(payload);
|
||||
var entries = pushReq.Entries.Select(e => new OplogEntry(
|
||||
e.Collection,
|
||||
e.Key,
|
||||
(OperationType)Enum.Parse(typeof(OperationType), e.Operation),
|
||||
string.IsNullOrEmpty(e.JsonData) ? (System.Text.Json.JsonElement?)null : System.Text.Json.JsonSerializer.Deserialize<System.Text.Json.JsonElement>(e.JsonData),
|
||||
new HlcTimestamp(e.HlcWall, e.HlcLogic, e.HlcNode),
|
||||
e.PreviousHash, // Restore PreviousHash
|
||||
e.Hash // Restore Hash
|
||||
));
|
||||
|
||||
await _oplogStore.ApplyBatchAsync(entries, token);
|
||||
|
||||
response = new AckResponse { Success = true };
|
||||
resType = MessageType.AckRes;
|
||||
break;
|
||||
|
||||
case MessageType.GetChainRangeReq:
|
||||
var rangeReq = GetChainRangeRequest.Parser.ParseFrom(payload);
|
||||
var rangeEntries = await _oplogStore.GetChainRangeAsync(rangeReq.StartHash, rangeReq.EndHash, token);
|
||||
var rangeRes = new ChainRangeResponse();
|
||||
|
||||
if (!rangeEntries.Any() && rangeReq.StartHash != rangeReq.EndHash)
|
||||
{
|
||||
// Gap cannot be filled (likely pruned or unknown branch)
|
||||
rangeRes.SnapshotRequired = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
foreach (var e in rangeEntries)
|
||||
{
|
||||
rangeRes.Entries.Add(new ProtoOplogEntry
|
||||
{
|
||||
Collection = e.Collection,
|
||||
Key = e.Key,
|
||||
Operation = e.Operation.ToString(),
|
||||
JsonData = e.Payload?.GetRawText() ?? "",
|
||||
HlcWall = e.Timestamp.PhysicalTime,
|
||||
HlcLogic = e.Timestamp.LogicalCounter,
|
||||
HlcNode = e.Timestamp.NodeId,
|
||||
Hash = e.Hash,
|
||||
PreviousHash = e.PreviousHash
|
||||
});
|
||||
}
|
||||
}
|
||||
response = rangeRes;
|
||||
resType = MessageType.ChainRangeRes;
|
||||
break;
|
||||
|
||||
case MessageType.GetSnapshotReq:
|
||||
_logger.LogInformation("Processing GetSnapshotReq from {Endpoint}", remoteEp);
|
||||
var tempFile = Path.GetTempFileName();
|
||||
try
|
||||
{
|
||||
// Create backup
|
||||
using (var fs = File.Create(tempFile))
|
||||
{
|
||||
await _snapshotStore.CreateSnapshotAsync(fs, token);
|
||||
}
|
||||
|
||||
using (var fs = File.OpenRead(tempFile))
|
||||
{
|
||||
byte[] buffer = new byte[80 * 1024]; // 80KB chunks
|
||||
int bytesRead;
|
||||
while ((bytesRead = await fs.ReadAsync(buffer, 0, buffer.Length, token)) > 0)
|
||||
{
|
||||
var chunk = new SnapshotChunk
|
||||
{
|
||||
Data = ByteString.CopyFrom(buffer, 0, bytesRead),
|
||||
IsLast = false
|
||||
};
|
||||
await protocol.SendMessageAsync(stream, MessageType.SnapshotChunkMsg, chunk, false, cipherState, token);
|
||||
}
|
||||
|
||||
// Send End of Snapshot
|
||||
await protocol.SendMessageAsync(stream, MessageType.SnapshotChunkMsg, new SnapshotChunk { IsLast = true }, false, cipherState, token);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (File.Exists(tempFile)) File.Delete(tempFile);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (response != null)
|
||||
{
|
||||
await protocol.SendMessageAsync(stream, resType, response, useCompression, cipherState, token);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning("Client Handler Error from {Endpoint}: {Message}", remoteEp, ex.Message);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_logger.LogDebug("Client Disconnected: {Endpoint}", remoteEp);
|
||||
}
|
||||
}
|
||||
}
|
||||
60
src/ZB.MOM.WW.CBDDC.Network/Telemetry/INetworkTelemetryService.cs
Executable file
60
src/ZB.MOM.WW.CBDDC.Network/Telemetry/INetworkTelemetryService.cs
Executable file
@@ -0,0 +1,60 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Telemetry;
|
||||
|
||||
public interface INetworkTelemetryService
|
||||
{
|
||||
/// <summary>
|
||||
/// Records a metric value for the specified metric type.
|
||||
/// </summary>
|
||||
/// <param name="type">The metric type to record.</param>
|
||||
/// <param name="value">The metric value.</param>
|
||||
void RecordValue(MetricType type, double value);
|
||||
|
||||
/// <summary>
|
||||
/// Starts timing a metric for the specified metric type.
|
||||
/// </summary>
|
||||
/// <param name="type">The metric type to time.</param>
|
||||
/// <returns>A timer that records elapsed time when disposed.</returns>
|
||||
MetricTimer StartMetric(MetricType type);
|
||||
|
||||
/// <summary>
|
||||
/// Gets a snapshot of all recorded metric values.
|
||||
/// </summary>
|
||||
/// <returns>A dictionary of metric values grouped by metric type and bucket.</returns>
|
||||
System.Collections.Generic.Dictionary<MetricType, System.Collections.Generic.Dictionary<int, double>> GetSnapshot();
|
||||
}
|
||||
|
||||
public readonly struct MetricTimer : IDisposable
|
||||
{
|
||||
private readonly INetworkTelemetryService _service;
|
||||
private readonly MetricType _type;
|
||||
private readonly long _startTimestamp;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new metric timer.
|
||||
/// </summary>
|
||||
/// <param name="service">The telemetry service that receives the recorded value.</param>
|
||||
/// <param name="type">The metric type being timed.</param>
|
||||
public MetricTimer(INetworkTelemetryService service, MetricType type)
|
||||
{
|
||||
_service = service;
|
||||
_type = type;
|
||||
_startTimestamp = Stopwatch.GetTimestamp();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops timing and records the elapsed duration.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
var elapsed = Stopwatch.GetTimestamp() - _startTimestamp;
|
||||
// Convert ticks to milliseconds? Or keep as ticks?
|
||||
// Plan said "latency", usually ms.
|
||||
// Stopwatch.Frequency depends on hardware.
|
||||
// Let's store MS representation.
|
||||
double ms = (double)elapsed * 1000 / Stopwatch.Frequency;
|
||||
_service.RecordValue(_type, ms);
|
||||
}
|
||||
}
|
||||
9
src/ZB.MOM.WW.CBDDC.Network/Telemetry/MetricType.cs
Executable file
9
src/ZB.MOM.WW.CBDDC.Network/Telemetry/MetricType.cs
Executable file
@@ -0,0 +1,9 @@
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Telemetry;
|
||||
|
||||
public enum MetricType
|
||||
{
|
||||
CompressionRatio = 0,
|
||||
EncryptionTime = 1,
|
||||
DecryptionTime = 2,
|
||||
RoundTripTime = 3
|
||||
}
|
||||
283
src/ZB.MOM.WW.CBDDC.Network/Telemetry/NetworkTelemetryService.cs
Executable file
283
src/ZB.MOM.WW.CBDDC.Network/Telemetry/NetworkTelemetryService.cs
Executable file
@@ -0,0 +1,283 @@
|
||||
using System;
|
||||
using System.Buffers;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Channels;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Extensions.Logging;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network.Telemetry;
|
||||
|
||||
public class NetworkTelemetryService : INetworkTelemetryService, IDisposable
|
||||
{
|
||||
private readonly Channel<(MetricType Type, double Value)> _metricChannel;
|
||||
private readonly CancellationTokenSource _cts;
|
||||
private readonly ILogger<NetworkTelemetryService> _logger;
|
||||
private readonly string _persistencePath;
|
||||
|
||||
// Aggregation State
|
||||
// We keep 30m of history with 1s resolution = 1800 buckets.
|
||||
private const int MaxHistorySeconds = 1800;
|
||||
|
||||
private readonly object _lock = new object();
|
||||
private readonly MetricBucket[] _history;
|
||||
private int _headIndex = 0; // Points to current second
|
||||
private long _currentSecondTimestamp; // Unix timestamp of current bucket
|
||||
|
||||
// Rolling Averages (Last calculated)
|
||||
private readonly Dictionary<string, double> _averages = new Dictionary<string, double>();
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="NetworkTelemetryService"/> class.
|
||||
/// </summary>
|
||||
/// <param name="logger">The logger used to report telemetry processing and persistence errors.</param>
|
||||
/// <param name="persistencePath">The file path where persisted telemetry snapshots are written.</param>
|
||||
public NetworkTelemetryService(ILogger<NetworkTelemetryService> logger, string persistencePath)
|
||||
{
|
||||
_logger = logger;
|
||||
_persistencePath = persistencePath;
|
||||
_metricChannel = Channel.CreateUnbounded<(MetricType, double)>(new UnboundedChannelOptions
|
||||
{
|
||||
SingleReader = true,
|
||||
SingleWriter = false
|
||||
});
|
||||
_cts = new CancellationTokenSource();
|
||||
|
||||
_history = new MetricBucket[MaxHistorySeconds];
|
||||
for (int i = 0; i < MaxHistorySeconds; i++) _history[i] = new MetricBucket();
|
||||
|
||||
_currentSecondTimestamp = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
|
||||
|
||||
_ = Task.Run(ProcessMetricsLoop);
|
||||
_ = Task.Run(PersistenceLoop);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Records a metric value for the specified metric type.
|
||||
/// </summary>
|
||||
/// <param name="type">The metric category to update.</param>
|
||||
/// <param name="value">The metric value to record.</param>
|
||||
public void RecordValue(MetricType type, double value)
|
||||
{
|
||||
_metricChannel.Writer.TryWrite((type, value));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts a timer for the specified metric type.
|
||||
/// </summary>
|
||||
/// <param name="type">The metric category to time.</param>
|
||||
/// <returns>A metric timer that records elapsed time when disposed.</returns>
|
||||
public MetricTimer StartMetric(MetricType type)
|
||||
{
|
||||
return new MetricTimer(this, type);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets a point-in-time snapshot of rolling averages for each metric type.
|
||||
/// </summary>
|
||||
/// <returns>A dictionary keyed by metric type containing average values by window size in seconds.</returns>
|
||||
public Dictionary<MetricType, Dictionary<int, double>> GetSnapshot()
|
||||
{
|
||||
var snapshot = new Dictionary<MetricType, Dictionary<int, double>>();
|
||||
var windows = new[] { 60, 300, 600, 1800 };
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
foreach (var type in Enum.GetValues(typeof(MetricType)).Cast<MetricType>())
|
||||
{
|
||||
var typeDict = new Dictionary<int, double>();
|
||||
foreach (var w in windows)
|
||||
{
|
||||
typeDict[w] = CalculateAverage(type, w);
|
||||
}
|
||||
snapshot[type] = typeDict;
|
||||
}
|
||||
}
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
private async Task ProcessMetricsLoop()
|
||||
{
|
||||
var reader = _metricChannel.Reader;
|
||||
while (!_cts.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (await reader.WaitToReadAsync(_cts.Token))
|
||||
{
|
||||
while (reader.TryRead(out var item))
|
||||
{
|
||||
AddMetricToCurrentBucket(item.Type, item.Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) { break; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error processing metrics");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void AddMetricToCurrentBucket(MetricType type, double value)
|
||||
{
|
||||
long now = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
|
||||
|
||||
lock (_lock)
|
||||
{
|
||||
// Rotate bucket if second changed
|
||||
if (now > _currentSecondTimestamp)
|
||||
{
|
||||
long diff = now - _currentSecondTimestamp;
|
||||
// Move head forward, clearing buckets in between if gap > 1s
|
||||
for (int i = 0; i < diff && i < MaxHistorySeconds; i++)
|
||||
{
|
||||
_headIndex = (_headIndex + 1) % MaxHistorySeconds;
|
||||
_history[_headIndex].Reset();
|
||||
}
|
||||
_currentSecondTimestamp = now;
|
||||
}
|
||||
|
||||
_history[_headIndex].Add(type, value);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task PersistenceLoop()
|
||||
{
|
||||
while (!_cts.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromMinutes(1), _cts.Token);
|
||||
CalculateAndPersist();
|
||||
}
|
||||
catch (OperationCanceledException) { break; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Error persisting metrics");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void CalculateAndPersist()
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
// Calculate averages
|
||||
var windows = new[] { 60, 300, 600, 1800 }; // 1m, 5m, 10m, 30m
|
||||
|
||||
using var fs = new FileStream(_persistencePath, FileMode.Create, FileAccess.Write);
|
||||
using var bw = new BinaryWriter(fs);
|
||||
|
||||
// Header
|
||||
bw.Write((byte)1); // Version
|
||||
bw.Write(DateTimeOffset.UtcNow.ToUnixTimeSeconds()); // Timestamp
|
||||
|
||||
foreach (var type in Enum.GetValues(typeof(MetricType)).Cast<MetricType>())
|
||||
{
|
||||
bw.Write((int)type);
|
||||
foreach (var w in windows)
|
||||
{
|
||||
double avg = CalculateAverage(type, w);
|
||||
bw.Write(w); // Window Seconds
|
||||
bw.Write(avg); // Average Value
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Forces immediate calculation and persistence of telemetry data.
|
||||
/// </summary>
|
||||
internal void ForcePersist()
|
||||
{
|
||||
CalculateAndPersist();
|
||||
}
|
||||
|
||||
private double CalculateAverage(MetricType type, int seconds)
|
||||
{
|
||||
// Go backwards from head
|
||||
double sum = 0;
|
||||
int count = 0;
|
||||
int scanned = 0;
|
||||
|
||||
int idx = _headIndex;
|
||||
|
||||
while (scanned < seconds && scanned < MaxHistorySeconds)
|
||||
{
|
||||
var bucket = _history[idx];
|
||||
sum += bucket.GetSum(type);
|
||||
count += bucket.GetCount(type);
|
||||
|
||||
idx--;
|
||||
if (idx < 0) idx = MaxHistorySeconds - 1;
|
||||
scanned++;
|
||||
}
|
||||
|
||||
return count == 0 ? 0 : sum / count;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases resources used by the telemetry service.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
_cts.Cancel();
|
||||
_cts.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
internal class MetricBucket
|
||||
{
|
||||
// Simple lock-free or locked accumulation? Global lock handles it for now.
|
||||
// Storing Sum and Count for each type
|
||||
private readonly double[] _sums;
|
||||
private readonly int[] _counts;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="MetricBucket"/> class.
|
||||
/// </summary>
|
||||
public MetricBucket()
|
||||
{
|
||||
var typeCount = Enum.GetValues(typeof(MetricType)).Length;
|
||||
_sums = new double[typeCount];
|
||||
_counts = new int[typeCount];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Clears all accumulated metric sums and counts in this bucket.
|
||||
/// </summary>
|
||||
public void Reset()
|
||||
{
|
||||
Array.Clear(_sums, 0, _sums.Length);
|
||||
Array.Clear(_counts, 0, _counts.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds a metric value to the bucket.
|
||||
/// </summary>
|
||||
/// <param name="type">The metric category to update.</param>
|
||||
/// <param name="value">The value to accumulate.</param>
|
||||
public void Add(MetricType type, double value)
|
||||
{
|
||||
int idx = (int)type;
|
||||
_sums[idx] += value;
|
||||
_counts[idx]++;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the accumulated sum for a metric type.
|
||||
/// </summary>
|
||||
/// <param name="type">The metric category to read.</param>
|
||||
/// <returns>The accumulated sum for the specified metric type.</returns>
|
||||
public double GetSum(MetricType type) => _sums[(int)type];
|
||||
/// <summary>
|
||||
/// Gets the accumulated count for a metric type.
|
||||
/// </summary>
|
||||
/// <param name="type">The metric category to read.</param>
|
||||
/// <returns>The accumulated sample count for the specified metric type.</returns>
|
||||
public int GetCount(MetricType type) => _counts[(int)type];
|
||||
}
|
||||
317
src/ZB.MOM.WW.CBDDC.Network/UdpDiscoveryService.cs
Executable file
317
src/ZB.MOM.WW.CBDDC.Network/UdpDiscoveryService.cs
Executable file
@@ -0,0 +1,317 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Network;
|
||||
|
||||
/// <summary>
|
||||
/// Provides UDP-based peer discovery for the CBDDC network.
|
||||
/// Broadcasts presence beacons and listens for other nodes on the local network.
|
||||
/// </summary>
|
||||
internal class UdpDiscoveryService : IDiscoveryService
|
||||
{
|
||||
private const int DiscoveryPort = 25000;
|
||||
private readonly ILogger<UdpDiscoveryService> _logger;
|
||||
private readonly IPeerNodeConfigurationProvider _configProvider;
|
||||
private readonly IDocumentStore _documentStore;
|
||||
private CancellationTokenSource? _cts;
|
||||
private readonly ConcurrentDictionary<string, PeerNode> _activePeers = new();
|
||||
private readonly object _startStopLock = new object();
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="UdpDiscoveryService"/> class.
|
||||
/// </summary>
|
||||
/// <param name="peerNodeConfigurationProvider">Provider for peer node configuration.</param>
|
||||
/// <param name="documentStore">Document store used to obtain collection interests.</param>
|
||||
/// <param name="logger">Logger for discovery service events.</param>
|
||||
public UdpDiscoveryService(
|
||||
IPeerNodeConfigurationProvider peerNodeConfigurationProvider,
|
||||
IDocumentStore documentStore,
|
||||
ILogger<UdpDiscoveryService> logger)
|
||||
{
|
||||
_configProvider = peerNodeConfigurationProvider ?? throw new ArgumentNullException(nameof(peerNodeConfigurationProvider));
|
||||
_documentStore = documentStore ?? throw new ArgumentNullException(nameof(documentStore));
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Starts the discovery service, initiating listener, broadcaster, and cleanup tasks.
|
||||
/// </summary>
|
||||
public async Task Start()
|
||||
{
|
||||
lock (_startStopLock)
|
||||
{
|
||||
if (_cts != null)
|
||||
{
|
||||
_logger.LogWarning("UDP Discovery Service already started");
|
||||
return;
|
||||
}
|
||||
_cts = new CancellationTokenSource();
|
||||
}
|
||||
|
||||
var token = _cts.Token;
|
||||
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await ListenAsync(token);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "UDP Listen task failed");
|
||||
}
|
||||
}, token);
|
||||
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await BroadcastAsync(token);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "UDP Broadcast task failed");
|
||||
}
|
||||
}, token);
|
||||
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
try
|
||||
{
|
||||
await CleanupAsync(token);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "UDP Cleanup task failed");
|
||||
}
|
||||
}, token);
|
||||
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
// ... Stop ...
|
||||
|
||||
private async Task CleanupAsync(CancellationToken token)
|
||||
{
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(10000, token); // Check every 10s
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
var expired = new List<string>();
|
||||
|
||||
foreach (var pair in _activePeers)
|
||||
{
|
||||
// Expiry: 15 seconds (broadcast is every 5s, so 3 missed beats = dead)
|
||||
if ((now - pair.Value.LastSeen).TotalSeconds > 15)
|
||||
{
|
||||
expired.Add(pair.Key);
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var id in expired)
|
||||
{
|
||||
if (_activePeers.TryRemove(id, out var removed))
|
||||
{
|
||||
_logger.LogInformation("Peer Expired: {NodeId} at {Endpoint}", removed.NodeId, removed.Address);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) { break; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Cleanup Loop Error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ... Listen ...
|
||||
|
||||
private void HandleBeacon(DiscoveryBeacon beacon, IPAddress address)
|
||||
{
|
||||
var peerId = beacon.NodeId;
|
||||
var endpoint = $"{address}:{beacon.TcpPort}";
|
||||
|
||||
var peer = new PeerNode(peerId, endpoint, DateTimeOffset.UtcNow, interestingCollections: beacon.InterestingCollections);
|
||||
|
||||
_activePeers.AddOrUpdate(peerId, peer, (key, old) => peer);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stops the discovery service.
|
||||
/// </summary>
|
||||
/// <returns>A task that completes when stop processing has finished.</returns>
|
||||
public async Task Stop()
|
||||
{
|
||||
CancellationTokenSource? ctsToDispose = null;
|
||||
|
||||
lock (_startStopLock)
|
||||
{
|
||||
if (_cts == null)
|
||||
{
|
||||
_logger.LogWarning("UDP Discovery Service already stopped or never started");
|
||||
return;
|
||||
}
|
||||
|
||||
ctsToDispose = _cts;
|
||||
_cts = null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
ctsToDispose.Cancel();
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
// Already disposed, ignore
|
||||
}
|
||||
finally
|
||||
{
|
||||
ctsToDispose.Dispose();
|
||||
}
|
||||
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the currently active peers discovered on the network.
|
||||
/// </summary>
|
||||
/// <returns>The collection of active peers.</returns>
|
||||
public IEnumerable<PeerNode> GetActivePeers() => _activePeers.Values;
|
||||
|
||||
private async Task ListenAsync(CancellationToken token)
|
||||
{
|
||||
using var udp = new UdpClient();
|
||||
udp.Client.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, true);
|
||||
udp.Client.Bind(new IPEndPoint(IPAddress.Any, DiscoveryPort));
|
||||
|
||||
_logger.LogInformation("UDP Discovery Listening on port {Port}", DiscoveryPort);
|
||||
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
var result = await udp.ReceiveAsync();
|
||||
var json = Encoding.UTF8.GetString(result.Buffer);
|
||||
|
||||
try
|
||||
{
|
||||
var config = await _configProvider.GetConfiguration();
|
||||
var _nodeId = config.NodeId;
|
||||
var localClusterHash = ComputeClusterHash(config.AuthToken);
|
||||
|
||||
var beacon = JsonSerializer.Deserialize<DiscoveryBeacon>(json);
|
||||
|
||||
if (beacon != null && beacon.NodeId != _nodeId)
|
||||
{
|
||||
// Filter by ClusterHash to reduce congestion from different clusters
|
||||
if (!string.Equals(beacon.ClusterHash, localClusterHash, StringComparison.Ordinal))
|
||||
{
|
||||
// Optional: Log trace if needed, but keeping it silent avoids flooding logs during congestion
|
||||
continue;
|
||||
}
|
||||
|
||||
HandleBeacon(beacon, result.RemoteEndPoint.Address);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to parse beacon from {Address}", result.RemoteEndPoint.Address);
|
||||
}
|
||||
}
|
||||
catch (ObjectDisposedException) { break; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "UDP Listener Error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async Task BroadcastAsync(CancellationToken token)
|
||||
{
|
||||
using var udp = new UdpClient();
|
||||
udp.EnableBroadcast = true;
|
||||
|
||||
var endpoint = new IPEndPoint(IPAddress.Broadcast, DiscoveryPort);
|
||||
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Re-fetch config each time in case it changes (though usually static)
|
||||
var conf = await _configProvider.GetConfiguration();
|
||||
|
||||
var beacon = new DiscoveryBeacon
|
||||
{
|
||||
NodeId = conf.NodeId,
|
||||
TcpPort = conf.TcpPort,
|
||||
ClusterHash = ComputeClusterHash(conf.AuthToken),
|
||||
InterestingCollections = _documentStore.InterestedCollection.ToList()
|
||||
};
|
||||
|
||||
var json = JsonSerializer.Serialize(beacon);
|
||||
var bytes = Encoding.UTF8.GetBytes(json);
|
||||
|
||||
await udp.SendAsync(bytes, bytes.Length, endpoint);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "UDP Broadcast Error");
|
||||
}
|
||||
|
||||
await Task.Delay(5000, token);
|
||||
}
|
||||
}
|
||||
|
||||
private string ComputeClusterHash(string authToken)
|
||||
{
|
||||
if (string.IsNullOrEmpty(authToken)) return "";
|
||||
using var sha256 = System.Security.Cryptography.SHA256.Create();
|
||||
var bytes = Encoding.UTF8.GetBytes(authToken);
|
||||
var hash = sha256.ComputeHash(bytes);
|
||||
// Return first 8 chars (4 bytes hex) is enough for filtering
|
||||
return BitConverter.ToString(hash).Replace("-", "").Substring(0, 8);
|
||||
}
|
||||
|
||||
|
||||
|
||||
private class DiscoveryBeacon
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the broadcasting node identifier.
|
||||
/// </summary>
|
||||
[System.Text.Json.Serialization.JsonPropertyName("node_id")]
|
||||
public string NodeId { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the TCP port used by the broadcasting node.
|
||||
/// </summary>
|
||||
[System.Text.Json.Serialization.JsonPropertyName("tcp_port")]
|
||||
public int TcpPort { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the cluster hash used for discovery filtering.
|
||||
/// </summary>
|
||||
[System.Text.Json.Serialization.JsonPropertyName("cluster_hash")]
|
||||
public string ClusterHash { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the collections the node is interested in.
|
||||
/// </summary>
|
||||
[System.Text.Json.Serialization.JsonPropertyName("interests")]
|
||||
public List<string> InterestingCollections { get; set; } = new();
|
||||
}
|
||||
}
|
||||
52
src/ZB.MOM.WW.CBDDC.Network/ZB.MOM.WW.CBDDC.Network.csproj
Executable file
52
src/ZB.MOM.WW.CBDDC.Network/ZB.MOM.WW.CBDDC.Network.csproj
Executable file
@@ -0,0 +1,52 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ZB.MOM.WW.CBDDC.Core\ZB.MOM.WW.CBDDC.Core.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Google.Protobuf" Version="3.25.1" />
|
||||
<PackageReference Include="Grpc.Tools" Version="2.76.0">
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Hosting.Abstractions" Version="8.0.0" />
|
||||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
|
||||
<PackageReference Include="Serilog" Version="4.2.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Protobuf Include="sync.proto" GrpcServices="None" />
|
||||
</ItemGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
<AssemblyName>ZB.MOM.WW.CBDDC.Network</AssemblyName>
|
||||
<RootNamespace>ZB.MOM.WW.CBDDC.Network</RootNamespace>
|
||||
<PackageId>ZB.MOM.WW.CBDDC.Network</PackageId>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<LangVersion>latest</LangVersion>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<Version>1.0.3</Version>
|
||||
<Authors>MrDevRobot</Authors>
|
||||
<Description>Networking layer (TCP/UDP/Gossip) for CBDDC.</Description>
|
||||
<PackageLicenseExpression>MIT</PackageLicenseExpression>
|
||||
<PackageTags>p2p;mesh;network;gossip;lan;udp;tcp;discovery</PackageTags>
|
||||
<PackageProjectUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</PackageProjectUrl>
|
||||
<RepositoryUrl>https://github.com/CBDDC/ZB.MOM.WW.CBDDC.Net</RepositoryUrl>
|
||||
<RepositoryType>git</RepositoryType>
|
||||
<PackageReadmeFile>README.md</PackageReadmeFile>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Include="README.md" Pack="true" PackagePath="\" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo">
|
||||
<_Parameter1>ZB.MOM.WW.CBDDC.Network.Tests</_Parameter1>
|
||||
</AssemblyAttribute>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
117
src/ZB.MOM.WW.CBDDC.Network/sync.proto
Executable file
117
src/ZB.MOM.WW.CBDDC.Network/sync.proto
Executable file
@@ -0,0 +1,117 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package ZB.MOM.WW.CBDDC.Network.Proto;
|
||||
|
||||
option csharp_namespace = "ZB.MOM.WW.CBDDC.Network.Proto";
|
||||
|
||||
message HandshakeRequest {
|
||||
string node_id = 1;
|
||||
string auth_token = 2;
|
||||
repeated string supported_compression = 3; // v4
|
||||
repeated string interesting_collections = 4; // v5
|
||||
}
|
||||
|
||||
message HandshakeResponse {
|
||||
string node_id = 1;
|
||||
bool accepted = 2;
|
||||
string selected_compression = 3; // v4
|
||||
repeated string interesting_collections = 4; // v5
|
||||
}
|
||||
|
||||
message GetClockRequest {
|
||||
}
|
||||
|
||||
message ClockResponse {
|
||||
int64 hlc_wall = 1;
|
||||
int32 hlc_logic = 2;
|
||||
string hlc_node = 3;
|
||||
}
|
||||
|
||||
message GetVectorClockRequest {
|
||||
}
|
||||
|
||||
message VectorClockResponse {
|
||||
repeated VectorClockEntry entries = 1;
|
||||
}
|
||||
|
||||
message VectorClockEntry {
|
||||
string node_id = 1;
|
||||
int64 hlc_wall = 2;
|
||||
int32 hlc_logic = 3;
|
||||
}
|
||||
|
||||
message PullChangesRequest {
|
||||
int64 since_wall = 1;
|
||||
int32 since_logic = 2;
|
||||
string since_node = 3;
|
||||
repeated string collections = 4; // v5: Filter by collection
|
||||
}
|
||||
|
||||
message ChangeSetResponse {
|
||||
repeated ProtoOplogEntry entries = 1;
|
||||
}
|
||||
|
||||
message PushChangesRequest {
|
||||
repeated ProtoOplogEntry entries = 1;
|
||||
}
|
||||
|
||||
message GetChainRangeRequest {
|
||||
string start_hash = 1;
|
||||
string end_hash = 2;
|
||||
}
|
||||
|
||||
message ChainRangeResponse {
|
||||
repeated ProtoOplogEntry entries = 1;
|
||||
bool snapshot_required = 2;
|
||||
}
|
||||
|
||||
message AckResponse {
|
||||
bool success = 1;
|
||||
bool snapshot_required = 2;
|
||||
}
|
||||
|
||||
message ProtoOplogEntry {
|
||||
string collection = 1;
|
||||
string key = 2;
|
||||
string operation = 3; // "Put" or "Delete"
|
||||
string json_data = 4;
|
||||
int64 hlc_wall = 5;
|
||||
int32 hlc_logic = 6;
|
||||
string hlc_node = 7;
|
||||
string hash = 8;
|
||||
string previous_hash = 9;
|
||||
}
|
||||
|
||||
message GetSnapshotRequest {
|
||||
}
|
||||
|
||||
message SnapshotChunk {
|
||||
bytes data = 1;
|
||||
bool is_last = 2;
|
||||
}
|
||||
|
||||
// Enum for wire framing (1 byte)
|
||||
enum MessageType {
|
||||
Unknown = 0;
|
||||
HandshakeReq = 1;
|
||||
HandshakeRes = 2;
|
||||
GetClockReq = 3;
|
||||
ClockRes = 4;
|
||||
PullChangesReq = 5;
|
||||
ChangeSetRes = 6;
|
||||
PushChangesReq = 7;
|
||||
AckRes = 8;
|
||||
SecureEnv = 9;
|
||||
GetChainRangeReq = 10;
|
||||
ChainRangeRes = 11;
|
||||
GetVectorClockReq = 12;
|
||||
VectorClockRes = 13;
|
||||
GetSnapshotReq = 14;
|
||||
SnapshotChunkMsg = 15;
|
||||
}
|
||||
|
||||
message SecureEnvelope {
|
||||
bytes ciphertext = 1; // Encrypted payload
|
||||
bytes nonce = 2; // IV or Nonce
|
||||
bytes auth_tag = 3; // HMAC or Auth Tag if using AEAD (optional if concatenated)
|
||||
}
|
||||
230
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteDocumentMetadataStore.cs
Executable file
230
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteDocumentMetadataStore.cs
Executable file
@@ -0,0 +1,230 @@
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
|
||||
|
||||
/// <summary>
|
||||
/// BLite implementation of document metadata storage for sync tracking.
|
||||
/// </summary>
|
||||
/// <typeparam name="TDbContext">The type of CBDDCDocumentDbContext.</typeparam>
|
||||
public class BLiteDocumentMetadataStore<TDbContext> : DocumentMetadataStore where TDbContext : CBDDCDocumentDbContext
|
||||
{
|
||||
private readonly TDbContext _context;
|
||||
private readonly ILogger<BLiteDocumentMetadataStore<TDbContext>> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="BLiteDocumentMetadataStore{TDbContext}"/> class.
|
||||
/// </summary>
|
||||
/// <param name="context">The BLite document database context.</param>
|
||||
/// <param name="logger">The optional logger instance.</param>
|
||||
public BLiteDocumentMetadataStore(TDbContext context, ILogger<BLiteDocumentMetadataStore<TDbContext>>? logger = null)
|
||||
{
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
_logger = logger ?? NullLogger<BLiteDocumentMetadataStore<TDbContext>>.Instance;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<DocumentMetadata?> GetMetadataAsync(string collection, string key, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var entity = _context.DocumentMetadatas
|
||||
.Find(m => m.Collection == collection && m.Key == key)
|
||||
.FirstOrDefault();
|
||||
|
||||
return entity != null ? ToDomain(entity) : null;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<DocumentMetadata>> GetMetadataByCollectionAsync(string collection, CancellationToken cancellationToken = default)
|
||||
{
|
||||
return _context.DocumentMetadatas
|
||||
.Find(m => m.Collection == collection)
|
||||
.Select(ToDomain)
|
||||
.ToList();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task UpsertMetadataAsync(DocumentMetadata metadata, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var existing = _context.DocumentMetadatas
|
||||
.Find(m => m.Collection == metadata.Collection && m.Key == metadata.Key)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.DocumentMetadatas.InsertAsync(ToEntity(metadata));
|
||||
}
|
||||
else
|
||||
{
|
||||
existing.HlcPhysicalTime = metadata.UpdatedAt.PhysicalTime;
|
||||
existing.HlcLogicalCounter = metadata.UpdatedAt.LogicalCounter;
|
||||
existing.HlcNodeId = metadata.UpdatedAt.NodeId;
|
||||
existing.IsDeleted = metadata.IsDeleted;
|
||||
await _context.DocumentMetadatas.UpdateAsync(existing);
|
||||
}
|
||||
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task UpsertMetadataBatchAsync(IEnumerable<DocumentMetadata> metadatas, CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var metadata in metadatas)
|
||||
{
|
||||
var existing = _context.DocumentMetadatas
|
||||
.Find(m => m.Collection == metadata.Collection && m.Key == metadata.Key)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.DocumentMetadatas.InsertAsync(ToEntity(metadata));
|
||||
}
|
||||
else
|
||||
{
|
||||
existing.HlcPhysicalTime = metadata.UpdatedAt.PhysicalTime;
|
||||
existing.HlcLogicalCounter = metadata.UpdatedAt.LogicalCounter;
|
||||
existing.HlcNodeId = metadata.UpdatedAt.NodeId;
|
||||
existing.IsDeleted = metadata.IsDeleted;
|
||||
await _context.DocumentMetadatas.UpdateAsync(existing);
|
||||
}
|
||||
}
|
||||
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task MarkDeletedAsync(string collection, string key, HlcTimestamp timestamp, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var existing = _context.DocumentMetadatas
|
||||
.Find(m => m.Collection == collection && m.Key == key)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.DocumentMetadatas.InsertAsync(new DocumentMetadataEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(),
|
||||
Collection = collection,
|
||||
Key = key,
|
||||
HlcPhysicalTime = timestamp.PhysicalTime,
|
||||
HlcLogicalCounter = timestamp.LogicalCounter,
|
||||
HlcNodeId = timestamp.NodeId,
|
||||
IsDeleted = true
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
existing.HlcPhysicalTime = timestamp.PhysicalTime;
|
||||
existing.HlcLogicalCounter = timestamp.LogicalCounter;
|
||||
existing.HlcNodeId = timestamp.NodeId;
|
||||
existing.IsDeleted = true;
|
||||
await _context.DocumentMetadatas.UpdateAsync(existing);
|
||||
}
|
||||
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<DocumentMetadata>> GetMetadataAfterAsync(HlcTimestamp since, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var query = _context.DocumentMetadatas.AsQueryable()
|
||||
.Where(m => (m.HlcPhysicalTime > since.PhysicalTime) ||
|
||||
(m.HlcPhysicalTime == since.PhysicalTime && m.HlcLogicalCounter > since.LogicalCounter));
|
||||
|
||||
if (collections != null)
|
||||
{
|
||||
var collectionSet = new HashSet<string>(collections);
|
||||
query = query.Where(m => collectionSet.Contains(m.Collection));
|
||||
}
|
||||
|
||||
return query
|
||||
.OrderBy(m => m.HlcPhysicalTime)
|
||||
.ThenBy(m => m.HlcLogicalCounter)
|
||||
.Select(ToDomain)
|
||||
.ToList();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task DropAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var allIds = _context.DocumentMetadatas.FindAll().Select(m => m.Id).ToList();
|
||||
await _context.DocumentMetadatas.DeleteBulkAsync(allIds);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<DocumentMetadata>> ExportAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
return _context.DocumentMetadatas.FindAll().Select(ToDomain).ToList();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task ImportAsync(IEnumerable<DocumentMetadata> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var item in items)
|
||||
{
|
||||
await _context.DocumentMetadatas.InsertAsync(ToEntity(item));
|
||||
}
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task MergeAsync(IEnumerable<DocumentMetadata> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var item in items)
|
||||
{
|
||||
var existing = _context.DocumentMetadatas
|
||||
.Find(m => m.Collection == item.Collection && m.Key == item.Key)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.DocumentMetadatas.InsertAsync(ToEntity(item));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Update only if incoming is newer
|
||||
var existingTs = new HlcTimestamp(existing.HlcPhysicalTime, existing.HlcLogicalCounter, existing.HlcNodeId);
|
||||
if (item.UpdatedAt.CompareTo(existingTs) > 0)
|
||||
{
|
||||
existing.HlcPhysicalTime = item.UpdatedAt.PhysicalTime;
|
||||
existing.HlcLogicalCounter = item.UpdatedAt.LogicalCounter;
|
||||
existing.HlcNodeId = item.UpdatedAt.NodeId;
|
||||
existing.IsDeleted = item.IsDeleted;
|
||||
await _context.DocumentMetadatas.UpdateAsync(existing);
|
||||
}
|
||||
}
|
||||
}
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
#region Mappers
|
||||
|
||||
private static DocumentMetadata ToDomain(DocumentMetadataEntity entity)
|
||||
{
|
||||
return new DocumentMetadata(
|
||||
entity.Collection,
|
||||
entity.Key,
|
||||
new HlcTimestamp(entity.HlcPhysicalTime, entity.HlcLogicalCounter, entity.HlcNodeId),
|
||||
entity.IsDeleted
|
||||
);
|
||||
}
|
||||
|
||||
private static DocumentMetadataEntity ToEntity(DocumentMetadata metadata)
|
||||
{
|
||||
return new DocumentMetadataEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(),
|
||||
Collection = metadata.Collection,
|
||||
Key = metadata.Key,
|
||||
HlcPhysicalTime = metadata.UpdatedAt.PhysicalTime,
|
||||
HlcLogicalCounter = metadata.UpdatedAt.LogicalCounter,
|
||||
HlcNodeId = metadata.UpdatedAt.NodeId,
|
||||
IsDeleted = metadata.IsDeleted
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
209
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteDocumentStore.README.md
Executable file
209
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteDocumentStore.README.md
Executable file
@@ -0,0 +1,209 @@
|
||||
# BLiteDocumentStore - Usage Guide
|
||||
|
||||
## Overview
|
||||
|
||||
`BLiteDocumentStore<TDbContext>` is an abstract base class that simplifies creating document stores for CBDDC with BLite persistence. It handles all Oplog management internally, so you only need to implement entity-to-JSON mapping methods.
|
||||
|
||||
## Key Features
|
||||
|
||||
- ? **Automatic Oplog Creation** - Local changes automatically create Oplog entries
|
||||
- ? **Remote Sync Handling** - AsyncLocal flag suppresses Oplog during sync (prevents duplicates)
|
||||
- ? **No CDC Events Needed** - Direct Oplog management eliminates event loops
|
||||
- ? **Simple API** - Only 4 abstract methods to implement
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
User Code ? SampleDocumentStore (extends BLiteDocumentStore)
|
||||
?
|
||||
BLiteDocumentStore
|
||||
??? _context.Users / TodoLists (read/write entities)
|
||||
??? _context.OplogEntries (write oplog directly)
|
||||
|
||||
Remote Sync ? OplogStore.ApplyBatchAsync()
|
||||
?
|
||||
BLiteDocumentStore.PutDocumentAsync(fromSync=true)
|
||||
??? _context.Users / TodoLists (write only)
|
||||
??? _context.OplogEntries (skip - already exists)
|
||||
```
|
||||
|
||||
**Key Advantage**: No circular dependency! `BLiteDocumentStore` writes directly to `CBDDCDocumentDbContext.OplogEntries` collection.
|
||||
|
||||
## Implementation Example
|
||||
|
||||
```csharp
|
||||
public class SampleDocumentStore : BLiteDocumentStore<SampleDbContext>
|
||||
{
|
||||
public SampleDocumentStore(
|
||||
SampleDbContext context,
|
||||
IPeerNodeConfigurationProvider configProvider,
|
||||
ILogger<SampleDocumentStore>? logger = null)
|
||||
: base(context, configProvider, new LastWriteWinsConflictResolver(), logger)
|
||||
{
|
||||
}
|
||||
|
||||
public override IEnumerable<string> InterestedCollection => new[] { "Users", "TodoLists" };
|
||||
|
||||
protected override async Task ApplyContentToEntityAsync(
|
||||
string collection, string key, JsonElement content, CancellationToken ct)
|
||||
{
|
||||
switch (collection)
|
||||
{
|
||||
case "Users":
|
||||
var user = content.Deserialize<User>()!;
|
||||
user.Id = key;
|
||||
var existingUser = _context.Users.FindById(key);
|
||||
if (existingUser != null)
|
||||
await _context.Users.UpdateAsync(user);
|
||||
else
|
||||
await _context.Users.InsertAsync(user);
|
||||
await _context.SaveChangesAsync(ct);
|
||||
break;
|
||||
|
||||
case "TodoLists":
|
||||
var todoList = content.Deserialize<TodoList>()!;
|
||||
todoList.Id = key;
|
||||
var existingTodoList = _context.TodoLists.FindById(key);
|
||||
if (existingTodoList != null)
|
||||
await _context.TodoLists.UpdateAsync(todoList);
|
||||
else
|
||||
await _context.TodoLists.InsertAsync(todoList);
|
||||
await _context.SaveChangesAsync(ct);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new NotSupportedException($"Collection '{collection}' is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
protected override Task<JsonElement?> GetEntityAsJsonAsync(
|
||||
string collection, string key, CancellationToken ct)
|
||||
{
|
||||
return Task.FromResult<JsonElement?>(collection switch
|
||||
{
|
||||
"Users" => SerializeEntity(_context.Users.FindById(key)),
|
||||
"TodoLists" => SerializeEntity(_context.TodoLists.FindById(key)),
|
||||
_ => null
|
||||
});
|
||||
}
|
||||
|
||||
protected override async Task RemoveEntityAsync(
|
||||
string collection, string key, CancellationToken ct)
|
||||
{
|
||||
switch (collection)
|
||||
{
|
||||
case "Users":
|
||||
await _context.Users.DeleteAsync(key);
|
||||
await _context.SaveChangesAsync(ct);
|
||||
break;
|
||||
|
||||
case "TodoLists":
|
||||
await _context.TodoLists.DeleteAsync(key);
|
||||
await _context.SaveChangesAsync(ct);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
protected override async Task<IEnumerable<(string Key, JsonElement Content)>> GetAllEntitiesAsJsonAsync(
|
||||
string collection, CancellationToken ct)
|
||||
{
|
||||
return await Task.Run(() => collection switch
|
||||
{
|
||||
"Users" => _context.Users.FindAll()
|
||||
.Select(u => (u.Id, SerializeEntity(u)!.Value)),
|
||||
|
||||
"TodoLists" => _context.TodoLists.FindAll()
|
||||
.Select(t => (t.Id, SerializeEntity(t)!.Value)),
|
||||
|
||||
_ => Enumerable.Empty<(string, JsonElement)>()
|
||||
}, ct);
|
||||
}
|
||||
|
||||
private static JsonElement? SerializeEntity<T>(T? entity) where T : class
|
||||
{
|
||||
if (entity == null) return null;
|
||||
return JsonSerializer.SerializeToElement(entity);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Usage in Application
|
||||
|
||||
### Setup (DI Container)
|
||||
|
||||
```csharp
|
||||
services.AddSingleton<SampleDbContext>(sp =>
|
||||
new SampleDbContext("data/sample.blite"));
|
||||
|
||||
// No OplogStore dependency needed!
|
||||
services.AddSingleton<IDocumentStore, SampleDocumentStore>();
|
||||
services.AddSingleton<IOplogStore, BLiteOplogStore<SampleDbContext>>();
|
||||
```
|
||||
|
||||
### Local Changes (User operations)
|
||||
|
||||
```csharp
|
||||
// User inserts a new user
|
||||
var user = new User { Id = "user-1", Name = "Alice" };
|
||||
await _context.Users.InsertAsync(user);
|
||||
await _context.SaveChangesAsync();
|
||||
|
||||
// The application then needs to notify the DocumentStore:
|
||||
var document = new Document(
|
||||
"Users",
|
||||
"user-1",
|
||||
JsonSerializer.SerializeToElement(user),
|
||||
new HlcTimestamp(0, 0, ""),
|
||||
false);
|
||||
|
||||
await documentStore.PutDocumentAsync(document);
|
||||
// ? This creates an OplogEntry automatically
|
||||
```
|
||||
|
||||
### Remote Sync (Automatic)
|
||||
|
||||
```csharp
|
||||
// When OplogStore.ApplyBatchAsync receives remote changes:
|
||||
await oplogStore.ApplyBatchAsync(remoteEntries, cancellationToken);
|
||||
|
||||
// Internally, this calls:
|
||||
using (documentStore.BeginRemoteSync()) // ? Suppresses Oplog creation
|
||||
{
|
||||
foreach (var entry in remoteEntries)
|
||||
{
|
||||
await documentStore.PutDocumentAsync(entryAsDocument);
|
||||
// ? Writes to DB only, no Oplog duplication
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Migration from Old CDC-based Approach
|
||||
|
||||
### Before (with CDC Events)
|
||||
```csharp
|
||||
// SampleDocumentStore subscribes to BLite CDC
|
||||
// CDC emits events ? OplogCoordinator creates Oplog
|
||||
// Problem: Remote sync also triggers CDC ? duplicate Oplog entries
|
||||
```
|
||||
|
||||
### After (with BLiteDocumentStore)
|
||||
```csharp
|
||||
// Direct Oplog management in DocumentStore
|
||||
// AsyncLocal flag prevents duplicates during sync
|
||||
// No CDC events needed
|
||||
```
|
||||
|
||||
## Benefits
|
||||
|
||||
1. **No Event Loops** - Direct control over Oplog creation
|
||||
2. **Thread-Safe** - AsyncLocal handles concurrent operations
|
||||
3. **Simpler** - Only 4 methods to implement vs full CDC subscription
|
||||
4. **Transparent** - Oplog management is hidden from user code
|
||||
|
||||
## Next Steps
|
||||
|
||||
After implementing your DocumentStore:
|
||||
1. Remove CDC subscriptions from your code
|
||||
2. Remove `OplogCoordinator` from DI (no longer needed)
|
||||
3. Test local operations create Oplog entries
|
||||
4. Test remote sync doesn't create duplicate entries
|
||||
733
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteDocumentStore.cs
Executable file
733
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteDocumentStore.cs
Executable file
@@ -0,0 +1,733 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using BLite.Core.CDC;
|
||||
using BLite.Core.Collections;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
using BLiteOperationType = BLite.Core.Transactions.OperationType;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
|
||||
|
||||
/// <summary>
|
||||
/// Abstract base class for BLite-based document stores.
|
||||
/// Handles Oplog creation internally - subclasses only implement entity mapping.
|
||||
/// </summary>
|
||||
/// <typeparam name="TDbContext">The BLite DbContext type.</typeparam>
|
||||
public abstract class BLiteDocumentStore<TDbContext> : IDocumentStore, IDisposable
|
||||
where TDbContext : CBDDCDocumentDbContext
|
||||
{
|
||||
protected readonly TDbContext _context;
|
||||
protected readonly IPeerNodeConfigurationProvider _configProvider;
|
||||
protected readonly IConflictResolver _conflictResolver;
|
||||
protected readonly IVectorClockService _vectorClock;
|
||||
protected readonly ILogger<BLiteDocumentStore<TDbContext>> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Semaphore used to suppress CDC-triggered OplogEntry creation during remote sync.
|
||||
/// CurrentCount == 0 ? sync in progress, CDC must skip.
|
||||
/// CurrentCount == 1 ? no sync, CDC creates OplogEntry.
|
||||
/// </summary>
|
||||
private readonly SemaphoreSlim _remoteSyncGuard = new SemaphoreSlim(1, 1);
|
||||
|
||||
private readonly List<IDisposable> _cdcWatchers = new();
|
||||
private readonly HashSet<string> _registeredCollections = new();
|
||||
|
||||
// HLC state for generating timestamps for local changes
|
||||
private long _lastPhysicalTime;
|
||||
private int _logicalCounter;
|
||||
private readonly object _clockLock = new object();
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="BLiteDocumentStore{TDbContext}"/> class.
|
||||
/// </summary>
|
||||
/// <param name="context">The BLite database context.</param>
|
||||
/// <param name="configProvider">The peer node configuration provider.</param>
|
||||
/// <param name="vectorClockService">The vector clock service.</param>
|
||||
/// <param name="conflictResolver">The conflict resolver to use for merges.</param>
|
||||
/// <param name="logger">The logger instance.</param>
|
||||
protected BLiteDocumentStore(
|
||||
TDbContext context,
|
||||
IPeerNodeConfigurationProvider configProvider,
|
||||
IVectorClockService vectorClockService,
|
||||
IConflictResolver? conflictResolver = null,
|
||||
ILogger? logger = null)
|
||||
{
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
_configProvider = configProvider ?? throw new ArgumentNullException(nameof(configProvider));
|
||||
_vectorClock = vectorClockService ?? throw new ArgumentNullException(nameof(vectorClockService));
|
||||
_conflictResolver = conflictResolver ?? new LastWriteWinsConflictResolver();
|
||||
_logger = CreateTypedLogger(logger);
|
||||
|
||||
_lastPhysicalTime = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
_logicalCounter = 0;
|
||||
}
|
||||
|
||||
private static ILogger<BLiteDocumentStore<TDbContext>> CreateTypedLogger(ILogger? logger)
|
||||
{
|
||||
if (logger is null)
|
||||
{
|
||||
return NullLogger<BLiteDocumentStore<TDbContext>>.Instance;
|
||||
}
|
||||
|
||||
if (logger is ILogger<BLiteDocumentStore<TDbContext>> typedLogger)
|
||||
{
|
||||
return typedLogger;
|
||||
}
|
||||
|
||||
return new ForwardingLogger(logger);
|
||||
}
|
||||
|
||||
private sealed class ForwardingLogger : ILogger<BLiteDocumentStore<TDbContext>>
|
||||
{
|
||||
private readonly ILogger _inner;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="ForwardingLogger"/> class.
|
||||
/// </summary>
|
||||
/// <param name="inner">The underlying logger instance.</param>
|
||||
public ForwardingLogger(ILogger inner)
|
||||
{
|
||||
_inner = inner;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IDisposable? BeginScope<TState>(TState state) where TState : notnull
|
||||
{
|
||||
return _inner.BeginScope(state);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public bool IsEnabled(LogLevel logLevel)
|
||||
{
|
||||
return _inner.IsEnabled(logLevel);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Log<TState>(
|
||||
LogLevel logLevel,
|
||||
EventId eventId,
|
||||
TState state,
|
||||
Exception? exception,
|
||||
Func<TState, Exception?, string> formatter)
|
||||
{
|
||||
_inner.Log(logLevel, eventId, state, exception, formatter);
|
||||
}
|
||||
}
|
||||
|
||||
#region CDC Registration
|
||||
|
||||
/// <summary>
|
||||
/// Registers a BLite collection for CDC tracking.
|
||||
/// Call in subclass constructor for each collection to sync.
|
||||
/// </summary>
|
||||
/// <typeparam name="TEntity">The entity type.</typeparam>
|
||||
/// <param name="collectionName">The logical collection name used in Oplog.</param>
|
||||
/// <param name="collection">The BLite DocumentCollection.</param>
|
||||
/// <param name="keySelector">Function to extract the entity key.</param>
|
||||
protected void WatchCollection<TEntity>(
|
||||
string collectionName,
|
||||
DocumentCollection<string, TEntity> collection,
|
||||
Func<TEntity, string> keySelector)
|
||||
where TEntity : class
|
||||
{
|
||||
_registeredCollections.Add(collectionName);
|
||||
|
||||
var watcher = collection.Watch(capturePayload: true)
|
||||
.Subscribe(new CdcObserver<TEntity>(collectionName, keySelector, this));
|
||||
_cdcWatchers.Add(watcher);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generic CDC observer. Forwards BLite change events to OnLocalChangeDetectedAsync.
|
||||
/// Automatically skips events when remote sync is in progress.
|
||||
/// </summary>
|
||||
private class CdcObserver<TEntity> : IObserver<ChangeStreamEvent<string, TEntity>>
|
||||
where TEntity : class
|
||||
{
|
||||
private readonly string _collectionName;
|
||||
private readonly Func<TEntity, string> _keySelector;
|
||||
private readonly BLiteDocumentStore<TDbContext> _store;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="CdcObserver{TEntity}"/> class.
|
||||
/// </summary>
|
||||
/// <param name="collectionName">The logical collection name.</param>
|
||||
/// <param name="keySelector">The key selector for observed entities.</param>
|
||||
/// <param name="store">The owning document store instance.</param>
|
||||
public CdcObserver(
|
||||
string collectionName,
|
||||
Func<TEntity, string> keySelector,
|
||||
BLiteDocumentStore<TDbContext> store)
|
||||
{
|
||||
_collectionName = collectionName;
|
||||
_keySelector = keySelector;
|
||||
_store = store;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles a change stream event from BLite CDC.
|
||||
/// </summary>
|
||||
/// <param name="changeEvent">The change event payload.</param>
|
||||
public void OnNext(ChangeStreamEvent<string, TEntity> changeEvent)
|
||||
{
|
||||
if (_store._remoteSyncGuard.CurrentCount == 0) return;
|
||||
|
||||
var entityId = changeEvent.DocumentId?.ToString() ?? "";
|
||||
|
||||
if (changeEvent.Type == BLiteOperationType.Delete)
|
||||
{
|
||||
_store.OnLocalChangeDetectedAsync(_collectionName, entityId, OperationType.Delete, null)
|
||||
.GetAwaiter().GetResult();
|
||||
}
|
||||
else if (changeEvent.Entity != null)
|
||||
{
|
||||
var content = JsonSerializer.SerializeToElement(changeEvent.Entity);
|
||||
var key = _keySelector(changeEvent.Entity);
|
||||
_store.OnLocalChangeDetectedAsync(_collectionName, key, OperationType.Put, content)
|
||||
.GetAwaiter().GetResult();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Handles CDC observer errors.
|
||||
/// </summary>
|
||||
/// <param name="error">The observed exception.</param>
|
||||
public void OnError(Exception error) { }
|
||||
|
||||
/// <summary>
|
||||
/// Handles completion of the CDC stream.
|
||||
/// </summary>
|
||||
public void OnCompleted() { }
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Abstract Methods - Implemented by subclass
|
||||
|
||||
/// <summary>
|
||||
/// Applies JSON content to a single entity (insert or update) and commits changes.
|
||||
/// Called for single-document operations.
|
||||
/// </summary>
|
||||
/// <param name="collection">The logical collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="content">The document content to apply.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
protected abstract Task ApplyContentToEntityAsync(
|
||||
string collection, string key, JsonElement content, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Applies JSON content to multiple entities (insert or update) with a single commit.
|
||||
/// Called for batch operations. Must commit all changes in a single SaveChanges.
|
||||
/// </summary>
|
||||
/// <param name="documents">The documents to apply in one batch.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
protected abstract Task ApplyContentToEntitiesBatchAsync(
|
||||
IEnumerable<(string Collection, string Key, JsonElement Content)> documents, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Reads an entity from the DbContext and returns it as JsonElement.
|
||||
/// </summary>
|
||||
/// <param name="collection">The logical collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
protected abstract Task<JsonElement?> GetEntityAsJsonAsync(
|
||||
string collection, string key, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Removes a single entity from the DbContext and commits changes.
|
||||
/// </summary>
|
||||
/// <param name="collection">The logical collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
protected abstract Task RemoveEntityAsync(
|
||||
string collection, string key, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Removes multiple entities from the DbContext with a single commit.
|
||||
/// </summary>
|
||||
/// <param name="documents">The documents to remove in one batch.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
protected abstract Task RemoveEntitiesBatchAsync(
|
||||
IEnumerable<(string Collection, string Key)> documents, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Reads all entities from a collection as JsonElements.
|
||||
/// </summary>
|
||||
/// <param name="collection">The logical collection name.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
protected abstract Task<IEnumerable<(string Key, JsonElement Content)>> GetAllEntitiesAsJsonAsync(
|
||||
string collection, CancellationToken cancellationToken);
|
||||
|
||||
#endregion
|
||||
|
||||
#region IDocumentStore Implementation
|
||||
|
||||
/// <summary>
|
||||
/// Returns the collections registered via WatchCollection.
|
||||
/// </summary>
|
||||
public IEnumerable<string> InterestedCollection => _registeredCollections;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a document by collection and key.
|
||||
/// </summary>
|
||||
/// <param name="collection">The logical collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>The matching document, or <see langword="null"/> when not found.</returns>
|
||||
public async Task<Document?> GetDocumentAsync(string collection, string key, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var content = await GetEntityAsJsonAsync(collection, key, cancellationToken);
|
||||
if (content == null) return null;
|
||||
|
||||
var timestamp = new HlcTimestamp(0, 0, ""); // Will be populated from metadata if needed
|
||||
return new Document(collection, key, content.Value, timestamp, false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all documents for a collection.
|
||||
/// </summary>
|
||||
/// <param name="collection">The logical collection name.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>The documents in the specified collection.</returns>
|
||||
public async Task<IEnumerable<Document>> GetDocumentsByCollectionAsync(string collection, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var entities = await GetAllEntitiesAsJsonAsync(collection, cancellationToken);
|
||||
var timestamp = new HlcTimestamp(0, 0, "");
|
||||
return entities.Select(e => new Document(collection, e.Key, e.Content, timestamp, false));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets documents for the specified collection and key pairs.
|
||||
/// </summary>
|
||||
/// <param name="documentKeys">The collection and key pairs to resolve.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>The documents that were found.</returns>
|
||||
public async Task<IEnumerable<Document>> GetDocumentsAsync(List<(string Collection, string Key)> documentKeys, CancellationToken cancellationToken)
|
||||
{
|
||||
var documents = new List<Document>();
|
||||
foreach (var (collection, key) in documentKeys)
|
||||
{
|
||||
var doc = await GetDocumentAsync(collection, key, cancellationToken);
|
||||
if (doc != null)
|
||||
{
|
||||
documents.Add(doc);
|
||||
}
|
||||
}
|
||||
return documents;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Inserts or updates a single document.
|
||||
/// </summary>
|
||||
/// <param name="document">The document to persist.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns><see langword="true"/> when the operation succeeds.</returns>
|
||||
public async Task<bool> PutDocumentAsync(Document document, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _remoteSyncGuard.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
await PutDocumentInternalAsync(document, cancellationToken);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_remoteSyncGuard.Release();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private async Task PutDocumentInternalAsync(Document document, CancellationToken cancellationToken)
|
||||
{
|
||||
await ApplyContentToEntityAsync(document.Collection, document.Key, document.Content, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Updates a batch of documents.
|
||||
/// </summary>
|
||||
/// <param name="documents">The documents to update.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns><see langword="true"/> when the operation succeeds.</returns>
|
||||
public async Task<bool> UpdateBatchDocumentsAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _remoteSyncGuard.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
await ApplyContentToEntitiesBatchAsync(
|
||||
documents.Select(d => (d.Collection, d.Key, d.Content)), cancellationToken);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_remoteSyncGuard.Release();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Inserts a batch of documents.
|
||||
/// </summary>
|
||||
/// <param name="documents">The documents to insert.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns><see langword="true"/> when the operation succeeds.</returns>
|
||||
public async Task<bool> InsertBatchDocumentsAsync(IEnumerable<Document> documents, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _remoteSyncGuard.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
await ApplyContentToEntitiesBatchAsync(
|
||||
documents.Select(d => (d.Collection, d.Key, d.Content)), cancellationToken);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_remoteSyncGuard.Release();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deletes a single document.
|
||||
/// </summary>
|
||||
/// <param name="collection">The logical collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns><see langword="true"/> when the operation succeeds.</returns>
|
||||
public async Task<bool> DeleteDocumentAsync(string collection, string key, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _remoteSyncGuard.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
await DeleteDocumentInternalAsync(collection, key, cancellationToken);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_remoteSyncGuard.Release();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private async Task DeleteDocumentInternalAsync(string collection, string key, CancellationToken cancellationToken)
|
||||
{
|
||||
await RemoveEntityAsync(collection, key, cancellationToken);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deletes a batch of documents by composite keys.
|
||||
/// </summary>
|
||||
/// <param name="documentKeys">The document keys in collection/key format.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns><see langword="true"/> when the operation succeeds.</returns>
|
||||
public async Task<bool> DeleteBatchDocumentsAsync(IEnumerable<string> documentKeys, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var parsedKeys = new List<(string Collection, string Key)>();
|
||||
foreach (var key in documentKeys)
|
||||
{
|
||||
var parts = key.Split('/');
|
||||
if (parts.Length == 2)
|
||||
{
|
||||
parsedKeys.Add((parts[0], parts[1]));
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogWarning("Invalid document key format: {Key}", key);
|
||||
}
|
||||
}
|
||||
|
||||
if (parsedKeys.Count == 0) return true;
|
||||
|
||||
await _remoteSyncGuard.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
await RemoveEntitiesBatchAsync(parsedKeys, cancellationToken);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_remoteSyncGuard.Release();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Merges an incoming document with the current stored document.
|
||||
/// </summary>
|
||||
/// <param name="incoming">The incoming document.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>The stored document after merge resolution.</returns>
|
||||
public async Task<Document> MergeAsync(Document incoming, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var existing = await GetDocumentAsync(incoming.Collection, incoming.Key, cancellationToken);
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
// Use internal method - guard not acquired yet in single-document merge
|
||||
await PutDocumentInternalAsync(incoming, cancellationToken);
|
||||
return incoming;
|
||||
}
|
||||
|
||||
// Use conflict resolver to merge
|
||||
var resolution = _conflictResolver.Resolve(existing, new OplogEntry(
|
||||
incoming.Collection,
|
||||
incoming.Key,
|
||||
OperationType.Put,
|
||||
incoming.Content,
|
||||
incoming.UpdatedAt,
|
||||
""));
|
||||
|
||||
if (resolution.ShouldApply && resolution.MergedDocument != null)
|
||||
{
|
||||
await PutDocumentInternalAsync(resolution.MergedDocument, cancellationToken);
|
||||
return resolution.MergedDocument;
|
||||
}
|
||||
|
||||
return existing;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region ISnapshotable Implementation
|
||||
|
||||
/// <summary>
|
||||
/// Removes all tracked documents from registered collections.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
public async Task DropAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var collection in InterestedCollection)
|
||||
{
|
||||
var entities = await GetAllEntitiesAsJsonAsync(collection, cancellationToken);
|
||||
foreach (var (key, _) in entities)
|
||||
{
|
||||
await RemoveEntityAsync(collection, key, cancellationToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Exports all tracked documents from registered collections.
|
||||
/// </summary>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
/// <returns>The exported documents.</returns>
|
||||
public async Task<IEnumerable<Document>> ExportAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var documents = new List<Document>();
|
||||
foreach (var collection in InterestedCollection)
|
||||
{
|
||||
var collectionDocs = await GetDocumentsByCollectionAsync(collection, cancellationToken);
|
||||
documents.AddRange(collectionDocs);
|
||||
}
|
||||
return documents;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Imports a batch of documents.
|
||||
/// </summary>
|
||||
/// <param name="items">The documents to import.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
public async Task ImportAsync(IEnumerable<Document> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _remoteSyncGuard.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
await ApplyContentToEntitiesBatchAsync(
|
||||
items.Select(d => (d.Collection, d.Key, d.Content)), cancellationToken);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_remoteSyncGuard.Release();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Merges a batch of incoming documents.
|
||||
/// </summary>
|
||||
/// <param name="items">The incoming documents.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
public async Task MergeAsync(IEnumerable<Document> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Acquire guard to prevent Oplog creation during merge
|
||||
await _remoteSyncGuard.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
foreach (var document in items)
|
||||
{
|
||||
await MergeAsync(document, cancellationToken);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_remoteSyncGuard.Release();
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Oplog Management
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if a remote sync operation is in progress (guard acquired).
|
||||
/// CDC listeners should check this before creating OplogEntry.
|
||||
/// </summary>
|
||||
protected bool IsRemoteSyncInProgress => _remoteSyncGuard.CurrentCount == 0;
|
||||
|
||||
/// <summary>
|
||||
/// Called by subclass CDC listeners when a local change is detected.
|
||||
/// Creates OplogEntry + DocumentMetadata only if no remote sync is in progress.
|
||||
/// </summary>
|
||||
/// <param name="collection">The logical collection name.</param>
|
||||
/// <param name="key">The document key.</param>
|
||||
/// <param name="operationType">The detected operation type.</param>
|
||||
/// <param name="content">The document content when available.</param>
|
||||
/// <param name="cancellationToken">The cancellation token.</param>
|
||||
protected async Task OnLocalChangeDetectedAsync(
|
||||
string collection,
|
||||
string key,
|
||||
OperationType operationType,
|
||||
JsonElement? content,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (IsRemoteSyncInProgress) return;
|
||||
|
||||
await CreateOplogEntryAsync(collection, key, operationType, content, cancellationToken);
|
||||
}
|
||||
|
||||
private HlcTimestamp GenerateTimestamp(string nodeId)
|
||||
{
|
||||
lock (_clockLock)
|
||||
{
|
||||
var now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
|
||||
if (now > _lastPhysicalTime)
|
||||
{
|
||||
_lastPhysicalTime = now;
|
||||
_logicalCounter = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
_logicalCounter++;
|
||||
}
|
||||
|
||||
return new HlcTimestamp(_lastPhysicalTime, _logicalCounter, nodeId);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task CreateOplogEntryAsync(
|
||||
string collection,
|
||||
string key,
|
||||
OperationType operationType,
|
||||
JsonElement? content,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var config = await _configProvider.GetConfiguration();
|
||||
var nodeId = config.NodeId;
|
||||
|
||||
// Get last hash from OplogEntries collection directly
|
||||
var lastEntry = _context.OplogEntries
|
||||
.Find(e => e.TimestampNodeId == nodeId)
|
||||
.OrderByDescending(e => e.TimestampPhysicalTime)
|
||||
.ThenByDescending(e => e.TimestampLogicalCounter)
|
||||
.FirstOrDefault();
|
||||
|
||||
var previousHash = lastEntry?.Hash ?? string.Empty;
|
||||
var timestamp = GenerateTimestamp(nodeId);
|
||||
|
||||
var oplogEntry = new OplogEntry(
|
||||
collection,
|
||||
key,
|
||||
operationType,
|
||||
content,
|
||||
timestamp,
|
||||
previousHash);
|
||||
|
||||
// Write directly to OplogEntries collection
|
||||
await _context.OplogEntries.InsertAsync(oplogEntry.ToEntity());
|
||||
|
||||
// Write DocumentMetadata for sync tracking
|
||||
var docMetadata = EntityMappers.CreateDocumentMetadata(
|
||||
collection,
|
||||
key,
|
||||
timestamp,
|
||||
isDeleted: operationType == OperationType.Delete);
|
||||
|
||||
var existingMetadata = _context.DocumentMetadatas
|
||||
.Find(m => m.Collection == collection && m.Key == key)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existingMetadata != null)
|
||||
{
|
||||
// Update existing metadata
|
||||
existingMetadata.HlcPhysicalTime = timestamp.PhysicalTime;
|
||||
existingMetadata.HlcLogicalCounter = timestamp.LogicalCounter;
|
||||
existingMetadata.HlcNodeId = timestamp.NodeId;
|
||||
existingMetadata.IsDeleted = operationType == OperationType.Delete;
|
||||
await _context.DocumentMetadatas.UpdateAsync(existingMetadata);
|
||||
}
|
||||
else
|
||||
{
|
||||
await _context.DocumentMetadatas.InsertAsync(docMetadata);
|
||||
}
|
||||
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
|
||||
// Notify VectorClockService so sync sees local changes
|
||||
_vectorClock.Update(oplogEntry);
|
||||
|
||||
_logger.LogDebug(
|
||||
"Created Oplog entry: {Operation} {Collection}/{Key} at {Timestamp} (hash: {Hash})",
|
||||
operationType, collection, key, timestamp, oplogEntry.Hash);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Marks the start of remote sync operations (suppresses CDC-triggered Oplog creation).
|
||||
/// Use in using statement: using (store.BeginRemoteSync()) { ... }
|
||||
/// </summary>
|
||||
public IDisposable BeginRemoteSync()
|
||||
{
|
||||
_remoteSyncGuard.Wait();
|
||||
return new RemoteSyncScope(_remoteSyncGuard);
|
||||
}
|
||||
|
||||
private class RemoteSyncScope : IDisposable
|
||||
{
|
||||
private readonly SemaphoreSlim _guard;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="RemoteSyncScope"/> class.
|
||||
/// </summary>
|
||||
/// <param name="guard">The semaphore guarding remote sync operations.</param>
|
||||
public RemoteSyncScope(SemaphoreSlim guard)
|
||||
{
|
||||
_guard = guard;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Releases the remote sync guard.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
_guard.Release();
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// Releases managed resources used by this document store.
|
||||
/// </summary>
|
||||
public virtual void Dispose()
|
||||
{
|
||||
foreach (var watcher in _cdcWatchers)
|
||||
{
|
||||
try { watcher.Dispose(); } catch { }
|
||||
}
|
||||
_cdcWatchers.Clear();
|
||||
_remoteSyncGuard.Dispose();
|
||||
}
|
||||
}
|
||||
249
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteOplogStore.cs
Executable file
249
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteOplogStore.cs
Executable file
@@ -0,0 +1,249 @@
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
|
||||
|
||||
public class BLiteOplogStore<TDbContext> : OplogStore where TDbContext : CBDDCDocumentDbContext
|
||||
{
|
||||
protected readonly TDbContext _context;
|
||||
protected readonly ILogger<BLiteOplogStore<TDbContext>> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="BLiteOplogStore{TDbContext}"/> class.
|
||||
/// </summary>
|
||||
/// <param name="dbContext">The BLite database context.</param>
|
||||
/// <param name="documentStore">The document store used by the oplog store.</param>
|
||||
/// <param name="conflictResolver">The conflict resolver used during merges.</param>
|
||||
/// <param name="vectorClockService">The vector clock service used for timestamp coordination.</param>
|
||||
/// <param name="snapshotMetadataStore">Optional snapshot metadata store used for initialization.</param>
|
||||
/// <param name="logger">Optional logger instance.</param>
|
||||
public BLiteOplogStore(
|
||||
TDbContext dbContext,
|
||||
IDocumentStore documentStore,
|
||||
IConflictResolver conflictResolver,
|
||||
IVectorClockService vectorClockService,
|
||||
ISnapshotMetadataStore? snapshotMetadataStore = null,
|
||||
ILogger<BLiteOplogStore<TDbContext>>? logger = null) : base(documentStore, conflictResolver, vectorClockService, snapshotMetadataStore)
|
||||
{
|
||||
_context = dbContext ?? throw new ArgumentNullException(nameof(dbContext));
|
||||
_logger = logger ?? NullLogger<BLiteOplogStore<TDbContext>>.Instance;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task ApplyBatchAsync(IEnumerable<OplogEntry> oplogEntries, CancellationToken cancellationToken = default)
|
||||
{
|
||||
// BLite transactions are committed by each SaveChangesAsync internally.
|
||||
// Wrapping in an explicit transaction causes "Cannot rollback committed transaction"
|
||||
// because PutDocumentAsync → SaveChangesAsync already commits.
|
||||
await base.ApplyBatchAsync(oplogEntries, cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task DropAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Use Id (technical key) for deletion, not Hash (business key)
|
||||
await _context.OplogEntries.DeleteBulkAsync(_context.OplogEntries.FindAll().Select(e => e.Id));
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
_vectorClock.Invalidate();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<OplogEntry>> ExportAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
return _context.OplogEntries.FindAll().ToDomain();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<OplogEntry>> GetChainRangeAsync(string startHash, string endHash, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var startRow = _context.OplogEntries.Find(o => o.Hash == startHash).FirstOrDefault();
|
||||
var endRow = _context.OplogEntries.Find(o => o.Hash == endHash).FirstOrDefault();
|
||||
|
||||
if (startRow == null || endRow == null) return [];
|
||||
|
||||
var nodeId = startRow.TimestampNodeId;
|
||||
|
||||
// 2. Fetch range (Start < Entry <= End)
|
||||
var entities = _context.OplogEntries
|
||||
.Find(o => o.TimestampNodeId == nodeId &&
|
||||
((o.TimestampPhysicalTime > startRow.TimestampPhysicalTime) ||
|
||||
(o.TimestampPhysicalTime == startRow.TimestampPhysicalTime && o.TimestampLogicalCounter > startRow.TimestampLogicalCounter)) &&
|
||||
((o.TimestampPhysicalTime < endRow.TimestampPhysicalTime) ||
|
||||
(o.TimestampPhysicalTime == endRow.TimestampPhysicalTime && o.TimestampLogicalCounter <= endRow.TimestampLogicalCounter)))
|
||||
.OrderBy(o => o.TimestampPhysicalTime)
|
||||
.ThenBy(o => o.TimestampLogicalCounter)
|
||||
.ToList();
|
||||
|
||||
return entities.ToDomain();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<OplogEntry?> GetEntryByHashAsync(string hash, CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Hash is now a regular indexed property, not the Key
|
||||
return _context.OplogEntries.Find(o => o.Hash == hash).FirstOrDefault()?.ToDomain();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<OplogEntry>> GetOplogAfterAsync(HlcTimestamp timestamp, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var query = _context.OplogEntries
|
||||
.Find(o => (o.TimestampPhysicalTime > timestamp.PhysicalTime) ||
|
||||
(o.TimestampPhysicalTime == timestamp.PhysicalTime && o.TimestampLogicalCounter > timestamp.LogicalCounter));
|
||||
if (collections != null)
|
||||
{
|
||||
var collectionSet = new HashSet<string>(collections);
|
||||
query = query.Where(o => collectionSet.Contains(o.Collection));
|
||||
}
|
||||
return query
|
||||
.OrderBy(o => o.TimestampPhysicalTime)
|
||||
.ThenBy(o => o.TimestampLogicalCounter)
|
||||
.ToDomain()
|
||||
.ToList();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<OplogEntry>> GetOplogForNodeAfterAsync(string nodeId, HlcTimestamp since, IEnumerable<string>? collections = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var query = _context.OplogEntries.AsQueryable()
|
||||
.Where(o => o.TimestampNodeId == nodeId &&
|
||||
((o.TimestampPhysicalTime > since.PhysicalTime) ||
|
||||
(o.TimestampPhysicalTime == since.PhysicalTime && o.TimestampLogicalCounter > since.LogicalCounter)));
|
||||
if (collections != null)
|
||||
{
|
||||
var collectionSet = new HashSet<string>(collections);
|
||||
query = query.Where(o => collectionSet.Contains(o.Collection));
|
||||
}
|
||||
return query
|
||||
.OrderBy(o => o.TimestampPhysicalTime)
|
||||
.ThenBy(o => o.TimestampLogicalCounter)
|
||||
.ToDomain()
|
||||
.ToList();
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task ImportAsync(IEnumerable<OplogEntry> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var item in items)
|
||||
{
|
||||
await _context.OplogEntries.InsertAsync(item.ToEntity());
|
||||
}
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task MergeAsync(IEnumerable<OplogEntry> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var item in items)
|
||||
{
|
||||
// Hash is now a regular indexed property, not the Key
|
||||
var existing = _context.OplogEntries.Find(o => o.Hash == item.Hash).FirstOrDefault();
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.OplogEntries.InsertAsync(item.ToEntity());
|
||||
}
|
||||
}
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task PruneOplogAsync(HlcTimestamp cutoff, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var toDelete = _context.OplogEntries.AsQueryable()
|
||||
.Where(o => (o.TimestampPhysicalTime < cutoff.PhysicalTime) ||
|
||||
(o.TimestampPhysicalTime == cutoff.PhysicalTime && o.TimestampLogicalCounter <= cutoff.LogicalCounter))
|
||||
.Select(o => o.Hash)
|
||||
.ToList();
|
||||
await _context.OplogEntries.DeleteBulkAsync(toDelete);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void InitializeVectorClock()
|
||||
{
|
||||
if (_vectorClock.IsInitialized) return;
|
||||
|
||||
// Early check: if context or OplogEntries is null, skip initialization
|
||||
if (_context?.OplogEntries == null)
|
||||
{
|
||||
_vectorClock.IsInitialized = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 1: Load from SnapshotMetadata FIRST (base state after prune)
|
||||
if (_snapshotMetadataStore != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
var snapshots = _snapshotMetadataStore.GetAllSnapshotMetadataAsync().GetAwaiter().GetResult();
|
||||
foreach (var snapshot in snapshots)
|
||||
{
|
||||
_vectorClock.UpdateNode(
|
||||
snapshot.NodeId,
|
||||
new HlcTimestamp(snapshot.TimestampPhysicalTime, snapshot.TimestampLogicalCounter, snapshot.NodeId),
|
||||
snapshot.Hash ?? "");
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Ignore errors during initialization - oplog data will be used as fallback
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Load from Oplog (Latest State - Overrides Snapshot if newer)
|
||||
var latestPerNode = _context.OplogEntries.AsQueryable()
|
||||
.GroupBy(o => o.TimestampNodeId)
|
||||
.Select(g => new
|
||||
{
|
||||
NodeId = g.Key,
|
||||
MaxEntry = g.OrderByDescending(o => o.TimestampPhysicalTime)
|
||||
.ThenByDescending(o => o.TimestampLogicalCounter)
|
||||
.FirstOrDefault()
|
||||
})
|
||||
.ToList()
|
||||
.Where(x => x.MaxEntry != null)
|
||||
.ToList();
|
||||
|
||||
foreach (var node in latestPerNode)
|
||||
{
|
||||
if (node.MaxEntry != null)
|
||||
{
|
||||
_vectorClock.UpdateNode(
|
||||
node.NodeId,
|
||||
new HlcTimestamp(node.MaxEntry.TimestampPhysicalTime, node.MaxEntry.TimestampLogicalCounter, node.MaxEntry.TimestampNodeId),
|
||||
node.MaxEntry.Hash ?? "");
|
||||
}
|
||||
}
|
||||
|
||||
_vectorClock.IsInitialized = true;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override async Task InsertOplogEntryAsync(OplogEntry entry, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _context.OplogEntries.InsertAsync(entry.ToEntity());
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override async Task<string?> QueryLastHashForNodeAsync(string nodeId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var lastEntry = _context.OplogEntries.AsQueryable()
|
||||
.Where(o => o.TimestampNodeId == nodeId)
|
||||
.OrderByDescending(o => o.TimestampPhysicalTime)
|
||||
.ThenByDescending(o => o.TimestampLogicalCounter)
|
||||
.FirstOrDefault();
|
||||
return lastEntry?.Hash;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override async Task<(long Wall, int Logic)?> QueryLastHashTimestampFromOplogAsync(string hash, CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Hash is now a regular indexed property, not the Key
|
||||
var entry = _context.OplogEntries.Find(o => o.Hash == hash).FirstOrDefault();
|
||||
if (entry == null) return null;
|
||||
return (entry.TimestampPhysicalTime, entry.TimestampLogicalCounter);
|
||||
}
|
||||
}
|
||||
114
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLitePeerConfigurationStore.cs
Executable file
114
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLitePeerConfigurationStore.cs
Executable file
@@ -0,0 +1,114 @@
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
|
||||
|
||||
/// <summary>
|
||||
/// Provides a peer configuration store implementation that uses a specified CBDDCDocumentDbContext for persistence
|
||||
/// operations.
|
||||
/// </summary>
|
||||
/// <remarks>This class enables storage, retrieval, and management of remote peer configurations using the provided
|
||||
/// database context. It is typically used in scenarios where peer configurations need to be persisted in a document
|
||||
/// database.</remarks>
|
||||
/// <typeparam name="TDbContext">The type of the document database context used for accessing and managing peer configurations. Must inherit from
|
||||
/// CBDDCDocumentDbContext.</typeparam>
|
||||
public class BLitePeerConfigurationStore<TDbContext> : PeerConfigurationStore where TDbContext : CBDDCDocumentDbContext
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents the database context used for data access operations within the derived class.
|
||||
/// </summary>
|
||||
protected readonly TDbContext _context;
|
||||
|
||||
/// <summary>
|
||||
/// Provides logging capabilities for the BLitePeerConfigurationStore operations.
|
||||
/// </summary>
|
||||
protected readonly ILogger<BLitePeerConfigurationStore<TDbContext>> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the BLitePeerConfigurationStore class using the specified database context and
|
||||
/// optional logger.
|
||||
/// </summary>
|
||||
/// <param name="context">The database context used to access and manage peer configuration data. Cannot be null.</param>
|
||||
/// <param name="logger">An optional logger for logging diagnostic messages. If null, a no-op logger is used.</param>
|
||||
/// <exception cref="ArgumentNullException">Thrown if the context parameter is null.</exception>
|
||||
public BLitePeerConfigurationStore(TDbContext context, ILogger<BLitePeerConfigurationStore<TDbContext>>? logger = null)
|
||||
{
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
_logger = logger ?? NullLogger<BLitePeerConfigurationStore<TDbContext>>.Instance;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task DropAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
_logger.LogWarning("Dropping peer configuration store - all remote peer configurations will be permanently deleted!");
|
||||
// Use Id (technical key) for deletion, not NodeId (business key)
|
||||
var allIds = await Task.Run(() => _context.RemotePeerConfigurations.FindAll().Select(p => p.Id).ToList(), cancellationToken);
|
||||
await _context.RemotePeerConfigurations.DeleteBulkAsync(allIds);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
_logger.LogInformation("Peer configuration store dropped successfully.");
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<RemotePeerConfiguration>> ExportAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await Task.Run(() => _context.RemotePeerConfigurations.FindAll().ToDomain().ToList(), cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<RemotePeerConfiguration?> GetRemotePeerAsync(string nodeId, CancellationToken cancellationToken)
|
||||
{
|
||||
// NodeId is now a regular indexed property, not the Key
|
||||
return await Task.Run(() => _context.RemotePeerConfigurations.Find(p => p.NodeId == nodeId).FirstOrDefault()?.ToDomain(), cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<RemotePeerConfiguration>> GetRemotePeersAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await Task.Run(() => _context.RemotePeerConfigurations.FindAll().ToDomain().ToList(), cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task RemoveRemotePeerAsync(string nodeId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
// NodeId is now a regular indexed property, not the Key
|
||||
var peer = await Task.Run(() => _context.RemotePeerConfigurations.Find(p => p.NodeId == nodeId).FirstOrDefault(), cancellationToken);
|
||||
if (peer != null)
|
||||
{
|
||||
await _context.RemotePeerConfigurations.DeleteAsync(peer.Id);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
_logger.LogInformation("Removed remote peer configuration: {NodeId}", nodeId);
|
||||
}
|
||||
else
|
||||
{
|
||||
_logger.LogWarning("Attempted to remove non-existent remote peer: {NodeId}", nodeId);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task SaveRemotePeerAsync(RemotePeerConfiguration peer, CancellationToken cancellationToken = default)
|
||||
{
|
||||
// NodeId is now a regular indexed property, not the Key
|
||||
var existing = await Task.Run(() => _context.RemotePeerConfigurations.Find(p => p.NodeId == peer.NodeId).FirstOrDefault(), cancellationToken);
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.RemotePeerConfigurations.InsertAsync(peer.ToEntity());
|
||||
}
|
||||
else
|
||||
{
|
||||
existing.NodeId = peer.NodeId;
|
||||
existing.Address = peer.Address;
|
||||
existing.Type = (int)peer.Type;
|
||||
existing.IsEnabled = peer.IsEnabled;
|
||||
existing.InterestsJson = peer.InterestingCollections.Count > 0
|
||||
? System.Text.Json.JsonSerializer.Serialize(peer.InterestingCollections)
|
||||
: "";
|
||||
await _context.RemotePeerConfigurations.UpdateAsync(existing);
|
||||
}
|
||||
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
_logger.LogInformation("Saved remote peer configuration: {NodeId} ({Type})", peer.NodeId, peer.Type);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,321 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
|
||||
|
||||
/// <summary>
|
||||
/// BLite-backed peer oplog confirmation store.
|
||||
/// </summary>
|
||||
/// <typeparam name="TDbContext">The BLite context type.</typeparam>
|
||||
public class BLitePeerOplogConfirmationStore<TDbContext> : PeerOplogConfirmationStore where TDbContext : CBDDCDocumentDbContext
|
||||
{
|
||||
internal const string RegistrationSourceNodeId = "__peer_registration__";
|
||||
|
||||
private readonly TDbContext _context;
|
||||
private readonly ILogger<BLitePeerOplogConfirmationStore<TDbContext>> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="BLitePeerOplogConfirmationStore{TDbContext}"/> class.
|
||||
/// </summary>
|
||||
/// <param name="context">The BLite context.</param>
|
||||
/// <param name="logger">An optional logger.</param>
|
||||
public BLitePeerOplogConfirmationStore(
|
||||
TDbContext context,
|
||||
ILogger<BLitePeerOplogConfirmationStore<TDbContext>>? logger = null)
|
||||
{
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
_logger = logger ?? NullLogger<BLitePeerOplogConfirmationStore<TDbContext>>.Instance;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task EnsurePeerRegisteredAsync(
|
||||
string peerNodeId,
|
||||
string address,
|
||||
PeerType type,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(peerNodeId))
|
||||
{
|
||||
throw new ArgumentException("Peer node id is required.", nameof(peerNodeId));
|
||||
}
|
||||
|
||||
var existing = _context.PeerOplogConfirmations
|
||||
.Find(c => c.PeerNodeId == peerNodeId && c.SourceNodeId == RegistrationSourceNodeId)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.PeerOplogConfirmations.InsertAsync(new PeerOplogConfirmationEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(),
|
||||
PeerNodeId = peerNodeId,
|
||||
SourceNodeId = RegistrationSourceNodeId,
|
||||
ConfirmedWall = 0,
|
||||
ConfirmedLogic = 0,
|
||||
ConfirmedHash = "",
|
||||
LastConfirmedUtcMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
|
||||
IsActive = true
|
||||
});
|
||||
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
_logger.LogDebug("Registered peer confirmation tracking for {PeerNodeId} ({Address}, {Type}).", peerNodeId, address, type);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!existing.IsActive)
|
||||
{
|
||||
existing.IsActive = true;
|
||||
existing.LastConfirmedUtcMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
await _context.PeerOplogConfirmations.UpdateAsync(existing);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task UpdateConfirmationAsync(
|
||||
string peerNodeId,
|
||||
string sourceNodeId,
|
||||
HlcTimestamp timestamp,
|
||||
string hash,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(peerNodeId))
|
||||
{
|
||||
throw new ArgumentException("Peer node id is required.", nameof(peerNodeId));
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(sourceNodeId))
|
||||
{
|
||||
throw new ArgumentException("Source node id is required.", nameof(sourceNodeId));
|
||||
}
|
||||
|
||||
var existing = _context.PeerOplogConfirmations
|
||||
.Find(c => c.PeerNodeId == peerNodeId && c.SourceNodeId == sourceNodeId)
|
||||
.FirstOrDefault();
|
||||
|
||||
var nowMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.PeerOplogConfirmations.InsertAsync(new PeerOplogConfirmationEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(),
|
||||
PeerNodeId = peerNodeId,
|
||||
SourceNodeId = sourceNodeId,
|
||||
ConfirmedWall = timestamp.PhysicalTime,
|
||||
ConfirmedLogic = timestamp.LogicalCounter,
|
||||
ConfirmedHash = hash ?? "",
|
||||
LastConfirmedUtcMs = nowMs,
|
||||
IsActive = true
|
||||
});
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
return;
|
||||
}
|
||||
|
||||
var isNewer = IsIncomingTimestampNewer(timestamp, existing);
|
||||
var samePointHashChanged = timestamp.PhysicalTime == existing.ConfirmedWall &&
|
||||
timestamp.LogicalCounter == existing.ConfirmedLogic &&
|
||||
!string.Equals(existing.ConfirmedHash, hash, StringComparison.Ordinal);
|
||||
|
||||
if (!isNewer && !samePointHashChanged && existing.IsActive)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
existing.ConfirmedWall = timestamp.PhysicalTime;
|
||||
existing.ConfirmedLogic = timestamp.LogicalCounter;
|
||||
existing.ConfirmedHash = hash ?? "";
|
||||
existing.LastConfirmedUtcMs = nowMs;
|
||||
existing.IsActive = true;
|
||||
|
||||
await _context.PeerOplogConfirmations.UpdateAsync(existing);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override Task<IEnumerable<PeerOplogConfirmation>> GetConfirmationsAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var confirmations = _context.PeerOplogConfirmations
|
||||
.Find(c => c.SourceNodeId != RegistrationSourceNodeId)
|
||||
.ToDomain()
|
||||
.ToList();
|
||||
|
||||
return Task.FromResult<IEnumerable<PeerOplogConfirmation>>(confirmations);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override Task<IEnumerable<PeerOplogConfirmation>> GetConfirmationsForPeerAsync(
|
||||
string peerNodeId,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(peerNodeId))
|
||||
{
|
||||
throw new ArgumentException("Peer node id is required.", nameof(peerNodeId));
|
||||
}
|
||||
|
||||
var confirmations = _context.PeerOplogConfirmations
|
||||
.Find(c => c.PeerNodeId == peerNodeId && c.SourceNodeId != RegistrationSourceNodeId)
|
||||
.ToDomain()
|
||||
.ToList();
|
||||
|
||||
return Task.FromResult<IEnumerable<PeerOplogConfirmation>>(confirmations);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task RemovePeerTrackingAsync(string peerNodeId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(peerNodeId))
|
||||
{
|
||||
throw new ArgumentException("Peer node id is required.", nameof(peerNodeId));
|
||||
}
|
||||
|
||||
var matches = _context.PeerOplogConfirmations
|
||||
.Find(c => c.PeerNodeId == peerNodeId)
|
||||
.ToList();
|
||||
|
||||
if (matches.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var nowMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
|
||||
foreach (var match in matches)
|
||||
{
|
||||
if (!match.IsActive)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
match.IsActive = false;
|
||||
match.LastConfirmedUtcMs = nowMs;
|
||||
await _context.PeerOplogConfirmations.UpdateAsync(match);
|
||||
}
|
||||
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override Task<IEnumerable<string>> GetActiveTrackedPeersAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var peers = _context.PeerOplogConfirmations
|
||||
.Find(c => c.IsActive)
|
||||
.Select(c => c.PeerNodeId)
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.ToList();
|
||||
|
||||
return Task.FromResult<IEnumerable<string>>(peers);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task DropAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var allIds = _context.PeerOplogConfirmations.FindAll().Select(c => c.Id).ToList();
|
||||
await _context.PeerOplogConfirmations.DeleteBulkAsync(allIds);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override Task<IEnumerable<PeerOplogConfirmation>> ExportAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
var exported = _context.PeerOplogConfirmations
|
||||
.FindAll()
|
||||
.ToDomain()
|
||||
.ToList();
|
||||
|
||||
return Task.FromResult<IEnumerable<PeerOplogConfirmation>>(exported);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task ImportAsync(IEnumerable<PeerOplogConfirmation> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var item in items)
|
||||
{
|
||||
var existing = _context.PeerOplogConfirmations
|
||||
.Find(c => c.PeerNodeId == item.PeerNodeId && c.SourceNodeId == item.SourceNodeId)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.PeerOplogConfirmations.InsertAsync(item.ToEntity());
|
||||
continue;
|
||||
}
|
||||
|
||||
existing.ConfirmedWall = item.ConfirmedWall;
|
||||
existing.ConfirmedLogic = item.ConfirmedLogic;
|
||||
existing.ConfirmedHash = item.ConfirmedHash;
|
||||
existing.LastConfirmedUtcMs = item.LastConfirmedUtc.ToUnixTimeMilliseconds();
|
||||
existing.IsActive = item.IsActive;
|
||||
await _context.PeerOplogConfirmations.UpdateAsync(existing);
|
||||
}
|
||||
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task MergeAsync(IEnumerable<PeerOplogConfirmation> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var item in items)
|
||||
{
|
||||
var existing = _context.PeerOplogConfirmations
|
||||
.Find(c => c.PeerNodeId == item.PeerNodeId && c.SourceNodeId == item.SourceNodeId)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.PeerOplogConfirmations.InsertAsync(item.ToEntity());
|
||||
continue;
|
||||
}
|
||||
|
||||
var changed = false;
|
||||
var incomingTimestamp = new HlcTimestamp(item.ConfirmedWall, item.ConfirmedLogic, item.SourceNodeId);
|
||||
var existingTimestamp = new HlcTimestamp(existing.ConfirmedWall, existing.ConfirmedLogic, existing.SourceNodeId);
|
||||
|
||||
if (incomingTimestamp > existingTimestamp)
|
||||
{
|
||||
existing.ConfirmedWall = item.ConfirmedWall;
|
||||
existing.ConfirmedLogic = item.ConfirmedLogic;
|
||||
existing.ConfirmedHash = item.ConfirmedHash;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
var incomingLastConfirmedMs = item.LastConfirmedUtc.ToUnixTimeMilliseconds();
|
||||
if (incomingLastConfirmedMs > existing.LastConfirmedUtcMs)
|
||||
{
|
||||
existing.LastConfirmedUtcMs = incomingLastConfirmedMs;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (existing.IsActive != item.IsActive)
|
||||
{
|
||||
existing.IsActive = item.IsActive;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (changed)
|
||||
{
|
||||
await _context.PeerOplogConfirmations.UpdateAsync(existing);
|
||||
}
|
||||
}
|
||||
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
private static bool IsIncomingTimestampNewer(HlcTimestamp incomingTimestamp, PeerOplogConfirmationEntity existing)
|
||||
{
|
||||
if (incomingTimestamp.PhysicalTime > existing.ConfirmedWall)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (incomingTimestamp.PhysicalTime == existing.ConfirmedWall &&
|
||||
incomingTimestamp.LogicalCounter > existing.ConfirmedLogic)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
145
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteSnapshotMetadataStore.cs
Executable file
145
src/ZB.MOM.WW.CBDDC.Persistence/BLite/BLiteSnapshotMetadataStore.cs
Executable file
@@ -0,0 +1,145 @@
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
|
||||
|
||||
/// <summary>
|
||||
/// Provides a snapshot metadata store implementation that uses a specified CBDDCDocumentDbContext for persistence
|
||||
/// operations.
|
||||
/// </summary>
|
||||
/// <remarks>This class enables storage, retrieval, and management of snapshot metadata using the provided
|
||||
/// database context. It is typically used in scenarios where snapshot metadata needs to be persisted in a document
|
||||
/// database. The class supports bulk operations and incremental updates, and can be extended for custom database
|
||||
/// contexts. Thread safety depends on the underlying context implementation.</remarks>
|
||||
/// <typeparam name="TDbContext">The type of the document database context used for accessing and managing snapshot metadata. Must inherit from
|
||||
/// CBDDCDocumentDbContext.</typeparam>
|
||||
public class BLiteSnapshotMetadataStore<TDbContext> : SnapshotMetadataStore where TDbContext : CBDDCDocumentDbContext
|
||||
{
|
||||
/// <summary>
|
||||
/// Represents the database context used for data access operations within the derived class.
|
||||
/// </summary>
|
||||
/// <remarks>Intended for use by derived classes to interact with the underlying database. The context
|
||||
/// should be properly disposed of according to the application's lifetime management strategy.</remarks>
|
||||
protected readonly TDbContext _context;
|
||||
|
||||
/// <summary>
|
||||
/// Provides logging capabilities for the BLiteSnapshotMetadataStore operations.
|
||||
/// </summary>
|
||||
/// <remarks>Intended for use by derived classes to record diagnostic and operational information. The
|
||||
/// logger instance is specific to the BLiteSnapshotMetadataStore<TDbContext> type.</remarks>
|
||||
protected readonly ILogger<BLiteSnapshotMetadataStore<TDbContext>> _logger;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the BLiteSnapshotMetadataStore class using the specified database context and
|
||||
/// optional logger.
|
||||
/// </summary>
|
||||
/// <param name="context">The database context to be used for accessing snapshot metadata. Cannot be null.</param>
|
||||
/// <param name="logger">An optional logger for logging diagnostic messages. If null, a no-op logger is used.</param>
|
||||
/// <exception cref="ArgumentNullException">Thrown if the context parameter is null.</exception>
|
||||
public BLiteSnapshotMetadataStore(TDbContext context, ILogger<BLiteSnapshotMetadataStore<TDbContext>>? logger = null)
|
||||
{
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
_logger = logger ?? NullLogger<BLiteSnapshotMetadataStore<TDbContext>>.Instance;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task DropAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
// Use Id (technical key) for deletion, not NodeId (business key)
|
||||
var allIds = await Task.Run(() => _context.SnapshotMetadatas.FindAll().Select(s => s.Id).ToList(), cancellationToken);
|
||||
await _context.SnapshotMetadatas.DeleteBulkAsync(allIds);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<SnapshotMetadata>> ExportAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await Task.Run(() => _context.SnapshotMetadatas.FindAll().ToDomain().ToList(), cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<string?> GetSnapshotHashAsync(string nodeId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
// NodeId is now a regular indexed property, not the Key
|
||||
var snapshot = await Task.Run(() => _context.SnapshotMetadatas.Find(s => s.NodeId == nodeId).FirstOrDefault(), cancellationToken);
|
||||
return snapshot?.Hash;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task ImportAsync(IEnumerable<SnapshotMetadata> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var metadata in items)
|
||||
{
|
||||
await _context.SnapshotMetadatas.InsertAsync(metadata.ToEntity());
|
||||
}
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task InsertSnapshotMetadataAsync(SnapshotMetadata metadata, CancellationToken cancellationToken = default)
|
||||
{
|
||||
await _context.SnapshotMetadatas.InsertAsync(metadata.ToEntity());
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task MergeAsync(IEnumerable<SnapshotMetadata> items, CancellationToken cancellationToken = default)
|
||||
{
|
||||
foreach (var metadata in items)
|
||||
{
|
||||
// NodeId is now a regular indexed property, not the Key
|
||||
var existing = await Task.Run(() => _context.SnapshotMetadatas.Find(s => s.NodeId == metadata.NodeId).FirstOrDefault(), cancellationToken);
|
||||
|
||||
if (existing == null)
|
||||
{
|
||||
await _context.SnapshotMetadatas.InsertAsync(metadata.ToEntity());
|
||||
}
|
||||
else
|
||||
{
|
||||
// Update only if incoming is newer
|
||||
if (metadata.TimestampPhysicalTime > existing.TimestampPhysicalTime ||
|
||||
(metadata.TimestampPhysicalTime == existing.TimestampPhysicalTime &&
|
||||
metadata.TimestampLogicalCounter > existing.TimestampLogicalCounter))
|
||||
{
|
||||
existing.NodeId = metadata.NodeId;
|
||||
existing.TimestampPhysicalTime = metadata.TimestampPhysicalTime;
|
||||
existing.TimestampLogicalCounter = metadata.TimestampLogicalCounter;
|
||||
existing.Hash = metadata.Hash;
|
||||
await _context.SnapshotMetadatas.UpdateAsync(existing);
|
||||
}
|
||||
}
|
||||
}
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task UpdateSnapshotMetadataAsync(SnapshotMetadata existingMeta, CancellationToken cancellationToken)
|
||||
{
|
||||
// NodeId is now a regular indexed property, not the Key - find existing by NodeId
|
||||
var existing = await Task.Run(() => _context.SnapshotMetadatas.Find(s => s.NodeId == existingMeta.NodeId).FirstOrDefault(), cancellationToken);
|
||||
if (existing != null)
|
||||
{
|
||||
existing.NodeId = existingMeta.NodeId;
|
||||
existing.TimestampPhysicalTime = existingMeta.TimestampPhysicalTime;
|
||||
existing.TimestampLogicalCounter = existingMeta.TimestampLogicalCounter;
|
||||
existing.Hash = existingMeta.Hash;
|
||||
await _context.SnapshotMetadatas.UpdateAsync(existing);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<SnapshotMetadata?> GetSnapshotMetadataAsync(string nodeId, CancellationToken cancellationToken = default)
|
||||
{
|
||||
// NodeId is now a regular indexed property, not the Key
|
||||
return await Task.Run(() => _context.SnapshotMetadatas.Find(s => s.NodeId == nodeId).FirstOrDefault()?.ToDomain(), cancellationToken);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override async Task<IEnumerable<SnapshotMetadata>> GetAllSnapshotMetadataAsync(CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await Task.Run(() => _context.SnapshotMetadatas.FindAll().ToDomain().ToList(), cancellationToken);
|
||||
}
|
||||
}
|
||||
102
src/ZB.MOM.WW.CBDDC.Persistence/BLite/CBDDCBLiteExtensions.cs
Executable file
102
src/ZB.MOM.WW.CBDDC.Persistence/BLite/CBDDCBLiteExtensions.cs
Executable file
@@ -0,0 +1,102 @@
|
||||
using ZB.MOM.WW.CBDDC.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Core.Sync;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
|
||||
|
||||
/// <summary>
|
||||
/// Extension methods for configuring BLite persistence for ZB.MOM.WW.CBDDC.
|
||||
/// </summary>
|
||||
public static class CBDDCBLiteExtensions
|
||||
{
|
||||
/// <summary>
|
||||
/// Adds BLite persistence to CBDDC using a custom DbContext and DocumentStore implementation.
|
||||
/// </summary>
|
||||
/// <typeparam name="TDbContext">The type of the BLite document database context. Must inherit from CBDDCDocumentDbContext.</typeparam>
|
||||
/// <typeparam name="TDocumentStore">The type of the document store implementation. Must implement IDocumentStore.</typeparam>
|
||||
/// <param name="services">The service collection to add the services to.</param>
|
||||
/// <param name="contextFactory">A factory function that creates the DbContext instance.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
public static IServiceCollection AddCBDDCBLite<TDbContext, TDocumentStore>(
|
||||
this IServiceCollection services,
|
||||
Func<IServiceProvider, TDbContext> contextFactory)
|
||||
where TDbContext : CBDDCDocumentDbContext
|
||||
where TDocumentStore : class, IDocumentStore
|
||||
{
|
||||
if (services == null) throw new ArgumentNullException(nameof(services));
|
||||
if (contextFactory == null) throw new ArgumentNullException(nameof(contextFactory));
|
||||
|
||||
// Register the DbContext as singleton (must match store lifetime)
|
||||
services.TryAddSingleton<TDbContext>(contextFactory);
|
||||
services.TryAddSingleton<CBDDCDocumentDbContext>(sp => sp.GetRequiredService<TDbContext>());
|
||||
|
||||
// Default Conflict Resolver (Last Write Wins) if none is provided
|
||||
services.TryAddSingleton<IConflictResolver, LastWriteWinsConflictResolver>();
|
||||
|
||||
// Vector Clock Service (shared between DocumentStore and OplogStore)
|
||||
services.TryAddSingleton<IVectorClockService, VectorClockService>();
|
||||
|
||||
// Register BLite Stores (all Singleton)
|
||||
services.TryAddSingleton<IOplogStore, BLiteOplogStore<TDbContext>>();
|
||||
services.TryAddSingleton<IPeerConfigurationStore, BLitePeerConfigurationStore<TDbContext>>();
|
||||
services.TryAddSingleton<IPeerOplogConfirmationStore, BLitePeerOplogConfirmationStore<TDbContext>>();
|
||||
services.TryAddSingleton<ISnapshotMetadataStore, BLiteSnapshotMetadataStore<TDbContext>>();
|
||||
services.TryAddSingleton<IDocumentMetadataStore, BLiteDocumentMetadataStore<TDbContext>>();
|
||||
|
||||
// Register the DocumentStore implementation
|
||||
services.TryAddSingleton<IDocumentStore, TDocumentStore>();
|
||||
|
||||
// Register the SnapshotService (uses the generic SnapshotStore from ZB.MOM.WW.CBDDC.Persistence)
|
||||
services.TryAddSingleton<ISnapshotService, SnapshotStore>();
|
||||
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds BLite persistence to CBDDC using a custom DbContext (without explicit DocumentStore type).
|
||||
/// </summary>
|
||||
/// <typeparam name="TDbContext">The type of the BLite document database context. Must inherit from CBDDCDocumentDbContext.</typeparam>
|
||||
/// <param name="services">The service collection to add the services to.</param>
|
||||
/// <param name="contextFactory">A factory function that creates the DbContext instance.</param>
|
||||
/// <returns>The service collection for chaining.</returns>
|
||||
/// <remarks>You must manually register IDocumentStore after calling this method.</remarks>
|
||||
public static IServiceCollection AddCBDDCBLite<TDbContext>(
|
||||
this IServiceCollection services,
|
||||
Func<IServiceProvider, TDbContext> contextFactory)
|
||||
where TDbContext : CBDDCDocumentDbContext
|
||||
{
|
||||
if (services == null) throw new ArgumentNullException(nameof(services));
|
||||
if (contextFactory == null) throw new ArgumentNullException(nameof(contextFactory));
|
||||
|
||||
// Register the DbContext as singleton
|
||||
services.TryAddSingleton<TDbContext>(contextFactory);
|
||||
services.TryAddSingleton<CBDDCDocumentDbContext>(sp => sp.GetRequiredService<TDbContext>());
|
||||
|
||||
// Default Conflict Resolver (Last Write Wins) if none is provided
|
||||
services.TryAddSingleton<IConflictResolver, LastWriteWinsConflictResolver>();
|
||||
|
||||
// Register BLite Stores (all Singleton)
|
||||
services.TryAddSingleton<IOplogStore, BLiteOplogStore<TDbContext>>();
|
||||
services.TryAddSingleton<IPeerConfigurationStore, BLitePeerConfigurationStore<TDbContext>>();
|
||||
services.TryAddSingleton<IPeerOplogConfirmationStore, BLitePeerOplogConfirmationStore<TDbContext>>();
|
||||
services.TryAddSingleton<ISnapshotMetadataStore, BLiteSnapshotMetadataStore<TDbContext>>();
|
||||
services.TryAddSingleton<IDocumentMetadataStore, BLiteDocumentMetadataStore<TDbContext>>();
|
||||
|
||||
// Register the SnapshotService (uses the generic SnapshotStore from ZB.MOM.WW.CBDDC.Persistence)
|
||||
services.TryAddSingleton<ISnapshotService, SnapshotStore>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Options for configuring BLite persistence.
|
||||
/// </summary>
|
||||
public class BLiteOptions
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the file path to the BLite database file.
|
||||
/// </summary>
|
||||
public string DatabasePath { get; set; } = "";
|
||||
}
|
||||
105
src/ZB.MOM.WW.CBDDC.Persistence/BLite/CBDDCDocumentDbContext.cs
Executable file
105
src/ZB.MOM.WW.CBDDC.Persistence/BLite/CBDDCDocumentDbContext.cs
Executable file
@@ -0,0 +1,105 @@
|
||||
using BLite.Core;
|
||||
using BLite.Core.Collections;
|
||||
using BLite.Core.Metadata;
|
||||
using BLite.Core.Storage;
|
||||
using ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite;
|
||||
|
||||
public partial class CBDDCDocumentDbContext : DocumentDbContext
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the collection of operation log entries associated with this instance.
|
||||
/// </summary>
|
||||
/// <remarks>The collection provides access to all recorded operation log (oplog) entries, which can be
|
||||
/// used to track changes or replicate operations. The collection is read-only; entries cannot be added or removed
|
||||
/// directly through this property.</remarks>
|
||||
public DocumentCollection<string, OplogEntity> OplogEntries { get; private set; } = null!;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the collection of snapshot metadata associated with the document.
|
||||
/// </summary>
|
||||
public DocumentCollection<string, SnapshotMetadataEntity> SnapshotMetadatas { get; private set; } = null!;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the collection of remote peer configurations associated with this instance.
|
||||
/// </summary>
|
||||
/// <remarks>Use this collection to access or enumerate the configuration settings for each remote peer.
|
||||
/// The collection is read-only; to modify peer configurations, use the appropriate methods provided by the
|
||||
/// containing class.</remarks>
|
||||
public DocumentCollection<string, RemotePeerEntity> RemotePeerConfigurations { get; private set; } = null!;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the collection of document metadata for sync tracking.
|
||||
/// </summary>
|
||||
/// <remarks>Stores HLC timestamps and deleted state for each document without modifying application entities.
|
||||
/// Used to track document versions for incremental sync instead of full snapshots.</remarks>
|
||||
public DocumentCollection<string, DocumentMetadataEntity> DocumentMetadatas { get; private set; } = null!;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the collection of peer oplog confirmation records for pruning safety tracking.
|
||||
/// </summary>
|
||||
public DocumentCollection<string, PeerOplogConfirmationEntity> PeerOplogConfirmations { get; private set; } = null!;
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the CBDDCDocumentDbContext class using the specified database file path.
|
||||
/// </summary>
|
||||
/// <param name="databasePath">The file system path to the database file to be used by the context. Cannot be null or empty.</param>
|
||||
public CBDDCDocumentDbContext(string databasePath) : base(databasePath)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the CBDDCDocumentDbContext class using the specified database path and page file
|
||||
/// configuration.
|
||||
/// </summary>
|
||||
/// <param name="databasePath">The file system path to the database file. This value cannot be null or empty.</param>
|
||||
/// <param name="config">The configuration settings for the page file. Specifies options that control how the database pages are managed.</param>
|
||||
public CBDDCDocumentDbContext(string databasePath, PageFileConfig config) : base(databasePath, config)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void OnModelCreating(ModelBuilder modelBuilder)
|
||||
{
|
||||
base.OnModelCreating(modelBuilder);
|
||||
|
||||
// OplogEntries: Use Id as technical key, Hash as unique business key
|
||||
modelBuilder.Entity<OplogEntity>()
|
||||
.ToCollection("OplogEntries")
|
||||
.HasKey(e => e.Id)
|
||||
.HasIndex(e => e.Hash, unique: true) // Hash is unique business key
|
||||
.HasIndex(e => new { e.TimestampPhysicalTime, e.TimestampLogicalCounter, e.TimestampNodeId })
|
||||
.HasIndex(e => e.Collection);
|
||||
|
||||
// SnapshotMetadatas: Use Id as technical key, NodeId as unique business key
|
||||
modelBuilder.Entity<SnapshotMetadataEntity>()
|
||||
.ToCollection("SnapshotMetadatas")
|
||||
.HasKey(e => e.Id)
|
||||
.HasIndex(e => e.NodeId, unique: true) // NodeId is unique business key
|
||||
.HasIndex(e => new { e.TimestampPhysicalTime, e.TimestampLogicalCounter });
|
||||
|
||||
// RemotePeerConfigurations: Use Id as technical key, NodeId as unique business key
|
||||
modelBuilder.Entity<RemotePeerEntity>()
|
||||
.ToCollection("RemotePeerConfigurations")
|
||||
.HasKey(e => e.Id)
|
||||
.HasIndex(e => e.NodeId, unique: true) // NodeId is unique business key
|
||||
.HasIndex(e => e.IsEnabled);
|
||||
|
||||
// DocumentMetadatas: Use Id as technical key, Collection+Key as unique composite business key
|
||||
modelBuilder.Entity<DocumentMetadataEntity>()
|
||||
.ToCollection("DocumentMetadatas")
|
||||
.HasKey(e => e.Id)
|
||||
.HasIndex(e => new { e.Collection, e.Key }, unique: true) // Composite business key
|
||||
.HasIndex(e => new { e.HlcPhysicalTime, e.HlcLogicalCounter, e.HlcNodeId })
|
||||
.HasIndex(e => e.Collection);
|
||||
|
||||
// PeerOplogConfirmations: Use Id as technical key, PeerNodeId+SourceNodeId as unique business key
|
||||
modelBuilder.Entity<PeerOplogConfirmationEntity>()
|
||||
.ToCollection("PeerOplogConfirmations")
|
||||
.HasKey(e => e.Id)
|
||||
.HasIndex(e => new { e.PeerNodeId, e.SourceNodeId }, unique: true)
|
||||
.HasIndex(e => e.IsActive)
|
||||
.HasIndex(e => new { e.SourceNodeId, e.ConfirmedWall, e.ConfirmedLogic });
|
||||
}
|
||||
}
|
||||
47
src/ZB.MOM.WW.CBDDC.Persistence/BLite/Entities/DocumentMetadataEntity.cs
Executable file
47
src/ZB.MOM.WW.CBDDC.Persistence/BLite/Entities/DocumentMetadataEntity.cs
Executable file
@@ -0,0 +1,47 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// BLite entity representing document metadata for sync tracking.
|
||||
/// Stores HLC timestamp and deleted state for each document without modifying application entities.
|
||||
/// </summary>
|
||||
public class DocumentMetadataEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the unique identifier for this entity (technical key).
|
||||
/// Auto-generated GUID string.
|
||||
/// </summary>
|
||||
[Key]
|
||||
public string Id { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the collection name (business key part 1).
|
||||
/// </summary>
|
||||
public string Collection { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the document key within the collection (business key part 2).
|
||||
/// </summary>
|
||||
public string Key { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the physical time component of the HLC timestamp.
|
||||
/// </summary>
|
||||
public long HlcPhysicalTime { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the logical counter component of the HLC timestamp.
|
||||
/// </summary>
|
||||
public int HlcLogicalCounter { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the node ID that last modified this document.
|
||||
/// </summary>
|
||||
public string HlcNodeId { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether this document is marked as deleted (tombstone).
|
||||
/// </summary>
|
||||
public bool IsDeleted { get; set; }
|
||||
}
|
||||
243
src/ZB.MOM.WW.CBDDC.Persistence/BLite/Entities/EntityMappers.cs
Executable file
243
src/ZB.MOM.WW.CBDDC.Persistence/BLite/Entities/EntityMappers.cs
Executable file
@@ -0,0 +1,243 @@
|
||||
using System.Text.Json;
|
||||
using ZB.MOM.WW.CBDDC.Core;
|
||||
using ZB.MOM.WW.CBDDC.Core.Network;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// Provides extension methods for mapping between BLite entities and domain models.
|
||||
/// </summary>
|
||||
public static class EntityMappers
|
||||
{
|
||||
#region OplogEntity Mappers
|
||||
|
||||
/// <summary>
|
||||
/// Converts an OplogEntry domain model to an OplogEntity for persistence.
|
||||
/// </summary>
|
||||
/// <param name="entry">The oplog entry to convert.</param>
|
||||
public static OplogEntity ToEntity(this OplogEntry entry)
|
||||
{
|
||||
return new OplogEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(), // Auto-generate technical key
|
||||
Collection = entry.Collection,
|
||||
Key = entry.Key,
|
||||
Operation = (int)entry.Operation,
|
||||
// Use empty string instead of null to avoid BLite BSON serialization issues
|
||||
PayloadJson = entry.Payload?.GetRawText() ?? "",
|
||||
TimestampPhysicalTime = entry.Timestamp.PhysicalTime,
|
||||
TimestampLogicalCounter = entry.Timestamp.LogicalCounter,
|
||||
TimestampNodeId = entry.Timestamp.NodeId,
|
||||
Hash = entry.Hash,
|
||||
PreviousHash = entry.PreviousHash
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts an OplogEntity to an OplogEntry domain model.
|
||||
/// </summary>
|
||||
/// <param name="entity">The persisted oplog entity to convert.</param>
|
||||
public static OplogEntry ToDomain(this OplogEntity entity)
|
||||
{
|
||||
JsonElement? payload = null;
|
||||
// Treat empty string as null payload (Delete operations)
|
||||
if (!string.IsNullOrEmpty(entity.PayloadJson))
|
||||
{
|
||||
payload = JsonSerializer.Deserialize<JsonElement>(entity.PayloadJson);
|
||||
}
|
||||
|
||||
return new OplogEntry(
|
||||
entity.Collection,
|
||||
entity.Key,
|
||||
(OperationType)entity.Operation,
|
||||
payload,
|
||||
new HlcTimestamp(entity.TimestampPhysicalTime, entity.TimestampLogicalCounter, entity.TimestampNodeId),
|
||||
entity.PreviousHash,
|
||||
entity.Hash);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a collection of OplogEntity to OplogEntry domain models.
|
||||
/// </summary>
|
||||
/// <param name="entities">The oplog entities to convert.</param>
|
||||
public static IEnumerable<OplogEntry> ToDomain(this IEnumerable<OplogEntity> entities)
|
||||
{
|
||||
return entities.Select(e => e.ToDomain());
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region SnapshotMetadataEntity Mappers
|
||||
|
||||
/// <summary>
|
||||
/// Converts a SnapshotMetadata domain model to a SnapshotMetadataEntity for persistence.
|
||||
/// </summary>
|
||||
/// <param name="metadata">The snapshot metadata to convert.</param>
|
||||
public static SnapshotMetadataEntity ToEntity(this SnapshotMetadata metadata)
|
||||
{
|
||||
return new SnapshotMetadataEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(), // Auto-generate technical key
|
||||
NodeId = metadata.NodeId,
|
||||
TimestampPhysicalTime = metadata.TimestampPhysicalTime,
|
||||
TimestampLogicalCounter = metadata.TimestampLogicalCounter,
|
||||
Hash = metadata.Hash
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a SnapshotMetadataEntity to a SnapshotMetadata domain model.
|
||||
/// </summary>
|
||||
/// <param name="entity">The persisted snapshot metadata entity to convert.</param>
|
||||
public static SnapshotMetadata ToDomain(this SnapshotMetadataEntity entity)
|
||||
{
|
||||
return new SnapshotMetadata
|
||||
{
|
||||
NodeId = entity.NodeId,
|
||||
TimestampPhysicalTime = entity.TimestampPhysicalTime,
|
||||
TimestampLogicalCounter = entity.TimestampLogicalCounter,
|
||||
Hash = entity.Hash
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a collection of SnapshotMetadataEntity to SnapshotMetadata domain models.
|
||||
/// </summary>
|
||||
/// <param name="entities">The snapshot metadata entities to convert.</param>
|
||||
public static IEnumerable<SnapshotMetadata> ToDomain(this IEnumerable<SnapshotMetadataEntity> entities)
|
||||
{
|
||||
return entities.Select(e => e.ToDomain());
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region RemotePeerEntity Mappers
|
||||
|
||||
/// <summary>
|
||||
/// Converts a RemotePeerConfiguration domain model to a RemotePeerEntity for persistence.
|
||||
/// </summary>
|
||||
/// <param name="config">The remote peer configuration to convert.</param>
|
||||
public static RemotePeerEntity ToEntity(this RemotePeerConfiguration config)
|
||||
{
|
||||
return new RemotePeerEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(), // Auto-generate technical key
|
||||
NodeId = config.NodeId,
|
||||
Address = config.Address,
|
||||
Type = (int)config.Type,
|
||||
IsEnabled = config.IsEnabled,
|
||||
InterestsJson = config.InterestingCollections.Count > 0
|
||||
? JsonSerializer.Serialize(config.InterestingCollections)
|
||||
: ""
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a RemotePeerEntity to a RemotePeerConfiguration domain model.
|
||||
/// </summary>
|
||||
/// <param name="entity">The persisted remote peer entity to convert.</param>
|
||||
public static RemotePeerConfiguration ToDomain(this RemotePeerEntity entity)
|
||||
{
|
||||
var config = new RemotePeerConfiguration
|
||||
{
|
||||
NodeId = entity.NodeId,
|
||||
Address = entity.Address,
|
||||
Type = (PeerType)entity.Type,
|
||||
IsEnabled = entity.IsEnabled
|
||||
};
|
||||
|
||||
if (!string.IsNullOrEmpty(entity.InterestsJson))
|
||||
{
|
||||
config.InterestingCollections = JsonSerializer.Deserialize<List<string>>(entity.InterestsJson) ?? [];
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a collection of RemotePeerEntity to RemotePeerConfiguration domain models.
|
||||
/// </summary>
|
||||
/// <param name="entities">The remote peer entities to convert.</param>
|
||||
public static IEnumerable<RemotePeerConfiguration> ToDomain(this IEnumerable<RemotePeerEntity> entities)
|
||||
{
|
||||
return entities.Select(e => e.ToDomain());
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region PeerOplogConfirmationEntity Mappers
|
||||
|
||||
/// <summary>
|
||||
/// Converts a peer oplog confirmation domain model to a BLite entity.
|
||||
/// </summary>
|
||||
/// <param name="confirmation">The confirmation to convert.</param>
|
||||
public static PeerOplogConfirmationEntity ToEntity(this PeerOplogConfirmation confirmation)
|
||||
{
|
||||
return new PeerOplogConfirmationEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(),
|
||||
PeerNodeId = confirmation.PeerNodeId,
|
||||
SourceNodeId = confirmation.SourceNodeId,
|
||||
ConfirmedWall = confirmation.ConfirmedWall,
|
||||
ConfirmedLogic = confirmation.ConfirmedLogic,
|
||||
ConfirmedHash = confirmation.ConfirmedHash,
|
||||
LastConfirmedUtcMs = confirmation.LastConfirmedUtc.ToUnixTimeMilliseconds(),
|
||||
IsActive = confirmation.IsActive
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a peer oplog confirmation entity to a domain model.
|
||||
/// </summary>
|
||||
/// <param name="entity">The entity to convert.</param>
|
||||
public static PeerOplogConfirmation ToDomain(this PeerOplogConfirmationEntity entity)
|
||||
{
|
||||
return new PeerOplogConfirmation
|
||||
{
|
||||
PeerNodeId = entity.PeerNodeId,
|
||||
SourceNodeId = entity.SourceNodeId,
|
||||
ConfirmedWall = entity.ConfirmedWall,
|
||||
ConfirmedLogic = entity.ConfirmedLogic,
|
||||
ConfirmedHash = entity.ConfirmedHash,
|
||||
LastConfirmedUtc = DateTimeOffset.FromUnixTimeMilliseconds(entity.LastConfirmedUtcMs),
|
||||
IsActive = entity.IsActive
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a collection of peer oplog confirmation entities to domain models.
|
||||
/// </summary>
|
||||
/// <param name="entities">The entities to convert.</param>
|
||||
public static IEnumerable<PeerOplogConfirmation> ToDomain(this IEnumerable<PeerOplogConfirmationEntity> entities)
|
||||
{
|
||||
return entities.Select(e => e.ToDomain());
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region DocumentMetadataEntity Helpers
|
||||
|
||||
/// <summary>
|
||||
/// Creates a DocumentMetadataEntity from collection, key, timestamp, and deleted state.
|
||||
/// Used for tracking document sync state.
|
||||
/// </summary>
|
||||
/// <param name="collection">The collection name that owns the document.</param>
|
||||
/// <param name="key">The document key within the collection.</param>
|
||||
/// <param name="timestamp">The hybrid logical clock timestamp for the document state.</param>
|
||||
/// <param name="isDeleted">Indicates whether the document is marked as deleted.</param>
|
||||
public static DocumentMetadataEntity CreateDocumentMetadata(string collection, string key, HlcTimestamp timestamp, bool isDeleted = false)
|
||||
{
|
||||
return new DocumentMetadataEntity
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(),
|
||||
Collection = collection,
|
||||
Key = key,
|
||||
HlcPhysicalTime = timestamp.PhysicalTime,
|
||||
HlcLogicalCounter = timestamp.LogicalCounter,
|
||||
HlcNodeId = timestamp.NodeId,
|
||||
IsDeleted = isDeleted
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
61
src/ZB.MOM.WW.CBDDC.Persistence/BLite/Entities/OplogEntity.cs
Executable file
61
src/ZB.MOM.WW.CBDDC.Persistence/BLite/Entities/OplogEntity.cs
Executable file
@@ -0,0 +1,61 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// BLite entity representing an operation log entry.
|
||||
/// </summary>
|
||||
public class OplogEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the unique identifier for this entity (technical key).
|
||||
/// Auto-generated GUID string.
|
||||
/// </summary>
|
||||
[Key]
|
||||
public string Id { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the collection name.
|
||||
/// </summary>
|
||||
public string Collection { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the document key.
|
||||
/// </summary>
|
||||
public string Key { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the operation type (0 = Put, 1 = Delete).
|
||||
/// </summary>
|
||||
public int Operation { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the payload JSON (empty string for Delete operations).
|
||||
/// </summary>
|
||||
public string PayloadJson { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the physical time component of the HLC timestamp.
|
||||
/// </summary>
|
||||
public long TimestampPhysicalTime { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the logical counter component of the HLC timestamp.
|
||||
/// </summary>
|
||||
public int TimestampLogicalCounter { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the node ID component of the HLC timestamp.
|
||||
/// </summary>
|
||||
public string TimestampNodeId { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the cryptographic hash of this entry (business key).
|
||||
/// </summary>
|
||||
public string Hash { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the hash of the previous entry in the chain.
|
||||
/// </summary>
|
||||
public string PreviousHash { get; set; } = "";
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
using System.ComponentModel.DataAnnotations;
|
||||
|
||||
namespace ZB.MOM.WW.CBDDC.Persistence.BLite.Entities;
|
||||
|
||||
/// <summary>
|
||||
/// BLite entity representing a peer oplog confirmation watermark.
|
||||
/// </summary>
|
||||
public class PeerOplogConfirmationEntity
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets or sets the unique technical identifier for this entity.
|
||||
/// </summary>
|
||||
[Key]
|
||||
public string Id { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the tracked peer node identifier.
|
||||
/// </summary>
|
||||
public string PeerNodeId { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the source node identifier for this confirmation.
|
||||
/// </summary>
|
||||
public string SourceNodeId { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the physical wall-clock component of the confirmed HLC timestamp.
|
||||
/// </summary>
|
||||
public long ConfirmedWall { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the logical component of the confirmed HLC timestamp.
|
||||
/// </summary>
|
||||
public int ConfirmedLogic { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the confirmed hash value.
|
||||
/// </summary>
|
||||
public string ConfirmedHash { get; set; } = "";
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets the UTC instant of the last update as unix milliseconds.
|
||||
/// </summary>
|
||||
public long LastConfirmedUtcMs { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Gets or sets whether the tracked peer remains active.
|
||||
/// </summary>
|
||||
public bool IsActive { get; set; } = true;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user