chore: organize solution into module folders (Core/Server/Drivers/Client/Tooling)
Group all 69 projects into category subfolders under src/ and tests/ so the Rider Solution Explorer mirrors the module structure. Folders: Core, Server, Drivers (with a nested Driver CLIs subfolder), Client, Tooling. - Move every project folder on disk with git mv (history preserved as renames). - Recompute relative paths in 57 .csproj files: cross-category ProjectReferences, the lib/ HintPath+None refs in Driver.Historian.Wonderware, and the external mxaccessgw refs in Driver.Galaxy and its test project. - Rebuild ZB.MOM.WW.OtOpcUa.slnx with nested solution folders. - Re-prefix project paths in functional scripts (e2e, compliance, smoke SQL, integration, install). Build green (0 errors); unit tests pass. Docs left for a separate pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,170 @@
|
||||
using LiteDB;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
/// <summary>
|
||||
/// Generation-sealed LiteDB cache per <c>docs/v2/plan.md</c> decision #148 and Phase 6.1
|
||||
/// Stream D.1. Each published generation writes one <b>read-only</b> LiteDB file under
|
||||
/// <c><cache-root>/<clusterId>/<generationId>.db</c>. A per-cluster
|
||||
/// <c>CURRENT</c> text file holds the currently-active generation id; it is updated
|
||||
/// atomically (temp file + <see cref="File.Replace(string, string, string?)"/>) only after
|
||||
/// the sealed file is fully written.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Mixed-generation reads are impossible: any read opens the single file pointed to
|
||||
/// by <c>CURRENT</c>, which is a coherent snapshot. Corruption of the CURRENT file or the
|
||||
/// sealed file surfaces as <see cref="GenerationCacheUnavailableException"/> — the reader
|
||||
/// fails closed rather than silently falling back to an older generation. Recovery path
|
||||
/// is to re-fetch from the central DB (and the Phase 6.1 Stream C <c>UsingStaleConfig</c>
|
||||
/// flag goes true until that succeeds).</para>
|
||||
///
|
||||
/// <para>This cache is the read-path fallback when the central DB is unreachable. The
|
||||
/// write path (draft edits, publish) bypasses the cache and fails hard on DB outage per
|
||||
/// Stream D.2 — inconsistent writes are worse than a temporary inability to edit.</para>
|
||||
/// </remarks>
|
||||
public sealed class GenerationSealedCache
|
||||
{
|
||||
private const string CollectionName = "generation";
|
||||
private const string CurrentPointerFileName = "CURRENT";
|
||||
private readonly string _cacheRoot;
|
||||
|
||||
/// <summary>Root directory for all clusters' sealed caches.</summary>
|
||||
public string CacheRoot => _cacheRoot;
|
||||
|
||||
public GenerationSealedCache(string cacheRoot)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(cacheRoot);
|
||||
_cacheRoot = cacheRoot;
|
||||
Directory.CreateDirectory(_cacheRoot);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Seal a generation: write the snapshot to <c><cluster>/<generationId>.db</c>,
|
||||
/// mark the file read-only, then atomically publish the <c>CURRENT</c> pointer. Existing
|
||||
/// sealed files for prior generations are preserved (prune separately).
|
||||
/// </summary>
|
||||
public async Task SealAsync(GenerationSnapshot snapshot, CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(snapshot);
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var clusterDir = Path.Combine(_cacheRoot, snapshot.ClusterId);
|
||||
Directory.CreateDirectory(clusterDir);
|
||||
var sealedPath = Path.Combine(clusterDir, $"{snapshot.GenerationId}.db");
|
||||
|
||||
if (File.Exists(sealedPath))
|
||||
{
|
||||
// Already sealed — idempotent. Treat as no-op + update pointer in case an earlier
|
||||
// seal succeeded but the pointer update failed (crash recovery).
|
||||
WritePointerAtomically(clusterDir, snapshot.GenerationId);
|
||||
return;
|
||||
}
|
||||
|
||||
var tmpPath = sealedPath + ".tmp";
|
||||
try
|
||||
{
|
||||
using (var db = new LiteDatabase(new ConnectionString { Filename = tmpPath, Upgrade = false }))
|
||||
{
|
||||
var col = db.GetCollection<GenerationSnapshot>(CollectionName);
|
||||
col.Insert(snapshot);
|
||||
}
|
||||
|
||||
File.Move(tmpPath, sealedPath);
|
||||
File.SetAttributes(sealedPath, File.GetAttributes(sealedPath) | FileAttributes.ReadOnly);
|
||||
WritePointerAtomically(clusterDir, snapshot.GenerationId);
|
||||
}
|
||||
catch
|
||||
{
|
||||
try { if (File.Exists(tmpPath)) File.Delete(tmpPath); } catch { /* best-effort */ }
|
||||
throw;
|
||||
}
|
||||
|
||||
await Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Read the current sealed snapshot for <paramref name="clusterId"/>. Throws
|
||||
/// <see cref="GenerationCacheUnavailableException"/> when the pointer is missing
|
||||
/// (first-boot-no-snapshot case) or when the sealed file is corrupt. Never silently
|
||||
/// falls back to a prior generation.
|
||||
/// </summary>
|
||||
public Task<GenerationSnapshot> ReadCurrentAsync(string clusterId, CancellationToken ct = default)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
var clusterDir = Path.Combine(_cacheRoot, clusterId);
|
||||
var pointerPath = Path.Combine(clusterDir, CurrentPointerFileName);
|
||||
if (!File.Exists(pointerPath))
|
||||
throw new GenerationCacheUnavailableException(
|
||||
$"No sealed generation for cluster '{clusterId}' at '{clusterDir}'. First-boot case: the central DB must be reachable at least once before cache fallback is possible.");
|
||||
|
||||
long generationId;
|
||||
try
|
||||
{
|
||||
var text = File.ReadAllText(pointerPath).Trim();
|
||||
generationId = long.Parse(text, System.Globalization.CultureInfo.InvariantCulture);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new GenerationCacheUnavailableException(
|
||||
$"CURRENT pointer at '{pointerPath}' is corrupt or unreadable.", ex);
|
||||
}
|
||||
|
||||
var sealedPath = Path.Combine(clusterDir, $"{generationId}.db");
|
||||
if (!File.Exists(sealedPath))
|
||||
throw new GenerationCacheUnavailableException(
|
||||
$"CURRENT points at generation {generationId} but '{sealedPath}' is missing — fails closed rather than serving an older generation.");
|
||||
|
||||
try
|
||||
{
|
||||
using var db = new LiteDatabase(new ConnectionString { Filename = sealedPath, ReadOnly = true });
|
||||
var col = db.GetCollection<GenerationSnapshot>(CollectionName);
|
||||
var snapshot = col.FindAll().FirstOrDefault()
|
||||
?? throw new GenerationCacheUnavailableException(
|
||||
$"Sealed file '{sealedPath}' contains no snapshot row — file is corrupt.");
|
||||
return Task.FromResult(snapshot);
|
||||
}
|
||||
catch (GenerationCacheUnavailableException) { throw; }
|
||||
catch (Exception ex) when (ex is LiteException or InvalidDataException or IOException
|
||||
or NotSupportedException or FormatException)
|
||||
{
|
||||
throw new GenerationCacheUnavailableException(
|
||||
$"Sealed file '{sealedPath}' is corrupt or unreadable — fails closed rather than falling back to an older generation.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Return the generation id the <c>CURRENT</c> pointer points at, or null if no pointer exists.</summary>
|
||||
public long? TryGetCurrentGenerationId(string clusterId)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
|
||||
var pointerPath = Path.Combine(_cacheRoot, clusterId, CurrentPointerFileName);
|
||||
if (!File.Exists(pointerPath)) return null;
|
||||
try
|
||||
{
|
||||
return long.Parse(File.ReadAllText(pointerPath).Trim(), System.Globalization.CultureInfo.InvariantCulture);
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static void WritePointerAtomically(string clusterDir, long generationId)
|
||||
{
|
||||
var pointerPath = Path.Combine(clusterDir, CurrentPointerFileName);
|
||||
var tmpPath = pointerPath + ".tmp";
|
||||
File.WriteAllText(tmpPath, generationId.ToString(System.Globalization.CultureInfo.InvariantCulture));
|
||||
if (File.Exists(pointerPath))
|
||||
File.Replace(tmpPath, pointerPath, destinationBackupFileName: null);
|
||||
else
|
||||
File.Move(tmpPath, pointerPath);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Sealed cache is unreachable — caller must fail closed.</summary>
|
||||
public sealed class GenerationCacheUnavailableException : Exception
|
||||
{
|
||||
public GenerationCacheUnavailableException(string message) : base(message) { }
|
||||
public GenerationCacheUnavailableException(string message, Exception inner) : base(message, inner) { }
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
/// <summary>
|
||||
/// A self-contained snapshot of one generation — enough to rebuild the address space on a node
|
||||
/// that has lost DB connectivity. The payload is the JSON-serialized <c>sp_GetGenerationContent</c>
|
||||
/// result; the local cache doesn't inspect the shape, it just round-trips bytes.
|
||||
/// </summary>
|
||||
public sealed class GenerationSnapshot
|
||||
{
|
||||
public int Id { get; set; } // LiteDB auto-ID
|
||||
public required string ClusterId { get; set; }
|
||||
public required long GenerationId { get; set; }
|
||||
public required DateTime CachedAt { get; set; }
|
||||
public required string PayloadJson { get; set; }
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
/// <summary>
|
||||
/// Per-node local cache of the most-recently-applied generation(s). Used to bootstrap the
|
||||
/// address space when the central DB is unreachable (decision #79 — degraded-but-running).
|
||||
/// </summary>
|
||||
public interface ILocalConfigCache
|
||||
{
|
||||
Task<GenerationSnapshot?> GetMostRecentAsync(string clusterId, CancellationToken ct = default);
|
||||
Task PutAsync(GenerationSnapshot snapshot, CancellationToken ct = default);
|
||||
Task PruneOldGenerationsAsync(string clusterId, int keepLatest = 10, CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
using LiteDB;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
/// <summary>
|
||||
/// LiteDB-backed <see cref="ILocalConfigCache"/>. One file per node (default
|
||||
/// <c>config_cache.db</c>), one collection per snapshot. Corruption surfaces as
|
||||
/// <see cref="LocalConfigCacheCorruptException"/> on construction or read — callers should
|
||||
/// delete and re-fetch from the central DB (decision #80).
|
||||
/// </summary>
|
||||
public sealed class LiteDbConfigCache : ILocalConfigCache, IDisposable
|
||||
{
|
||||
private const string CollectionName = "generations";
|
||||
private readonly LiteDatabase _db;
|
||||
private readonly ILiteCollection<GenerationSnapshot> _col;
|
||||
|
||||
public LiteDbConfigCache(string dbPath)
|
||||
{
|
||||
// LiteDB can be tolerant of header-only corruption at construction time (it may overwrite
|
||||
// the header and "recover"), so we force a write + read probe to fail fast on real corruption.
|
||||
try
|
||||
{
|
||||
_db = new LiteDatabase(new ConnectionString { Filename = dbPath, Upgrade = true });
|
||||
_col = _db.GetCollection<GenerationSnapshot>(CollectionName);
|
||||
_col.EnsureIndex(s => s.ClusterId);
|
||||
_col.EnsureIndex(s => s.GenerationId);
|
||||
_ = _col.Count();
|
||||
}
|
||||
catch (Exception ex) when (ex is LiteException or InvalidDataException or IOException
|
||||
or NotSupportedException or UnauthorizedAccessException
|
||||
or ArgumentOutOfRangeException or FormatException)
|
||||
{
|
||||
_db?.Dispose();
|
||||
throw new LocalConfigCacheCorruptException(
|
||||
$"LiteDB cache at '{dbPath}' is corrupt or unreadable — delete the file and refetch from the central DB.",
|
||||
ex);
|
||||
}
|
||||
}
|
||||
|
||||
public Task<GenerationSnapshot?> GetMostRecentAsync(string clusterId, CancellationToken ct = default)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
var snapshot = _col
|
||||
.Find(s => s.ClusterId == clusterId)
|
||||
.OrderByDescending(s => s.GenerationId)
|
||||
.FirstOrDefault();
|
||||
return Task.FromResult<GenerationSnapshot?>(snapshot);
|
||||
}
|
||||
|
||||
public Task PutAsync(GenerationSnapshot snapshot, CancellationToken ct = default)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
// upsert by (ClusterId, GenerationId) — replace in place if already cached
|
||||
var existing = _col
|
||||
.Find(s => s.ClusterId == snapshot.ClusterId && s.GenerationId == snapshot.GenerationId)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existing is null)
|
||||
_col.Insert(snapshot);
|
||||
else
|
||||
{
|
||||
snapshot.Id = existing.Id;
|
||||
_col.Update(snapshot);
|
||||
}
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public Task PruneOldGenerationsAsync(string clusterId, int keepLatest = 10, CancellationToken ct = default)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
var doomed = _col
|
||||
.Find(s => s.ClusterId == clusterId)
|
||||
.OrderByDescending(s => s.GenerationId)
|
||||
.Skip(keepLatest)
|
||||
.Select(s => s.Id)
|
||||
.ToList();
|
||||
|
||||
foreach (var id in doomed)
|
||||
_col.Delete(id);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public void Dispose() => _db.Dispose();
|
||||
}
|
||||
|
||||
public sealed class LocalConfigCacheCorruptException(string message, Exception inner)
|
||||
: Exception(message, inner);
|
||||
@@ -0,0 +1,90 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Polly;
|
||||
using Polly.Retry;
|
||||
using Polly.Timeout;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
/// <summary>
|
||||
/// Wraps a central-DB fetch function with Phase 6.1 Stream D.2 resilience:
|
||||
/// <b>timeout 2 s → retry 3× jittered → fallback to sealed cache</b>. Maintains the
|
||||
/// <see cref="StaleConfigFlag"/> — fresh on central-DB success, stale on cache fallback.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>Read-path only per plan. The write path (draft save, publish) bypasses this
|
||||
/// wrapper entirely and fails hard on DB outage so inconsistent writes never land.</para>
|
||||
///
|
||||
/// <para>Fallback is triggered by <b>any exception</b> the fetch raises (central-DB
|
||||
/// unreachable, SqlException, timeout). If the sealed cache also fails (no pointer,
|
||||
/// corrupt file, etc.), <see cref="GenerationCacheUnavailableException"/> surfaces — caller
|
||||
/// must fail the current request (InitializeAsync for a driver, etc.).</para>
|
||||
/// </remarks>
|
||||
public sealed class ResilientConfigReader
|
||||
{
|
||||
private readonly GenerationSealedCache _cache;
|
||||
private readonly StaleConfigFlag _staleFlag;
|
||||
private readonly ResiliencePipeline _pipeline;
|
||||
private readonly ILogger<ResilientConfigReader> _logger;
|
||||
|
||||
public ResilientConfigReader(
|
||||
GenerationSealedCache cache,
|
||||
StaleConfigFlag staleFlag,
|
||||
ILogger<ResilientConfigReader> logger,
|
||||
TimeSpan? timeout = null,
|
||||
int retryCount = 3)
|
||||
{
|
||||
_cache = cache;
|
||||
_staleFlag = staleFlag;
|
||||
_logger = logger;
|
||||
var builder = new ResiliencePipelineBuilder()
|
||||
.AddTimeout(new TimeoutStrategyOptions { Timeout = timeout ?? TimeSpan.FromSeconds(2) });
|
||||
|
||||
if (retryCount > 0)
|
||||
{
|
||||
builder.AddRetry(new RetryStrategyOptions
|
||||
{
|
||||
MaxRetryAttempts = retryCount,
|
||||
BackoffType = DelayBackoffType.Exponential,
|
||||
UseJitter = true,
|
||||
Delay = TimeSpan.FromMilliseconds(100),
|
||||
MaxDelay = TimeSpan.FromSeconds(1),
|
||||
ShouldHandle = new PredicateBuilder().Handle<Exception>(ex => ex is not OperationCanceledException),
|
||||
});
|
||||
}
|
||||
|
||||
_pipeline = builder.Build();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Execute <paramref name="centralFetch"/> through the resilience pipeline. On full failure
|
||||
/// (post-retry), reads the sealed cache for <paramref name="clusterId"/> and passes the
|
||||
/// snapshot to <paramref name="fromSnapshot"/> to extract the requested shape.
|
||||
/// </summary>
|
||||
public async ValueTask<T> ReadAsync<T>(
|
||||
string clusterId,
|
||||
Func<CancellationToken, ValueTask<T>> centralFetch,
|
||||
Func<GenerationSnapshot, T> fromSnapshot,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrWhiteSpace(clusterId);
|
||||
ArgumentNullException.ThrowIfNull(centralFetch);
|
||||
ArgumentNullException.ThrowIfNull(fromSnapshot);
|
||||
|
||||
try
|
||||
{
|
||||
var result = await _pipeline.ExecuteAsync(centralFetch, cancellationToken).ConfigureAwait(false);
|
||||
_staleFlag.MarkFresh();
|
||||
return result;
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
_logger.LogWarning(ex, "Central-DB read failed after retries; falling back to sealed cache for cluster {ClusterId}", clusterId);
|
||||
// GenerationCacheUnavailableException surfaces intentionally — fails the caller's
|
||||
// operation. StaleConfigFlag stays unchanged; the flag only flips when we actually
|
||||
// served a cache snapshot.
|
||||
var snapshot = await _cache.ReadCurrentAsync(clusterId, cancellationToken).ConfigureAwait(false);
|
||||
_staleFlag.MarkStale();
|
||||
return fromSnapshot(snapshot);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
|
||||
/// <summary>
|
||||
/// Thread-safe <c>UsingStaleConfig</c> signal per Phase 6.1 Stream D.3. Flips true whenever
|
||||
/// a read falls back to a sealed cache snapshot; flips false on the next successful central-DB
|
||||
/// round-trip. Surfaced on <c>/healthz</c> body and on the Admin <c>/hosts</c> page.
|
||||
/// </summary>
|
||||
public sealed class StaleConfigFlag
|
||||
{
|
||||
private int _stale;
|
||||
|
||||
/// <summary>True when the last config read was served from the sealed cache, not the central DB.</summary>
|
||||
public bool IsStale => Volatile.Read(ref _stale) != 0;
|
||||
|
||||
/// <summary>Mark the current config as stale (a read fell back to the cache).</summary>
|
||||
public void MarkStale() => Volatile.Write(ref _stale, 1);
|
||||
|
||||
/// <summary>Mark the current config as fresh (a central-DB read succeeded).</summary>
|
||||
public void MarkFresh() => Volatile.Write(ref _stale, 0);
|
||||
}
|
||||
Reference in New Issue
Block a user