review(Configuration): fix LiteDB global BsonMapper cross-instance race (High)

Re-review at 7286d320. Configuration-012 (High): LiteDbConfigCache/GenerationSealedCache built
LiteDatabase on the process-wide BsonMapper.Global whose lazy member resolution races across
concurrently-constructed DBs (NotSupportedException/duplicate-key under contention; also caused
intermittent suite flakiness). Fix: per-cache fresh BsonMapper + pre-registered entity + TDD.
-013 (dead ValidateClusterTopology, ControlPlane) / -014 (collation case-sensitivity, needs
migration) deferred. No migration touched.
This commit is contained in:
Joseph Doherty
2026-06-19 11:06:56 -04:00
parent 145b06bec9
commit c3d148e396
4 changed files with 142 additions and 6 deletions
@@ -28,6 +28,18 @@ public sealed class GenerationSealedCache
private const string CurrentPointerFileName = "CURRENT";
private readonly string _cacheRoot;
// Private per-database BsonMapper with the entity pre-registered. LiteDB's default
// BsonMapper.Global is a process-wide singleton whose lazy per-type member registration is
// not thread-safe across concurrently-constructed LiteDatabase instances; a seal racing a
// read (or this cache racing LiteDbConfigCache) corrupts the global mapper, surfacing as
// "Member … not found on BsonMapper" or a bogus duplicate-_id insert — Configuration-012.
private static BsonMapper BuildMapper()
{
var mapper = new BsonMapper();
mapper.Entity<GenerationSnapshot>();
return mapper;
}
/// <summary>Root directory for all clusters' sealed caches.</summary>
public string CacheRoot => _cacheRoot;
@@ -68,7 +80,7 @@ public sealed class GenerationSealedCache
var tmpPath = sealedPath + ".tmp";
try
{
using (var db = new LiteDatabase(new ConnectionString { Filename = tmpPath, Upgrade = false }))
using (var db = new LiteDatabase(new ConnectionString { Filename = tmpPath, Upgrade = false }, BuildMapper()))
{
var col = db.GetCollection<GenerationSnapshot>(CollectionName);
col.Insert(snapshot);
@@ -126,7 +138,7 @@ public sealed class GenerationSealedCache
try
{
using var db = new LiteDatabase(new ConnectionString { Filename = sealedPath, ReadOnly = true });
using var db = new LiteDatabase(new ConnectionString { Filename = sealedPath, ReadOnly = true }, BuildMapper());
var col = db.GetCollection<GenerationSnapshot>(CollectionName);
var snapshot = col.FindAll().FirstOrDefault()
?? throw new GenerationCacheUnavailableException(
@@ -11,6 +11,21 @@ namespace ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
public sealed class LiteDbConfigCache : ILocalConfigCache, IDisposable
{
private const string CollectionName = "generations";
// LiteDB's default BsonMapper.Global is a process-wide singleton whose per-type member
// registration is lazy and NOT thread-safe across concurrently-constructed LiteDatabase
// instances. When several caches (this one + GenerationSealedCache) initialise in parallel
// the global mapper races, surfacing as "Member ClusterId not found on BsonMapper" or a
// bogus "duplicate key _id = 0" (the int auto-id mapping was lost so Insert writes a literal
// 0 twice) — Configuration-012. Give each database a private, pre-registered mapper so member
// resolution happens once, single-threaded, at construction and never touches the global.
private static BsonMapper BuildMapper()
{
var mapper = new BsonMapper();
mapper.Entity<GenerationSnapshot>();
return mapper;
}
private readonly LiteDatabase _db;
private readonly ILiteCollection<GenerationSnapshot> _col;
// PutAsync is a find-then-insert/update; without serialization, two concurrent puts for the
@@ -28,7 +43,7 @@ public sealed class LiteDbConfigCache : ILocalConfigCache, IDisposable
// the header and "recover"), so we force a write + read probe to fail fast on real corruption.
try
{
_db = new LiteDatabase(new ConnectionString { Filename = dbPath, Upgrade = true });
_db = new LiteDatabase(new ConnectionString { Filename = dbPath, Upgrade = true }, BuildMapper());
_col = _db.GetCollection<GenerationSnapshot>(CollectionName);
_col.EnsureIndex(s => s.ClusterId);
_col.EnsureIndex(s => s.GenerationId);