fix(configuration): resolve Low code-review findings (Configuration-004,005,007,010,011)
- Configuration-004: NodePermissions stored as int to match the EF HasConversion<int>() in OtOpcUaConfigDbContext.ConfigureNodeAcl. - Configuration-005: serialise LiteDbConfigCache.PutAsync so concurrent Put for the same (ClusterId, GenerationId) cannot duplicate rows. - Configuration-007: rethrow OperationCanceledException from GenerationApplier.ApplyPass when the caller's token is cancelled. - Configuration-010: scrub secrets and drop the full exception object from the ResilientConfigReader fallback warning log. - Configuration-011: pin the previously-uncovered GenerationApplier cancellation and path-length / publish-validation paths. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,10 @@ public sealed class GenerationApplier(ApplyCallbacks callbacks) : IGenerationApp
|
||||
|
||||
foreach (var kind in new[] { ChangeKind.Added, ChangeKind.Modified })
|
||||
{
|
||||
// Honour cancellation between passes — a caller can abort the apply between Removed
|
||||
// and Added phases even if individual callbacks don't observe the token themselves
|
||||
// (Configuration-007).
|
||||
ct.ThrowIfCancellationRequested();
|
||||
await ApplyPass(diff.Namespaces, kind, callbacks.OnNamespace, errors, ct);
|
||||
await ApplyPass(diff.Drivers, kind, callbacks.OnDriver, errors, ct);
|
||||
await ApplyPass(diff.Devices, kind, callbacks.OnDevice, errors, ct);
|
||||
@@ -42,6 +46,12 @@ public sealed class GenerationApplier(ApplyCallbacks callbacks) : IGenerationApp
|
||||
foreach (var change in changes.Where(c => c.Kind == kind))
|
||||
{
|
||||
try { await callback(change, ct); }
|
||||
// Configuration-007: cancellation must propagate, not be silently recorded as an
|
||||
// entity error. Distinguish caller cancellation (token signalled) from any
|
||||
// OperationCanceledException raised independently of the caller's token, which we
|
||||
// still want to surface as an entity error so a single misbehaving callback does
|
||||
// not crash the entire apply.
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
|
||||
catch (Exception ex) { errors.Add($"{typeof(T).Name} {change.Kind} '{change.LogicalId}': {ex.Message}"); }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ namespace ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
/// Stored as <c>int</c> bitmask in <see cref="Entities.NodeAcl.PermissionFlags"/>.
|
||||
/// </summary>
|
||||
[Flags]
|
||||
public enum NodePermissions : uint
|
||||
public enum NodePermissions : int
|
||||
{
|
||||
None = 0,
|
||||
|
||||
|
||||
@@ -4,6 +4,13 @@ namespace ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
|
||||
/// Per-node local cache of the most-recently-applied generation(s). Used to bootstrap the
|
||||
/// address space when the central DB is unreachable (decision #79 — degraded-but-running).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para><b>Concurrency contract:</b> implementations must serialize writes — specifically,
|
||||
/// <see cref="PutAsync"/> for the same <c>(ClusterId, GenerationId)</c> from concurrent
|
||||
/// callers must not produce duplicate rows. Reads may run concurrently with reads and writes.
|
||||
/// The <see cref="LiteDbConfigCache"/> implementation enforces this via an instance-level
|
||||
/// <see cref="SemaphoreSlim"/> around the find-then-insert/update window.</para>
|
||||
/// </remarks>
|
||||
public interface ILocalConfigCache
|
||||
{
|
||||
Task<GenerationSnapshot?> GetMostRecentAsync(string clusterId, CancellationToken ct = default);
|
||||
|
||||
@@ -13,6 +13,12 @@ public sealed class LiteDbConfigCache : ILocalConfigCache, IDisposable
|
||||
private const string CollectionName = "generations";
|
||||
private readonly LiteDatabase _db;
|
||||
private readonly ILiteCollection<GenerationSnapshot> _col;
|
||||
// PutAsync is a find-then-insert/update; without serialization, two concurrent puts for the
|
||||
// same (ClusterId, GenerationId) can both observe `existing is null` and both Insert,
|
||||
// producing duplicate rows (Configuration-005). Serialize writes through this semaphore so
|
||||
// the read-modify-write block is atomic for a given instance. LiteDB itself only locks the
|
||||
// page-level write, not the find-then-insert window.
|
||||
private readonly SemaphoreSlim _writeGate = new(initialCount: 1, maxCount: 1);
|
||||
|
||||
public LiteDbConfigCache(string dbPath)
|
||||
{
|
||||
@@ -47,23 +53,32 @@ public sealed class LiteDbConfigCache : ILocalConfigCache, IDisposable
|
||||
return Task.FromResult<GenerationSnapshot?>(snapshot);
|
||||
}
|
||||
|
||||
public Task PutAsync(GenerationSnapshot snapshot, CancellationToken ct = default)
|
||||
public async Task PutAsync(GenerationSnapshot snapshot, CancellationToken ct = default)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
// upsert by (ClusterId, GenerationId) — replace in place if already cached
|
||||
var existing = _col
|
||||
.Find(s => s.ClusterId == snapshot.ClusterId && s.GenerationId == snapshot.GenerationId)
|
||||
.FirstOrDefault();
|
||||
|
||||
if (existing is null)
|
||||
_col.Insert(snapshot);
|
||||
else
|
||||
// Serialize the find-then-insert/update so concurrent callers do not observe a stale
|
||||
// `existing is null` and both Insert (Configuration-005). LiteDB's per-call lock is
|
||||
// not enough — the read and the write are independent calls.
|
||||
await _writeGate.WaitAsync(ct).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
snapshot.Id = existing.Id;
|
||||
_col.Update(snapshot);
|
||||
}
|
||||
// upsert by (ClusterId, GenerationId) — replace in place if already cached
|
||||
var existing = _col
|
||||
.Find(s => s.ClusterId == snapshot.ClusterId && s.GenerationId == snapshot.GenerationId)
|
||||
.FirstOrDefault();
|
||||
|
||||
return Task.CompletedTask;
|
||||
if (existing is null)
|
||||
_col.Insert(snapshot);
|
||||
else
|
||||
{
|
||||
snapshot.Id = existing.Id;
|
||||
_col.Update(snapshot);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_writeGate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
public Task PruneOldGenerationsAsync(string clusterId, int keepLatest = 10, CancellationToken ct = default)
|
||||
@@ -82,7 +97,11 @@ public sealed class LiteDbConfigCache : ILocalConfigCache, IDisposable
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
public void Dispose() => _db.Dispose();
|
||||
public void Dispose()
|
||||
{
|
||||
_writeGate.Dispose();
|
||||
_db.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class LocalConfigCacheCorruptException(string message, Exception inner)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Polly;
|
||||
using Polly.Retry;
|
||||
@@ -61,6 +62,23 @@ public sealed class ResilientConfigReader
|
||||
_pipeline = builder.Build();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Configuration-010: redact connection-string fragments (Password, User Id, Pwd, etc.)
|
||||
/// that a caller's exception message could carry. Conservative regex pass — anything
|
||||
/// matching <c>Key=Value</c> with a known credential key gets its value replaced.
|
||||
/// </summary>
|
||||
private static readonly Regex SecretsRegex = new(
|
||||
@"(?ix)\b(Password|Pwd|User\s*Id|Uid|AccessToken|Authorization|Api[-_]?Key)\s*=\s*[^;,)\s]*",
|
||||
RegexOptions.Compiled);
|
||||
|
||||
internal static string ScrubSecrets(string? message)
|
||||
{
|
||||
if (string.IsNullOrEmpty(message)) return message ?? string.Empty;
|
||||
// Replace the entire matched fragment (key + value) with a redaction marker so the
|
||||
// key name itself doesn't leak — log scrapers grep for "Password=" too.
|
||||
return SecretsRegex.Replace(message, "[redacted credential]");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Execute <paramref name="centralFetch"/> through the resilience pipeline. On full failure
|
||||
/// (post-retry), reads the sealed cache for <paramref name="clusterId"/> and passes the
|
||||
@@ -88,7 +106,15 @@ public sealed class ResilientConfigReader
|
||||
// that case, not propagate. Only rethrow if the caller actually requested cancellation.
|
||||
catch (Exception ex) when (ex is not OperationCanceledException || !cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning(ex, "Central-DB read failed after retries; falling back to sealed cache for cluster {ClusterId}", clusterId);
|
||||
// Configuration-010: do NOT pass the raw exception object — it carries the stack
|
||||
// and inner-exception chain, and SqlException/wrapping delegates can surface
|
||||
// connection-string fragments (Password=…, User Id=…) embedded in messages.
|
||||
// Log only the exception type and a scrubbed message so secrets stay out of logs.
|
||||
_logger.LogWarning(
|
||||
"Central-DB read failed after retries ({ExceptionType}: {SanitizedMessage}); falling back to sealed cache for cluster {ClusterId}",
|
||||
ex.GetType().Name,
|
||||
ScrubSecrets(ex.Message),
|
||||
clusterId);
|
||||
// GenerationCacheUnavailableException surfaces intentionally — fails the caller's
|
||||
// operation. StaleConfigFlag stays unchanged; the flag only flips when we actually
|
||||
// served a cache snapshot.
|
||||
|
||||
Reference in New Issue
Block a user