using System.Text.RegularExpressions; using Microsoft.Extensions.Logging; using Polly; using Polly.Retry; using Polly.Timeout; namespace ZB.MOM.WW.OtOpcUa.Configuration.LocalCache; /// /// Wraps a central-DB fetch function with Phase 6.1 Stream D.2 resilience: /// timeout 2 s → retry 3× jittered → fallback to sealed cache. Maintains the /// — fresh on central-DB success, stale on cache fallback. /// /// /// Read-path only per plan. The write path (draft save, publish) bypasses this /// wrapper entirely and fails hard on DB outage so inconsistent writes never land. /// /// Fallback is triggered by any exception the fetch raises (central-DB /// unreachable, SqlException, timeout). If the sealed cache also fails (no pointer, /// corrupt file, etc.), surfaces — caller /// must fail the current request (InitializeAsync for a driver, etc.). /// public sealed class ResilientConfigReader { private readonly GenerationSealedCache _cache; private readonly StaleConfigFlag _staleFlag; private readonly ResiliencePipeline _pipeline; private readonly ILogger _logger; public ResilientConfigReader( GenerationSealedCache cache, StaleConfigFlag staleFlag, ILogger logger, TimeSpan? timeout = null, int retryCount = 3) { _cache = cache; _staleFlag = staleFlag; _logger = logger; var builder = new ResiliencePipelineBuilder() .AddTimeout(new TimeoutStrategyOptions { Timeout = timeout ?? TimeSpan.FromSeconds(2) }); if (retryCount > 0) { builder.AddRetry(new RetryStrategyOptions { MaxRetryAttempts = retryCount, BackoffType = DelayBackoffType.Exponential, UseJitter = true, Delay = TimeSpan.FromMilliseconds(100), MaxDelay = TimeSpan.FromSeconds(1), // Handle ALL exceptions including OperationCanceledException. A SQL command-level // timeout surfaces as TaskCanceledException (derives from OperationCanceledException) // when the caller's token is NOT cancelled, and must be retried just like any other // transient error. Polly itself checks the cancellation token between retries and // stops with OperationCanceledException on genuine caller cancellation regardless of // this predicate. ShouldHandle = new PredicateBuilder().Handle(), }); } _pipeline = builder.Build(); } /// /// Configuration-010: redact connection-string fragments (Password, User Id, Pwd, etc.) /// that a caller's exception message could carry. Conservative regex pass — anything /// matching Key=Value with a known credential key gets its value replaced. /// private static readonly Regex SecretsRegex = new( @"(?ix)\b(Password|Pwd|User\s*Id|Uid|AccessToken|Authorization|Api[-_]?Key)\s*=\s*[^;,)\s]*", RegexOptions.Compiled); internal static string ScrubSecrets(string? message) { if (string.IsNullOrEmpty(message)) return message ?? string.Empty; // Replace the entire matched fragment (key + value) with a redaction marker so the // key name itself doesn't leak — log scrapers grep for "Password=" too. return SecretsRegex.Replace(message, "[redacted credential]"); } /// /// Execute through the resilience pipeline. On full failure /// (post-retry), reads the sealed cache for and passes the /// snapshot to to extract the requested shape. /// public async ValueTask ReadAsync( string clusterId, Func> centralFetch, Func fromSnapshot, CancellationToken cancellationToken) { ArgumentException.ThrowIfNullOrWhiteSpace(clusterId); ArgumentNullException.ThrowIfNull(centralFetch); ArgumentNullException.ThrowIfNull(fromSnapshot); try { var result = await _pipeline.ExecuteAsync(centralFetch, cancellationToken).ConfigureAwait(false); _staleFlag.MarkFresh(); return result; } // Catch all exceptions that are NOT genuine caller cancellations. A SQL command-level // timeout surfaces as TaskCanceledException (derives from OperationCanceledException) // but the caller's token is NOT cancelled — we must fall back to the sealed cache for // that case, not propagate. Only rethrow if the caller actually requested cancellation. catch (Exception ex) when (ex is not OperationCanceledException || !cancellationToken.IsCancellationRequested) { // Configuration-010: do NOT pass the raw exception object — it carries the stack // and inner-exception chain, and SqlException/wrapping delegates can surface // connection-string fragments (Password=…, User Id=…) embedded in messages. // Log only the exception type and a scrubbed message so secrets stay out of logs. _logger.LogWarning( "Central-DB read failed after retries ({ExceptionType}: {SanitizedMessage}); falling back to sealed cache for cluster {ClusterId}", ex.GetType().Name, ScrubSecrets(ex.Message), clusterId); // GenerationCacheUnavailableException surfaces intentionally — fails the caller's // operation. StaleConfigFlag stays unchanged; the flag only flips when we actually // served a cache snapshot. var snapshot = await _cache.ReadCurrentAsync(clusterId, cancellationToken).ConfigureAwait(false); _staleFlag.MarkStale(); return fromSnapshot(snapshot); } } }