refactor: rename ScadaLink → ZB.MOM.WW.ScadaBridge (code + projects + namespaces)

Solution + 23 src projects + 26 test projects renamed; folders, csproj, namespaces, and ScadaLinkDbContext/ScadaBridgeDbContext class updated. ActorSystem "scadalink" → "scadabridge", Akka seed-node URLs migrated. SQL roles/logins, LDAP domains, CLI command name, and CLI config dir (~/.scadalink → ~/.scadabridge) also renamed. Build green; 5 Host.Tests fail awaiting SQL login rename in next commit. Pre-existing StaleTagMonitor timing flakes unchanged. Rename script committed at tools/rename-to-scadabridge.sh.
2026-05-28 09:37:45 -04:00
parent 6d87ee3c3b
commit 7b0b9c7365
1531 changed files with 11180 additions and 11054 deletions
@@ -0,0 +1,165 @@
+using Microsoft.Extensions.Logging;
+using ZB.MOM.WW.ScadaBridge.AuditLog.Payload;
+using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
+using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
+
+namespace ZB.MOM.WW.ScadaBridge.AuditLog.Site;
+
+/// <summary>
+/// Composes the primary <see cref="SqliteAuditWriter"/> with a drop-oldest
+/// <see cref="RingBufferFallback"/>. Audit writes are best-effort by contract
+/// (see <see cref="IAuditWriter"/>) — a primary failure must NEVER bubble out
+/// to the calling script. Failed events are stashed in the ring; on the next
+/// successful primary write the ring is drained back through the primary in
+/// FIFO order.
+/// </summary>
+/// <remarks>
+/// <para>
+/// Each primary failure increments <see cref="IAuditWriteFailureCounter"/> so
+/// Site Health Monitoring can surface a sustained outage as
+/// <c>SiteAuditWriteFailures</c> (Bundle G).
+/// </para>
+/// <para>
+/// Errors raised by the ring drain on recovery are logged and silently dropped
+/// so we don't loop the failure mode — the trigger event itself succeeded, and
+/// retrying the drain on the NEXT successful write is the recovery path.
+/// </para>
+/// </remarks>
+public sealed class FallbackAuditWriter : IAuditWriter
+{
+    private readonly IAuditWriter _primary;
+    private readonly RingBufferFallback _ring;
+    private readonly IAuditWriteFailureCounter _failureCounter;
+    private readonly ILogger<FallbackAuditWriter> _logger;
+    private readonly IAuditPayloadFilter _filter;
+    private readonly SemaphoreSlim _drainGate = new(1, 1);
+
+    /// <summary>
+    /// Bundle C (M5-T6) wires the singleton <see cref="IAuditPayloadFilter"/>
+    /// here so every event written via the site hot path is truncated +
+    /// header/body/SQL-param redacted before it hits both the primary SQLite
+    /// writer AND the ring fallback. The parameter is optional (defaults to
+    /// no filtering) so the long tail of test composition roots that don't
+    /// care about the filter need no change — the production
+    /// <see cref="ServiceCollectionExtensions.AddAuditLog"/> registration
+    /// always passes the real filter through.
+    /// </summary>
+    /// <param name="primary">The primary audit writer (typically the SQLite writer).</param>
+    /// <param name="ring">Drop-oldest ring buffer used to stash events when the primary fails.</param>
+    /// <param name="failureCounter">Counter incremented on each primary failure for health reporting.</param>
+    /// <param name="logger">Logger for diagnostics.</param>
+    /// <param name="filter">Optional payload filter applied before writing; null means no filtering.</param>
+    public FallbackAuditWriter(
+        IAuditWriter primary,
+        RingBufferFallback ring,
+        IAuditWriteFailureCounter failureCounter,
+        ILogger<FallbackAuditWriter> logger,
+        IAuditPayloadFilter? filter = null)
+    {
+        _primary = primary ?? throw new ArgumentNullException(nameof(primary));
+        _ring = ring ?? throw new ArgumentNullException(nameof(ring));
+        _failureCounter = failureCounter ?? throw new ArgumentNullException(nameof(failureCounter));
+        _logger = logger ?? throw new ArgumentNullException(nameof(logger));
+        // AuditLog-008: never default to a null filter — over-redact instead.
+        // SafeDefaultAuditPayloadFilter.Instance performs HTTP header
+        // redaction with the hard-coded sensitive defaults (Authorization,
+        // X-Api-Key, Cookie, Set-Cookie) so a test composition root that
+        // doesn't bind the real options never persists those headers
+        // verbatim. The real DefaultAuditPayloadFilter (truncation + body /
+        // SQL-param redaction) is wired by AddAuditLog and takes precedence.
+        _filter = filter ?? Payload.SafeDefaultAuditPayloadFilter.Instance;
+    }
+
+    /// <inheritdoc />
+    public async Task WriteAsync(AuditEvent evt, CancellationToken ct = default)
+    {
+        ArgumentNullException.ThrowIfNull(evt);
+
+        // Filter once, up-front. The filtered event flows BOTH to the primary
+        // and (on failure) to the ring buffer — so a primary outage that
+        // drains later still hands the SqliteAuditWriter a row that has
+        // already been truncated and redacted. The filter contract is
+        // "MUST NOT throw". AuditLog-008: _filter is now non-null (defaults
+        // to SafeDefaultAuditPayloadFilter so header redaction is always
+        // applied even in composition roots that don't wire the real filter).
+        var filtered = _filter.Apply(evt);
+
+        try
+        {
+            await _primary.WriteAsync(filtered, ct).ConfigureAwait(false);
+        }
+        catch (Exception ex)
+        {
+            // Primary down: record the failure, stash in the ring, return
+            // success to the caller. Audit-write failures NEVER abort the
+            // user-facing action (alog.md §7). DO NOT attempt the ring drain
+            // here — primary is throwing, draining would just scramble FIFO
+            // order across re-enqueues.
+            _failureCounter.Increment();
+            _logger.LogWarning(ex,
+                "Primary audit writer threw; routing EventId {EventId} to drop-oldest ring.",
+                filtered.EventId);
+            // Ring stores the filtered copy so the eventual drain replays a
+            // payload that has already been capped/redacted — no second
+            // filter pass needed on recovery, and no risk of the ring
+            // holding the raw oversized blob in memory.
+            _ring.TryEnqueue(filtered);
+            return;
+        }
+
+        // Primary succeeded — opportunistically drain anything that piled up
+        // in the ring during the outage. Best-effort: a failure during the
+        // drain re-enqueues the popped event and is logged; the next
+        // successful write will retry. Drain order in the audit log is
+        // therefore: <triggering event>, <backlog FIFO>.
+        if (_ring.Count > 0)
+        {
+            await TryDrainRingAsync(ct).ConfigureAwait(false);
+        }
+    }
+
+    private async Task TryDrainRingAsync(CancellationToken ct)
+    {
+        // Serialise drains so two concurrent recoveries don't double-replay.
+        if (!await _drainGate.WaitAsync(0, ct).ConfigureAwait(false))
+        {
+            return;
+        }
+
+        try
+        {
+            // Pull only what is currently buffered; do NOT wait for new events.
+            // We iterate with a snapshot of Count so we never starve under
+            // concurrent enqueues.
+            var pending = _ring.Count;
+            for (var i = 0; i < pending; i++)
+            {
+                if (!_ring.TryDequeue(out var queued))
+                {
+                    break;
+                }
+
+                try
+                {
+                    await _primary.WriteAsync(queued, ct).ConfigureAwait(false);
+                }
+                catch (Exception ex)
+                {
+                    // Primary fell over again. Put the event back at the head
+                    // of the queue is impossible with Channel<T>; route to the
+                    // tail (drop-oldest preserves the most-recent picture).
+                    _failureCounter.Increment();
+                    _logger.LogWarning(ex,
+                        "Ring drain re-throw on EventId {EventId}; re-enqueuing.",
+                        queued.EventId);
+                    _ring.TryEnqueue(queued);
+                    break;
+                }
+            }
+        }
+        finally
+        {
+            _drainGate.Release();
+        }
+    }
+}