refactor: rename ScadaLink → ZB.MOM.WW.ScadaBridge (code + projects + namespaces)
Solution + 23 src projects + 26 test projects renamed; folders, csproj, namespaces, and ScadaLinkDbContext/ScadaBridgeDbContext class updated. ActorSystem "scadalink" → "scadabridge", Akka seed-node URLs migrated. SQL roles/logins, LDAP domains, CLI command name, and CLI config dir (~/.scadalink → ~/.scadabridge) also renamed. Build green; 5 Host.Tests fail awaiting SQL login rename in next commit. Pre-existing StaleTagMonitor timing flakes unchanged. Rename script committed at tools/rename-to-scadabridge.sh.
This commit is contained in:
@@ -0,0 +1,165 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ZB.MOM.WW.ScadaBridge.AuditLog.Payload;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Site;
|
||||
|
||||
/// <summary>
|
||||
/// Composes the primary <see cref="SqliteAuditWriter"/> with a drop-oldest
|
||||
/// <see cref="RingBufferFallback"/>. Audit writes are best-effort by contract
|
||||
/// (see <see cref="IAuditWriter"/>) — a primary failure must NEVER bubble out
|
||||
/// to the calling script. Failed events are stashed in the ring; on the next
|
||||
/// successful primary write the ring is drained back through the primary in
|
||||
/// FIFO order.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Each primary failure increments <see cref="IAuditWriteFailureCounter"/> so
|
||||
/// Site Health Monitoring can surface a sustained outage as
|
||||
/// <c>SiteAuditWriteFailures</c> (Bundle G).
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Errors raised by the ring drain on recovery are logged and silently dropped
|
||||
/// so we don't loop the failure mode — the trigger event itself succeeded, and
|
||||
/// retrying the drain on the NEXT successful write is the recovery path.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class FallbackAuditWriter : IAuditWriter
|
||||
{
|
||||
private readonly IAuditWriter _primary;
|
||||
private readonly RingBufferFallback _ring;
|
||||
private readonly IAuditWriteFailureCounter _failureCounter;
|
||||
private readonly ILogger<FallbackAuditWriter> _logger;
|
||||
private readonly IAuditPayloadFilter _filter;
|
||||
private readonly SemaphoreSlim _drainGate = new(1, 1);
|
||||
|
||||
/// <summary>
|
||||
/// Bundle C (M5-T6) wires the singleton <see cref="IAuditPayloadFilter"/>
|
||||
/// here so every event written via the site hot path is truncated +
|
||||
/// header/body/SQL-param redacted before it hits both the primary SQLite
|
||||
/// writer AND the ring fallback. The parameter is optional (defaults to
|
||||
/// no filtering) so the long tail of test composition roots that don't
|
||||
/// care about the filter need no change — the production
|
||||
/// <see cref="ServiceCollectionExtensions.AddAuditLog"/> registration
|
||||
/// always passes the real filter through.
|
||||
/// </summary>
|
||||
/// <param name="primary">The primary audit writer (typically the SQLite writer).</param>
|
||||
/// <param name="ring">Drop-oldest ring buffer used to stash events when the primary fails.</param>
|
||||
/// <param name="failureCounter">Counter incremented on each primary failure for health reporting.</param>
|
||||
/// <param name="logger">Logger for diagnostics.</param>
|
||||
/// <param name="filter">Optional payload filter applied before writing; null means no filtering.</param>
|
||||
public FallbackAuditWriter(
|
||||
IAuditWriter primary,
|
||||
RingBufferFallback ring,
|
||||
IAuditWriteFailureCounter failureCounter,
|
||||
ILogger<FallbackAuditWriter> logger,
|
||||
IAuditPayloadFilter? filter = null)
|
||||
{
|
||||
_primary = primary ?? throw new ArgumentNullException(nameof(primary));
|
||||
_ring = ring ?? throw new ArgumentNullException(nameof(ring));
|
||||
_failureCounter = failureCounter ?? throw new ArgumentNullException(nameof(failureCounter));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
// AuditLog-008: never default to a null filter — over-redact instead.
|
||||
// SafeDefaultAuditPayloadFilter.Instance performs HTTP header
|
||||
// redaction with the hard-coded sensitive defaults (Authorization,
|
||||
// X-Api-Key, Cookie, Set-Cookie) so a test composition root that
|
||||
// doesn't bind the real options never persists those headers
|
||||
// verbatim. The real DefaultAuditPayloadFilter (truncation + body /
|
||||
// SQL-param redaction) is wired by AddAuditLog and takes precedence.
|
||||
_filter = filter ?? Payload.SafeDefaultAuditPayloadFilter.Instance;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task WriteAsync(AuditEvent evt, CancellationToken ct = default)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(evt);
|
||||
|
||||
// Filter once, up-front. The filtered event flows BOTH to the primary
|
||||
// and (on failure) to the ring buffer — so a primary outage that
|
||||
// drains later still hands the SqliteAuditWriter a row that has
|
||||
// already been truncated and redacted. The filter contract is
|
||||
// "MUST NOT throw". AuditLog-008: _filter is now non-null (defaults
|
||||
// to SafeDefaultAuditPayloadFilter so header redaction is always
|
||||
// applied even in composition roots that don't wire the real filter).
|
||||
var filtered = _filter.Apply(evt);
|
||||
|
||||
try
|
||||
{
|
||||
await _primary.WriteAsync(filtered, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Primary down: record the failure, stash in the ring, return
|
||||
// success to the caller. Audit-write failures NEVER abort the
|
||||
// user-facing action (alog.md §7). DO NOT attempt the ring drain
|
||||
// here — primary is throwing, draining would just scramble FIFO
|
||||
// order across re-enqueues.
|
||||
_failureCounter.Increment();
|
||||
_logger.LogWarning(ex,
|
||||
"Primary audit writer threw; routing EventId {EventId} to drop-oldest ring.",
|
||||
filtered.EventId);
|
||||
// Ring stores the filtered copy so the eventual drain replays a
|
||||
// payload that has already been capped/redacted — no second
|
||||
// filter pass needed on recovery, and no risk of the ring
|
||||
// holding the raw oversized blob in memory.
|
||||
_ring.TryEnqueue(filtered);
|
||||
return;
|
||||
}
|
||||
|
||||
// Primary succeeded — opportunistically drain anything that piled up
|
||||
// in the ring during the outage. Best-effort: a failure during the
|
||||
// drain re-enqueues the popped event and is logged; the next
|
||||
// successful write will retry. Drain order in the audit log is
|
||||
// therefore: <triggering event>, <backlog FIFO>.
|
||||
if (_ring.Count > 0)
|
||||
{
|
||||
await TryDrainRingAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task TryDrainRingAsync(CancellationToken ct)
|
||||
{
|
||||
// Serialise drains so two concurrent recoveries don't double-replay.
|
||||
if (!await _drainGate.WaitAsync(0, ct).ConfigureAwait(false))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
// Pull only what is currently buffered; do NOT wait for new events.
|
||||
// We iterate with a snapshot of Count so we never starve under
|
||||
// concurrent enqueues.
|
||||
var pending = _ring.Count;
|
||||
for (var i = 0; i < pending; i++)
|
||||
{
|
||||
if (!_ring.TryDequeue(out var queued))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await _primary.WriteAsync(queued, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Primary fell over again. Put the event back at the head
|
||||
// of the queue is impossible with Channel<T>; route to the
|
||||
// tail (drop-oldest preserves the most-recent picture).
|
||||
_failureCounter.Increment();
|
||||
_logger.LogWarning(ex,
|
||||
"Ring drain re-throw on EventId {EventId}; re-enqueuing.",
|
||||
queued.EventId);
|
||||
_ring.TryEnqueue(queued);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_drainGate.Release();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user