refactor: rename ScadaLink → ZB.MOM.WW.ScadaBridge (code + projects + namespaces)

Solution + 23 src projects + 26 test projects renamed; folders, csproj,
namespaces, and ScadaLinkDbContext/ScadaBridgeDbContext class updated.
ActorSystem "scadalink" → "scadabridge", Akka seed-node URLs migrated.
SQL roles/logins, LDAP domains, CLI command name, and CLI config dir
(~/.scadalink → ~/.scadabridge) also renamed.

Build green; 5 Host.Tests fail awaiting SQL login rename in next commit.
Pre-existing StaleTagMonitor timing flakes unchanged.

Rename script committed at tools/rename-to-scadabridge.sh.
This commit is contained in:
Joseph Doherty
2026-05-28 09:37:45 -04:00
parent 6d87ee3c3b
commit 7b0b9c7365
1531 changed files with 11180 additions and 11054 deletions
@@ -0,0 +1,913 @@
using System.Threading.Channels;
using Microsoft.Data.Sqlite;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Services;
using ZB.MOM.WW.ScadaBridge.Commons.Types;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Site;
/// <summary>
/// Site-side SQLite hot-path writer for Audit Log (#23) events. Mirrors the
/// <see cref="ZB.MOM.WW.ScadaBridge.SiteEventLogging.SiteEventLogger"/> design — a single
/// owned <see cref="SqliteConnection"/> serialised behind a write lock, fed by a
/// bounded <see cref="Channel{T}"/> drained on a dedicated background writer
/// task — so script-thread callers never block on disk I/O.
/// </summary>
/// <remarks>
/// <para>
/// The schema is bootstrapped in the constructor (Bundle B-T1). The
/// Channel-based <see cref="WriteAsync"/> hot-path + Bundle D
/// <see cref="ReadPendingAsync"/> / <see cref="MarkForwardedAsync"/> support
/// surface are wired in Bundle B-T2.
/// </para>
/// <para>
/// Site rows always carry <see cref="AuditForwardState.Pending"/> on first
/// insert; the central row-shape's <c>IngestedAtUtc</c> column does NOT live in
/// the site SQLite schema — central stamps it on ingest.
/// </para>
/// </remarks>
public class SqliteAuditWriter : IAuditWriter, ISiteAuditQueue, IAsyncDisposable, IDisposable
{
// Microsoft.Data.Sqlite reports a generic SQLITE_CONSTRAINT (error code 19)
// on a PRIMARY KEY violation; the extended subcode 1555 (SQLITE_CONSTRAINT_PRIMARYKEY)
// is exposed via SqliteException.SqliteExtendedErrorCode but isn't reliably
// surfaced across all SQLite builds. We treat any constraint error on insert
// as a duplicate-eventid race and swallow it (first-write-wins) — the index
// on EventId is the only constraint on this table, so this scope is precise.
private const int SqliteErrorConstraint = 19;
private readonly SqliteConnection _connection;
// AuditLog-005: dedicated read-only connection used by GetBacklogStatsAsync,
// ReadPendingAsync, ReadPendingSinceAsync, and ReadForwardedAsync so a slow
// backlog scan (COUNT(*) over hundreds of thousands of Pending rows under a
// central outage) never parks the hot-path writer behind _writeLock.
// SQLite-with-WAL allows a second connection on the same file to read
// concurrently with the writer; the writer's WAL pragma is set in
// InitializeSchema before this connection is opened. The reader connection
// has its own _readLock because SqliteConnection itself is not thread-safe
// even in read-only mode — multiple read callers can otherwise interleave
// commands on the shared connection.
private readonly SqliteConnection _readConnection;
private readonly object _readLock = new();
private readonly SqliteAuditWriterOptions _options;
private readonly ILogger<SqliteAuditWriter> _logger;
private readonly INodeIdentityProvider _nodeIdentity;
private readonly object _writeLock = new();
private readonly Channel<PendingAuditEvent> _writeQueue;
private readonly Task _writerLoop;
private bool _disposed;
/// <summary>Initializes a new instance of the SqliteAuditWriter class.</summary>
/// <param name="options">Configuration options for the audit writer.</param>
/// <param name="logger">Logger instance.</param>
/// <param name="nodeIdentity">Node identity provider.</param>
/// <param name="connectionStringOverride">Optional connection string override.</param>
public SqliteAuditWriter(
IOptions<SqliteAuditWriterOptions> options,
ILogger<SqliteAuditWriter> logger,
INodeIdentityProvider nodeIdentity,
string? connectionStringOverride = null)
{
ArgumentNullException.ThrowIfNull(options);
ArgumentNullException.ThrowIfNull(logger);
ArgumentNullException.ThrowIfNull(nodeIdentity);
_options = options.Value;
_logger = logger;
_nodeIdentity = nodeIdentity;
var connectionString = connectionStringOverride
?? $"Data Source={_options.DatabasePath};Cache=Shared";
_connection = new SqliteConnection(connectionString);
_connection.Open();
InitializeSchema();
// AuditLog-005: open a second connection for read-only callers
// (GetBacklogStatsAsync, ReadPendingAsync, ReadPendingSinceAsync,
// ReadForwardedAsync). InitializeSchema set journal_mode=WAL on the
// writer connection, which is a database-level setting that persists
// for the file — subsequent connections to the same file see WAL and
// can read concurrently with the writer without taking _writeLock.
// Reuse the same connection string so the read connection sees the
// same Data Source / Cache settings as the writer.
_readConnection = new SqliteConnection(connectionString);
_readConnection.Open();
_writeQueue = Channel.CreateBounded<PendingAuditEvent>(
new BoundedChannelOptions(_options.ChannelCapacity)
{
// The hot-path enqueue must back-pressure if the background
// writer falls behind; a higher-level fallback (Bundle B-T4)
// handles truly catastrophic primary failure with a drop-oldest
// ring buffer.
FullMode = BoundedChannelFullMode.Wait,
SingleReader = true,
SingleWriter = false,
});
_writerLoop = Task.Run(ProcessWriteQueueAsync);
}
private void InitializeSchema()
{
// auto_vacuum must be set before any table is created for it to take
// effect on a fresh database. INCREMENTAL lets a future
// `PRAGMA incremental_vacuum` shrink the file after the 7-day retention
// purge — see alog.md §10.
using (var pragmaCmd = _connection.CreateCommand())
{
pragmaCmd.CommandText = "PRAGMA auto_vacuum = INCREMENTAL";
pragmaCmd.ExecuteNonQuery();
}
// AuditLog-005: enable WAL so a second connection on the same file can
// serve read-only callers (GetBacklogStatsAsync, ReadPendingAsync,
// ReadPendingSinceAsync, ReadForwardedAsync) concurrently with the
// batched writer, decoupling those reads from _writeLock. WAL is a
// database-level setting persisted in the file header; setting it on
// the writer connection means every connection opened to the file
// afterwards inherits WAL behaviour. PRAGMA journal_mode returns the
// mode actually adopted ("memory" for ":memory:" / shared-cache memory
// mode, "wal" for file-backed) — we don't error if WAL was rejected
// because the read connection's correctness does not depend on WAL
// itself, only its concurrency advantage does.
using (var pragmaCmd = _connection.CreateCommand())
{
pragmaCmd.CommandText = "PRAGMA journal_mode = WAL";
pragmaCmd.ExecuteNonQuery();
}
using var cmd = _connection.CreateCommand();
cmd.CommandText = """
CREATE TABLE IF NOT EXISTS AuditLog (
EventId TEXT NOT NULL,
OccurredAtUtc TEXT NOT NULL,
Channel TEXT NOT NULL,
Kind TEXT NOT NULL,
CorrelationId TEXT NULL,
SourceSiteId TEXT NULL,
SourceNode TEXT NULL,
SourceInstanceId TEXT NULL,
SourceScript TEXT NULL,
Actor TEXT NULL,
Target TEXT NULL,
Status TEXT NOT NULL,
HttpStatus INTEGER NULL,
DurationMs INTEGER NULL,
ErrorMessage TEXT NULL,
ErrorDetail TEXT NULL,
RequestSummary TEXT NULL,
ResponseSummary TEXT NULL,
PayloadTruncated INTEGER NOT NULL,
Extra TEXT NULL,
ForwardState TEXT NOT NULL,
ExecutionId TEXT NULL,
ParentExecutionId TEXT NULL,
PRIMARY KEY (EventId)
);
CREATE INDEX IF NOT EXISTS IX_SiteAuditLog_ForwardState_Occurred
ON AuditLog (ForwardState, OccurredAtUtc);
""";
cmd.ExecuteNonQuery();
// Audit Log #23 (ExecutionId): additively add the ExecutionId column.
// CREATE TABLE IF NOT EXISTS above does NOT add columns to an AuditLog
// table that already exists from a pre-ExecutionId build, so an
// auditlog.db created by an older build needs the column ALTER-ed in.
// The file is durable across restart/failover by design (7-day
// retention), so without this step every WriteAsync on an upgraded
// deployment would bind $ExecutionId against a missing column and the
// best-effort write path would silently drop every site audit row.
// SQLite has no "ADD COLUMN IF NOT EXISTS"; the column presence is
// probed first and the ALTER skipped when already there. The column is
// nullable with no default, so any row written before this migration
// reads back ExecutionId = null (back-compat).
AddColumnIfMissing("ExecutionId", "TEXT NULL");
// Audit Log #23 (ParentExecutionId): same idempotent upgrade path as
// ExecutionId above. A deployment that already ran the ExecutionId
// branch has an auditlog.db with the 21-column schema and no
// ParentExecutionId column; CREATE TABLE IF NOT EXISTS cannot add it,
// so it is ALTER-ed in here. Nullable with no default — rows written
// before this migration read back ParentExecutionId = null.
AddColumnIfMissing("ParentExecutionId", "TEXT NULL");
// SourceNode stamping: same idempotent upgrade path as ExecutionId /
// ParentExecutionId above. A deployment that already ran the
// ParentExecutionId branch has an auditlog.db with the 22-column
// schema and no SourceNode column; CREATE TABLE IF NOT EXISTS cannot
// add it, so it is ALTER-ed in here. Nullable with no default — rows
// written before this migration read back SourceNode = null.
AddColumnIfMissing("SourceNode", "TEXT NULL");
}
/// <summary>
/// Audit Log #23: additively adds a column to <c>AuditLog</c> only when
/// it is not already present (used for <c>ExecutionId</c> and
/// <c>ParentExecutionId</c>). SQLite lacks <c>ADD COLUMN IF NOT EXISTS</c>,
/// so the schema is probed via <c>PRAGMA table_info</c> first. Idempotent —
/// safe to run on every <see cref="InitializeSchema"/>. Mirrors
/// <c>StoreAndForwardStorage.AddColumnIfMissingAsync</c>; kept synchronous
/// here to match the rest of this writer's bootstrap DDL.
/// </summary>
private void AddColumnIfMissing(string columnName, string columnDefinition)
{
using var probe = _connection.CreateCommand();
probe.CommandText = "SELECT COUNT(*) FROM pragma_table_info('AuditLog') WHERE name = $name";
probe.Parameters.AddWithValue("$name", columnName);
var exists = Convert.ToInt32(probe.ExecuteScalar()) > 0;
if (exists)
{
return;
}
using var alter = _connection.CreateCommand();
// Column name + definition are caller-controlled constants, never user
// input — safe to interpolate (parameters are not permitted in DDL).
alter.CommandText = $"ALTER TABLE AuditLog ADD COLUMN {columnName} {columnDefinition}";
alter.ExecuteNonQuery();
}
/// <inheritdoc />
public Task WriteAsync(AuditEvent evt, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(evt);
// Site rows always carry a non-null ForwardState; central rows leave it
// null. Force Pending on enqueue so callers can pass a bare AuditEvent
// without thinking about site-vs-central provenance.
var siteEvt = evt.ForwardState is null
? evt with { ForwardState = AuditForwardState.Pending }
: evt;
var pending = new PendingAuditEvent(siteEvt);
// CreateBounded(FullMode=Wait) means WriteAsync will await room rather
// than throw when full — exactly the hot-path back-pressure semantics
// we want.
if (!_writeQueue.Writer.TryWrite(pending))
{
// The writer is either completed (logger disposed) or the channel
// is at capacity. Fall back to the async path which honours the
// FullMode=Wait policy.
return WriteSlowPathAsync(pending, ct);
}
return pending.Completion.Task;
}
private async Task WriteSlowPathAsync(PendingAuditEvent pending, CancellationToken ct)
{
try
{
await _writeQueue.Writer.WriteAsync(pending, ct).ConfigureAwait(false);
}
catch (ChannelClosedException)
{
pending.Completion.TrySetException(
new ObjectDisposedException(nameof(SqliteAuditWriter),
"Event could not be recorded: the audit writer has been disposed."));
}
await pending.Completion.Task.ConfigureAwait(false);
}
private async Task ProcessWriteQueueAsync()
{
var batch = new List<PendingAuditEvent>(_options.BatchSize);
// ReadAllAsync completes when the channel is marked complete (Dispose).
await foreach (var first in _writeQueue.Reader.ReadAllAsync().ConfigureAwait(false))
{
batch.Clear();
batch.Add(first);
// Pull additional ready events up to BatchSize. TryRead is non-
// blocking and lets us amortise the transaction overhead across a
// burst of concurrent enqueues.
while (batch.Count < _options.BatchSize &&
_writeQueue.Reader.TryRead(out var next))
{
batch.Add(next);
}
FlushBatch(batch);
}
}
private void FlushBatch(IReadOnlyList<PendingAuditEvent> batch)
{
lock (_writeLock)
{
if (_disposed)
{
foreach (var pending in batch)
{
pending.Completion.TrySetException(
new ObjectDisposedException(nameof(SqliteAuditWriter),
"Event could not be recorded: the audit writer was disposed before the write completed."));
}
return;
}
using var transaction = _connection.BeginTransaction();
try
{
using var cmd = _connection.CreateCommand();
cmd.Transaction = transaction;
cmd.CommandText = """
INSERT INTO AuditLog (
EventId, OccurredAtUtc, Channel, Kind, CorrelationId,
SourceSiteId, SourceNode, SourceInstanceId, SourceScript, Actor, Target,
Status, HttpStatus, DurationMs, ErrorMessage, ErrorDetail,
RequestSummary, ResponseSummary, PayloadTruncated, Extra, ForwardState,
ExecutionId, ParentExecutionId
) VALUES (
$EventId, $OccurredAtUtc, $Channel, $Kind, $CorrelationId,
$SourceSiteId, $SourceNode, $SourceInstanceId, $SourceScript, $Actor, $Target,
$Status, $HttpStatus, $DurationMs, $ErrorMessage, $ErrorDetail,
$RequestSummary, $ResponseSummary, $PayloadTruncated, $Extra, $ForwardState,
$ExecutionId, $ParentExecutionId
);
""";
var pEventId = cmd.Parameters.Add("$EventId", SqliteType.Text);
var pOccurredAt = cmd.Parameters.Add("$OccurredAtUtc", SqliteType.Text);
var pChannel = cmd.Parameters.Add("$Channel", SqliteType.Text);
var pKind = cmd.Parameters.Add("$Kind", SqliteType.Text);
var pCorrelationId = cmd.Parameters.Add("$CorrelationId", SqliteType.Text);
var pSourceSiteId = cmd.Parameters.Add("$SourceSiteId", SqliteType.Text);
var pSourceNode = cmd.Parameters.Add("$SourceNode", SqliteType.Text);
var pSourceInstanceId = cmd.Parameters.Add("$SourceInstanceId", SqliteType.Text);
var pSourceScript = cmd.Parameters.Add("$SourceScript", SqliteType.Text);
var pActor = cmd.Parameters.Add("$Actor", SqliteType.Text);
var pTarget = cmd.Parameters.Add("$Target", SqliteType.Text);
var pStatus = cmd.Parameters.Add("$Status", SqliteType.Text);
var pHttpStatus = cmd.Parameters.Add("$HttpStatus", SqliteType.Integer);
var pDurationMs = cmd.Parameters.Add("$DurationMs", SqliteType.Integer);
var pErrorMessage = cmd.Parameters.Add("$ErrorMessage", SqliteType.Text);
var pErrorDetail = cmd.Parameters.Add("$ErrorDetail", SqliteType.Text);
var pRequestSummary = cmd.Parameters.Add("$RequestSummary", SqliteType.Text);
var pResponseSummary = cmd.Parameters.Add("$ResponseSummary", SqliteType.Text);
var pPayloadTruncated = cmd.Parameters.Add("$PayloadTruncated", SqliteType.Integer);
var pExtra = cmd.Parameters.Add("$Extra", SqliteType.Text);
var pForwardState = cmd.Parameters.Add("$ForwardState", SqliteType.Text);
var pExecutionId = cmd.Parameters.Add("$ExecutionId", SqliteType.Text);
var pParentExecutionId = cmd.Parameters.Add("$ParentExecutionId", SqliteType.Text);
foreach (var pending in batch)
{
var e = pending.Event;
pEventId.Value = e.EventId.ToString();
pOccurredAt.Value = e.OccurredAtUtc.ToString("o");
pChannel.Value = e.Channel.ToString();
pKind.Value = e.Kind.ToString();
pCorrelationId.Value = (object?)e.CorrelationId?.ToString() ?? DBNull.Value;
pSourceSiteId.Value = (object?)e.SourceSiteId ?? DBNull.Value;
// SourceNode-stamping: caller-provided value wins (preserves
// rows reconciled in from other nodes via the same writer);
// otherwise stamp from the local INodeIdentityProvider. The
// event record itself is NOT mutated — stamping is at write
// time only. If the provider also returns null (unconfigured
// node), the row's SourceNode stays NULL — operators see
// "needs config" via the schema, not a magic fallback string.
var sourceNode = e.SourceNode ?? _nodeIdentity.NodeName;
pSourceNode.Value = (object?)sourceNode ?? DBNull.Value;
pSourceInstanceId.Value = (object?)e.SourceInstanceId ?? DBNull.Value;
pSourceScript.Value = (object?)e.SourceScript ?? DBNull.Value;
pActor.Value = (object?)e.Actor ?? DBNull.Value;
pTarget.Value = (object?)e.Target ?? DBNull.Value;
pStatus.Value = e.Status.ToString();
pHttpStatus.Value = (object?)e.HttpStatus ?? DBNull.Value;
pDurationMs.Value = (object?)e.DurationMs ?? DBNull.Value;
pErrorMessage.Value = (object?)e.ErrorMessage ?? DBNull.Value;
pErrorDetail.Value = (object?)e.ErrorDetail ?? DBNull.Value;
pRequestSummary.Value = (object?)e.RequestSummary ?? DBNull.Value;
pResponseSummary.Value = (object?)e.ResponseSummary ?? DBNull.Value;
pPayloadTruncated.Value = e.PayloadTruncated ? 1 : 0;
pExtra.Value = (object?)e.Extra ?? DBNull.Value;
pForwardState.Value = (e.ForwardState ?? AuditForwardState.Pending).ToString();
pExecutionId.Value = (object?)e.ExecutionId?.ToString() ?? DBNull.Value;
pParentExecutionId.Value = (object?)e.ParentExecutionId?.ToString() ?? DBNull.Value;
try
{
cmd.ExecuteNonQuery();
pending.Completion.TrySetResult();
}
catch (SqliteException ex) when (ex.SqliteErrorCode == SqliteErrorConstraint)
{
// Duplicate EventId — first-write-wins (alog.md §11).
// Treat as success: the lifecycle event is durably
// recorded under the first writer's payload.
_logger.LogDebug(ex,
"Duplicate EventId {EventId} swallowed by SqliteAuditWriter",
e.EventId);
pending.Completion.TrySetResult();
}
}
transaction.Commit();
}
catch (Exception ex)
{
transaction.Rollback();
_logger.LogError(ex, "SqliteAuditWriter batch insert failed; faulting {Count} pending events", batch.Count);
foreach (var pending in batch)
{
pending.Completion.TrySetException(ex);
}
}
}
}
// AuditLog-001: cached-lifecycle audit kinds that ride the combined-telemetry
// drain (joined with the operational tracking row + pushed via
// IngestCachedTelemetryAsync into the central dual-write transaction).
// ReadPendingAsync EXCLUDES these so the audit-only drain doesn't double-emit
// them; ReadPendingCachedTelemetryAsync below is the dedicated read surface
// the new SiteAuditTelemetryActor cached-drain uses.
private static readonly string[] CachedTelemetryKindNames =
{
nameof(AuditKind.CachedSubmit),
nameof(AuditKind.ApiCallCached),
nameof(AuditKind.DbWriteCached),
nameof(AuditKind.CachedResolve),
};
/// <inheritdoc />
public Task<IReadOnlyList<AuditEvent>> ReadPendingAsync(int limit, CancellationToken ct = default)
{
if (limit <= 0)
{
throw new ArgumentOutOfRangeException(nameof(limit), "limit must be > 0.");
}
// AuditLog-005: read via the dedicated _readConnection so this scan
// (which can be expensive when the backlog grows under a central
// outage) does not block the batched writer on _writeLock. WAL mode
// gives us a stable snapshot of the table while writes proceed on the
// writer connection. _readLock serialises this connection across
// multiple concurrent read callers since SqliteConnection itself is
// not thread-safe.
// AuditLog-001: NOT IN ($cached1,$cached2,$cached3,$cached4) excludes the
// cached-lifecycle kinds — they flow through ReadPendingCachedTelemetryAsync
// + the combined-telemetry drain. Kind is stored as the enum's name (see
// FlushBatch's pKind.Value), so a string-IN against the constant kind
// names matches the on-disk shape exactly.
lock (_readLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _readConnection.CreateCommand();
cmd.CommandText = """
SELECT EventId, OccurredAtUtc, Channel, Kind, CorrelationId,
SourceSiteId, SourceNode, SourceInstanceId, SourceScript, Actor, Target,
Status, HttpStatus, DurationMs, ErrorMessage, ErrorDetail,
RequestSummary, ResponseSummary, PayloadTruncated, Extra, ForwardState,
ExecutionId, ParentExecutionId
FROM AuditLog
WHERE ForwardState = $pending
AND Kind NOT IN ($k0, $k1, $k2, $k3)
ORDER BY OccurredAtUtc ASC, EventId ASC
LIMIT $limit;
""";
cmd.Parameters.AddWithValue("$pending", AuditForwardState.Pending.ToString());
cmd.Parameters.AddWithValue("$k0", CachedTelemetryKindNames[0]);
cmd.Parameters.AddWithValue("$k1", CachedTelemetryKindNames[1]);
cmd.Parameters.AddWithValue("$k2", CachedTelemetryKindNames[2]);
cmd.Parameters.AddWithValue("$k3", CachedTelemetryKindNames[3]);
cmd.Parameters.AddWithValue("$limit", limit);
var rows = new List<AuditEvent>(Math.Min(limit, 256));
using var reader = cmd.ExecuteReader();
while (reader.Read())
{
rows.Add(MapRow(reader));
}
return Task.FromResult<IReadOnlyList<AuditEvent>>(rows);
}
}
/// <inheritdoc />
public Task<IReadOnlyList<AuditEvent>> ReadPendingCachedTelemetryAsync(
int limit, CancellationToken ct = default)
{
if (limit <= 0)
{
throw new ArgumentOutOfRangeException(nameof(limit), "limit must be > 0.");
}
// AuditLog-001: dedicated read surface for the cached-call lifecycle
// drain — symmetric to ReadPendingAsync but filtered to the four
// cached AuditKinds. Same _readConnection + _readLock pattern so the
// hot-path writer is not contended.
lock (_readLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _readConnection.CreateCommand();
cmd.CommandText = """
SELECT EventId, OccurredAtUtc, Channel, Kind, CorrelationId,
SourceSiteId, SourceNode, SourceInstanceId, SourceScript, Actor, Target,
Status, HttpStatus, DurationMs, ErrorMessage, ErrorDetail,
RequestSummary, ResponseSummary, PayloadTruncated, Extra, ForwardState,
ExecutionId, ParentExecutionId
FROM AuditLog
WHERE ForwardState = $pending
AND Kind IN ($k0, $k1, $k2, $k3)
ORDER BY OccurredAtUtc ASC, EventId ASC
LIMIT $limit;
""";
cmd.Parameters.AddWithValue("$pending", AuditForwardState.Pending.ToString());
cmd.Parameters.AddWithValue("$k0", CachedTelemetryKindNames[0]);
cmd.Parameters.AddWithValue("$k1", CachedTelemetryKindNames[1]);
cmd.Parameters.AddWithValue("$k2", CachedTelemetryKindNames[2]);
cmd.Parameters.AddWithValue("$k3", CachedTelemetryKindNames[3]);
cmd.Parameters.AddWithValue("$limit", limit);
var rows = new List<AuditEvent>(Math.Min(limit, 256));
using var reader = cmd.ExecuteReader();
while (reader.Read())
{
rows.Add(MapRow(reader));
}
return Task.FromResult<IReadOnlyList<AuditEvent>>(rows);
}
}
/// <summary>
/// Returns up to <paramref name="limit"/> rows in
/// <see cref="AuditForwardState.Forwarded"/>, oldest
/// <see cref="AuditEvent.OccurredAtUtc"/> first, with
/// <see cref="AuditEvent.EventId"/> as the deterministic tiebreaker. The
/// <see cref="AuditForwardState.Forwarded"/>-specific counterpart of
/// <see cref="ReadPendingAsync"/>; used by tests to assert a row reached the
/// <see cref="AuditForwardState.Forwarded"/> state specifically (unlike
/// <see cref="ReadPendingSinceAsync"/>, which also returns
/// <see cref="AuditForwardState.Pending"/> rows).
/// </summary>
/// <param name="limit">Maximum number of rows to return.</param>
/// <param name="ct">Cancellation token.</param>
public Task<IReadOnlyList<AuditEvent>> ReadForwardedAsync(int limit, CancellationToken ct = default)
{
if (limit <= 0)
{
throw new ArgumentOutOfRangeException(nameof(limit), "limit must be > 0.");
}
// AuditLog-005: mirror ReadPendingAsync — read via _readConnection /
// _readLock so this query never contends with the batched writer on
// _writeLock.
lock (_readLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _readConnection.CreateCommand();
cmd.CommandText = """
SELECT EventId, OccurredAtUtc, Channel, Kind, CorrelationId,
SourceSiteId, SourceNode, SourceInstanceId, SourceScript, Actor, Target,
Status, HttpStatus, DurationMs, ErrorMessage, ErrorDetail,
RequestSummary, ResponseSummary, PayloadTruncated, Extra, ForwardState,
ExecutionId, ParentExecutionId
FROM AuditLog
WHERE ForwardState = $forwarded
ORDER BY OccurredAtUtc ASC, EventId ASC
LIMIT $limit;
""";
cmd.Parameters.AddWithValue("$forwarded", AuditForwardState.Forwarded.ToString());
cmd.Parameters.AddWithValue("$limit", limit);
var rows = new List<AuditEvent>(Math.Min(limit, 256));
using var reader = cmd.ExecuteReader();
while (reader.Read())
{
rows.Add(MapRow(reader));
}
return Task.FromResult<IReadOnlyList<AuditEvent>>(rows);
}
}
/// <inheritdoc />
public Task MarkForwardedAsync(IReadOnlyList<Guid> eventIds, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(eventIds);
if (eventIds.Count == 0)
{
return Task.CompletedTask;
}
lock (_writeLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _connection.CreateCommand();
// Build a single IN (...) parameter list so we issue one UPDATE per
// batch regardless of size. Each id is bound as its own parameter,
// so no string concatenation of user data ever enters the SQL.
var sb = new System.Text.StringBuilder();
sb.Append("UPDATE AuditLog SET ForwardState = $forwarded WHERE EventId IN (");
for (int i = 0; i < eventIds.Count; i++)
{
if (i > 0) sb.Append(',');
var p = $"$id{i}";
sb.Append(p);
cmd.Parameters.AddWithValue(p, eventIds[i].ToString());
}
sb.Append(");");
cmd.CommandText = sb.ToString();
cmd.Parameters.AddWithValue("$forwarded", AuditForwardState.Forwarded.ToString());
cmd.ExecuteNonQuery();
return Task.CompletedTask;
}
}
/// <inheritdoc />
public Task<IReadOnlyList<AuditEvent>> ReadPendingSinceAsync(
DateTime sinceUtc, int batchSize, CancellationToken ct = default)
{
if (batchSize <= 0)
{
throw new ArgumentOutOfRangeException(nameof(batchSize), "batchSize must be > 0.");
}
// AuditLog-005: read via _readConnection / _readLock — same lock-
// decoupling as ReadPendingAsync.
lock (_readLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _readConnection.CreateCommand();
cmd.CommandText = """
SELECT EventId, OccurredAtUtc, Channel, Kind, CorrelationId,
SourceSiteId, SourceNode, SourceInstanceId, SourceScript, Actor, Target,
Status, HttpStatus, DurationMs, ErrorMessage, ErrorDetail,
RequestSummary, ResponseSummary, PayloadTruncated, Extra, ForwardState,
ExecutionId, ParentExecutionId
FROM AuditLog
WHERE ForwardState IN ($pending, $forwarded)
AND OccurredAtUtc >= $since
ORDER BY OccurredAtUtc ASC, EventId ASC
LIMIT $limit;
""";
cmd.Parameters.AddWithValue("$pending", AuditForwardState.Pending.ToString());
cmd.Parameters.AddWithValue("$forwarded", AuditForwardState.Forwarded.ToString());
// Normalise to UTC ISO-8601 round-trip format to match how OccurredAtUtc
// is stored on insert ("o" format) — string comparison is monotonic for
// that encoding so we can index-scan against it.
cmd.Parameters.AddWithValue("$since", EnsureUtc(sinceUtc).ToString(
"o", System.Globalization.CultureInfo.InvariantCulture));
cmd.Parameters.AddWithValue("$limit", batchSize);
var rows = new List<AuditEvent>(Math.Min(batchSize, 256));
using var reader = cmd.ExecuteReader();
while (reader.Read())
{
rows.Add(MapRow(reader));
}
return Task.FromResult<IReadOnlyList<AuditEvent>>(rows);
}
}
/// <inheritdoc />
public Task MarkReconciledAsync(IReadOnlyList<Guid> eventIds, CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(eventIds);
if (eventIds.Count == 0)
{
return Task.CompletedTask;
}
lock (_writeLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
using var cmd = _connection.CreateCommand();
var sb = new System.Text.StringBuilder();
sb.Append("UPDATE AuditLog SET ForwardState = $reconciled ")
.Append("WHERE ForwardState IN ($pending, $forwarded) AND EventId IN (");
for (int i = 0; i < eventIds.Count; i++)
{
if (i > 0) sb.Append(',');
var p = $"$id{i}";
sb.Append(p);
cmd.Parameters.AddWithValue(p, eventIds[i].ToString());
}
sb.Append(");");
cmd.CommandText = sb.ToString();
cmd.Parameters.AddWithValue("$reconciled", AuditForwardState.Reconciled.ToString());
cmd.Parameters.AddWithValue("$pending", AuditForwardState.Pending.ToString());
cmd.Parameters.AddWithValue("$forwarded", AuditForwardState.Forwarded.ToString());
cmd.ExecuteNonQuery();
return Task.CompletedTask;
}
}
/// <inheritdoc />
public Task<SiteAuditBacklogSnapshot> GetBacklogStatsAsync(CancellationToken ct = default)
{
int pendingCount;
DateTime? oldestPending;
// AuditLog-005: read via the dedicated _readConnection (under
// _readLock) so this probe — polled every 30 s by SiteAuditBacklogReporter
// — never blocks the batched hot-path writer on _writeLock. Under a
// central outage the Pending backlog can grow to hundreds of thousands
// of rows and the COUNT(*) scan correspondingly stretches; that no
// longer adds tail latency to user-facing audit writes.
lock (_readLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
// Single round-trip — COUNT(*) + MIN(OccurredAtUtc) over the same
// index range avoids a second scan. The IX_SiteAuditLog_ForwardState_Occurred
// index makes both aggregates cheap (count is a covering scan, min
// is the first key).
using var cmd = _readConnection.CreateCommand();
cmd.CommandText = """
SELECT COUNT(*), MIN(OccurredAtUtc)
FROM AuditLog
WHERE ForwardState = $pending;
""";
cmd.Parameters.AddWithValue("$pending", AuditForwardState.Pending.ToString());
using var reader = cmd.ExecuteReader();
reader.Read();
pendingCount = reader.GetInt32(0);
oldestPending = reader.IsDBNull(1)
? null
: DateTime.Parse(reader.GetString(1),
System.Globalization.CultureInfo.InvariantCulture,
System.Globalization.DateTimeStyles.RoundtripKind);
}
// File-size lookup outside the lock — the DatabasePath option is the
// canonical source. The connection-string-override branch (used by
// some tests) keeps the same DatabasePath value, so this works
// uniformly. In-memory / mode=memory paths return 0 because the file
// doesn't exist on disk.
long onDiskBytes = 0;
try
{
if (!string.IsNullOrEmpty(_options.DatabasePath) &&
!_options.DatabasePath.StartsWith(":memory:", StringComparison.Ordinal) &&
!_options.DatabasePath.Contains("mode=memory", StringComparison.OrdinalIgnoreCase) &&
File.Exists(_options.DatabasePath))
{
onDiskBytes = new FileInfo(_options.DatabasePath).Length;
}
}
catch (Exception ex)
{
// File system probe is a best-effort health-metric — never abort
// a backlog snapshot because stat() failed. Log and report 0.
_logger.LogDebug(ex,
"SqliteAuditWriter could not stat DB path {Path} for backlog snapshot.",
_options.DatabasePath);
}
return Task.FromResult(new SiteAuditBacklogSnapshot(
PendingCount: pendingCount,
OldestPendingUtc: oldestPending,
OnDiskBytes: onDiskBytes));
}
private static DateTime EnsureUtc(DateTime value) =>
value.Kind == DateTimeKind.Utc
? value
: DateTime.SpecifyKind(value.ToUniversalTime(), DateTimeKind.Utc);
private static AuditEvent MapRow(SqliteDataReader reader)
{
return new AuditEvent
{
EventId = Guid.Parse(reader.GetString(0)),
OccurredAtUtc = DateTime.Parse(reader.GetString(1),
System.Globalization.CultureInfo.InvariantCulture,
System.Globalization.DateTimeStyles.RoundtripKind),
Channel = Enum.Parse<AuditChannel>(reader.GetString(2)),
Kind = Enum.Parse<AuditKind>(reader.GetString(3)),
CorrelationId = reader.IsDBNull(4) ? null : Guid.Parse(reader.GetString(4)),
SourceSiteId = reader.IsDBNull(5) ? null : reader.GetString(5),
SourceNode = reader.IsDBNull(6) ? null : reader.GetString(6),
SourceInstanceId = reader.IsDBNull(7) ? null : reader.GetString(7),
SourceScript = reader.IsDBNull(8) ? null : reader.GetString(8),
Actor = reader.IsDBNull(9) ? null : reader.GetString(9),
Target = reader.IsDBNull(10) ? null : reader.GetString(10),
Status = Enum.Parse<AuditStatus>(reader.GetString(11)),
HttpStatus = reader.IsDBNull(12) ? null : reader.GetInt32(12),
DurationMs = reader.IsDBNull(13) ? null : reader.GetInt32(13),
ErrorMessage = reader.IsDBNull(14) ? null : reader.GetString(14),
ErrorDetail = reader.IsDBNull(15) ? null : reader.GetString(15),
RequestSummary = reader.IsDBNull(16) ? null : reader.GetString(16),
ResponseSummary = reader.IsDBNull(17) ? null : reader.GetString(17),
PayloadTruncated = reader.GetInt32(18) != 0,
Extra = reader.IsDBNull(19) ? null : reader.GetString(19),
ForwardState = Enum.Parse<AuditForwardState>(reader.GetString(20)),
ExecutionId = reader.IsDBNull(21) ? null : Guid.Parse(reader.GetString(21)),
ParentExecutionId = reader.IsDBNull(22) ? null : Guid.Parse(reader.GetString(22)),
};
}
/// <summary>
/// Disposes the audit writer and releases resources.
/// </summary>
/// <remarks>
/// AuditLog-006: prefer <see cref="DisposeAsync"/> when possible (DI honours
/// <see cref="IAsyncDisposable"/> on singletons). The sync path remains for
/// callers that only know about <see cref="IDisposable"/> (e.g. legacy
/// composition roots, <c>using</c> statements without <c>await</c>). To
/// avoid the classic sync-over-async deadlock on a captured
/// <see cref="SynchronizationContext"/> (ASP.NET request thread, Akka
/// dispatcher under some configurations), we hop to the thread pool via
/// <see cref="Task.Run(Func{Task})"/> before blocking on the result — the
/// async continuation inside <see cref="DisposeAsync"/> then resumes on a
/// pool thread with no captured context, so <c>GetResult()</c> never waits
/// on the very thread the continuation needs.
/// </remarks>
public void Dispose()
{
Task.Run(async () => await DisposeAsync().ConfigureAwait(false))
.GetAwaiter().GetResult();
}
/// <summary>Asynchronously disposes the audit writer and releases resources.</summary>
public async ValueTask DisposeAsync()
{
Task? writerLoop;
lock (_writeLock)
{
if (_disposed) return;
// Stop accepting new events. Completing the channel writer is the
// shutdown signal: WriteAsync calls observe the completion and
// fault, and the writer loop drains any already-buffered items
// before exiting. _disposed is intentionally NOT set here — it
// flips only after the loop has fully drained (second lock block
// below), so FlushBatch's existing _disposed check guards the
// post-drain window when the connection is about to close.
_writeQueue.Writer.TryComplete();
writerLoop = _writerLoop;
}
// Wait outside the lock — the loop reacquires it for each batch.
try
{
if (writerLoop is not null)
{
await writerLoop.WaitAsync(TimeSpan.FromSeconds(5)).ConfigureAwait(false);
}
}
catch (TimeoutException)
{
_logger.LogWarning("SqliteAuditWriter writer loop did not drain within 5s of dispose.");
}
catch (Exception ex)
{
// The loop's per-batch try/catch already routed individual failures
// to pending TCSes; a top-level fault here is unexpected.
_logger.LogError(ex, "SqliteAuditWriter writer loop faulted during dispose.");
}
lock (_writeLock)
{
if (_disposed) return;
_disposed = true;
_connection.Dispose();
}
// AuditLog-005: dispose the dedicated read connection after the writer
// is fully drained and closed. _readLock is taken to fence out any
// in-flight read caller that grabbed the lock before _disposed flipped
// — they observe ObjectDisposedException on the next attempt.
lock (_readLock)
{
_readConnection.Dispose();
}
}
/// <summary>An audit event awaiting persistence by the background writer.</summary>
private sealed class PendingAuditEvent
{
/// <summary>Initializes a new instance of the PendingAuditEvent class.</summary>
/// <param name="evt">The audit event to persist.</param>
public PendingAuditEvent(AuditEvent evt)
{
Event = evt;
Completion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
}
/// <summary>The audit event to persist.</summary>
public AuditEvent Event { get; }
/// <summary>Task completion source for write completion signaling.</summary>
public TaskCompletionSource Completion { get; }
}
}