feat(audit)!: ScadaBridge C5 — collapse central dbo.AuditLog to 10 canonical cols + persisted computed cols; CollapseAuditLogToCanonical migration; repo writes canonical directly (Task 2.5)

This commit is contained in:
Joseph Doherty
2026-06-02 14:06:46 -04:00
parent 1737d15f04
commit 68a6bd1720
12 changed files with 2592 additions and 440 deletions
@@ -46,36 +46,35 @@ public class AuditLogRepository : IAuditLogRepository
throw new ArgumentNullException(nameof(evt));
}
// C3 transitional shim: the canonical record carries the ScadaBridge domain
// fields inside DetailsJson — decompose it into the typed 24-column values the
// existing dbo.AuditLog table expects. Central rows leave ForwardState null
// (it is a site-storage-only concern, never on a central row).
var r = AuditRowProjection.Decompose(evt);
// C5 (Task 2.5): write the 10 canonical columns DIRECTLY — no Decompose.
// The five queryability columns (Kind/Status/SourceSiteId/ExecutionId/
// ParentExecutionId) plus IngestedAtUtc are PERSISTED computed columns on
// dbo.AuditLog; SQL Server derives them from DetailsJson at INSERT, so they
// are intentionally absent from this column list (writing a computed column
// is an error). The canonical OccurredAtUtc is UTC by construction; store a
// Kind=Utc DateTime so downstream UTC/local conversions are safe.
var occurred = DateTime.SpecifyKind(evt.OccurredAtUtc.UtcDateTime, DateTimeKind.Utc);
// Enum columns are stored as varchar(32) (HasConversion<string>()), so do
// the conversion in C# rather than relying on parameter type inference —
// SqlClient would otherwise bind enums as int by default.
var channel = r.Channel.ToString();
var kind = r.Kind.ToString();
var status = r.Status.ToString();
string? forwardState = null;
// Canonical Actor is a required non-null string; an empty Actor maps to a
// NULL column (legacy/central rows stored null for system/anon).
string? actor = string.IsNullOrEmpty(evt.Actor) ? null : evt.Actor;
// Outcome / Category are varchar columns (Outcome via HasConversion<string>;
// Category carries the channel name). Bind as strings rather than relying on
// parameter type inference.
var outcome = evt.Outcome.ToString();
string? category = evt.Category;
// FormattableString interpolation parameterises every value (no concatenation),
// so this is safe against injection even for the string columns.
try
{
await _context.Database.ExecuteSqlInterpolatedAsync(
$@"IF NOT EXISTS (SELECT 1 FROM dbo.AuditLog WHERE EventId = {r.EventId})
$@"IF NOT EXISTS (SELECT 1 FROM dbo.AuditLog WHERE EventId = {evt.EventId})
INSERT INTO dbo.AuditLog
(EventId, OccurredAtUtc, IngestedAtUtc, Channel, Kind, CorrelationId, ExecutionId, ParentExecutionId,
SourceSiteId, SourceNode, SourceInstanceId, SourceScript, Actor, Target, Status,
HttpStatus, DurationMs, ErrorMessage, ErrorDetail, RequestSummary,
ResponseSummary, PayloadTruncated, Extra, ForwardState)
(EventId, OccurredAtUtc, Actor, Action, Outcome, Category, Target, SourceNode, CorrelationId, DetailsJson)
VALUES
({r.EventId}, {r.OccurredAtUtc}, {r.IngestedAtUtc}, {channel}, {kind}, {r.CorrelationId}, {r.ExecutionId}, {r.ParentExecutionId},
{r.SourceSiteId}, {r.SourceNode}, {r.SourceInstanceId}, {r.SourceScript}, {r.Actor}, {r.Target}, {status},
{r.HttpStatus}, {r.DurationMs}, {r.ErrorMessage}, {r.ErrorDetail}, {r.RequestSummary},
{r.ResponseSummary}, {r.PayloadTruncated}, {r.Extra}, {forwardState});",
({evt.EventId}, {occurred}, {actor}, {evt.Action}, {outcome}, {category}, {evt.Target}, {evt.SourceNode}, {evt.CorrelationId}, {evt.DetailsJson});",
ct);
}
catch (SqlException ex) when (
@@ -92,7 +91,7 @@ VALUES
ex,
"InsertIfNotExistsAsync swallowed duplicate-key violation (error {SqlErrorNumber}) for EventId {EventId}; treating as no-op.",
ex.Number,
r.EventId);
evt.EventId);
}
}
@@ -110,9 +109,11 @@ VALUES
throw new ArgumentNullException(nameof(paging));
}
// C3 transitional shim: the typed-column filter predicates query the
// AuditLogRow persistence shape as before (C6 retargets how the filter is
// applied); the materialized rows are recomposed into canonical records.
// C5 (Task 2.5): the filter predicates bind to the canonical columns and the
// persisted computed columns directly — Channel→Category, Kind/Status/
// SourceSiteId/ExecutionId/ParentExecutionId are computed columns. The
// materialized rows are projected to the canonical record by reading the 10
// canonical columns (no 24-column Recompose).
var query = _context.Set<AuditLogRow>().AsNoTracking();
// Multi-value dimensions: a null OR empty list means "no constraint"
@@ -201,36 +202,29 @@ VALUES
}
/// <summary>
/// C3 transitional shim: recompose a canonical <see cref="AuditEvent"/> from a
/// materialized <see cref="AuditLogRow"/> read back from <c>dbo.AuditLog</c>.
/// <c>ForwardState</c> is dropped (central rows never carry it; it is not a
/// canonical / DetailsJson field).
/// C5 (Task 2.5): build the canonical <see cref="AuditEvent"/> DIRECTLY from the
/// 10 canonical columns of a materialized <see cref="AuditLogRow"/> read back from
/// <c>dbo.AuditLog</c> — no 24-column <c>Recompose</c>, because the table now holds
/// the canonical shape (every ScadaBridge domain field already lives in
/// <c>DetailsJson</c>). The persisted computed columns are read helpers only and
/// are not part of the canonical record. <see cref="AuditLogRow.Channel"/> is the
/// canonical <c>Category</c> column (Category = channel name for ScadaBridge).
/// </summary>
private static AuditEvent RowToCanonical(AuditLogRow row)
=> AuditRowProjection.Recompose(new AuditRowProjection.AuditRowValues(
EventId: row.EventId,
OccurredAtUtc: row.OccurredAtUtc,
IngestedAtUtc: row.IngestedAtUtc,
Channel: row.Channel,
Kind: row.Kind,
Status: row.Status,
CorrelationId: row.CorrelationId,
ExecutionId: row.ExecutionId,
ParentExecutionId: row.ParentExecutionId,
SourceSiteId: row.SourceSiteId,
SourceNode: row.SourceNode,
SourceInstanceId: row.SourceInstanceId,
SourceScript: row.SourceScript,
Actor: row.Actor,
Target: row.Target,
HttpStatus: row.HttpStatus,
DurationMs: row.DurationMs,
ErrorMessage: row.ErrorMessage,
ErrorDetail: row.ErrorDetail,
RequestSummary: row.RequestSummary,
ResponseSummary: row.ResponseSummary,
PayloadTruncated: row.PayloadTruncated,
Extra: row.Extra));
=> new()
{
EventId = row.EventId,
OccurredAtUtc = new DateTimeOffset(
DateTime.SpecifyKind(row.OccurredAtUtc, DateTimeKind.Utc)),
Actor = row.Actor ?? string.Empty,
Action = row.Action,
Outcome = row.Outcome,
Category = row.Channel.ToString(),
Target = row.Target,
SourceNode = row.SourceNode,
CorrelationId = row.CorrelationId,
DetailsJson = row.DetailsJson,
};
/// <inheritdoc />
public async Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default)
@@ -270,38 +264,29 @@ VALUES
DROP INDEX UX_AuditLog_EventId ON dbo.AuditLog;
-- 2. Staging table on [PRIMARY] (non-partitioned) with column shapes
-- byte-identical to dbo.AuditLog. Any drift here causes SWITCH to
-- reject the operation with msg 4904/4915.
-- byte-identical to the C5 dbo.AuditLog — INCLUDING the persisted
-- computed columns, whose definitions must match EXACTLY (same
-- expression text + PERSISTED) or ALTER TABLE ... SWITCH PARTITION
-- rejects the operation with msg 4904/4948. The ordinal order also
-- matches dbo.AuditLog_v2 (the CollapseAuditLogToCanonical migration):
-- 10 canonical columns first, then the 6 computed columns.
CREATE TABLE dbo.[{stagingTableName}] (
EventId uniqueidentifier NOT NULL,
OccurredAtUtc datetime2(7) NOT NULL,
IngestedAtUtc datetime2(7) NULL,
Channel varchar(32) NOT NULL,
Kind varchar(32) NOT NULL,
CorrelationId uniqueidentifier NULL,
SourceSiteId varchar(64) NULL,
SourceInstanceId varchar(128) NULL,
SourceScript varchar(128) NULL,
Actor varchar(128) NULL,
Target varchar(256) NULL,
Status varchar(32) NOT NULL,
HttpStatus int NULL,
DurationMs int NULL,
ErrorMessage nvarchar(1024) NULL,
ErrorDetail nvarchar(max) NULL,
RequestSummary nvarchar(max) NULL,
ResponseSummary nvarchar(max) NULL,
PayloadTruncated bit NOT NULL,
Extra nvarchar(max) NULL,
ForwardState varchar(32) NULL,
-- ExecutionId, ParentExecutionId, and SourceNode are last (in this
-- ordinal order) because each was added to the live AuditLog table
-- by a later ALTER TABLE ADD migration; the staging table must
-- match the live table column shape ordinal-for-ordinal or
-- ALTER TABLE ... SWITCH PARTITION fails (msg 4904/4915).
ExecutionId uniqueidentifier NULL,
ParentExecutionId uniqueidentifier NULL,
Actor nvarchar(256) NULL,
Action varchar(64) NOT NULL,
Outcome varchar(16) NOT NULL,
Category varchar(32) NOT NULL,
Target nvarchar(256) NULL,
SourceNode varchar(64) NULL,
CorrelationId uniqueidentifier NULL,
DetailsJson nvarchar(max) NULL,
Kind AS JSON_VALUE(DetailsJson,'$.kind') PERSISTED,
Status AS JSON_VALUE(DetailsJson,'$.status') PERSISTED,
SourceSiteId AS JSON_VALUE(DetailsJson,'$.sourceSiteId') PERSISTED,
ExecutionId AS CAST(JSON_VALUE(DetailsJson,'$.executionId') AS uniqueidentifier) PERSISTED,
ParentExecutionId AS CAST(JSON_VALUE(DetailsJson,'$.parentExecutionId') AS uniqueidentifier) PERSISTED,
IngestedAtUtc AS CAST(SWITCHOFFSET(CAST(JSON_VALUE(DetailsJson,'$.ingestedAtUtc') AS datetimeoffset), 0) AS datetime2(7)),
CONSTRAINT PK_{stagingTableName} PRIMARY KEY CLUSTERED (EventId, OccurredAtUtc)
) ON [PRIMARY];
@@ -648,24 +633,28 @@ VALUES
SELECT ParentExecutionId FROM Chain
WHERE ParentExecutionId IS NOT NULL
)
-- ParentExecutionId / SourceSiteId / SourceInstanceId are
-- derived via MIN: every audit row of one execution carries
-- the SAME ParentExecutionId (and source identity) — it is
-- stamped once per script run / inbound request — so MIN
-- simply picks that one shared value, it is not collapsing a
-- genuine disagreement across rows.
-- C5 (Task 2.5): ExecutionId / ParentExecutionId / SourceSiteId
-- are persisted computed columns (same names); Channel is now the
-- canonical Category column (Category = channel name, so the
-- Channels aggregate still yields channel names); SourceInstanceId
-- is no longer a column — read it from DetailsJson via JSON_VALUE.
-- ParentExecutionId / SourceSiteId / SourceInstanceId are derived
-- via MIN: every audit row of one execution carries the SAME value
-- (stamped once per script run / inbound request) — MIN simply
-- picks that one shared value, not collapsing a genuine
-- disagreement across rows.
SELECT
ids.ExecutionId AS [ExecutionId],
MIN(a.ParentExecutionId) AS [ParentExecutionId],
COUNT(a.EventId) AS [RowCount],
(SELECT STRING_AGG(d.Channel, ',')
FROM (SELECT DISTINCT a2.Channel FROM dbo.AuditLog a2
(SELECT STRING_AGG(d.Category, ',')
FROM (SELECT DISTINCT a2.Category FROM dbo.AuditLog a2
WHERE a2.ExecutionId = ids.ExecutionId) d) AS [Channels],
(SELECT STRING_AGG(d.Status, ',')
FROM (SELECT DISTINCT a2.Status FROM dbo.AuditLog a2
WHERE a2.ExecutionId = ids.ExecutionId) d) AS [Statuses],
MIN(a.SourceSiteId) AS [SourceSiteId],
MIN(a.SourceInstanceId) AS [SourceInstanceId],
MIN(JSON_VALUE(a.DetailsJson,'$.sourceInstanceId')) AS [SourceInstanceId],
MIN(a.OccurredAtUtc) AS [FirstOccurredAtUtc],
MAX(a.OccurredAtUtc) AS [LastOccurredAtUtc]
FROM ChainIds ids