using Microsoft.Data.SqlClient;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Types.Audit;
namespace ScadaLink.ConfigurationDatabase.Repositories;
///
/// EF Core implementation of . See the interface
/// for the append-only contract; this class only adds notes on the data-access
/// strategy used by each method.
///
public class AuditLogRepository : IAuditLogRepository
{
// SQL Server error numbers for duplicate-key violations on
// UX_AuditLog_EventId. 2601 is a unique-index violation; 2627 is a
// primary-key/unique-constraint violation. The IF NOT EXISTS … INSERT
// pattern has a check-then-act race window — two sessions can both pass
// the EXISTS check and then both attempt the INSERT — and the loser
// surfaces as one of these errors. Idempotency demands we swallow them.
private const int SqlErrorUniqueIndexViolation = 2601;
private const int SqlErrorPrimaryKeyViolation = 2627;
private readonly ScadaLinkDbContext _context;
private readonly ILogger _logger;
public AuditLogRepository(ScadaLinkDbContext context, ILogger? logger = null)
{
_context = context ?? throw new ArgumentNullException(nameof(context));
_logger = logger ?? NullLogger.Instance;
}
///
/// Issues a single IF NOT EXISTS … INSERT INTO dbo.AuditLog (…) VALUES (…)
/// via .
/// Bypasses the EF change tracker so the row never enters a tracked state and
/// the enum-as-string conversion is done explicitly in C# (the columns are
/// declared varchar(32) via HasConversion<string>() in
/// ).
///
public async Task InsertIfNotExistsAsync(AuditEvent evt, CancellationToken ct = default)
{
if (evt is null)
{
throw new ArgumentNullException(nameof(evt));
}
// Enum columns are stored as varchar(32) (HasConversion()), so do
// the conversion in C# rather than relying on parameter type inference —
// SqlClient would otherwise bind enums as int by default.
var channel = evt.Channel.ToString();
var kind = evt.Kind.ToString();
var status = evt.Status.ToString();
var forwardState = evt.ForwardState?.ToString();
// FormattableString interpolation parameterises every value (no concatenation),
// so this is safe against injection even for the string columns.
try
{
await _context.Database.ExecuteSqlInterpolatedAsync(
$@"IF NOT EXISTS (SELECT 1 FROM dbo.AuditLog WHERE EventId = {evt.EventId})
INSERT INTO dbo.AuditLog
(EventId, OccurredAtUtc, IngestedAtUtc, Channel, Kind, CorrelationId,
SourceSiteId, SourceInstanceId, SourceScript, Actor, Target, Status,
HttpStatus, DurationMs, ErrorMessage, ErrorDetail, RequestSummary,
ResponseSummary, PayloadTruncated, Extra, ForwardState)
VALUES
({evt.EventId}, {evt.OccurredAtUtc}, {evt.IngestedAtUtc}, {channel}, {kind}, {evt.CorrelationId},
{evt.SourceSiteId}, {evt.SourceInstanceId}, {evt.SourceScript}, {evt.Actor}, {evt.Target}, {status},
{evt.HttpStatus}, {evt.DurationMs}, {evt.ErrorMessage}, {evt.ErrorDetail}, {evt.RequestSummary},
{evt.ResponseSummary}, {evt.PayloadTruncated}, {evt.Extra}, {forwardState});",
ct);
}
catch (SqlException ex) when (
ex.Number == SqlErrorUniqueIndexViolation
|| ex.Number == SqlErrorPrimaryKeyViolation)
{
// Two concurrent sessions both passed the IF NOT EXISTS check and
// both attempted the INSERT — the loser raises 2601/2627 against
// UX_AuditLog_EventId. First-write-wins idempotency is already the
// documented contract for this method, so the race outcome is
// semantically a no-op. Swallow at Debug; other SqlExceptions
// bubble.
_logger.LogDebug(
ex,
"InsertIfNotExistsAsync swallowed duplicate-key violation (error {SqlErrorNumber}) for EventId {EventId}; treating as no-op.",
ex.Number,
evt.EventId);
}
}
///
/// Builds an AsNoTracking queryable over , applies
/// every non-null filter predicate, and pages by keyset on
/// (OccurredAtUtc DESC, EventId DESC). The keyset clause is expressed
/// directly (occurred < after || (occurred == after && eventId.CompareTo(afterId) < 0))
/// — EF Core 10 translates against SQL Server's
/// uniqueidentifier sort order.
///
public async Task> QueryAsync(
AuditLogQueryFilter filter, AuditLogPaging paging, CancellationToken ct = default)
{
if (filter is null)
{
throw new ArgumentNullException(nameof(filter));
}
if (paging is null)
{
throw new ArgumentNullException(nameof(paging));
}
var query = _context.Set().AsNoTracking();
if (filter.Channel is { } channel)
{
query = query.Where(e => e.Channel == channel);
}
if (filter.Kind is { } kind)
{
query = query.Where(e => e.Kind == kind);
}
if (filter.Status is { } status)
{
query = query.Where(e => e.Status == status);
}
if (!string.IsNullOrEmpty(filter.SourceSiteId))
{
var siteId = filter.SourceSiteId;
query = query.Where(e => e.SourceSiteId == siteId);
}
if (!string.IsNullOrEmpty(filter.Target))
{
var target = filter.Target;
query = query.Where(e => e.Target == target);
}
if (!string.IsNullOrEmpty(filter.Actor))
{
var actor = filter.Actor;
query = query.Where(e => e.Actor == actor);
}
if (filter.CorrelationId is { } correlationId)
{
query = query.Where(e => e.CorrelationId == correlationId);
}
if (filter.FromUtc is { } fromUtc)
{
query = query.Where(e => e.OccurredAtUtc >= fromUtc);
}
if (filter.ToUtc is { } toUtc)
{
query = query.Where(e => e.OccurredAtUtc <= toUtc);
}
// Keyset cursor on (OccurredAtUtc desc, EventId desc).
if (paging.AfterOccurredAtUtc is { } afterOccurred && paging.AfterEventId is { } afterEventId)
{
query = query.Where(e =>
e.OccurredAtUtc < afterOccurred
|| (e.OccurredAtUtc == afterOccurred && e.EventId.CompareTo(afterEventId) < 0));
}
return await query
.OrderByDescending(e => e.OccurredAtUtc)
.ThenByDescending(e => e.EventId)
.Take(paging.PageSize)
.ToListAsync(ct);
}
///
/// M6-T4 production implementation of the drop-and-rebuild dance documented
/// on .
///
///
///
/// The staging table name is GUID-suffixed so concurrent purge attempts on
/// different boundaries cannot collide. The staging schema is byte-identical
/// to the live AuditLog table (same column types, lengths,
/// nullability, and clustered-key shape) — SQL Server's
/// ALTER TABLE … SWITCH PARTITION rejects any drift. Keep this CREATE
/// in sync with both the migration that ships the live table
/// (20260520142214_AddAuditLogTable) and
/// AuditLogEntityTypeConfiguration.
///
///
/// All five steps run inside an explicit transaction so the SWITCH +
/// staging-DROP are atomic from the perspective of a consumer reading via
/// snapshot isolation; the CATCH rolls back and runs an idempotent
/// "rebuild UX_AuditLog_EventId if it doesn't exist" so a partial failure
/// never leaves the live table without its idempotency-supporting unique
/// index.
///
///
public async Task SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default)
{
// GUID-suffixed staging name: prevents collision with any concurrent
// purge attempt and avoids polluting the AuditLog object namespace with
// a predictable identifier.
var stagingTableName = $"AuditLog_Staging_{Guid.NewGuid():N}";
// ISO 8601 in UTC — SQL Server's datetime2 literal parser accepts this
// unambiguously and the value is round-trip-safe across SET DATEFORMAT
// settings.
var monthBoundaryStr = monthBoundary.ToUniversalTime().ToString("yyyy-MM-dd HH:mm:ss");
// Two-statement batch: the first SELECT samples the per-partition row
// count BEFORE the dance so we can report it back to the purge actor;
// the second batch performs the drop-and-rebuild. We use OUTPUT-style
// variables wired through @@ROWCOUNT after the SWITCH is not viable
// because SWITCH is a metadata-only operation that doesn't move rows in
// a way @@ROWCOUNT can observe.
var sampleSql = $@"
SELECT COUNT_BIG(*) FROM dbo.AuditLog
WHERE $PARTITION.pf_AuditLog_Month(OccurredAtUtc) =
$partition.pf_AuditLog_Month('{monthBoundaryStr}');";
var sql = $@"
BEGIN TRY
BEGIN TRANSACTION;
-- 1. Drop the non-aligned unique index. ALTER TABLE SWITCH refuses
-- to run while it exists.
IF EXISTS (SELECT 1 FROM sys.indexes WHERE name = 'UX_AuditLog_EventId' AND object_id = OBJECT_ID('dbo.AuditLog'))
DROP INDEX UX_AuditLog_EventId ON dbo.AuditLog;
-- 2. Staging table on [PRIMARY] (non-partitioned) with column shapes
-- byte-identical to dbo.AuditLog. Any drift here causes SWITCH to
-- reject the operation with msg 4904/4915.
CREATE TABLE dbo.[{stagingTableName}] (
EventId uniqueidentifier NOT NULL,
OccurredAtUtc datetime2(7) NOT NULL,
IngestedAtUtc datetime2(7) NULL,
Channel varchar(32) NOT NULL,
Kind varchar(32) NOT NULL,
CorrelationId uniqueidentifier NULL,
SourceSiteId varchar(64) NULL,
SourceInstanceId varchar(128) NULL,
SourceScript varchar(128) NULL,
Actor varchar(128) NULL,
Target varchar(256) NULL,
Status varchar(32) NOT NULL,
HttpStatus int NULL,
DurationMs int NULL,
ErrorMessage nvarchar(1024) NULL,
ErrorDetail nvarchar(max) NULL,
RequestSummary nvarchar(max) NULL,
ResponseSummary nvarchar(max) NULL,
PayloadTruncated bit NOT NULL,
Extra nvarchar(max) NULL,
ForwardState varchar(32) NULL,
CONSTRAINT PK_{stagingTableName} PRIMARY KEY CLUSTERED (EventId, OccurredAtUtc)
) ON [PRIMARY];
-- 3. Switch the partition out. $partition.pf_AuditLog_Month returns
-- the partition number that contains the supplied boundary value;
-- SWITCH PARTITION N moves that partition's pages to the staging
-- table (metadata-only, no row copying).
DECLARE @partitionNumber int = $partition.pf_AuditLog_Month('{monthBoundaryStr}');
DECLARE @sql nvarchar(max) = 'ALTER TABLE dbo.AuditLog SWITCH PARTITION ' + CAST(@partitionNumber AS nvarchar(10)) + ' TO dbo.[{stagingTableName}];';
EXEC sp_executesql @sql;
-- 4. Drop staging — the rows are discarded here. This is the purge.
DROP TABLE dbo.[{stagingTableName}];
-- 5. Rebuild the non-aligned unique index. Live traffic that hit the
-- table during steps 1-4 saw composite-PK uniqueness only; from
-- here on, single-column EventId uniqueness is restored.
CREATE UNIQUE NONCLUSTERED INDEX UX_AuditLog_EventId ON dbo.AuditLog (EventId) ON [PRIMARY];
COMMIT TRANSACTION;
END TRY
BEGIN CATCH
IF @@TRANCOUNT > 0 ROLLBACK TRANSACTION;
-- Best-effort staging cleanup. The DROP INDEX in step 1 is now
-- rolled back (so the index is back), but the staging table from
-- step 2 may or may not survive the rollback depending on the
-- failure point. Guard the DROP so a missing staging table doesn't
-- mask the original error.
IF OBJECT_ID('dbo.[{stagingTableName}]', 'U') IS NOT NULL DROP TABLE dbo.[{stagingTableName}];
-- Idempotent index rebuild — covers the niche case where ROLLBACK
-- failed to restore UX_AuditLog_EventId (or the failure happened
-- AFTER the COMMIT, which shouldn't be possible inside this TRY
-- but is cheap insurance). Without this, a failed switch could
-- leave the live table without its idempotency-supporting index.
IF NOT EXISTS (SELECT 1 FROM sys.indexes WHERE name = 'UX_AuditLog_EventId' AND object_id = OBJECT_ID('dbo.AuditLog'))
CREATE UNIQUE NONCLUSTERED INDEX UX_AuditLog_EventId ON dbo.AuditLog (EventId) ON [PRIMARY];
-- Surface the original error to the caller — the purge actor logs
-- and continues with the next boundary.
THROW;
END CATCH;";
// Sample the row count before the switch. The sample is best-effort
// (no transaction wrapping the sample-then-switch pair) because the
// central singleton is the only writer to this RPC and a daily-purge
// tick doesn't compete with concurrent SwitchOut callers. A
// concurrent INSERT racing the sample under-reports by at most a
// few rows, which is acceptable for an "approximate" purged-row
// count surfaced via AuditLogPurgedEvent.
long rowsDeleted = 0;
var conn = _context.Database.GetDbConnection();
var openedHere = false;
if (conn.State != System.Data.ConnectionState.Open)
{
await conn.OpenAsync(ct).ConfigureAwait(false);
openedHere = true;
}
try
{
await using (var sampleCmd = conn.CreateCommand())
{
sampleCmd.CommandText = sampleSql;
var sampleResult = await sampleCmd.ExecuteScalarAsync(ct).ConfigureAwait(false);
if (sampleResult is not null && sampleResult is not DBNull)
{
rowsDeleted = Convert.ToInt64(sampleResult);
}
}
}
finally
{
if (openedHere)
{
await conn.CloseAsync().ConfigureAwait(false);
}
}
await _context.Database.ExecuteSqlRawAsync(sql, ct);
return rowsDeleted;
}
///
/// Returns the set of pf_AuditLog_Month boundaries whose partition's
/// MAX(OccurredAtUtc) is strictly older than .
/// Boundaries with empty partitions are excluded — purging an empty
/// partition is wasted I/O.
///
///
///
/// The CTE pulls every boundary value defined by the partition function and
/// joins it (via $PARTITION.pf_AuditLog_Month) to the live AuditLog
/// to compute per-partition MAX(OccurredAtUtc). The outer filter
/// keeps only those whose MAX is non-NULL (partition has rows) AND strictly
/// less than the threshold (every row is past retention).
///
///
/// Note: the query scans the live OccurredAtUtc column to compute
/// the MAX per partition. With IX_AuditLog_OccurredAtUtc on the
/// partition-aligned scheme this is a single index seek per partition; for
/// 24 partitions and a daily purge cadence the cost is negligible.
///
///
public async Task> GetPartitionBoundariesOlderThanAsync(
DateTime threshold,
CancellationToken ct = default)
{
var thresholdUtc = threshold.ToUniversalTime();
var thresholdStr = thresholdUtc.ToString("yyyy-MM-dd HH:mm:ss.fffffff");
// Per-partition MAX over the live table. We materialise the boundary
// list first (24 rows) then LEFT JOIN to the MAX aggregate so empty
// partitions surface as NULL and get filtered out by the WHERE clause.
var sql = $@"
WITH Boundaries AS (
SELECT CAST(rv.value AS datetime2(7)) AS BoundaryValue,
rv.boundary_id AS BoundaryId
FROM sys.partition_range_values rv
INNER JOIN sys.partition_functions pf ON rv.function_id = pf.function_id
WHERE pf.name = 'pf_AuditLog_Month'
)
SELECT b.BoundaryValue
FROM Boundaries b
CROSS APPLY (
SELECT MAX(a.OccurredAtUtc) AS MaxOccurredAt
FROM dbo.AuditLog a
WHERE $PARTITION.pf_AuditLog_Month(a.OccurredAtUtc) = b.BoundaryId + 1
) x
WHERE x.MaxOccurredAt IS NOT NULL
AND x.MaxOccurredAt < CAST('{thresholdStr}' AS datetime2(7))
ORDER BY b.BoundaryValue;";
var conn = _context.Database.GetDbConnection();
var openedHere = false;
if (conn.State != System.Data.ConnectionState.Open)
{
await conn.OpenAsync(ct).ConfigureAwait(false);
openedHere = true;
}
var results = new List();
try
{
await using var cmd = conn.CreateCommand();
cmd.CommandText = sql;
await using var reader = await cmd.ExecuteReaderAsync(ct).ConfigureAwait(false);
while (await reader.ReadAsync(ct).ConfigureAwait(false))
{
results.Add(reader.GetDateTime(0));
}
}
finally
{
if (openedHere)
{
await conn.CloseAsync().ConfigureAwait(false);
}
}
return results;
}
}