fix(configdb): replace SwitchOutPartitionAsync stub with drop-and-rebuild dance (#23 M6)
Replaces M1's NotSupportedException stub with the production drop-DROP-INDEX → CREATE-staging → SWITCH PARTITION → DROP-staging → CREATE-INDEX dance documented in alog.md §4. UX_AuditLog_EventId is intentionally non-aligned with ps_AuditLog_Month so single-column EventId uniqueness can be enforced cheaply for InsertIfNotExistsAsync; SQL Server rejects ALTER TABLE SWITCH while a non-aligned unique index is present, so the implementation drops it, switches the partition data into a GUID-suffixed staging table on [PRIMARY], drops staging (discarding the rows), and rebuilds the unique index — all inside an explicit transaction with a CATCH that guarantees the unique index is rebuilt regardless of failure point. Also adds GetPartitionBoundariesOlderThanAsync to IAuditLogRepository: a CROSS APPLY over sys.partition_range_values + per-partition MAX(OccurredAtUtc) to enumerate retention-eligible months for the M6 purge actor (next commit). Tests verify: * Old partition's rows are removed; other months untouched * UX_AuditLog_EventId is rebuilt after a successful switch * InsertIfNotExistsAsync's first-write-wins idempotency still holds after switch * On engineered SWITCH failure (inbound FK from a probe table), SqlException propagates AND UX_AuditLog_EventId is still present (CATCH branch ran) * GetPartitionBoundariesOlderThanAsync returns only boundaries whose partition's MAX(OccurredAtUtc) is strictly older than the threshold; empty partitions excluded
This commit is contained in:
@@ -216,5 +216,9 @@ public class AuditLogIngestActorTests : TestKit, IClassFixture<MsSqlMigrationFix
|
||||
|
||||
public Task SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
|
||||
_inner.SwitchOutPartitionAsync(monthBoundary, ct);
|
||||
|
||||
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
DateTime threshold, CancellationToken ct = default) =>
|
||||
_inner.GetPartitionBoundariesOlderThanAsync(threshold, ct);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,6 +89,10 @@ public class SiteAuditReconciliationActorTests : TestKit, IClassFixture<MsSqlMig
|
||||
|
||||
public Task SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
|
||||
Task.CompletedTask;
|
||||
|
||||
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
DateTime threshold, CancellationToken ct = default) =>
|
||||
Task.FromResult<IReadOnlyList<DateTime>>(Array.Empty<DateTime>());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Types.Audit;
|
||||
@@ -309,21 +310,221 @@ public class AuditLogRepositoryTests : IClassFixture<MsSqlMigrationFixture>
|
||||
Assert.True(events.Select(e => e.EventId).ToHashSet().SetEquals(allIds));
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// M6-T4 Bundle C: SwitchOutPartitionAsync drop-and-rebuild integration tests
|
||||
// ------------------------------------------------------------------------
|
||||
//
|
||||
// The partition-switch path replaces M1's NotSupportedException stub with
|
||||
// the production drop-DROP-INDEX → CREATE-staging → SWITCH PARTITION →
|
||||
// DROP-staging → CREATE-INDEX dance documented in alog.md §4. These tests
|
||||
// verify the side effects an outsider can observe:
|
||||
// * rows in the targeted month are removed
|
||||
// * rows in OTHER months are NOT touched
|
||||
// * UX_AuditLog_EventId still exists after a successful switch
|
||||
// * InsertIfNotExistsAsync's first-write-wins idempotency still holds
|
||||
// after a switch (the rebuilt index is real)
|
||||
// * a thrown SqlException leaves UX_AuditLog_EventId rebuilt (the CATCH
|
||||
// branch's recovery path runs)
|
||||
|
||||
[SkippableFact]
|
||||
public async Task SwitchOutPartitionAsync_ThrowsNotSupported_ForM1()
|
||||
public async Task SwitchOutPartitionAsync_OldPartition_RemovesRows_NewPartitionsKept()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
var siteId = NewSiteId();
|
||||
await using var context = CreateContext();
|
||||
var repo = new AuditLogRepository(context);
|
||||
|
||||
// Three distinct months — Jan, Feb, Mar 2026 — so the switch on Jan's
|
||||
// boundary purges exactly one month's worth of rows. Boundary values
|
||||
// come from the partition function's pre-seeded list (alog.md §4).
|
||||
var janEvt = NewEvent(siteId, occurredAtUtc: new DateTime(2026, 1, 15, 10, 0, 0, DateTimeKind.Utc));
|
||||
var febEvt = NewEvent(siteId, occurredAtUtc: new DateTime(2026, 2, 15, 10, 0, 0, DateTimeKind.Utc));
|
||||
var marEvt = NewEvent(siteId, occurredAtUtc: new DateTime(2026, 3, 15, 10, 0, 0, DateTimeKind.Utc));
|
||||
await repo.InsertIfNotExistsAsync(janEvt);
|
||||
await repo.InsertIfNotExistsAsync(febEvt);
|
||||
await repo.InsertIfNotExistsAsync(marEvt);
|
||||
|
||||
// Boundary value '2026-01-01' identifies the January 2026 partition under
|
||||
// RANGE RIGHT semantics ($PARTITION returns the partition into which the
|
||||
// boundary value itself falls — the partition whose lower bound is the
|
||||
// boundary).
|
||||
await repo.SwitchOutPartitionAsync(new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc));
|
||||
|
||||
await using var readContext = CreateContext();
|
||||
var remaining = await readContext.Set<AuditEvent>()
|
||||
.Where(e => e.SourceSiteId == siteId)
|
||||
.ToListAsync();
|
||||
|
||||
Assert.DoesNotContain(remaining, e => e.EventId == janEvt.EventId);
|
||||
Assert.Contains(remaining, e => e.EventId == febEvt.EventId);
|
||||
Assert.Contains(remaining, e => e.EventId == marEvt.EventId);
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
public async Task SwitchOutPartitionAsync_RebuildsUxIndex_AfterSwitch()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
await using var context = CreateContext();
|
||||
var repo = new AuditLogRepository(context);
|
||||
|
||||
// The partition-switch path is intentionally blocked in M1 because
|
||||
// UX_AuditLog_EventId is non-aligned. The drop-and-rebuild dance ships
|
||||
// with the M6 purge actor.
|
||||
var ex = await Assert.ThrowsAsync<NotSupportedException>(
|
||||
() => repo.SwitchOutPartitionAsync(new DateTime(2026, 2, 1, 0, 0, 0, DateTimeKind.Utc)));
|
||||
// Pick a different month per test so successive test runs (which share
|
||||
// the fixture's MSSQL database) don't tread on each other.
|
||||
await repo.SwitchOutPartitionAsync(new DateTime(2026, 4, 1, 0, 0, 0, DateTimeKind.Utc));
|
||||
|
||||
Assert.Contains("M6", ex.Message, StringComparison.OrdinalIgnoreCase);
|
||||
await using var verifyContext = CreateContext();
|
||||
var indexExists = await ScalarAsync<int>(
|
||||
verifyContext,
|
||||
"SELECT COUNT(*) FROM sys.indexes " +
|
||||
"WHERE name = 'UX_AuditLog_EventId' AND object_id = OBJECT_ID('dbo.AuditLog');");
|
||||
Assert.Equal(1, indexExists);
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
public async Task SwitchOutPartitionAsync_InsertIfNotExistsAsync_StillEnforcesFirstWriteWins_AfterSwitch()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
var siteId = NewSiteId();
|
||||
await using var context = CreateContext();
|
||||
var repo = new AuditLogRepository(context);
|
||||
|
||||
// Pre-existing row in May 2026 — must survive a switch on a different
|
||||
// (older) partition.
|
||||
var preExisting = NewEvent(siteId, occurredAtUtc: new DateTime(2026, 5, 20, 9, 0, 0, DateTimeKind.Utc));
|
||||
await repo.InsertIfNotExistsAsync(preExisting);
|
||||
|
||||
// Switch out the June 2026 partition (different month, empty).
|
||||
await repo.SwitchOutPartitionAsync(new DateTime(2026, 6, 1, 0, 0, 0, DateTimeKind.Utc));
|
||||
|
||||
// Re-attempting the same EventId after the switch must STILL be a no-op
|
||||
// (UX_AuditLog_EventId is the index that enables idempotency; if the
|
||||
// rebuild left it broken, this insert would silently produce a duplicate
|
||||
// row and the count assertion below would catch it).
|
||||
var dup = preExisting with { ErrorMessage = "second-should-be-ignored-after-switch" };
|
||||
await repo.InsertIfNotExistsAsync(dup);
|
||||
|
||||
await using var readContext = CreateContext();
|
||||
var rows = await readContext.Set<AuditEvent>()
|
||||
.Where(e => e.SourceSiteId == siteId)
|
||||
.ToListAsync();
|
||||
|
||||
Assert.Single(rows);
|
||||
Assert.Equal(preExisting.EventId, rows[0].EventId);
|
||||
// First-write-wins: the original ErrorMessage (null) survives.
|
||||
Assert.Null(rows[0].ErrorMessage);
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
public async Task SwitchOutPartitionAsync_PartialFailure_RebuildsUxIndex_RaisesException()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
await using var context = CreateContext();
|
||||
var repo = new AuditLogRepository(context);
|
||||
|
||||
// Force a deterministic switch failure with an inbound FOREIGN KEY:
|
||||
// ALTER TABLE … SWITCH refuses to move rows out of a partition that's
|
||||
// referenced by an FK from another table, raising msg 4928
|
||||
// ("ALTER TABLE SWITCH statement failed because target table … has a
|
||||
// foreign key …"). The CATCH branch then rolls back and rebuilds the
|
||||
// unique index — which the assertion below verifies.
|
||||
//
|
||||
// The probe table is uniquely named with a guid suffix so reruns of
|
||||
// this test inside the same fixture DB never collide. We clean it up
|
||||
// in the finally so the constraint never leaks into other tests.
|
||||
var probeTable = $"AuditFkProbe_{Guid.NewGuid():N}".Substring(0, 32);
|
||||
await using (var setup = new SqlConnection(_fixture.ConnectionString))
|
||||
{
|
||||
await setup.OpenAsync();
|
||||
await using var cmd = setup.CreateCommand();
|
||||
// Composite FK references AuditLog's composite PK (EventId, OccurredAtUtc).
|
||||
cmd.CommandText =
|
||||
$"CREATE TABLE dbo.[{probeTable}] ( " +
|
||||
$" EventId uniqueidentifier NOT NULL, " +
|
||||
$" OccurredAtUtc datetime2(7) NOT NULL, " +
|
||||
$" CONSTRAINT FK_{probeTable}_AuditLog FOREIGN KEY (EventId, OccurredAtUtc) " +
|
||||
$" REFERENCES dbo.AuditLog(EventId, OccurredAtUtc));";
|
||||
await cmd.ExecuteNonQueryAsync();
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var ex = await Assert.ThrowsAnyAsync<SqlException>(
|
||||
() => repo.SwitchOutPartitionAsync(new DateTime(2026, 9, 1, 0, 0, 0, DateTimeKind.Utc)));
|
||||
// Smoke-check the message references the SWITCH statement so we
|
||||
// know we hit the engineered failure, not some unrelated error.
|
||||
Assert.Contains("SWITCH", ex.Message, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// Always drop the probe table so the FK is gone before the next
|
||||
// test runs against the shared fixture.
|
||||
await using var cleanup = new SqlConnection(_fixture.ConnectionString);
|
||||
await cleanup.OpenAsync();
|
||||
await using var cmd = cleanup.CreateCommand();
|
||||
cmd.CommandText =
|
||||
$"IF OBJECT_ID('dbo.[{probeTable}]', 'U') IS NOT NULL DROP TABLE dbo.[{probeTable}];";
|
||||
await cmd.ExecuteNonQueryAsync();
|
||||
}
|
||||
|
||||
// The CATCH block in the production SQL guarantees UX_AuditLog_EventId
|
||||
// is rebuilt regardless of which step failed inside the TRY.
|
||||
await using var verifyContext = CreateContext();
|
||||
var indexExists = await ScalarAsync<int>(
|
||||
verifyContext,
|
||||
"SELECT COUNT(*) FROM sys.indexes " +
|
||||
"WHERE name = 'UX_AuditLog_EventId' AND object_id = OBJECT_ID('dbo.AuditLog');");
|
||||
Assert.Equal(1, indexExists);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
// M6-T4 Bundle C: GetPartitionBoundariesOlderThanAsync
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
[SkippableFact]
|
||||
public async Task GetPartitionBoundariesOlderThanAsync_ReturnsBoundaries_WithMaxOccurredOlderThanThreshold()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
var siteId = NewSiteId();
|
||||
await using var context = CreateContext();
|
||||
var repo = new AuditLogRepository(context);
|
||||
|
||||
// Seed events in two months: July 2026 (old) and August 2026 (new).
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: new DateTime(2026, 7, 10, 0, 0, 0, DateTimeKind.Utc)));
|
||||
await repo.InsertIfNotExistsAsync(NewEvent(siteId, occurredAtUtc: new DateTime(2026, 8, 10, 0, 0, 0, DateTimeKind.Utc)));
|
||||
|
||||
// Threshold = Aug 1 2026 — July partition's MAX (July 10) is older;
|
||||
// August partition's MAX (August 10) is newer. We expect only the July
|
||||
// boundary back.
|
||||
var threshold = new DateTime(2026, 8, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
var boundaries = await repo.GetPartitionBoundariesOlderThanAsync(threshold);
|
||||
|
||||
// The repo may also return EARLIER boundaries that have no data (their
|
||||
// MAX is NULL → treated as "no data, nothing to purge" by the contract).
|
||||
// We only assert the inclusion/exclusion that matters for our seeded
|
||||
// rows.
|
||||
Assert.Contains(new DateTime(2026, 7, 1, 0, 0, 0, DateTimeKind.Utc), boundaries);
|
||||
Assert.DoesNotContain(new DateTime(2026, 8, 1, 0, 0, 0, DateTimeKind.Utc), boundaries);
|
||||
}
|
||||
|
||||
private async Task<T> ScalarAsync<T>(ScadaLinkDbContext context, string sql)
|
||||
{
|
||||
var conn = context.Database.GetDbConnection();
|
||||
if (conn.State != System.Data.ConnectionState.Open)
|
||||
{
|
||||
await conn.OpenAsync();
|
||||
}
|
||||
await using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = sql;
|
||||
var result = await cmd.ExecuteScalarAsync();
|
||||
if (result is null || result is DBNull)
|
||||
{
|
||||
return default!;
|
||||
}
|
||||
return (T)Convert.ChangeType(result, typeof(T) == typeof(string) ? typeof(string) : Nullable.GetUnderlyingType(typeof(T)) ?? typeof(T))!;
|
||||
}
|
||||
|
||||
// --- helpers ------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user