feat(auditlog): AuditLogPurgeActor daily partition-switch purge (#23 M6)

Central singleton (M6-T4 Bundle C) that drives the daily AuditLog partition
purge. On a configurable timer (default 24 hours) the actor:
  1. Queries IAuditLogRepository.GetPartitionBoundariesOlderThanAsync for
     monthly boundaries whose latest OccurredAtUtc is older than
     DateTime.UtcNow - AuditLogOptions.RetentionDays.
  2. For each eligible boundary calls SwitchOutPartitionAsync, which runs
     the drop-and-rebuild dance around UX_AuditLog_EventId.
  3. Publishes AuditLogPurgedEvent(boundary, rowsDeleted, durationMs) on
     the actor-system EventStream so the Bundle E central health collector
     and ops surfaces can subscribe without coupling to this actor.

Co-changes:
* SwitchOutPartitionAsync returns long (rows deleted) — sampled BEFORE the
  switch via COUNT_BIG over the per-partition  filter so the count
  reflects what the switch removed, not a post-purge scan of a table that
  no longer exists. All stub implementations updated.
* AuditLogPurgeOptions: IntervalHours (default 24), IntervalOverride for
  tests, Interval property resolving either.
* AuditLogPurgedEvent: record with MonthBoundary, RowsDeleted, DurationMs.

Behavior:
* Continue-on-error per boundary — one partition that throws does NOT
  abandon the rest of the tick.
* DI scope opened per tick (IAuditLogRepository is a SCOPED EF Core
  service); mirrors SiteAuditReconciliationActor and AuditLogIngestActor.
* SupervisorStrategy Resume keeps the singleton alive across leaked
  exceptions.
* EventStream capture BEFORE the first await — Context is unsafe after
  await in async receive handlers (same pattern as Sender-capture in
  AuditLogIngestActor.OnIngestAsync).

Tests:
* Tick_Fires_OnDailyInterval — visible timer side effect.
* Tick_OldPartitions_SwitchedOut — both seeded boundaries purged.
* Tick_NewerPartitions_Untouched — empty enumerator → no switches.
* Tick_PublishesPurgedEvent_WithRowCount — AuditLogPurgedEvent carries
  RowsDeleted and DurationMs.
* Tick_SwitchThrows_OtherPartitionsStillProcessed — continue-on-error.
* Threshold_UsesAuditLogOptionsRetentionDays — non-default 30-day window
  computed from UtcNow - RetentionDays.
* EndToEnd_RealPartition_RowsRemoved_PurgedEventPublished — TestKit +
  MsSqlMigrationFixture: real partitioned table, Jan-2026 row purged,
  Apr-2026 row kept, AuditLogPurgedEvent observed via probe.
This commit is contained in:
Joseph Doherty
2026-05-20 18:36:31 -04:00
parent 6069a20e0f
commit 660fdc4e93
8 changed files with 718 additions and 6 deletions

View File

@@ -214,7 +214,7 @@ public class AuditLogIngestActorTests : TestKit, IClassFixture<MsSqlMigrationFix
AuditLogQueryFilter filter, AuditLogPaging paging, CancellationToken ct = default) =>
_inner.QueryAsync(filter, paging, ct);
public Task SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
_inner.SwitchOutPartitionAsync(monthBoundary, ct);
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(

View File

@@ -0,0 +1,376 @@
using Akka.Actor;
using Akka.TestKit.Xunit2;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using ScadaLink.AuditLog.Central;
using ScadaLink.AuditLog.Configuration;
using ScadaLink.Commons.Entities.Audit;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Types.Audit;
using ScadaLink.Commons.Types.Enums;
using ScadaLink.ConfigurationDatabase;
using ScadaLink.ConfigurationDatabase.Repositories;
using ScadaLink.ConfigurationDatabase.Tests.Migrations;
namespace ScadaLink.AuditLog.Tests.Central;
/// <summary>
/// Bundle C (#23 M6-T4) tests for <see cref="AuditLogPurgeActor"/>. The fast,
/// schedule-only tests substitute a recording stub for
/// <see cref="IAuditLogRepository"/> so the timer + per-boundary error-isolation
/// + event-publish machinery can be exercised without an MSSQL container.
/// The end-to-end "real partition gets switched out" assertion lives in the
/// repository tests (Bundle C of M6-T4); this actor file is purely about the
/// actor's policy decisions.
/// </summary>
public class AuditLogPurgeActorTests : TestKit, IClassFixture<MsSqlMigrationFixture>
{
private readonly MsSqlMigrationFixture _fixture;
public AuditLogPurgeActorTests(MsSqlMigrationFixture fixture)
{
_fixture = fixture;
}
/// <summary>
/// In-memory recording stub. Captures every
/// <see cref="GetPartitionBoundariesOlderThanAsync"/> + every
/// <see cref="SwitchOutPartitionAsync"/> so tests can assert which boundaries
/// the actor chose to purge and how many ticks it issued. Also lets a
/// specific boundary be configured to throw so the continue-on-error path
/// is exercisable.
/// </summary>
private sealed class RecordingRepo : IAuditLogRepository
{
public List<DateTime> ThresholdQueries { get; } = new();
public List<DateTime> SwitchedBoundaries { get; } = new();
public Func<DateTime, long> RowsPerBoundary { get; set; } = _ => 0L;
public DateTime? ThrowOnBoundary { get; set; }
public Exception? BoundaryException { get; set; }
// The actor enumerator returns whichever list is configured here.
// Mutating this between ticks lets tests simulate "no longer
// eligible" boundaries on the second tick.
public List<DateTime> Boundaries { get; set; } = new();
public Task InsertIfNotExistsAsync(AuditEvent evt, CancellationToken ct = default) =>
Task.CompletedTask;
public Task<IReadOnlyList<AuditEvent>> QueryAsync(
AuditLogQueryFilter filter, AuditLogPaging paging, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<AuditEvent>>(Array.Empty<AuditEvent>());
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default)
{
if (ThrowOnBoundary.HasValue && monthBoundary == ThrowOnBoundary.Value)
{
throw BoundaryException ?? new InvalidOperationException("simulated switch failure");
}
SwitchedBoundaries.Add(monthBoundary);
return Task.FromResult(RowsPerBoundary(monthBoundary));
}
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
DateTime threshold, CancellationToken ct = default)
{
ThresholdQueries.Add(threshold);
return Task.FromResult<IReadOnlyList<DateTime>>(Boundaries.ToArray());
}
}
private IServiceProvider BuildScopedProvider(IAuditLogRepository repo)
{
var services = new ServiceCollection();
// Mirror AddConfigurationDatabase: IAuditLogRepository is scoped, so
// the actor opens a fresh scope per tick and resolves there.
services.AddScoped(_ => repo);
return services.BuildServiceProvider();
}
private IActorRef CreateActor(
IAuditLogRepository repo,
AuditLogPurgeOptions purgeOptions,
AuditLogOptions? auditOptions = null)
{
var sp = BuildScopedProvider(repo);
return Sys.ActorOf(Props.Create(() => new AuditLogPurgeActor(
sp,
Options.Create(purgeOptions),
Options.Create(auditOptions ?? new AuditLogOptions()),
NullLogger<AuditLogPurgeActor>.Instance)));
}
private static AuditLogPurgeOptions FastTickOptions(TimeSpan? interval = null) => new()
{
IntervalHours = 24,
IntervalOverride = interval ?? TimeSpan.FromMilliseconds(100),
};
/// <summary>
/// Subscribe a probe to the EventStream so the test can observe
/// <see cref="AuditLogPurgedEvent"/> publications synchronously.
/// </summary>
private Akka.TestKit.TestProbe SubscribePurged()
{
var probe = CreateTestProbe();
Sys.EventStream.Subscribe(probe.Ref, typeof(AuditLogPurgedEvent));
return probe;
}
// ---------------------------------------------------------------------
// 1. Tick_Fires_OnDailyInterval
// ---------------------------------------------------------------------
[Fact]
public void Tick_Fires_OnDailyInterval()
{
var repo = new RecordingRepo();
CreateActor(repo, FastTickOptions());
// The first scheduled tick fires after the configured interval. We
// assert the visible side effect (the enumerator was called) rather
// than racing on internal state.
AwaitAssert(
() => Assert.True(repo.ThresholdQueries.Count >= 1,
$"expected >= 1 enumerator call, got {repo.ThresholdQueries.Count}"),
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
}
// ---------------------------------------------------------------------
// 2. Tick_OldPartitions_SwitchedOut
// ---------------------------------------------------------------------
[Fact]
public void Tick_OldPartitions_SwitchedOut()
{
var repo = new RecordingRepo
{
Boundaries = new List<DateTime>
{
new(2025, 11, 1, 0, 0, 0, DateTimeKind.Utc),
new(2025, 12, 1, 0, 0, 0, DateTimeKind.Utc),
},
RowsPerBoundary = _ => 42L,
};
CreateActor(repo, FastTickOptions());
AwaitAssert(
() =>
{
Assert.Contains(new DateTime(2025, 11, 1, 0, 0, 0, DateTimeKind.Utc), repo.SwitchedBoundaries);
Assert.Contains(new DateTime(2025, 12, 1, 0, 0, 0, DateTimeKind.Utc), repo.SwitchedBoundaries);
},
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
}
// ---------------------------------------------------------------------
// 3. Tick_NewerPartitions_Untouched
// ---------------------------------------------------------------------
[Fact]
public void Tick_NewerPartitions_Untouched()
{
// The actor's contract: it only touches whatever the enumerator
// returns. The enumerator (in production) filters out non-eligible
// boundaries; here we simulate that by handing back an empty list
// and asserting the actor switched nothing despite the tick firing.
var repo = new RecordingRepo { Boundaries = new List<DateTime>() };
CreateActor(repo, FastTickOptions());
// Wait for at least one tick (visible via the enumerator call) then
// assert no switch happened.
AwaitAssert(
() => Assert.True(repo.ThresholdQueries.Count >= 1),
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
Assert.Empty(repo.SwitchedBoundaries);
}
// ---------------------------------------------------------------------
// 4. Tick_PublishesPurgedEvent_WithRowCount
// ---------------------------------------------------------------------
[Fact]
public void Tick_PublishesPurgedEvent_WithRowCount()
{
var boundary = new DateTime(2025, 6, 1, 0, 0, 0, DateTimeKind.Utc);
var repo = new RecordingRepo
{
Boundaries = new List<DateTime> { boundary },
RowsPerBoundary = _ => 1234L,
};
var probe = SubscribePurged();
CreateActor(repo, FastTickOptions());
var msg = probe.ExpectMsg<AuditLogPurgedEvent>(TimeSpan.FromSeconds(5));
Assert.Equal(boundary, msg.MonthBoundary);
Assert.Equal(1234L, msg.RowsDeleted);
Assert.True(msg.DurationMs >= 0,
$"DurationMs should be non-negative; was {msg.DurationMs}");
}
// ---------------------------------------------------------------------
// 5. Tick_SwitchThrows_OtherPartitionsStillProcessed (continue-on-error)
// ---------------------------------------------------------------------
[Fact]
public void Tick_SwitchThrows_OtherPartitionsStillProcessed()
{
var poisonBoundary = new DateTime(2025, 7, 1, 0, 0, 0, DateTimeKind.Utc);
var goodBoundary = new DateTime(2025, 8, 1, 0, 0, 0, DateTimeKind.Utc);
var repo = new RecordingRepo
{
Boundaries = new List<DateTime> { poisonBoundary, goodBoundary },
ThrowOnBoundary = poisonBoundary,
BoundaryException = new InvalidOperationException("simulated switch failure for poison boundary"),
};
CreateActor(repo, FastTickOptions());
AwaitAssert(
() =>
{
// The good boundary was still switched even though the poison
// boundary threw.
Assert.Contains(goodBoundary, repo.SwitchedBoundaries);
Assert.DoesNotContain(poisonBoundary, repo.SwitchedBoundaries);
},
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
}
// ---------------------------------------------------------------------
// 6. EndToEnd_RealPartition_RowsRemoved_PurgedEventPublished
// ---------------------------------------------------------------------
[SkippableFact]
public async Task EndToEnd_RealPartition_RowsRemoved_PurgedEventPublished()
{
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
// Today is ~2026-05-20 per the test environment. With RetentionDays =
// 60 the actor computes threshold ≈ 2026-03-21:
// * Jan partition (MAX = Jan 15) → older than threshold → PURGED
// * Apr partition (MAX = Apr 15) → newer than threshold → KEPT
var siteId = "purge-e2e-" + Guid.NewGuid().ToString("N").Substring(0, 8);
var janEvt = new AuditEvent
{
EventId = Guid.NewGuid(),
OccurredAtUtc = new DateTime(2026, 1, 15, 0, 0, 0, DateTimeKind.Utc),
Channel = AuditChannel.ApiOutbound,
Kind = AuditKind.ApiCall,
Status = AuditStatus.Delivered,
SourceSiteId = siteId,
};
var aprEvt = new AuditEvent
{
EventId = Guid.NewGuid(),
OccurredAtUtc = new DateTime(2026, 4, 15, 0, 0, 0, DateTimeKind.Utc),
Channel = AuditChannel.ApiOutbound,
Kind = AuditKind.ApiCall,
Status = AuditStatus.Delivered,
SourceSiteId = siteId,
};
await using (var seedContext = CreateMsSqlContext())
{
var seedRepo = new AuditLogRepository(seedContext);
await seedRepo.InsertIfNotExistsAsync(janEvt);
await seedRepo.InsertIfNotExistsAsync(aprEvt);
}
// Wire the actor's DI scope to the real repository against the
// fixture's MSSQL database. The actor opens a fresh scope per tick,
// so register the context as scoped (mirroring the production
// AddConfigurationDatabase wiring).
var services = new ServiceCollection();
services.AddDbContext<ScadaLinkDbContext>(
opts => opts.UseSqlServer(_fixture.ConnectionString),
ServiceLifetime.Scoped);
services.AddScoped<IAuditLogRepository, AuditLogRepository>();
var sp = services.BuildServiceProvider();
var auditOptions = new AuditLogOptions { RetentionDays = 60 };
var purgeOptions = new AuditLogPurgeOptions
{
IntervalHours = 24,
IntervalOverride = TimeSpan.FromMilliseconds(100),
};
var probe = SubscribePurged();
Sys.ActorOf(Props.Create(() => new AuditLogPurgeActor(
sp,
Options.Create(purgeOptions),
Options.Create(auditOptions),
NullLogger<AuditLogPurgeActor>.Instance)));
// The probe receives one AuditLogPurgedEvent per partition the actor
// purges per tick — other test runs that share the fixture DB may
// also leave behind eligible partitions, but this test creates its
// own fixture DB so the Jan-2026 partition is the only eligible one.
// Use FishForMessage to filter just in case, with a generous timeout
// because the real drop-and-rebuild dance against MSSQL routinely
// takes a couple of seconds on a busy dev container.
var janBoundary = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc);
var matched = probe.FishForMessage<AuditLogPurgedEvent>(
isMessage: m => m.MonthBoundary == janBoundary,
max: TimeSpan.FromSeconds(30));
Assert.True(matched.RowsDeleted >= 1,
$"Expected RowsDeleted >= 1 for the Jan-2026 partition; got {matched.RowsDeleted}.");
// Settle: allow any in-flight tick to commit before reading.
await Task.Delay(TimeSpan.FromMilliseconds(500));
await using var verifyContext = CreateMsSqlContext();
var rows = await verifyContext.Set<AuditEvent>()
.Where(e => e.SourceSiteId == siteId)
.ToListAsync();
Assert.DoesNotContain(rows, r => r.EventId == janEvt.EventId);
Assert.Contains(rows, r => r.EventId == aprEvt.EventId);
}
private ScadaLinkDbContext CreateMsSqlContext() =>
new(new DbContextOptionsBuilder<ScadaLinkDbContext>()
.UseSqlServer(_fixture.ConnectionString).Options);
// ---------------------------------------------------------------------
// 7. Threshold_UsesAuditLogOptionsRetentionDays
// ---------------------------------------------------------------------
[Fact]
public void Threshold_UsesAuditLogOptionsRetentionDays()
{
// The actor computes the threshold from AuditLogOptions.RetentionDays;
// assert the enumerator received a threshold whose value is in the
// expected window (today - retentionDays) rather than DateTime.MinValue
// or some other accidental default. We use a non-default retention
// (30 days) so the assertion isn't satisfied by the 365 default.
var repo = new RecordingRepo();
CreateActor(
repo,
FastTickOptions(),
auditOptions: new AuditLogOptions { RetentionDays = 30 });
AwaitAssert(
() => Assert.True(repo.ThresholdQueries.Count >= 1),
duration: TimeSpan.FromSeconds(3),
interval: TimeSpan.FromMilliseconds(50));
var threshold = repo.ThresholdQueries[0];
var expected = DateTime.UtcNow - TimeSpan.FromDays(30);
// 1-minute slack covers test-thread scheduling jitter between the
// tick firing and the assertion running.
Assert.True(
Math.Abs((threshold - expected).TotalMinutes) < 1.0,
$"threshold {threshold:o} should be within 1 minute of {expected:o}");
}
}

View File

@@ -87,8 +87,8 @@ public class SiteAuditReconciliationActorTests : TestKit, IClassFixture<MsSqlMig
AuditLogQueryFilter filter, AuditLogPaging paging, CancellationToken ct = default) =>
Task.FromResult<IReadOnlyList<AuditEvent>>(Inserted);
public Task SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
Task.CompletedTask;
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
Task.FromResult(0L);
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
DateTime threshold, CancellationToken ct = default) =>