feat(auditlog): AuditLogPurgeActor daily partition-switch purge (#23 M6)
Central singleton (M6-T4 Bundle C) that drives the daily AuditLog partition
purge. On a configurable timer (default 24 hours) the actor:
1. Queries IAuditLogRepository.GetPartitionBoundariesOlderThanAsync for
monthly boundaries whose latest OccurredAtUtc is older than
DateTime.UtcNow - AuditLogOptions.RetentionDays.
2. For each eligible boundary calls SwitchOutPartitionAsync, which runs
the drop-and-rebuild dance around UX_AuditLog_EventId.
3. Publishes AuditLogPurgedEvent(boundary, rowsDeleted, durationMs) on
the actor-system EventStream so the Bundle E central health collector
and ops surfaces can subscribe without coupling to this actor.
Co-changes:
* SwitchOutPartitionAsync returns long (rows deleted) — sampled BEFORE the
switch via COUNT_BIG over the per-partition filter so the count
reflects what the switch removed, not a post-purge scan of a table that
no longer exists. All stub implementations updated.
* AuditLogPurgeOptions: IntervalHours (default 24), IntervalOverride for
tests, Interval property resolving either.
* AuditLogPurgedEvent: record with MonthBoundary, RowsDeleted, DurationMs.
Behavior:
* Continue-on-error per boundary — one partition that throws does NOT
abandon the rest of the tick.
* DI scope opened per tick (IAuditLogRepository is a SCOPED EF Core
service); mirrors SiteAuditReconciliationActor and AuditLogIngestActor.
* SupervisorStrategy Resume keeps the singleton alive across leaked
exceptions.
* EventStream capture BEFORE the first await — Context is unsafe after
await in async receive handlers (same pattern as Sender-capture in
AuditLogIngestActor.OnIngestAsync).
Tests:
* Tick_Fires_OnDailyInterval — visible timer side effect.
* Tick_OldPartitions_SwitchedOut — both seeded boundaries purged.
* Tick_NewerPartitions_Untouched — empty enumerator → no switches.
* Tick_PublishesPurgedEvent_WithRowCount — AuditLogPurgedEvent carries
RowsDeleted and DurationMs.
* Tick_SwitchThrows_OtherPartitionsStillProcessed — continue-on-error.
* Threshold_UsesAuditLogOptionsRetentionDays — non-default 30-day window
computed from UtcNow - RetentionDays.
* EndToEnd_RealPartition_RowsRemoved_PurgedEventPublished — TestKit +
MsSqlMigrationFixture: real partitioned table, Jan-2026 row purged,
Apr-2026 row kept, AuditLogPurgedEvent observed via probe.
This commit is contained in:
@@ -214,7 +214,7 @@ public class AuditLogIngestActorTests : TestKit, IClassFixture<MsSqlMigrationFix
|
||||
AuditLogQueryFilter filter, AuditLogPaging paging, CancellationToken ct = default) =>
|
||||
_inner.QueryAsync(filter, paging, ct);
|
||||
|
||||
public Task SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
|
||||
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
|
||||
_inner.SwitchOutPartitionAsync(monthBoundary, ct);
|
||||
|
||||
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
|
||||
@@ -0,0 +1,376 @@
|
||||
using Akka.Actor;
|
||||
using Akka.TestKit.Xunit2;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Central;
|
||||
using ScadaLink.AuditLog.Configuration;
|
||||
using ScadaLink.Commons.Entities.Audit;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Types.Audit;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.ConfigurationDatabase;
|
||||
using ScadaLink.ConfigurationDatabase.Repositories;
|
||||
using ScadaLink.ConfigurationDatabase.Tests.Migrations;
|
||||
|
||||
namespace ScadaLink.AuditLog.Tests.Central;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle C (#23 M6-T4) tests for <see cref="AuditLogPurgeActor"/>. The fast,
|
||||
/// schedule-only tests substitute a recording stub for
|
||||
/// <see cref="IAuditLogRepository"/> so the timer + per-boundary error-isolation
|
||||
/// + event-publish machinery can be exercised without an MSSQL container.
|
||||
/// The end-to-end "real partition gets switched out" assertion lives in the
|
||||
/// repository tests (Bundle C of M6-T4); this actor file is purely about the
|
||||
/// actor's policy decisions.
|
||||
/// </summary>
|
||||
public class AuditLogPurgeActorTests : TestKit, IClassFixture<MsSqlMigrationFixture>
|
||||
{
|
||||
private readonly MsSqlMigrationFixture _fixture;
|
||||
|
||||
public AuditLogPurgeActorTests(MsSqlMigrationFixture fixture)
|
||||
{
|
||||
_fixture = fixture;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// In-memory recording stub. Captures every
|
||||
/// <see cref="GetPartitionBoundariesOlderThanAsync"/> + every
|
||||
/// <see cref="SwitchOutPartitionAsync"/> so tests can assert which boundaries
|
||||
/// the actor chose to purge and how many ticks it issued. Also lets a
|
||||
/// specific boundary be configured to throw so the continue-on-error path
|
||||
/// is exercisable.
|
||||
/// </summary>
|
||||
private sealed class RecordingRepo : IAuditLogRepository
|
||||
{
|
||||
public List<DateTime> ThresholdQueries { get; } = new();
|
||||
public List<DateTime> SwitchedBoundaries { get; } = new();
|
||||
public Func<DateTime, long> RowsPerBoundary { get; set; } = _ => 0L;
|
||||
public DateTime? ThrowOnBoundary { get; set; }
|
||||
public Exception? BoundaryException { get; set; }
|
||||
|
||||
// The actor enumerator returns whichever list is configured here.
|
||||
// Mutating this between ticks lets tests simulate "no longer
|
||||
// eligible" boundaries on the second tick.
|
||||
public List<DateTime> Boundaries { get; set; } = new();
|
||||
|
||||
public Task InsertIfNotExistsAsync(AuditEvent evt, CancellationToken ct = default) =>
|
||||
Task.CompletedTask;
|
||||
|
||||
public Task<IReadOnlyList<AuditEvent>> QueryAsync(
|
||||
AuditLogQueryFilter filter, AuditLogPaging paging, CancellationToken ct = default) =>
|
||||
Task.FromResult<IReadOnlyList<AuditEvent>>(Array.Empty<AuditEvent>());
|
||||
|
||||
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default)
|
||||
{
|
||||
if (ThrowOnBoundary.HasValue && monthBoundary == ThrowOnBoundary.Value)
|
||||
{
|
||||
throw BoundaryException ?? new InvalidOperationException("simulated switch failure");
|
||||
}
|
||||
SwitchedBoundaries.Add(monthBoundary);
|
||||
return Task.FromResult(RowsPerBoundary(monthBoundary));
|
||||
}
|
||||
|
||||
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
DateTime threshold, CancellationToken ct = default)
|
||||
{
|
||||
ThresholdQueries.Add(threshold);
|
||||
return Task.FromResult<IReadOnlyList<DateTime>>(Boundaries.ToArray());
|
||||
}
|
||||
}
|
||||
|
||||
private IServiceProvider BuildScopedProvider(IAuditLogRepository repo)
|
||||
{
|
||||
var services = new ServiceCollection();
|
||||
// Mirror AddConfigurationDatabase: IAuditLogRepository is scoped, so
|
||||
// the actor opens a fresh scope per tick and resolves there.
|
||||
services.AddScoped(_ => repo);
|
||||
return services.BuildServiceProvider();
|
||||
}
|
||||
|
||||
private IActorRef CreateActor(
|
||||
IAuditLogRepository repo,
|
||||
AuditLogPurgeOptions purgeOptions,
|
||||
AuditLogOptions? auditOptions = null)
|
||||
{
|
||||
var sp = BuildScopedProvider(repo);
|
||||
return Sys.ActorOf(Props.Create(() => new AuditLogPurgeActor(
|
||||
sp,
|
||||
Options.Create(purgeOptions),
|
||||
Options.Create(auditOptions ?? new AuditLogOptions()),
|
||||
NullLogger<AuditLogPurgeActor>.Instance)));
|
||||
}
|
||||
|
||||
private static AuditLogPurgeOptions FastTickOptions(TimeSpan? interval = null) => new()
|
||||
{
|
||||
IntervalHours = 24,
|
||||
IntervalOverride = interval ?? TimeSpan.FromMilliseconds(100),
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Subscribe a probe to the EventStream so the test can observe
|
||||
/// <see cref="AuditLogPurgedEvent"/> publications synchronously.
|
||||
/// </summary>
|
||||
private Akka.TestKit.TestProbe SubscribePurged()
|
||||
{
|
||||
var probe = CreateTestProbe();
|
||||
Sys.EventStream.Subscribe(probe.Ref, typeof(AuditLogPurgedEvent));
|
||||
return probe;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 1. Tick_Fires_OnDailyInterval
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[Fact]
|
||||
public void Tick_Fires_OnDailyInterval()
|
||||
{
|
||||
var repo = new RecordingRepo();
|
||||
CreateActor(repo, FastTickOptions());
|
||||
|
||||
// The first scheduled tick fires after the configured interval. We
|
||||
// assert the visible side effect (the enumerator was called) rather
|
||||
// than racing on internal state.
|
||||
AwaitAssert(
|
||||
() => Assert.True(repo.ThresholdQueries.Count >= 1,
|
||||
$"expected >= 1 enumerator call, got {repo.ThresholdQueries.Count}"),
|
||||
duration: TimeSpan.FromSeconds(3),
|
||||
interval: TimeSpan.FromMilliseconds(50));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 2. Tick_OldPartitions_SwitchedOut
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[Fact]
|
||||
public void Tick_OldPartitions_SwitchedOut()
|
||||
{
|
||||
var repo = new RecordingRepo
|
||||
{
|
||||
Boundaries = new List<DateTime>
|
||||
{
|
||||
new(2025, 11, 1, 0, 0, 0, DateTimeKind.Utc),
|
||||
new(2025, 12, 1, 0, 0, 0, DateTimeKind.Utc),
|
||||
},
|
||||
RowsPerBoundary = _ => 42L,
|
||||
};
|
||||
|
||||
CreateActor(repo, FastTickOptions());
|
||||
|
||||
AwaitAssert(
|
||||
() =>
|
||||
{
|
||||
Assert.Contains(new DateTime(2025, 11, 1, 0, 0, 0, DateTimeKind.Utc), repo.SwitchedBoundaries);
|
||||
Assert.Contains(new DateTime(2025, 12, 1, 0, 0, 0, DateTimeKind.Utc), repo.SwitchedBoundaries);
|
||||
},
|
||||
duration: TimeSpan.FromSeconds(3),
|
||||
interval: TimeSpan.FromMilliseconds(50));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 3. Tick_NewerPartitions_Untouched
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[Fact]
|
||||
public void Tick_NewerPartitions_Untouched()
|
||||
{
|
||||
// The actor's contract: it only touches whatever the enumerator
|
||||
// returns. The enumerator (in production) filters out non-eligible
|
||||
// boundaries; here we simulate that by handing back an empty list
|
||||
// and asserting the actor switched nothing despite the tick firing.
|
||||
var repo = new RecordingRepo { Boundaries = new List<DateTime>() };
|
||||
|
||||
CreateActor(repo, FastTickOptions());
|
||||
|
||||
// Wait for at least one tick (visible via the enumerator call) then
|
||||
// assert no switch happened.
|
||||
AwaitAssert(
|
||||
() => Assert.True(repo.ThresholdQueries.Count >= 1),
|
||||
duration: TimeSpan.FromSeconds(3),
|
||||
interval: TimeSpan.FromMilliseconds(50));
|
||||
|
||||
Assert.Empty(repo.SwitchedBoundaries);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 4. Tick_PublishesPurgedEvent_WithRowCount
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[Fact]
|
||||
public void Tick_PublishesPurgedEvent_WithRowCount()
|
||||
{
|
||||
var boundary = new DateTime(2025, 6, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
var repo = new RecordingRepo
|
||||
{
|
||||
Boundaries = new List<DateTime> { boundary },
|
||||
RowsPerBoundary = _ => 1234L,
|
||||
};
|
||||
|
||||
var probe = SubscribePurged();
|
||||
CreateActor(repo, FastTickOptions());
|
||||
|
||||
var msg = probe.ExpectMsg<AuditLogPurgedEvent>(TimeSpan.FromSeconds(5));
|
||||
Assert.Equal(boundary, msg.MonthBoundary);
|
||||
Assert.Equal(1234L, msg.RowsDeleted);
|
||||
Assert.True(msg.DurationMs >= 0,
|
||||
$"DurationMs should be non-negative; was {msg.DurationMs}");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 5. Tick_SwitchThrows_OtherPartitionsStillProcessed (continue-on-error)
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[Fact]
|
||||
public void Tick_SwitchThrows_OtherPartitionsStillProcessed()
|
||||
{
|
||||
var poisonBoundary = new DateTime(2025, 7, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
var goodBoundary = new DateTime(2025, 8, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
var repo = new RecordingRepo
|
||||
{
|
||||
Boundaries = new List<DateTime> { poisonBoundary, goodBoundary },
|
||||
ThrowOnBoundary = poisonBoundary,
|
||||
BoundaryException = new InvalidOperationException("simulated switch failure for poison boundary"),
|
||||
};
|
||||
|
||||
CreateActor(repo, FastTickOptions());
|
||||
|
||||
AwaitAssert(
|
||||
() =>
|
||||
{
|
||||
// The good boundary was still switched even though the poison
|
||||
// boundary threw.
|
||||
Assert.Contains(goodBoundary, repo.SwitchedBoundaries);
|
||||
Assert.DoesNotContain(poisonBoundary, repo.SwitchedBoundaries);
|
||||
},
|
||||
duration: TimeSpan.FromSeconds(3),
|
||||
interval: TimeSpan.FromMilliseconds(50));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 6. EndToEnd_RealPartition_RowsRemoved_PurgedEventPublished
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[SkippableFact]
|
||||
public async Task EndToEnd_RealPartition_RowsRemoved_PurgedEventPublished()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
// Today is ~2026-05-20 per the test environment. With RetentionDays =
|
||||
// 60 the actor computes threshold ≈ 2026-03-21:
|
||||
// * Jan partition (MAX = Jan 15) → older than threshold → PURGED
|
||||
// * Apr partition (MAX = Apr 15) → newer than threshold → KEPT
|
||||
var siteId = "purge-e2e-" + Guid.NewGuid().ToString("N").Substring(0, 8);
|
||||
var janEvt = new AuditEvent
|
||||
{
|
||||
EventId = Guid.NewGuid(),
|
||||
OccurredAtUtc = new DateTime(2026, 1, 15, 0, 0, 0, DateTimeKind.Utc),
|
||||
Channel = AuditChannel.ApiOutbound,
|
||||
Kind = AuditKind.ApiCall,
|
||||
Status = AuditStatus.Delivered,
|
||||
SourceSiteId = siteId,
|
||||
};
|
||||
var aprEvt = new AuditEvent
|
||||
{
|
||||
EventId = Guid.NewGuid(),
|
||||
OccurredAtUtc = new DateTime(2026, 4, 15, 0, 0, 0, DateTimeKind.Utc),
|
||||
Channel = AuditChannel.ApiOutbound,
|
||||
Kind = AuditKind.ApiCall,
|
||||
Status = AuditStatus.Delivered,
|
||||
SourceSiteId = siteId,
|
||||
};
|
||||
|
||||
await using (var seedContext = CreateMsSqlContext())
|
||||
{
|
||||
var seedRepo = new AuditLogRepository(seedContext);
|
||||
await seedRepo.InsertIfNotExistsAsync(janEvt);
|
||||
await seedRepo.InsertIfNotExistsAsync(aprEvt);
|
||||
}
|
||||
|
||||
// Wire the actor's DI scope to the real repository against the
|
||||
// fixture's MSSQL database. The actor opens a fresh scope per tick,
|
||||
// so register the context as scoped (mirroring the production
|
||||
// AddConfigurationDatabase wiring).
|
||||
var services = new ServiceCollection();
|
||||
services.AddDbContext<ScadaLinkDbContext>(
|
||||
opts => opts.UseSqlServer(_fixture.ConnectionString),
|
||||
ServiceLifetime.Scoped);
|
||||
services.AddScoped<IAuditLogRepository, AuditLogRepository>();
|
||||
var sp = services.BuildServiceProvider();
|
||||
|
||||
var auditOptions = new AuditLogOptions { RetentionDays = 60 };
|
||||
var purgeOptions = new AuditLogPurgeOptions
|
||||
{
|
||||
IntervalHours = 24,
|
||||
IntervalOverride = TimeSpan.FromMilliseconds(100),
|
||||
};
|
||||
|
||||
var probe = SubscribePurged();
|
||||
Sys.ActorOf(Props.Create(() => new AuditLogPurgeActor(
|
||||
sp,
|
||||
Options.Create(purgeOptions),
|
||||
Options.Create(auditOptions),
|
||||
NullLogger<AuditLogPurgeActor>.Instance)));
|
||||
|
||||
// The probe receives one AuditLogPurgedEvent per partition the actor
|
||||
// purges per tick — other test runs that share the fixture DB may
|
||||
// also leave behind eligible partitions, but this test creates its
|
||||
// own fixture DB so the Jan-2026 partition is the only eligible one.
|
||||
// Use FishForMessage to filter just in case, with a generous timeout
|
||||
// because the real drop-and-rebuild dance against MSSQL routinely
|
||||
// takes a couple of seconds on a busy dev container.
|
||||
var janBoundary = new DateTime(2026, 1, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
var matched = probe.FishForMessage<AuditLogPurgedEvent>(
|
||||
isMessage: m => m.MonthBoundary == janBoundary,
|
||||
max: TimeSpan.FromSeconds(30));
|
||||
|
||||
Assert.True(matched.RowsDeleted >= 1,
|
||||
$"Expected RowsDeleted >= 1 for the Jan-2026 partition; got {matched.RowsDeleted}.");
|
||||
|
||||
// Settle: allow any in-flight tick to commit before reading.
|
||||
await Task.Delay(TimeSpan.FromMilliseconds(500));
|
||||
await using var verifyContext = CreateMsSqlContext();
|
||||
var rows = await verifyContext.Set<AuditEvent>()
|
||||
.Where(e => e.SourceSiteId == siteId)
|
||||
.ToListAsync();
|
||||
|
||||
Assert.DoesNotContain(rows, r => r.EventId == janEvt.EventId);
|
||||
Assert.Contains(rows, r => r.EventId == aprEvt.EventId);
|
||||
}
|
||||
|
||||
private ScadaLinkDbContext CreateMsSqlContext() =>
|
||||
new(new DbContextOptionsBuilder<ScadaLinkDbContext>()
|
||||
.UseSqlServer(_fixture.ConnectionString).Options);
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 7. Threshold_UsesAuditLogOptionsRetentionDays
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[Fact]
|
||||
public void Threshold_UsesAuditLogOptionsRetentionDays()
|
||||
{
|
||||
// The actor computes the threshold from AuditLogOptions.RetentionDays;
|
||||
// assert the enumerator received a threshold whose value is in the
|
||||
// expected window (today - retentionDays) rather than DateTime.MinValue
|
||||
// or some other accidental default. We use a non-default retention
|
||||
// (30 days) so the assertion isn't satisfied by the 365 default.
|
||||
var repo = new RecordingRepo();
|
||||
CreateActor(
|
||||
repo,
|
||||
FastTickOptions(),
|
||||
auditOptions: new AuditLogOptions { RetentionDays = 30 });
|
||||
|
||||
AwaitAssert(
|
||||
() => Assert.True(repo.ThresholdQueries.Count >= 1),
|
||||
duration: TimeSpan.FromSeconds(3),
|
||||
interval: TimeSpan.FromMilliseconds(50));
|
||||
|
||||
var threshold = repo.ThresholdQueries[0];
|
||||
var expected = DateTime.UtcNow - TimeSpan.FromDays(30);
|
||||
// 1-minute slack covers test-thread scheduling jitter between the
|
||||
// tick firing and the assertion running.
|
||||
Assert.True(
|
||||
Math.Abs((threshold - expected).TotalMinutes) < 1.0,
|
||||
$"threshold {threshold:o} should be within 1 minute of {expected:o}");
|
||||
}
|
||||
}
|
||||
@@ -87,8 +87,8 @@ public class SiteAuditReconciliationActorTests : TestKit, IClassFixture<MsSqlMig
|
||||
AuditLogQueryFilter filter, AuditLogPaging paging, CancellationToken ct = default) =>
|
||||
Task.FromResult<IReadOnlyList<AuditEvent>>(Inserted);
|
||||
|
||||
public Task SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
|
||||
Task.CompletedTask;
|
||||
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
|
||||
Task.FromResult(0L);
|
||||
|
||||
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
DateTime threshold, CancellationToken ct = default) =>
|
||||
|
||||
Reference in New Issue
Block a user