using System.Diagnostics;
using Akka.Actor;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.AuditLog.Configuration;
using ScadaLink.Commons.Interfaces.Repositories;
namespace ScadaLink.AuditLog.Central;
///
/// Central singleton (M6 Bundle C) that drives the daily AuditLog partition
/// purge. On a configurable timer (default 24 hours) the actor:
///
/// - Queries
/// for monthly boundaries whose latest OccurredAtUtc is older
/// than DateTime.UtcNow - RetentionDays.
/// - For each eligible boundary, calls
/// which runs
/// the drop-and-rebuild dance around UX_AuditLog_EventId.
/// - Publishes on the actor-system
/// EventStream so the Bundle E central health collector + ops surfaces
/// can subscribe without coupling to this actor.
///
///
///
///
/// Daily cadence. Partition switch is metadata-only but the
/// drop-and-rebuild dance briefly removes UX_AuditLog_EventId; running
/// more often than necessary trades unique-index rebuild outages for
/// negligible freshness wins. The default 24-hour interval matches
/// alog.md ยง10's retention policy.
///
///
/// Continue-on-error. A single boundary that throws (transient SQL
/// failure, contention with backup, missing object) must NOT prevent the
/// other eligible boundaries from being purged on the same tick. Per-boundary
/// work runs inside its own try/catch; the actor's
/// uses Resume so any leaked exception keeps
/// the singleton alive for the next tick.
///
///
/// DI scopes. is a scoped EF Core
/// service registered by AddConfigurationDatabase. The singleton
/// opens one DI scope per tick and reuses the same repository across every
/// boundary in that tick โ mirrors the
/// pattern.
///
///
/// EventStream. Publishing through
/// the EventStream rather than direct messaging avoids coupling this actor
/// to its consumers; M6 Bundle E will subscribe a central health-counter
/// bridge that surfaces purge progress on the central health report.
///
///
public class AuditLogPurgeActor : ReceiveActor
{
private readonly IServiceProvider _services;
private readonly AuditLogPurgeOptions _purgeOptions;
private readonly AuditLogOptions _auditOptions;
private readonly ILogger _logger;
private ICancelable? _timer;
public AuditLogPurgeActor(
IServiceProvider services,
IOptions purgeOptions,
IOptions auditOptions,
ILogger logger)
{
ArgumentNullException.ThrowIfNull(services);
ArgumentNullException.ThrowIfNull(purgeOptions);
ArgumentNullException.ThrowIfNull(auditOptions);
ArgumentNullException.ThrowIfNull(logger);
_services = services;
_purgeOptions = purgeOptions.Value;
_auditOptions = auditOptions.Value;
_logger = logger;
ReceiveAsync(_ => OnTickAsync());
}
protected override void PreStart()
{
base.PreStart();
var interval = _purgeOptions.Interval;
_timer = Context.System.Scheduler.ScheduleTellRepeatedlyCancelable(
initialDelay: interval,
interval: interval,
receiver: Self,
message: PurgeTick.Instance,
sender: Self);
}
protected override void PostStop()
{
_timer?.Cancel();
base.PostStop();
}
///
/// Resume keeps the singleton alive across any leaked exception. Restart
/// would re-run PreStart and reschedule the timer (harmless but wasteful);
/// Stop is wrong because the singleton must keep ticking until shutdown.
///
protected override SupervisorStrategy SupervisorStrategy()
{
return new OneForOneStrategy(
maxNrOfRetries: 0,
withinTimeRange: TimeSpan.Zero,
decider: Akka.Actor.SupervisorStrategy.DefaultDecider);
}
private async Task OnTickAsync()
{
// Capture EventStream BEFORE the first await. Accessing Context (and
// therefore Context.System) after an await is unsafe because Akka's
// ActorBase.Context throws "no active ActorContext" once the
// continuation runs on a thread that isn't currently dispatching this
// actor โ mirrors the same Sender-capture pattern in
// AuditLogIngestActor.OnIngestAsync.
var eventStream = Context.System.EventStream;
// Compute the retention threshold from AuditLogOptions.RetentionDays
// each tick โ the options class supports hot reload via
// IOptionsMonitor for the redaction policy and similar settings; we
// read the snapshot per-tick so an operator who lowers RetentionDays
// sees the change applied on the next purge without an actor
// restart.
var threshold = DateTime.UtcNow - TimeSpan.FromDays(_auditOptions.RetentionDays);
IServiceScope? scope = null;
IAuditLogRepository repository;
try
{
scope = _services.CreateScope();
repository = scope.ServiceProvider.GetRequiredService();
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to resolve IAuditLogRepository for AuditLog purge tick.");
scope?.Dispose();
return;
}
try
{
IReadOnlyList boundaries;
try
{
boundaries = await repository
.GetPartitionBoundariesOlderThanAsync(threshold)
.ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.LogError(
ex,
"Failed to enumerate eligible AuditLog partition boundaries (threshold {ThresholdUtc:o}); skipping purge tick.",
threshold);
return;
}
if (boundaries.Count == 0)
{
return;
}
foreach (var boundary in boundaries)
{
// Per-boundary try/catch: one bad partition (transient SQL
// failure, missing object, contention with backup) does NOT
// abandon the rest of the tick.
var sw = Stopwatch.StartNew();
try
{
var rowsDeleted = await repository
.SwitchOutPartitionAsync(boundary)
.ConfigureAwait(false);
sw.Stop();
eventStream.Publish(
new AuditLogPurgedEvent(boundary, rowsDeleted, sw.ElapsedMilliseconds));
_logger.LogInformation(
"Purged AuditLog partition {MonthBoundary:yyyy-MM-dd}; {RowsDeleted} rows in {DurationMs} ms.",
boundary,
rowsDeleted,
sw.ElapsedMilliseconds);
}
catch (Exception ex)
{
sw.Stop();
_logger.LogError(
ex,
"Failed to purge AuditLog partition {MonthBoundary:yyyy-MM-dd}; other partitions continue. Elapsed {DurationMs} ms.",
boundary,
sw.ElapsedMilliseconds);
}
}
}
finally
{
scope.Dispose();
}
}
/// Self-tick triggering a purge pass across all eligible partitions.
internal sealed class PurgeTick
{
public static readonly PurgeTick Instance = new();
private PurgeTick() { }
}
}