using System.Diagnostics; using Akka.Actor; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using ScadaLink.AuditLog.Configuration; using ScadaLink.Commons.Interfaces.Repositories; namespace ScadaLink.AuditLog.Central; /// /// Central singleton (M6 Bundle C) that drives the daily AuditLog partition /// purge. On a configurable timer (default 24 hours) the actor: /// /// Queries /// for monthly boundaries whose latest OccurredAtUtc is older /// than DateTime.UtcNow - RetentionDays. /// For each eligible boundary, calls /// which runs /// the drop-and-rebuild dance around UX_AuditLog_EventId. /// Publishes on the actor-system /// EventStream so the Bundle E central health collector + ops surfaces /// can subscribe without coupling to this actor. /// /// /// /// /// Daily cadence. Partition switch is metadata-only but the /// drop-and-rebuild dance briefly removes UX_AuditLog_EventId; running /// more often than necessary trades unique-index rebuild outages for /// negligible freshness wins. The default 24-hour interval matches /// alog.md ยง10's retention policy. /// /// /// Continue-on-error. A single boundary that throws (transient SQL /// failure, contention with backup, missing object) must NOT prevent the /// other eligible boundaries from being purged on the same tick. Per-boundary /// work runs inside its own try/catch; the actor's /// uses Resume so any leaked exception keeps /// the singleton alive for the next tick. /// /// /// DI scopes. is a scoped EF Core /// service registered by AddConfigurationDatabase. The singleton /// opens one DI scope per tick and reuses the same repository across every /// boundary in that tick โ€” mirrors the /// pattern. /// /// /// EventStream. Publishing through /// the EventStream rather than direct messaging avoids coupling this actor /// to its consumers; M6 Bundle E will subscribe a central health-counter /// bridge that surfaces purge progress on the central health report. /// /// public class AuditLogPurgeActor : ReceiveActor { private readonly IServiceProvider _services; private readonly AuditLogPurgeOptions _purgeOptions; private readonly AuditLogOptions _auditOptions; private readonly ILogger _logger; private ICancelable? _timer; public AuditLogPurgeActor( IServiceProvider services, IOptions purgeOptions, IOptions auditOptions, ILogger logger) { ArgumentNullException.ThrowIfNull(services); ArgumentNullException.ThrowIfNull(purgeOptions); ArgumentNullException.ThrowIfNull(auditOptions); ArgumentNullException.ThrowIfNull(logger); _services = services; _purgeOptions = purgeOptions.Value; _auditOptions = auditOptions.Value; _logger = logger; ReceiveAsync(_ => OnTickAsync()); } protected override void PreStart() { base.PreStart(); var interval = _purgeOptions.Interval; _timer = Context.System.Scheduler.ScheduleTellRepeatedlyCancelable( initialDelay: interval, interval: interval, receiver: Self, message: PurgeTick.Instance, sender: Self); } protected override void PostStop() { _timer?.Cancel(); base.PostStop(); } /// /// Resume keeps the singleton alive across any leaked exception. Restart /// would re-run PreStart and reschedule the timer (harmless but wasteful); /// Stop is wrong because the singleton must keep ticking until shutdown. /// protected override SupervisorStrategy SupervisorStrategy() { return new OneForOneStrategy( maxNrOfRetries: 0, withinTimeRange: TimeSpan.Zero, decider: Akka.Actor.SupervisorStrategy.DefaultDecider); } private async Task OnTickAsync() { // Capture EventStream BEFORE the first await. Accessing Context (and // therefore Context.System) after an await is unsafe because Akka's // ActorBase.Context throws "no active ActorContext" once the // continuation runs on a thread that isn't currently dispatching this // actor โ€” mirrors the same Sender-capture pattern in // AuditLogIngestActor.OnIngestAsync. var eventStream = Context.System.EventStream; // Compute the retention threshold from AuditLogOptions.RetentionDays // each tick โ€” the options class supports hot reload via // IOptionsMonitor for the redaction policy and similar settings; we // read the snapshot per-tick so an operator who lowers RetentionDays // sees the change applied on the next purge without an actor // restart. var threshold = DateTime.UtcNow - TimeSpan.FromDays(_auditOptions.RetentionDays); IServiceScope? scope = null; IAuditLogRepository repository; try { scope = _services.CreateScope(); repository = scope.ServiceProvider.GetRequiredService(); } catch (Exception ex) { _logger.LogError(ex, "Failed to resolve IAuditLogRepository for AuditLog purge tick."); scope?.Dispose(); return; } try { IReadOnlyList boundaries; try { boundaries = await repository .GetPartitionBoundariesOlderThanAsync(threshold) .ConfigureAwait(false); } catch (Exception ex) { _logger.LogError( ex, "Failed to enumerate eligible AuditLog partition boundaries (threshold {ThresholdUtc:o}); skipping purge tick.", threshold); return; } if (boundaries.Count == 0) { return; } foreach (var boundary in boundaries) { // Per-boundary try/catch: one bad partition (transient SQL // failure, missing object, contention with backup) does NOT // abandon the rest of the tick. var sw = Stopwatch.StartNew(); try { var rowsDeleted = await repository .SwitchOutPartitionAsync(boundary) .ConfigureAwait(false); sw.Stop(); eventStream.Publish( new AuditLogPurgedEvent(boundary, rowsDeleted, sw.ElapsedMilliseconds)); _logger.LogInformation( "Purged AuditLog partition {MonthBoundary:yyyy-MM-dd}; {RowsDeleted} rows in {DurationMs} ms.", boundary, rowsDeleted, sw.ElapsedMilliseconds); } catch (Exception ex) { sw.Stop(); _logger.LogError( ex, "Failed to purge AuditLog partition {MonthBoundary:yyyy-MM-dd}; other partitions continue. Elapsed {DurationMs} ms.", boundary, sw.ElapsedMilliseconds); } } } finally { scope.Dispose(); } } /// Self-tick triggering a purge pass across all eligible partitions. internal sealed class PurgeTick { public static readonly PurgeTick Instance = new(); private PurgeTick() { } } }