146 lines
5.9 KiB
C#
146 lines
5.9 KiB
C#
using Microsoft.Extensions.DependencyInjection;
|
|
using Microsoft.Extensions.Hosting;
|
|
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using ScadaLink.Commons.Interfaces;
|
|
|
|
namespace ScadaLink.AuditLog.Central;
|
|
|
|
/// <summary>
|
|
/// Central <see cref="IHostedService"/> (M6-T5, Bundle D) that rolls
|
|
/// <c>pf_AuditLog_Month</c> forward once a day. Each tick opens a fresh DI
|
|
/// scope, resolves <see cref="IPartitionMaintenance"/>, and calls
|
|
/// <see cref="IPartitionMaintenance.EnsureLookaheadAsync"/> to SPLIT any
|
|
/// missing future boundaries — the partition function must always cover at
|
|
/// least <see cref="AuditLogPartitionMaintenanceOptions.LookaheadMonths"/>
|
|
/// future months, otherwise inserts past the highest boundary accumulate in
|
|
/// a single unbounded tail partition that <c>SwitchOutPartitionAsync</c>
|
|
/// cannot purge cleanly.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// <b>Why a hosted service, not an actor.</b> Bundle C's
|
|
/// <see cref="AuditLogPurgeActor"/> sits inside the central singleton
|
|
/// because it needs supervised lifecycle alongside the rest of the
|
|
/// reconciliation / ingest pipeline. Roll-forward is genuinely a once-a-day
|
|
/// chore with no cross-actor coordination, so we use the much simpler
|
|
/// hosted-service pattern: <c>Task.Run</c> on start, <c>Task.Delay</c>
|
|
/// between ticks, cancellation on stop. Reusing
|
|
/// <see cref="IPartitionMaintenance"/> from the central node-only DI graph
|
|
/// keeps the contract testable without any actor framework involvement.
|
|
/// </para>
|
|
/// <para>
|
|
/// <b>Failure containment.</b> The tick body wraps the maintenance call in
|
|
/// a try/catch so a transient SQL Server error never tears down the hosted
|
|
/// service — the next tick simply retries. The exception is logged with
|
|
/// the original stack trace at <c>Error</c> level; ops surfaces (M6 Bundle
|
|
/// E's central health collector) can subscribe to the logger to alert on
|
|
/// repeated failures.
|
|
/// </para>
|
|
/// <para>
|
|
/// <b>Startup ordering.</b> A first tick fires immediately at
|
|
/// <see cref="StartAsync"/> so a fresh deployment doesn't need to wait
|
|
/// <see cref="AuditLogPartitionMaintenanceOptions.IntervalSeconds"/> for
|
|
/// the partition function to come up to spec. This is also what the brief
|
|
/// asks for ("Run once on startup").
|
|
/// </para>
|
|
/// <para>
|
|
/// <b>DI scope per tick.</b> <see cref="IPartitionMaintenance"/> is scoped
|
|
/// (alongside the rest of the EF repositories) because the implementation
|
|
/// reuses the per-scope <c>ScadaLinkDbContext</c>. A hosted service is a
|
|
/// singleton, so it must open and dispose a scope around each tick — the
|
|
/// same pattern <see cref="AuditLogPurgeActor"/> uses.
|
|
/// </para>
|
|
/// </remarks>
|
|
public sealed class AuditLogPartitionMaintenanceService : IHostedService, IDisposable
|
|
{
|
|
private readonly IServiceScopeFactory _scopeFactory;
|
|
private readonly IOptions<AuditLogPartitionMaintenanceOptions> _options;
|
|
private readonly ILogger<AuditLogPartitionMaintenanceService> _logger;
|
|
private CancellationTokenSource? _cts;
|
|
private Task? _loop;
|
|
|
|
public AuditLogPartitionMaintenanceService(
|
|
IServiceScopeFactory scopeFactory,
|
|
IOptions<AuditLogPartitionMaintenanceOptions> options,
|
|
ILogger<AuditLogPartitionMaintenanceService> logger)
|
|
{
|
|
_scopeFactory = scopeFactory ?? throw new ArgumentNullException(nameof(scopeFactory));
|
|
_options = options ?? throw new ArgumentNullException(nameof(options));
|
|
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public Task StartAsync(CancellationToken ct)
|
|
{
|
|
// Linked CTS lets StopAsync's cancellation AND the host's shutdown
|
|
// token both terminate the loop; either side firing aborts the
|
|
// pending Task.Delay.
|
|
_cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
|
_loop = Task.Run(() => RunLoopAsync(_cts.Token));
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
private async Task RunLoopAsync(CancellationToken ct)
|
|
{
|
|
// Run once on startup so a fresh deployment isn't gated on the
|
|
// IntervalSeconds initial wait — the brief calls this out explicitly.
|
|
await SafeMaintainAsync(ct).ConfigureAwait(false);
|
|
|
|
while (!ct.IsCancellationRequested)
|
|
{
|
|
try
|
|
{
|
|
await Task.Delay(TimeSpan.FromSeconds(_options.Value.IntervalSeconds), ct)
|
|
.ConfigureAwait(false);
|
|
}
|
|
catch (OperationCanceledException)
|
|
{
|
|
break;
|
|
}
|
|
|
|
await SafeMaintainAsync(ct).ConfigureAwait(false);
|
|
}
|
|
}
|
|
|
|
private async Task SafeMaintainAsync(CancellationToken ct)
|
|
{
|
|
try
|
|
{
|
|
await using var scope = _scopeFactory.CreateAsyncScope();
|
|
var maintenance = scope.ServiceProvider.GetRequiredService<IPartitionMaintenance>();
|
|
var added = await maintenance
|
|
.EnsureLookaheadAsync(_options.Value.LookaheadMonths, ct)
|
|
.ConfigureAwait(false);
|
|
if (added.Count > 0)
|
|
{
|
|
_logger.LogInformation(
|
|
"AuditLogPartitionMaintenance added {Count} boundaries: {Boundaries}",
|
|
added.Count,
|
|
string.Join(", ", added.Select(b => b.ToString("yyyy-MM-dd"))));
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
// Catch-all is deliberate: the hosted service must survive every
|
|
// class of tick failure (transient SQL, DI resolution, etc.) so
|
|
// the next tick gets a chance. The brief's contract is
|
|
// "exception logged, not propagated".
|
|
_logger.LogError(ex, "AuditLogPartitionMaintenance tick failed");
|
|
}
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public Task StopAsync(CancellationToken ct)
|
|
{
|
|
_cts?.Cancel();
|
|
return _loop ?? Task.CompletedTask;
|
|
}
|
|
|
|
/// <inheritdoc />
|
|
public void Dispose()
|
|
{
|
|
_cts?.Dispose();
|
|
}
|
|
}
|