feat(auditlog): AuditLogPartitionMaintenanceService monthly roll-forward (#23 M6)
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
namespace ScadaLink.AuditLog.Central;
|
||||
|
||||
/// <summary>
|
||||
/// Tuning knobs for the central
|
||||
/// <see cref="AuditLogPartitionMaintenanceService"/> hosted service (M6-T5).
|
||||
/// Defaults: once every 24 hours, keep at least one future monthly
|
||||
/// boundary ahead of <see cref="DateTime.UtcNow"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The hosted service drives a daily roll-forward of
|
||||
/// <c>pf_AuditLog_Month</c>: each tick reads the current max boundary and
|
||||
/// SPLITs new monthly boundaries until at least
|
||||
/// <see cref="LookaheadMonths"/> future months are covered. The 1-month
|
||||
/// default is intentionally conservative — anything less risks an
|
||||
/// end-of-month race where inserts land in the unbounded tail partition;
|
||||
/// anything more wastes nothing but represents premature commitment.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The 24-hour cadence is the cheapest interval that still guarantees
|
||||
/// at-most-one missed boundary in steady state (even a hard failover the
|
||||
/// hosted service can recover on its very next tick). Lowering this below
|
||||
/// an hour would generate more metadata churn than it saves.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AuditLogPartitionMaintenanceOptions
|
||||
{
|
||||
/// <summary>Period of the maintenance tick in seconds (default 86 400 = 24 h).</summary>
|
||||
public int IntervalSeconds { get; set; } = 86_400;
|
||||
|
||||
/// <summary>
|
||||
/// Minimum number of future months that <c>pf_AuditLog_Month</c> must
|
||||
/// cover after each tick. Default 1 — i.e. as of mid-May the partition
|
||||
/// for the next full month (June) must already be present.
|
||||
/// </summary>
|
||||
public int LookaheadMonths { get; set; } = 1;
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Interfaces;
|
||||
|
||||
namespace ScadaLink.AuditLog.Central;
|
||||
|
||||
/// <summary>
|
||||
/// Central <see cref="IHostedService"/> (M6-T5, Bundle D) that rolls
|
||||
/// <c>pf_AuditLog_Month</c> forward once a day. Each tick opens a fresh DI
|
||||
/// scope, resolves <see cref="IPartitionMaintenance"/>, and calls
|
||||
/// <see cref="IPartitionMaintenance.EnsureLookaheadAsync"/> to SPLIT any
|
||||
/// missing future boundaries — the partition function must always cover at
|
||||
/// least <see cref="AuditLogPartitionMaintenanceOptions.LookaheadMonths"/>
|
||||
/// future months, otherwise inserts past the highest boundary accumulate in
|
||||
/// a single unbounded tail partition that <c>SwitchOutPartitionAsync</c>
|
||||
/// cannot purge cleanly.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Why a hosted service, not an actor.</b> Bundle C's
|
||||
/// <see cref="AuditLogPurgeActor"/> sits inside the central singleton
|
||||
/// because it needs supervised lifecycle alongside the rest of the
|
||||
/// reconciliation / ingest pipeline. Roll-forward is genuinely a once-a-day
|
||||
/// chore with no cross-actor coordination, so we use the much simpler
|
||||
/// hosted-service pattern: <c>Task.Run</c> on start, <c>Task.Delay</c>
|
||||
/// between ticks, cancellation on stop. Reusing
|
||||
/// <see cref="IPartitionMaintenance"/> from the central node-only DI graph
|
||||
/// keeps the contract testable without any actor framework involvement.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Failure containment.</b> The tick body wraps the maintenance call in
|
||||
/// a try/catch so a transient SQL Server error never tears down the hosted
|
||||
/// service — the next tick simply retries. The exception is logged with
|
||||
/// the original stack trace at <c>Error</c> level; ops surfaces (M6 Bundle
|
||||
/// E's central health collector) can subscribe to the logger to alert on
|
||||
/// repeated failures.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Startup ordering.</b> A first tick fires immediately at
|
||||
/// <see cref="StartAsync"/> so a fresh deployment doesn't need to wait
|
||||
/// <see cref="AuditLogPartitionMaintenanceOptions.IntervalSeconds"/> for
|
||||
/// the partition function to come up to spec. This is also what the brief
|
||||
/// asks for ("Run once on startup").
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>DI scope per tick.</b> <see cref="IPartitionMaintenance"/> is scoped
|
||||
/// (alongside the rest of the EF repositories) because the implementation
|
||||
/// reuses the per-scope <c>ScadaLinkDbContext</c>. A hosted service is a
|
||||
/// singleton, so it must open and dispose a scope around each tick — the
|
||||
/// same pattern <see cref="AuditLogPurgeActor"/> uses.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AuditLogPartitionMaintenanceService : IHostedService, IDisposable
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
private readonly IOptions<AuditLogPartitionMaintenanceOptions> _options;
|
||||
private readonly ILogger<AuditLogPartitionMaintenanceService> _logger;
|
||||
private CancellationTokenSource? _cts;
|
||||
private Task? _loop;
|
||||
|
||||
public AuditLogPartitionMaintenanceService(
|
||||
IServiceScopeFactory scopeFactory,
|
||||
IOptions<AuditLogPartitionMaintenanceOptions> options,
|
||||
ILogger<AuditLogPartitionMaintenanceService> logger)
|
||||
{
|
||||
_scopeFactory = scopeFactory ?? throw new ArgumentNullException(nameof(scopeFactory));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task StartAsync(CancellationToken ct)
|
||||
{
|
||||
// Linked CTS lets StopAsync's cancellation AND the host's shutdown
|
||||
// token both terminate the loop; either side firing aborts the
|
||||
// pending Task.Delay.
|
||||
_cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
_loop = Task.Run(() => RunLoopAsync(_cts.Token));
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private async Task RunLoopAsync(CancellationToken ct)
|
||||
{
|
||||
// Run once on startup so a fresh deployment isn't gated on the
|
||||
// IntervalSeconds initial wait — the brief calls this out explicitly.
|
||||
await SafeMaintainAsync(ct).ConfigureAwait(false);
|
||||
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(_options.Value.IntervalSeconds), ct)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
await SafeMaintainAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task SafeMaintainAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var scope = _scopeFactory.CreateAsyncScope();
|
||||
var maintenance = scope.ServiceProvider.GetRequiredService<IPartitionMaintenance>();
|
||||
var added = await maintenance
|
||||
.EnsureLookaheadAsync(_options.Value.LookaheadMonths, ct)
|
||||
.ConfigureAwait(false);
|
||||
if (added.Count > 0)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"AuditLogPartitionMaintenance added {Count} boundaries: {Boundaries}",
|
||||
added.Count,
|
||||
string.Join(", ", added.Select(b => b.ToString("yyyy-MM-dd"))));
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Catch-all is deliberate: the hosted service must survive every
|
||||
// class of tick failure (transient SQL, DI resolution, etc.) so
|
||||
// the next tick gets a chance. The brief's contract is
|
||||
// "exception logged, not propagated".
|
||||
_logger.LogError(ex, "AuditLogPartitionMaintenance tick failed");
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task StopAsync(CancellationToken ct)
|
||||
{
|
||||
_cts?.Cancel();
|
||||
return _loop ?? Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
_cts?.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Central;
|
||||
@@ -43,6 +44,9 @@ public static class ServiceCollectionExtensions
|
||||
/// <summary>Configuration section bound to <see cref="SiteAuditTelemetryOptions"/>.</summary>
|
||||
public const string SiteTelemetrySectionName = "AuditLog:SiteTelemetry";
|
||||
|
||||
/// <summary>Configuration section bound to <see cref="AuditLogPartitionMaintenanceOptions"/>.</summary>
|
||||
public const string PartitionMaintenanceSectionName = "AuditLog:PartitionMaintenance";
|
||||
|
||||
/// <summary>
|
||||
/// Registers the Audit Log (#23) component services: options, the site
|
||||
/// SQLite writer chain (primary + ring fallback + failure-counter sink),
|
||||
@@ -216,4 +220,37 @@ public static class ServiceCollectionExtensions
|
||||
ServiceDescriptor.Singleton<IAuditRedactionFailureCounter, HealthMetricsAuditRedactionFailureCounter>());
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M6-T5 Bundle D — central-only registration for the
|
||||
/// <see cref="AuditLogPartitionMaintenanceService"/> hosted service plus
|
||||
/// its <see cref="AuditLogPartitionMaintenanceOptions"/> binding. Must be
|
||||
/// called from the Central role's composition root (not from a site
|
||||
/// composition root); the underlying <c>IPartitionMaintenance</c>
|
||||
/// implementation is registered by <c>AddConfigurationDatabase</c> and
|
||||
/// only exists on the central node.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Separated from <see cref="AddAuditLog"/> because <c>AddAuditLog</c> is
|
||||
/// also invoked from site composition roots — silently starting a
|
||||
/// hosted service that resolves an unregistered dependency on a site
|
||||
/// would fail every tick. Keeping the central-only registration in its
|
||||
/// own helper preserves the "every <c>Add*</c> call is safe to issue
|
||||
/// from any composition root" invariant.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public static IServiceCollection AddAuditLogCentralMaintenance(
|
||||
this IServiceCollection services,
|
||||
IConfiguration config)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentNullException.ThrowIfNull(config);
|
||||
|
||||
services.AddOptions<AuditLogPartitionMaintenanceOptions>()
|
||||
.Bind(config.GetSection(PartitionMaintenanceSectionName));
|
||||
services.AddHostedService<AuditLogPartitionMaintenanceService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user