feat(auditlog): AuditLogPartitionMaintenanceService monthly roll-forward (#23 M6)
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
namespace ScadaLink.AuditLog.Central;
|
||||
|
||||
/// <summary>
|
||||
/// Tuning knobs for the central
|
||||
/// <see cref="AuditLogPartitionMaintenanceService"/> hosted service (M6-T5).
|
||||
/// Defaults: once every 24 hours, keep at least one future monthly
|
||||
/// boundary ahead of <see cref="DateTime.UtcNow"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The hosted service drives a daily roll-forward of
|
||||
/// <c>pf_AuditLog_Month</c>: each tick reads the current max boundary and
|
||||
/// SPLITs new monthly boundaries until at least
|
||||
/// <see cref="LookaheadMonths"/> future months are covered. The 1-month
|
||||
/// default is intentionally conservative — anything less risks an
|
||||
/// end-of-month race where inserts land in the unbounded tail partition;
|
||||
/// anything more wastes nothing but represents premature commitment.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The 24-hour cadence is the cheapest interval that still guarantees
|
||||
/// at-most-one missed boundary in steady state (even a hard failover the
|
||||
/// hosted service can recover on its very next tick). Lowering this below
|
||||
/// an hour would generate more metadata churn than it saves.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AuditLogPartitionMaintenanceOptions
|
||||
{
|
||||
/// <summary>Period of the maintenance tick in seconds (default 86 400 = 24 h).</summary>
|
||||
public int IntervalSeconds { get; set; } = 86_400;
|
||||
|
||||
/// <summary>
|
||||
/// Minimum number of future months that <c>pf_AuditLog_Month</c> must
|
||||
/// cover after each tick. Default 1 — i.e. as of mid-May the partition
|
||||
/// for the next full month (June) must already be present.
|
||||
/// </summary>
|
||||
public int LookaheadMonths { get; set; } = 1;
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.Commons.Interfaces;
|
||||
|
||||
namespace ScadaLink.AuditLog.Central;
|
||||
|
||||
/// <summary>
|
||||
/// Central <see cref="IHostedService"/> (M6-T5, Bundle D) that rolls
|
||||
/// <c>pf_AuditLog_Month</c> forward once a day. Each tick opens a fresh DI
|
||||
/// scope, resolves <see cref="IPartitionMaintenance"/>, and calls
|
||||
/// <see cref="IPartitionMaintenance.EnsureLookaheadAsync"/> to SPLIT any
|
||||
/// missing future boundaries — the partition function must always cover at
|
||||
/// least <see cref="AuditLogPartitionMaintenanceOptions.LookaheadMonths"/>
|
||||
/// future months, otherwise inserts past the highest boundary accumulate in
|
||||
/// a single unbounded tail partition that <c>SwitchOutPartitionAsync</c>
|
||||
/// cannot purge cleanly.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Why a hosted service, not an actor.</b> Bundle C's
|
||||
/// <see cref="AuditLogPurgeActor"/> sits inside the central singleton
|
||||
/// because it needs supervised lifecycle alongside the rest of the
|
||||
/// reconciliation / ingest pipeline. Roll-forward is genuinely a once-a-day
|
||||
/// chore with no cross-actor coordination, so we use the much simpler
|
||||
/// hosted-service pattern: <c>Task.Run</c> on start, <c>Task.Delay</c>
|
||||
/// between ticks, cancellation on stop. Reusing
|
||||
/// <see cref="IPartitionMaintenance"/> from the central node-only DI graph
|
||||
/// keeps the contract testable without any actor framework involvement.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Failure containment.</b> The tick body wraps the maintenance call in
|
||||
/// a try/catch so a transient SQL Server error never tears down the hosted
|
||||
/// service — the next tick simply retries. The exception is logged with
|
||||
/// the original stack trace at <c>Error</c> level; ops surfaces (M6 Bundle
|
||||
/// E's central health collector) can subscribe to the logger to alert on
|
||||
/// repeated failures.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Startup ordering.</b> A first tick fires immediately at
|
||||
/// <see cref="StartAsync"/> so a fresh deployment doesn't need to wait
|
||||
/// <see cref="AuditLogPartitionMaintenanceOptions.IntervalSeconds"/> for
|
||||
/// the partition function to come up to spec. This is also what the brief
|
||||
/// asks for ("Run once on startup").
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>DI scope per tick.</b> <see cref="IPartitionMaintenance"/> is scoped
|
||||
/// (alongside the rest of the EF repositories) because the implementation
|
||||
/// reuses the per-scope <c>ScadaLinkDbContext</c>. A hosted service is a
|
||||
/// singleton, so it must open and dispose a scope around each tick — the
|
||||
/// same pattern <see cref="AuditLogPurgeActor"/> uses.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AuditLogPartitionMaintenanceService : IHostedService, IDisposable
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopeFactory;
|
||||
private readonly IOptions<AuditLogPartitionMaintenanceOptions> _options;
|
||||
private readonly ILogger<AuditLogPartitionMaintenanceService> _logger;
|
||||
private CancellationTokenSource? _cts;
|
||||
private Task? _loop;
|
||||
|
||||
public AuditLogPartitionMaintenanceService(
|
||||
IServiceScopeFactory scopeFactory,
|
||||
IOptions<AuditLogPartitionMaintenanceOptions> options,
|
||||
ILogger<AuditLogPartitionMaintenanceService> logger)
|
||||
{
|
||||
_scopeFactory = scopeFactory ?? throw new ArgumentNullException(nameof(scopeFactory));
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task StartAsync(CancellationToken ct)
|
||||
{
|
||||
// Linked CTS lets StopAsync's cancellation AND the host's shutdown
|
||||
// token both terminate the loop; either side firing aborts the
|
||||
// pending Task.Delay.
|
||||
_cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
_loop = Task.Run(() => RunLoopAsync(_cts.Token));
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private async Task RunLoopAsync(CancellationToken ct)
|
||||
{
|
||||
// Run once on startup so a fresh deployment isn't gated on the
|
||||
// IntervalSeconds initial wait — the brief calls this out explicitly.
|
||||
await SafeMaintainAsync(ct).ConfigureAwait(false);
|
||||
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
await Task.Delay(TimeSpan.FromSeconds(_options.Value.IntervalSeconds), ct)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
await SafeMaintainAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task SafeMaintainAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
await using var scope = _scopeFactory.CreateAsyncScope();
|
||||
var maintenance = scope.ServiceProvider.GetRequiredService<IPartitionMaintenance>();
|
||||
var added = await maintenance
|
||||
.EnsureLookaheadAsync(_options.Value.LookaheadMonths, ct)
|
||||
.ConfigureAwait(false);
|
||||
if (added.Count > 0)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"AuditLogPartitionMaintenance added {Count} boundaries: {Boundaries}",
|
||||
added.Count,
|
||||
string.Join(", ", added.Select(b => b.ToString("yyyy-MM-dd"))));
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Catch-all is deliberate: the hosted service must survive every
|
||||
// class of tick failure (transient SQL, DI resolution, etc.) so
|
||||
// the next tick gets a chance. The brief's contract is
|
||||
// "exception logged, not propagated".
|
||||
_logger.LogError(ex, "AuditLogPartitionMaintenance tick failed");
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task StopAsync(CancellationToken ct)
|
||||
{
|
||||
_cts?.Cancel();
|
||||
return _loop ?? Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
_cts?.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
using Microsoft.Extensions.Configuration;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.DependencyInjection.Extensions;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Central;
|
||||
@@ -43,6 +44,9 @@ public static class ServiceCollectionExtensions
|
||||
/// <summary>Configuration section bound to <see cref="SiteAuditTelemetryOptions"/>.</summary>
|
||||
public const string SiteTelemetrySectionName = "AuditLog:SiteTelemetry";
|
||||
|
||||
/// <summary>Configuration section bound to <see cref="AuditLogPartitionMaintenanceOptions"/>.</summary>
|
||||
public const string PartitionMaintenanceSectionName = "AuditLog:PartitionMaintenance";
|
||||
|
||||
/// <summary>
|
||||
/// Registers the Audit Log (#23) component services: options, the site
|
||||
/// SQLite writer chain (primary + ring fallback + failure-counter sink),
|
||||
@@ -216,4 +220,37 @@ public static class ServiceCollectionExtensions
|
||||
ServiceDescriptor.Singleton<IAuditRedactionFailureCounter, HealthMetricsAuditRedactionFailureCounter>());
|
||||
return services;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Audit Log (#23) M6-T5 Bundle D — central-only registration for the
|
||||
/// <see cref="AuditLogPartitionMaintenanceService"/> hosted service plus
|
||||
/// its <see cref="AuditLogPartitionMaintenanceOptions"/> binding. Must be
|
||||
/// called from the Central role's composition root (not from a site
|
||||
/// composition root); the underlying <c>IPartitionMaintenance</c>
|
||||
/// implementation is registered by <c>AddConfigurationDatabase</c> and
|
||||
/// only exists on the central node.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Separated from <see cref="AddAuditLog"/> because <c>AddAuditLog</c> is
|
||||
/// also invoked from site composition roots — silently starting a
|
||||
/// hosted service that resolves an unregistered dependency on a site
|
||||
/// would fail every tick. Keeping the central-only registration in its
|
||||
/// own helper preserves the "every <c>Add*</c> call is safe to issue
|
||||
/// from any composition root" invariant.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public static IServiceCollection AddAuditLogCentralMaintenance(
|
||||
this IServiceCollection services,
|
||||
IConfiguration config)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
ArgumentNullException.ThrowIfNull(config);
|
||||
|
||||
services.AddOptions<AuditLogPartitionMaintenanceOptions>()
|
||||
.Bind(config.GetSection(PartitionMaintenanceSectionName));
|
||||
services.AddHostedService<AuditLogPartitionMaintenanceService>();
|
||||
|
||||
return services;
|
||||
}
|
||||
}
|
||||
|
||||
48
src/ScadaLink.Commons/Interfaces/IPartitionMaintenance.cs
Normal file
48
src/ScadaLink.Commons/Interfaces/IPartitionMaintenance.cs
Normal file
@@ -0,0 +1,48 @@
|
||||
namespace ScadaLink.Commons.Interfaces;
|
||||
|
||||
/// <summary>
|
||||
/// Abstraction over the central AuditLog partition-function roll-forward
|
||||
/// operation. M6-T5 introduces a daily-cadence hosted service
|
||||
/// (<c>AuditLogPartitionMaintenanceService</c>) that calls
|
||||
/// <see cref="EnsureLookaheadAsync"/> to make sure
|
||||
/// <c>pf_AuditLog_Month</c> always has at least <c>LookaheadMonths</c> of
|
||||
/// future boundaries available — otherwise inserts past the highest
|
||||
/// boundary land in a single ever-growing tail partition that
|
||||
/// <c>SwitchOutPartitionAsync</c> cannot purge cleanly.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The interface lives in <c>ScadaLink.Commons</c> so the central hosted
|
||||
/// service in <c>ScadaLink.AuditLog</c> can depend on it without taking a
|
||||
/// reference on <c>ScadaLink.ConfigurationDatabase</c>; the EF-based
|
||||
/// implementation ships in
|
||||
/// <c>ScadaLink.ConfigurationDatabase.Maintenance.AuditLogPartitionMaintenance</c>
|
||||
/// and is registered by <c>AddConfigurationDatabase</c>.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Both methods read <c>sys.partition_range_values</c> / mutate
|
||||
/// <c>pf_AuditLog_Month</c> via raw SQL — there is no EF model for a
|
||||
/// partition function. The interface deliberately exposes only the two
|
||||
/// operations the hosted service needs; it is not a general partition-DDL
|
||||
/// surface.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public interface IPartitionMaintenance
|
||||
{
|
||||
/// <summary>
|
||||
/// Splits new monthly boundaries on <c>pf_AuditLog_Month</c> so the
|
||||
/// function covers at least <paramref name="lookaheadMonths"/> future
|
||||
/// months relative to <see cref="DateTime.UtcNow"/>. Idempotent — a
|
||||
/// boundary that already exists is skipped rather than re-issued.
|
||||
/// Returns the boundaries actually added, in chronological order.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DateTime>> EnsureLookaheadAsync(int lookaheadMonths, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Reads the current maximum boundary value from
|
||||
/// <c>sys.partition_range_values</c> for <c>pf_AuditLog_Month</c>.
|
||||
/// Returns <c>null</c> when the partition function does not exist or
|
||||
/// has no boundaries.
|
||||
/// </summary>
|
||||
Task<DateTime?> GetMaxBoundaryAsync(CancellationToken ct = default);
|
||||
}
|
||||
@@ -0,0 +1,218 @@
|
||||
using System.Globalization;
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ScadaLink.Commons.Interfaces;
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Maintenance;
|
||||
|
||||
/// <summary>
|
||||
/// EF/SQL-Server implementation of <see cref="IPartitionMaintenance"/> that
|
||||
/// rolls forward <c>pf_AuditLog_Month</c> by issuing
|
||||
/// <c>ALTER PARTITION FUNCTION … SPLIT RANGE</c> for each missing future
|
||||
/// monthly boundary.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The class is scoped (registered alongside the other repositories in
|
||||
/// <c>AddConfigurationDatabase</c>) because it shares <see cref="ScadaLinkDbContext"/>
|
||||
/// — the hosted service opens a per-tick DI scope, resolves a fresh instance,
|
||||
/// and lets the scope's <c>DbContext</c> dispose with it. The class itself
|
||||
/// holds no state between calls.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Idempotency model.</b> Each tick reads the current max boundary from
|
||||
/// <c>sys.partition_range_values</c> and only issues SPLIT RANGE for
|
||||
/// boundaries that strictly follow it — a boundary already covered is never
|
||||
/// re-issued, so the "boundary already exists" failure (SQL Server msg 7708
|
||||
/// / 7711) is avoided by construction rather than caught. The pre-check is
|
||||
/// cheaper than the alternative TRY/CATCH around every SPLIT call and also
|
||||
/// keeps the returned <c>added</c> list semantically precise.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Why "first of next month".</b> The migration seeds boundaries on the
|
||||
/// first-of-month at midnight UTC; we preserve that convention so the
|
||||
/// resulting partition layout is uniform. <see cref="NormalizeToFirstOfMonth"/>
|
||||
/// rounds an arbitrary timestamp up to the next first-of-month boundary
|
||||
/// (e.g. 2026-05-20 → 2026-06-01), and <see cref="NextMonthBoundary"/>
|
||||
/// walks one month at a time from there.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Permissions.</b> The migration's <c>scadalink_audit_purger</c> role
|
||||
/// already carries <c>ALTER ON SCHEMA::dbo</c>, which is sufficient for
|
||||
/// <c>ALTER PARTITION FUNCTION SPLIT RANGE</c>. No additional grant is
|
||||
/// required.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AuditLogPartitionMaintenance : IPartitionMaintenance
|
||||
{
|
||||
private const string PartitionFunctionName = "pf_AuditLog_Month";
|
||||
private const string PartitionSchemeName = "ps_AuditLog_Month";
|
||||
private const string TargetFileGroup = "PRIMARY";
|
||||
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly ILogger<AuditLogPartitionMaintenance> _logger;
|
||||
|
||||
public AuditLogPartitionMaintenance(
|
||||
ScadaLinkDbContext context,
|
||||
ILogger<AuditLogPartitionMaintenance>? logger = null)
|
||||
{
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
_logger = logger ?? NullLogger<AuditLogPartitionMaintenance>.Instance;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<DateTime?> GetMaxBoundaryAsync(CancellationToken ct = default)
|
||||
{
|
||||
// CAST the sql_variant `value` column to datetime2(7) — every boundary in
|
||||
// pf_AuditLog_Month is declared as datetime2(7) by the migration, so the
|
||||
// cast never loses precision.
|
||||
const string sql = @"
|
||||
SELECT MAX(CAST(rv.value AS datetime2(7)))
|
||||
FROM sys.partition_range_values rv
|
||||
INNER JOIN sys.partition_functions pf ON rv.function_id = pf.function_id
|
||||
WHERE pf.name = 'pf_AuditLog_Month';";
|
||||
|
||||
var conn = _context.Database.GetDbConnection();
|
||||
var openedHere = false;
|
||||
if (conn.State != System.Data.ConnectionState.Open)
|
||||
{
|
||||
await conn.OpenAsync(ct).ConfigureAwait(false);
|
||||
openedHere = true;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = sql;
|
||||
var raw = await cmd.ExecuteScalarAsync(ct).ConfigureAwait(false);
|
||||
if (raw is null || raw is DBNull)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// ExecuteScalarAsync materialises datetime2 as DateTime with
|
||||
// DateTimeKind.Unspecified; the boundary values are stored at
|
||||
// UTC midnight by convention (migration seeds with 'T00:00:00'),
|
||||
// so we re-tag the kind so downstream comparisons against
|
||||
// DateTime.UtcNow stay in the same kind space.
|
||||
var dt = (DateTime)raw;
|
||||
return DateTime.SpecifyKind(dt, DateTimeKind.Utc);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (openedHere)
|
||||
{
|
||||
await conn.CloseAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<DateTime>> EnsureLookaheadAsync(
|
||||
int lookaheadMonths,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
if (lookaheadMonths < 1)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(lookaheadMonths),
|
||||
lookaheadMonths,
|
||||
"Lookahead must be at least one month — the partition function would otherwise be allowed to fall behind 'now'.");
|
||||
}
|
||||
|
||||
var nowUtc = DateTime.UtcNow;
|
||||
// Horizon: the FIRST-OF-MONTH that must be the strictly-greater-than
|
||||
// max boundary after this call. Example: nowUtc = 2026-05-20 and
|
||||
// lookaheadMonths = 1 → horizon = 2026-07-01 (so the partition for
|
||||
// June 2026 is already in place by mid-May).
|
||||
var horizon = NormalizeToFirstOfMonth(nowUtc).AddMonths(lookaheadMonths);
|
||||
|
||||
var max = await GetMaxBoundaryAsync(ct).ConfigureAwait(false);
|
||||
if (max is null)
|
||||
{
|
||||
// No partition function (e.g. migrations not applied) — nothing
|
||||
// we can safely SPLIT against. Log and return; the absence is a
|
||||
// genuine misconfiguration that other parts of the system will
|
||||
// surface louder than we could here.
|
||||
_logger.LogWarning(
|
||||
"EnsureLookaheadAsync: partition function {PartitionFunctionName} not found; skipping.",
|
||||
PartitionFunctionName);
|
||||
return Array.Empty<DateTime>();
|
||||
}
|
||||
|
||||
// Start splitting from the FIRST month strictly after max — if max is
|
||||
// already first-of-month (the common case), that's max + 1 month;
|
||||
// otherwise NormalizeToFirstOfMonth rounds up.
|
||||
var next = NormalizeToFirstOfMonth(max.Value.AddDays(1));
|
||||
|
||||
// Edge case: max already past horizon → no work to do.
|
||||
if (next > horizon)
|
||||
{
|
||||
return Array.Empty<DateTime>();
|
||||
}
|
||||
|
||||
var added = new List<DateTime>();
|
||||
while (next <= horizon)
|
||||
{
|
||||
// Boundary literal must be a deterministic, culture-invariant ISO
|
||||
// string — SQL Server parses it as datetime2 via implicit conversion.
|
||||
// SPLIT RANGE does NOT accept @-parameters; the value is part of the
|
||||
// DDL statement, so we render it directly. The format is
|
||||
// guaranteed (yyyy-MM-ddTHH:mm:ss.fffffff) so there is no injection
|
||||
// surface.
|
||||
var literal = next.ToString("yyyy-MM-ddTHH:mm:ss.fffffff", CultureInfo.InvariantCulture);
|
||||
|
||||
// Before every SPLIT we must (re-)set the NEXT USED filegroup on
|
||||
// ps_AuditLog_Month. Even though the scheme was created with
|
||||
// `ALL TO ([PRIMARY])` (which auto-populates NEXT USED once), SQL
|
||||
// Server consumes that hint on the FIRST split — subsequent splits
|
||||
// raise msg 7707 ("partition scheme … does not have any next used
|
||||
// filegroup") unless NEXT USED is explicitly re-set. Re-issuing it
|
||||
// before every split is idempotent and keeps the loop simple.
|
||||
var sql = $@"
|
||||
ALTER PARTITION SCHEME {PartitionSchemeName} NEXT USED [{TargetFileGroup}];
|
||||
ALTER PARTITION FUNCTION {PartitionFunctionName}() SPLIT RANGE ('{literal}');";
|
||||
|
||||
try
|
||||
{
|
||||
await _context.Database.ExecuteSqlRawAsync(sql, ct).ConfigureAwait(false);
|
||||
added.Add(next);
|
||||
}
|
||||
catch (SqlException ex)
|
||||
{
|
||||
// Belt-and-braces: even though we read max-boundary first, an
|
||||
// ALTER from another process could have raced us. Logging at
|
||||
// Warning rather than Error because the desired end state
|
||||
// (boundary present) is satisfied by either path.
|
||||
_logger.LogWarning(
|
||||
ex,
|
||||
"EnsureLookaheadAsync: SPLIT RANGE for boundary {Boundary:o} failed; continuing.",
|
||||
next);
|
||||
}
|
||||
|
||||
next = NextMonthBoundary(next);
|
||||
}
|
||||
|
||||
return added;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Rounds an arbitrary instant UP to the next first-of-month UTC. Inputs
|
||||
/// that ARE already a first-of-month at midnight are returned as-is so
|
||||
/// callers can compose this freely without double-incrementing.
|
||||
/// </summary>
|
||||
private static DateTime NormalizeToFirstOfMonth(DateTime instant)
|
||||
{
|
||||
var utc = instant.Kind == DateTimeKind.Utc
|
||||
? instant
|
||||
: DateTime.SpecifyKind(instant, DateTimeKind.Utc);
|
||||
|
||||
var firstOfThisMonth = new DateTime(utc.Year, utc.Month, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
return utc == firstOfThisMonth ? firstOfThisMonth : firstOfThisMonth.AddMonths(1);
|
||||
}
|
||||
|
||||
private static DateTime NextMonthBoundary(DateTime boundary) =>
|
||||
boundary.AddMonths(1);
|
||||
}
|
||||
@@ -1,8 +1,10 @@
|
||||
using Microsoft.AspNetCore.DataProtection;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using ScadaLink.Commons.Interfaces;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.ConfigurationDatabase.Maintenance;
|
||||
using ScadaLink.ConfigurationDatabase.Repositories;
|
||||
using ScadaLink.ConfigurationDatabase.Services;
|
||||
|
||||
@@ -52,6 +54,13 @@ public static class ServiceCollectionExtensions
|
||||
services.AddScoped<IAuditService, AuditService>();
|
||||
services.AddScoped<IInstanceLocator, InstanceLocator>();
|
||||
|
||||
// #23 M6 Bundle D: IPartitionMaintenance drives the daily roll-forward
|
||||
// of pf_AuditLog_Month from the central AuditLogPartitionMaintenanceService
|
||||
// hosted service. Scoped because the implementation reuses the per-scope
|
||||
// ScadaLinkDbContext for raw-SQL execution; the hosted service opens a
|
||||
// fresh scope on each tick (mirrors AuditLogPurgeActor / AuditLogIngestActor).
|
||||
services.AddScoped<IPartitionMaintenance, AuditLogPartitionMaintenance>();
|
||||
|
||||
services.AddDataProtection()
|
||||
.PersistKeysToDbContext<ScadaLinkDbContext>();
|
||||
|
||||
|
||||
@@ -84,6 +84,10 @@ try
|
||||
// IAuditLogRepository. The site writer chain is still registered (lazy
|
||||
// singletons) but is never resolved on a central node.
|
||||
builder.Services.AddAuditLog(builder.Configuration);
|
||||
// #23 M6-T5 Bundle D — central-only hosted service that rolls
|
||||
// pf_AuditLog_Month forward monthly. Depends on IPartitionMaintenance
|
||||
// (registered below by AddConfigurationDatabase).
|
||||
builder.Services.AddAuditLogCentralMaintenance(builder.Configuration);
|
||||
// Site Call Audit (#22) — central node owns the SiteCallAuditActor
|
||||
// singleton (M3 Bundle F). The extension itself currently registers
|
||||
// nothing — actor Props are constructed inline in AkkaHostedService —
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ScadaLink.AuditLog.Central;
|
||||
using ScadaLink.Commons.Interfaces;
|
||||
using Xunit;
|
||||
|
||||
namespace ScadaLink.AuditLog.Tests.Central;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle D (#23 M6-T5) tests for <see cref="AuditLogPartitionMaintenanceService"/>.
|
||||
/// All tests use an in-memory <see cref="IPartitionMaintenance"/> stub —
|
||||
/// the real EF/MSSQL implementation is exercised by the
|
||||
/// <c>AuditLogPartitionMaintenanceTests</c> integration suite in
|
||||
/// <c>ScadaLink.ConfigurationDatabase.Tests</c>. This file is purely
|
||||
/// about the hosted service's policy decisions (start/stop, exception
|
||||
/// containment).
|
||||
/// </summary>
|
||||
public class AuditLogPartitionMaintenanceServiceTests
|
||||
{
|
||||
/// <summary>
|
||||
/// Recording stub — counts EnsureLookaheadAsync invocations and lets the
|
||||
/// test inject an exception per invocation to drive the catch-all path.
|
||||
/// </summary>
|
||||
private sealed class RecordingMaintenance : IPartitionMaintenance
|
||||
{
|
||||
public int EnsureCallCount;
|
||||
public Exception? ThrowOnce;
|
||||
|
||||
public Task<IReadOnlyList<DateTime>> EnsureLookaheadAsync(int lookaheadMonths, CancellationToken ct = default)
|
||||
{
|
||||
Interlocked.Increment(ref EnsureCallCount);
|
||||
if (ThrowOnce is { } ex)
|
||||
{
|
||||
ThrowOnce = null;
|
||||
throw ex;
|
||||
}
|
||||
return Task.FromResult<IReadOnlyList<DateTime>>(Array.Empty<DateTime>());
|
||||
}
|
||||
|
||||
public Task<DateTime?> GetMaxBoundaryAsync(CancellationToken ct = default) =>
|
||||
Task.FromResult<DateTime?>(DateTime.UtcNow.AddMonths(6));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Captures logged exceptions so the catch-all assertion can prove
|
||||
/// the exception was actually logged (not silently swallowed) and was
|
||||
/// the exact instance the stub threw.
|
||||
/// </summary>
|
||||
private sealed class CapturingLogger : ILogger<AuditLogPartitionMaintenanceService>
|
||||
{
|
||||
public List<(LogLevel Level, Exception? Exception, string Message)> Entries { get; } = new();
|
||||
|
||||
public IDisposable? BeginScope<TState>(TState state) where TState : notnull => null;
|
||||
|
||||
public bool IsEnabled(LogLevel logLevel) => true;
|
||||
|
||||
public void Log<TState>(
|
||||
LogLevel logLevel,
|
||||
EventId eventId,
|
||||
TState state,
|
||||
Exception? exception,
|
||||
Func<TState, Exception?, string> formatter)
|
||||
{
|
||||
Entries.Add((logLevel, exception, formatter(state, exception)));
|
||||
}
|
||||
}
|
||||
|
||||
private static IServiceProvider BuildProvider(IPartitionMaintenance maintenance)
|
||||
{
|
||||
var services = new ServiceCollection();
|
||||
// IPartitionMaintenance is registered as scoped by AddConfigurationDatabase;
|
||||
// we mirror that here so the hosted service's CreateAsyncScope +
|
||||
// GetRequiredService resolves the stub the test injected.
|
||||
services.AddScoped(_ => maintenance);
|
||||
return services.BuildServiceProvider();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task StartStop_NoExceptions()
|
||||
{
|
||||
// Long interval so only the eager startup tick fires inside the test
|
||||
// window — keeps assertions deterministic without relying on
|
||||
// multiple cadence loops.
|
||||
var opts = Options.Create(new AuditLogPartitionMaintenanceOptions
|
||||
{
|
||||
IntervalSeconds = 60,
|
||||
LookaheadMonths = 1,
|
||||
});
|
||||
var maintenance = new RecordingMaintenance();
|
||||
var sp = BuildProvider(maintenance);
|
||||
|
||||
var svc = new AuditLogPartitionMaintenanceService(
|
||||
sp.GetRequiredService<IServiceScopeFactory>(),
|
||||
opts,
|
||||
NullLogger<AuditLogPartitionMaintenanceService>.Instance);
|
||||
|
||||
await svc.StartAsync(CancellationToken.None);
|
||||
|
||||
// Spin briefly until the startup tick has fired — the loop's first
|
||||
// SafeMaintainAsync runs on a background Task.Run continuation, so
|
||||
// we can't synchronously rely on its completion.
|
||||
var deadline = DateTime.UtcNow.AddSeconds(3);
|
||||
while (Volatile.Read(ref maintenance.EnsureCallCount) < 1 && DateTime.UtcNow < deadline)
|
||||
{
|
||||
await Task.Delay(20);
|
||||
}
|
||||
|
||||
await svc.StopAsync(CancellationToken.None);
|
||||
svc.Dispose();
|
||||
|
||||
Assert.True(maintenance.EnsureCallCount >= 1, $"expected at least 1 ensure call, got {maintenance.EnsureCallCount}");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task SafeMaintain_ExceptionLogged_NotPropagated()
|
||||
{
|
||||
var opts = Options.Create(new AuditLogPartitionMaintenanceOptions
|
||||
{
|
||||
IntervalSeconds = 60,
|
||||
LookaheadMonths = 1,
|
||||
});
|
||||
// The injected exception fires on the FIRST EnsureLookaheadAsync call
|
||||
// (the startup tick) — the hosted service must contain it and
|
||||
// continue running.
|
||||
var boom = new InvalidOperationException("simulated maintenance failure");
|
||||
var maintenance = new RecordingMaintenance { ThrowOnce = boom };
|
||||
var sp = BuildProvider(maintenance);
|
||||
var logger = new CapturingLogger();
|
||||
|
||||
var svc = new AuditLogPartitionMaintenanceService(
|
||||
sp.GetRequiredService<IServiceScopeFactory>(),
|
||||
opts,
|
||||
logger);
|
||||
|
||||
// StartAsync must not throw even though the very first tick will fail.
|
||||
await svc.StartAsync(CancellationToken.None);
|
||||
|
||||
// Wait for the error to surface in the logger.
|
||||
var deadline = DateTime.UtcNow.AddSeconds(3);
|
||||
while (!logger.Entries.Any(e => e.Exception == boom) && DateTime.UtcNow < deadline)
|
||||
{
|
||||
await Task.Delay(20);
|
||||
}
|
||||
|
||||
await svc.StopAsync(CancellationToken.None);
|
||||
svc.Dispose();
|
||||
|
||||
var errorEntry = Assert.Single(logger.Entries, e => e.Exception == boom);
|
||||
Assert.Equal(LogLevel.Error, errorEntry.Level);
|
||||
Assert.Equal(1, maintenance.EnsureCallCount);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,182 @@
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ScadaLink.ConfigurationDatabase.Maintenance;
|
||||
using ScadaLink.ConfigurationDatabase.Tests.Migrations;
|
||||
using Xunit;
|
||||
|
||||
namespace ScadaLink.ConfigurationDatabase.Tests.Maintenance;
|
||||
|
||||
/// <summary>
|
||||
/// Bundle D (#23 M6-T5) integration tests for
|
||||
/// <see cref="AuditLogPartitionMaintenance"/>. Uses the same
|
||||
/// <see cref="MsSqlMigrationFixture"/> as the AuditLog migration / repository
|
||||
/// tests so the ALTER PARTITION FUNCTION DDL runs against the actual seeded
|
||||
/// <c>pf_AuditLog_Month</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The migration seeds boundaries for every month in 2026 and 2027 (Jan 2026
|
||||
/// through Dec 2027). Tests pick a lookahead relative to the current
|
||||
/// max-boundary at test start (rather than a fixed-target date) so each test
|
||||
/// is robust against earlier tests in the class having added boundaries to
|
||||
/// the shared fixture DB. Tests run sequentially within the class via xunit's
|
||||
/// per-class collection serialisation.
|
||||
/// </remarks>
|
||||
public class AuditLogPartitionMaintenanceTests : IClassFixture<MsSqlMigrationFixture>
|
||||
{
|
||||
private readonly MsSqlMigrationFixture _fixture;
|
||||
|
||||
public AuditLogPartitionMaintenanceTests(MsSqlMigrationFixture fixture)
|
||||
{
|
||||
_fixture = fixture;
|
||||
}
|
||||
|
||||
private ScadaLinkDbContext CreateContext() =>
|
||||
new(new DbContextOptionsBuilder<ScadaLinkDbContext>()
|
||||
.UseSqlServer(_fixture.ConnectionString).Options);
|
||||
|
||||
private AuditLogPartitionMaintenance NewMaintenance(ScadaLinkDbContext ctx) =>
|
||||
new(ctx, NullLogger<AuditLogPartitionMaintenance>.Instance);
|
||||
|
||||
/// <summary>
|
||||
/// Computes the lookahead-in-months required to fall strictly inside the
|
||||
/// already-covered boundary range. Picks something well below the
|
||||
/// distance from "now" to the current max — guaranteed not to need any
|
||||
/// new SPLIT.
|
||||
/// </summary>
|
||||
private static int LookaheadInsideExistingRange(DateTime max)
|
||||
{
|
||||
var now = DateTime.UtcNow;
|
||||
// (max - now) in whole months, minus a 1-month safety margin so we
|
||||
// never accidentally hit the boundary horizon edge case.
|
||||
var months = ((max.Year - now.Year) * 12) + max.Month - now.Month - 1;
|
||||
return Math.Max(1, months);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes the lookahead-in-months required to add exactly
|
||||
/// <paramref name="extraBoundaries"/> new boundaries past the current max.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// EnsureLookaheadAsync defines horizon =
|
||||
/// <c>NormalizeToFirstOfMonth(UtcNow) + lookaheadMonths</c>. The new
|
||||
/// boundaries it issues are first-of-month values strictly greater than
|
||||
/// max, up to and including horizon. So
|
||||
/// <c>lookaheadMonths = monthsBetween(NormalizeToFirstOfMonth(UtcNow), max) + extraBoundaries</c>
|
||||
/// is the exact value that lands horizon on <c>max + extraBoundaries</c>
|
||||
/// months.
|
||||
/// </remarks>
|
||||
private static int LookaheadForExtraBoundaries(DateTime max, int extraBoundaries)
|
||||
{
|
||||
var nowFirstOfMonth = FirstOfNextMonth(DateTime.UtcNow);
|
||||
var monthsToMax = ((max.Year - nowFirstOfMonth.Year) * 12) + max.Month - nowFirstOfMonth.Month;
|
||||
return monthsToMax + extraBoundaries;
|
||||
}
|
||||
|
||||
private static DateTime FirstOfNextMonth(DateTime instant)
|
||||
{
|
||||
var firstOfThisMonth = new DateTime(instant.Year, instant.Month, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
return firstOfThisMonth.AddMonths(1);
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
public async Task EnsureLookahead_AlreadyHasFutureRange_NoSplit_ReturnsEmpty()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
await using var ctx = CreateContext();
|
||||
var maintenance = NewMaintenance(ctx);
|
||||
|
||||
var max = await maintenance.GetMaxBoundaryAsync();
|
||||
Assert.NotNull(max);
|
||||
|
||||
// Pick a lookahead small enough that horizon (NormalizeToFirstOfMonth(now)
|
||||
// + lookahead) lands well INSIDE the already-covered range — no SPLIT
|
||||
// should fire.
|
||||
var lookahead = LookaheadInsideExistingRange(max.Value);
|
||||
|
||||
var added = await maintenance.EnsureLookaheadAsync(lookahead);
|
||||
|
||||
Assert.Empty(added);
|
||||
|
||||
// Sanity: the max boundary is unchanged after the no-op call.
|
||||
var maxAfter = await maintenance.GetMaxBoundaryAsync();
|
||||
Assert.Equal(max, maxAfter);
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
public async Task EnsureLookahead_NeedsOneMoreBoundary_Splits_Returns1Boundary()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
await using var ctx = CreateContext();
|
||||
var maintenance = NewMaintenance(ctx);
|
||||
|
||||
var maxBefore = await maintenance.GetMaxBoundaryAsync();
|
||||
Assert.NotNull(maxBefore);
|
||||
|
||||
var lookahead = LookaheadForExtraBoundaries(maxBefore.Value, extraBoundaries: 1);
|
||||
var expectedAdded = maxBefore.Value.AddMonths(1);
|
||||
|
||||
var added = await maintenance.EnsureLookaheadAsync(lookahead);
|
||||
|
||||
Assert.Single(added);
|
||||
Assert.Equal(expectedAdded, added[0]);
|
||||
|
||||
var maxAfter = await maintenance.GetMaxBoundaryAsync();
|
||||
Assert.Equal(expectedAdded, maxAfter);
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
public async Task EnsureLookahead_NeedsThreeBoundaries_Splits_Returns3Boundaries()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
await using var ctx = CreateContext();
|
||||
var maintenance = NewMaintenance(ctx);
|
||||
|
||||
var maxBefore = await maintenance.GetMaxBoundaryAsync();
|
||||
Assert.NotNull(maxBefore);
|
||||
|
||||
var lookahead = LookaheadForExtraBoundaries(maxBefore.Value, extraBoundaries: 3);
|
||||
|
||||
var added = await maintenance.EnsureLookaheadAsync(lookahead);
|
||||
|
||||
Assert.Equal(3, added.Count);
|
||||
Assert.Equal(maxBefore.Value.AddMonths(1), added[0]);
|
||||
Assert.Equal(maxBefore.Value.AddMonths(2), added[1]);
|
||||
Assert.Equal(maxBefore.Value.AddMonths(3), added[2]);
|
||||
|
||||
var maxAfter = await maintenance.GetMaxBoundaryAsync();
|
||||
Assert.Equal(maxBefore.Value.AddMonths(3), maxAfter);
|
||||
}
|
||||
|
||||
[SkippableFact]
|
||||
public async Task EnsureLookahead_BoundaryAlreadyExists_NoError_Idempotent()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
await using var ctx1 = CreateContext();
|
||||
var m1 = NewMaintenance(ctx1);
|
||||
|
||||
var maxStart = await m1.GetMaxBoundaryAsync();
|
||||
Assert.NotNull(maxStart);
|
||||
|
||||
// First call: add one boundary.
|
||||
var lookahead = LookaheadForExtraBoundaries(maxStart.Value, extraBoundaries: 1);
|
||||
var firstAdded = await m1.EnsureLookaheadAsync(lookahead);
|
||||
Assert.Single(firstAdded);
|
||||
|
||||
// Second call: the boundary just added is now part of pf_AuditLog_Month,
|
||||
// so the same lookahead value should be a no-op — no exception, no
|
||||
// duplicate SPLIT.
|
||||
await using var ctx2 = CreateContext();
|
||||
var m2 = NewMaintenance(ctx2);
|
||||
var secondAdded = await m2.EnsureLookaheadAsync(lookahead);
|
||||
|
||||
Assert.Empty(secondAdded);
|
||||
|
||||
// The max boundary is unchanged across the second call.
|
||||
var maxAfter = await m2.GetMaxBoundaryAsync();
|
||||
Assert.Equal(firstAdded[0], maxAfter);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user