From 75b060e0a8449a85c7f0d1f8b9f4aa9470eeb570 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Wed, 20 May 2026 18:51:43 -0400 Subject: [PATCH] feat(auditlog): AuditLogPartitionMaintenanceService monthly roll-forward (#23 M6) --- .../AuditLogPartitionMaintenanceOptions.cs | 37 +++ .../AuditLogPartitionMaintenanceService.cs | 145 ++++++++++++ .../ServiceCollectionExtensions.cs | 37 +++ .../Interfaces/IPartitionMaintenance.cs | 48 ++++ .../AuditLogPartitionMaintenance.cs | 218 ++++++++++++++++++ .../ServiceCollectionExtensions.cs | 9 + src/ScadaLink.Host/Program.cs | 4 + ...uditLogPartitionMaintenanceServiceTests.cs | 154 +++++++++++++ .../AuditLogPartitionMaintenanceTests.cs | 182 +++++++++++++++ 9 files changed, 834 insertions(+) create mode 100644 src/ScadaLink.AuditLog/Central/AuditLogPartitionMaintenanceOptions.cs create mode 100644 src/ScadaLink.AuditLog/Central/AuditLogPartitionMaintenanceService.cs create mode 100644 src/ScadaLink.Commons/Interfaces/IPartitionMaintenance.cs create mode 100644 src/ScadaLink.ConfigurationDatabase/Maintenance/AuditLogPartitionMaintenance.cs create mode 100644 tests/ScadaLink.AuditLog.Tests/Central/AuditLogPartitionMaintenanceServiceTests.cs create mode 100644 tests/ScadaLink.ConfigurationDatabase.Tests/Maintenance/AuditLogPartitionMaintenanceTests.cs diff --git a/src/ScadaLink.AuditLog/Central/AuditLogPartitionMaintenanceOptions.cs b/src/ScadaLink.AuditLog/Central/AuditLogPartitionMaintenanceOptions.cs new file mode 100644 index 0000000..317e6e7 --- /dev/null +++ b/src/ScadaLink.AuditLog/Central/AuditLogPartitionMaintenanceOptions.cs @@ -0,0 +1,37 @@ +namespace ScadaLink.AuditLog.Central; + +/// +/// Tuning knobs for the central +/// hosted service (M6-T5). +/// Defaults: once every 24 hours, keep at least one future monthly +/// boundary ahead of . +/// +/// +/// +/// The hosted service drives a daily roll-forward of +/// pf_AuditLog_Month: each tick reads the current max boundary and +/// SPLITs new monthly boundaries until at least +/// future months are covered. The 1-month +/// default is intentionally conservative — anything less risks an +/// end-of-month race where inserts land in the unbounded tail partition; +/// anything more wastes nothing but represents premature commitment. +/// +/// +/// The 24-hour cadence is the cheapest interval that still guarantees +/// at-most-one missed boundary in steady state (even a hard failover the +/// hosted service can recover on its very next tick). Lowering this below +/// an hour would generate more metadata churn than it saves. +/// +/// +public sealed class AuditLogPartitionMaintenanceOptions +{ + /// Period of the maintenance tick in seconds (default 86 400 = 24 h). + public int IntervalSeconds { get; set; } = 86_400; + + /// + /// Minimum number of future months that pf_AuditLog_Month must + /// cover after each tick. Default 1 — i.e. as of mid-May the partition + /// for the next full month (June) must already be present. + /// + public int LookaheadMonths { get; set; } = 1; +} diff --git a/src/ScadaLink.AuditLog/Central/AuditLogPartitionMaintenanceService.cs b/src/ScadaLink.AuditLog/Central/AuditLogPartitionMaintenanceService.cs new file mode 100644 index 0000000..2aa02f8 --- /dev/null +++ b/src/ScadaLink.AuditLog/Central/AuditLogPartitionMaintenanceService.cs @@ -0,0 +1,145 @@ +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using ScadaLink.Commons.Interfaces; + +namespace ScadaLink.AuditLog.Central; + +/// +/// Central (M6-T5, Bundle D) that rolls +/// pf_AuditLog_Month forward once a day. Each tick opens a fresh DI +/// scope, resolves , and calls +/// to SPLIT any +/// missing future boundaries — the partition function must always cover at +/// least +/// future months, otherwise inserts past the highest boundary accumulate in +/// a single unbounded tail partition that SwitchOutPartitionAsync +/// cannot purge cleanly. +/// +/// +/// +/// Why a hosted service, not an actor. Bundle C's +/// sits inside the central singleton +/// because it needs supervised lifecycle alongside the rest of the +/// reconciliation / ingest pipeline. Roll-forward is genuinely a once-a-day +/// chore with no cross-actor coordination, so we use the much simpler +/// hosted-service pattern: Task.Run on start, Task.Delay +/// between ticks, cancellation on stop. Reusing +/// from the central node-only DI graph +/// keeps the contract testable without any actor framework involvement. +/// +/// +/// Failure containment. The tick body wraps the maintenance call in +/// a try/catch so a transient SQL Server error never tears down the hosted +/// service — the next tick simply retries. The exception is logged with +/// the original stack trace at Error level; ops surfaces (M6 Bundle +/// E's central health collector) can subscribe to the logger to alert on +/// repeated failures. +/// +/// +/// Startup ordering. A first tick fires immediately at +/// so a fresh deployment doesn't need to wait +/// for +/// the partition function to come up to spec. This is also what the brief +/// asks for ("Run once on startup"). +/// +/// +/// DI scope per tick. is scoped +/// (alongside the rest of the EF repositories) because the implementation +/// reuses the per-scope ScadaLinkDbContext. A hosted service is a +/// singleton, so it must open and dispose a scope around each tick — the +/// same pattern uses. +/// +/// +public sealed class AuditLogPartitionMaintenanceService : IHostedService, IDisposable +{ + private readonly IServiceScopeFactory _scopeFactory; + private readonly IOptions _options; + private readonly ILogger _logger; + private CancellationTokenSource? _cts; + private Task? _loop; + + public AuditLogPartitionMaintenanceService( + IServiceScopeFactory scopeFactory, + IOptions options, + ILogger logger) + { + _scopeFactory = scopeFactory ?? throw new ArgumentNullException(nameof(scopeFactory)); + _options = options ?? throw new ArgumentNullException(nameof(options)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + /// + public Task StartAsync(CancellationToken ct) + { + // Linked CTS lets StopAsync's cancellation AND the host's shutdown + // token both terminate the loop; either side firing aborts the + // pending Task.Delay. + _cts = CancellationTokenSource.CreateLinkedTokenSource(ct); + _loop = Task.Run(() => RunLoopAsync(_cts.Token)); + return Task.CompletedTask; + } + + private async Task RunLoopAsync(CancellationToken ct) + { + // Run once on startup so a fresh deployment isn't gated on the + // IntervalSeconds initial wait — the brief calls this out explicitly. + await SafeMaintainAsync(ct).ConfigureAwait(false); + + while (!ct.IsCancellationRequested) + { + try + { + await Task.Delay(TimeSpan.FromSeconds(_options.Value.IntervalSeconds), ct) + .ConfigureAwait(false); + } + catch (OperationCanceledException) + { + break; + } + + await SafeMaintainAsync(ct).ConfigureAwait(false); + } + } + + private async Task SafeMaintainAsync(CancellationToken ct) + { + try + { + await using var scope = _scopeFactory.CreateAsyncScope(); + var maintenance = scope.ServiceProvider.GetRequiredService(); + var added = await maintenance + .EnsureLookaheadAsync(_options.Value.LookaheadMonths, ct) + .ConfigureAwait(false); + if (added.Count > 0) + { + _logger.LogInformation( + "AuditLogPartitionMaintenance added {Count} boundaries: {Boundaries}", + added.Count, + string.Join(", ", added.Select(b => b.ToString("yyyy-MM-dd")))); + } + } + catch (Exception ex) + { + // Catch-all is deliberate: the hosted service must survive every + // class of tick failure (transient SQL, DI resolution, etc.) so + // the next tick gets a chance. The brief's contract is + // "exception logged, not propagated". + _logger.LogError(ex, "AuditLogPartitionMaintenance tick failed"); + } + } + + /// + public Task StopAsync(CancellationToken ct) + { + _cts?.Cancel(); + return _loop ?? Task.CompletedTask; + } + + /// + public void Dispose() + { + _cts?.Dispose(); + } +} diff --git a/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs b/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs index cf04abd..2216eb2 100644 --- a/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs +++ b/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs @@ -1,6 +1,7 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection.Extensions; +using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using ScadaLink.AuditLog.Central; @@ -43,6 +44,9 @@ public static class ServiceCollectionExtensions /// Configuration section bound to . public const string SiteTelemetrySectionName = "AuditLog:SiteTelemetry"; + /// Configuration section bound to . + public const string PartitionMaintenanceSectionName = "AuditLog:PartitionMaintenance"; + /// /// Registers the Audit Log (#23) component services: options, the site /// SQLite writer chain (primary + ring fallback + failure-counter sink), @@ -216,4 +220,37 @@ public static class ServiceCollectionExtensions ServiceDescriptor.Singleton()); return services; } + + /// + /// Audit Log (#23) M6-T5 Bundle D — central-only registration for the + /// hosted service plus + /// its binding. Must be + /// called from the Central role's composition root (not from a site + /// composition root); the underlying IPartitionMaintenance + /// implementation is registered by AddConfigurationDatabase and + /// only exists on the central node. + /// + /// + /// + /// Separated from because AddAuditLog is + /// also invoked from site composition roots — silently starting a + /// hosted service that resolves an unregistered dependency on a site + /// would fail every tick. Keeping the central-only registration in its + /// own helper preserves the "every Add* call is safe to issue + /// from any composition root" invariant. + /// + /// + public static IServiceCollection AddAuditLogCentralMaintenance( + this IServiceCollection services, + IConfiguration config) + { + ArgumentNullException.ThrowIfNull(services); + ArgumentNullException.ThrowIfNull(config); + + services.AddOptions() + .Bind(config.GetSection(PartitionMaintenanceSectionName)); + services.AddHostedService(); + + return services; + } } diff --git a/src/ScadaLink.Commons/Interfaces/IPartitionMaintenance.cs b/src/ScadaLink.Commons/Interfaces/IPartitionMaintenance.cs new file mode 100644 index 0000000..b8b3ec5 --- /dev/null +++ b/src/ScadaLink.Commons/Interfaces/IPartitionMaintenance.cs @@ -0,0 +1,48 @@ +namespace ScadaLink.Commons.Interfaces; + +/// +/// Abstraction over the central AuditLog partition-function roll-forward +/// operation. M6-T5 introduces a daily-cadence hosted service +/// (AuditLogPartitionMaintenanceService) that calls +/// to make sure +/// pf_AuditLog_Month always has at least LookaheadMonths of +/// future boundaries available — otherwise inserts past the highest +/// boundary land in a single ever-growing tail partition that +/// SwitchOutPartitionAsync cannot purge cleanly. +/// +/// +/// +/// The interface lives in ScadaLink.Commons so the central hosted +/// service in ScadaLink.AuditLog can depend on it without taking a +/// reference on ScadaLink.ConfigurationDatabase; the EF-based +/// implementation ships in +/// ScadaLink.ConfigurationDatabase.Maintenance.AuditLogPartitionMaintenance +/// and is registered by AddConfigurationDatabase. +/// +/// +/// Both methods read sys.partition_range_values / mutate +/// pf_AuditLog_Month via raw SQL — there is no EF model for a +/// partition function. The interface deliberately exposes only the two +/// operations the hosted service needs; it is not a general partition-DDL +/// surface. +/// +/// +public interface IPartitionMaintenance +{ + /// + /// Splits new monthly boundaries on pf_AuditLog_Month so the + /// function covers at least future + /// months relative to . Idempotent — a + /// boundary that already exists is skipped rather than re-issued. + /// Returns the boundaries actually added, in chronological order. + /// + Task> EnsureLookaheadAsync(int lookaheadMonths, CancellationToken ct = default); + + /// + /// Reads the current maximum boundary value from + /// sys.partition_range_values for pf_AuditLog_Month. + /// Returns null when the partition function does not exist or + /// has no boundaries. + /// + Task GetMaxBoundaryAsync(CancellationToken ct = default); +} diff --git a/src/ScadaLink.ConfigurationDatabase/Maintenance/AuditLogPartitionMaintenance.cs b/src/ScadaLink.ConfigurationDatabase/Maintenance/AuditLogPartitionMaintenance.cs new file mode 100644 index 0000000..cdbd54b --- /dev/null +++ b/src/ScadaLink.ConfigurationDatabase/Maintenance/AuditLogPartitionMaintenance.cs @@ -0,0 +1,218 @@ +using System.Globalization; +using Microsoft.Data.SqlClient; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; +using ScadaLink.Commons.Interfaces; + +namespace ScadaLink.ConfigurationDatabase.Maintenance; + +/// +/// EF/SQL-Server implementation of that +/// rolls forward pf_AuditLog_Month by issuing +/// ALTER PARTITION FUNCTION … SPLIT RANGE for each missing future +/// monthly boundary. +/// +/// +/// +/// The class is scoped (registered alongside the other repositories in +/// AddConfigurationDatabase) because it shares +/// — the hosted service opens a per-tick DI scope, resolves a fresh instance, +/// and lets the scope's DbContext dispose with it. The class itself +/// holds no state between calls. +/// +/// +/// Idempotency model. Each tick reads the current max boundary from +/// sys.partition_range_values and only issues SPLIT RANGE for +/// boundaries that strictly follow it — a boundary already covered is never +/// re-issued, so the "boundary already exists" failure (SQL Server msg 7708 +/// / 7711) is avoided by construction rather than caught. The pre-check is +/// cheaper than the alternative TRY/CATCH around every SPLIT call and also +/// keeps the returned added list semantically precise. +/// +/// +/// Why "first of next month". The migration seeds boundaries on the +/// first-of-month at midnight UTC; we preserve that convention so the +/// resulting partition layout is uniform. +/// rounds an arbitrary timestamp up to the next first-of-month boundary +/// (e.g. 2026-05-20 → 2026-06-01), and +/// walks one month at a time from there. +/// +/// +/// Permissions. The migration's scadalink_audit_purger role +/// already carries ALTER ON SCHEMA::dbo, which is sufficient for +/// ALTER PARTITION FUNCTION SPLIT RANGE. No additional grant is +/// required. +/// +/// +public sealed class AuditLogPartitionMaintenance : IPartitionMaintenance +{ + private const string PartitionFunctionName = "pf_AuditLog_Month"; + private const string PartitionSchemeName = "ps_AuditLog_Month"; + private const string TargetFileGroup = "PRIMARY"; + + private readonly ScadaLinkDbContext _context; + private readonly ILogger _logger; + + public AuditLogPartitionMaintenance( + ScadaLinkDbContext context, + ILogger? logger = null) + { + _context = context ?? throw new ArgumentNullException(nameof(context)); + _logger = logger ?? NullLogger.Instance; + } + + /// + public async Task GetMaxBoundaryAsync(CancellationToken ct = default) + { + // CAST the sql_variant `value` column to datetime2(7) — every boundary in + // pf_AuditLog_Month is declared as datetime2(7) by the migration, so the + // cast never loses precision. + const string sql = @" +SELECT MAX(CAST(rv.value AS datetime2(7))) +FROM sys.partition_range_values rv +INNER JOIN sys.partition_functions pf ON rv.function_id = pf.function_id +WHERE pf.name = 'pf_AuditLog_Month';"; + + var conn = _context.Database.GetDbConnection(); + var openedHere = false; + if (conn.State != System.Data.ConnectionState.Open) + { + await conn.OpenAsync(ct).ConfigureAwait(false); + openedHere = true; + } + + try + { + await using var cmd = conn.CreateCommand(); + cmd.CommandText = sql; + var raw = await cmd.ExecuteScalarAsync(ct).ConfigureAwait(false); + if (raw is null || raw is DBNull) + { + return null; + } + + // ExecuteScalarAsync materialises datetime2 as DateTime with + // DateTimeKind.Unspecified; the boundary values are stored at + // UTC midnight by convention (migration seeds with 'T00:00:00'), + // so we re-tag the kind so downstream comparisons against + // DateTime.UtcNow stay in the same kind space. + var dt = (DateTime)raw; + return DateTime.SpecifyKind(dt, DateTimeKind.Utc); + } + finally + { + if (openedHere) + { + await conn.CloseAsync().ConfigureAwait(false); + } + } + } + + /// + public async Task> EnsureLookaheadAsync( + int lookaheadMonths, + CancellationToken ct = default) + { + if (lookaheadMonths < 1) + { + throw new ArgumentOutOfRangeException( + nameof(lookaheadMonths), + lookaheadMonths, + "Lookahead must be at least one month — the partition function would otherwise be allowed to fall behind 'now'."); + } + + var nowUtc = DateTime.UtcNow; + // Horizon: the FIRST-OF-MONTH that must be the strictly-greater-than + // max boundary after this call. Example: nowUtc = 2026-05-20 and + // lookaheadMonths = 1 → horizon = 2026-07-01 (so the partition for + // June 2026 is already in place by mid-May). + var horizon = NormalizeToFirstOfMonth(nowUtc).AddMonths(lookaheadMonths); + + var max = await GetMaxBoundaryAsync(ct).ConfigureAwait(false); + if (max is null) + { + // No partition function (e.g. migrations not applied) — nothing + // we can safely SPLIT against. Log and return; the absence is a + // genuine misconfiguration that other parts of the system will + // surface louder than we could here. + _logger.LogWarning( + "EnsureLookaheadAsync: partition function {PartitionFunctionName} not found; skipping.", + PartitionFunctionName); + return Array.Empty(); + } + + // Start splitting from the FIRST month strictly after max — if max is + // already first-of-month (the common case), that's max + 1 month; + // otherwise NormalizeToFirstOfMonth rounds up. + var next = NormalizeToFirstOfMonth(max.Value.AddDays(1)); + + // Edge case: max already past horizon → no work to do. + if (next > horizon) + { + return Array.Empty(); + } + + var added = new List(); + while (next <= horizon) + { + // Boundary literal must be a deterministic, culture-invariant ISO + // string — SQL Server parses it as datetime2 via implicit conversion. + // SPLIT RANGE does NOT accept @-parameters; the value is part of the + // DDL statement, so we render it directly. The format is + // guaranteed (yyyy-MM-ddTHH:mm:ss.fffffff) so there is no injection + // surface. + var literal = next.ToString("yyyy-MM-ddTHH:mm:ss.fffffff", CultureInfo.InvariantCulture); + + // Before every SPLIT we must (re-)set the NEXT USED filegroup on + // ps_AuditLog_Month. Even though the scheme was created with + // `ALL TO ([PRIMARY])` (which auto-populates NEXT USED once), SQL + // Server consumes that hint on the FIRST split — subsequent splits + // raise msg 7707 ("partition scheme … does not have any next used + // filegroup") unless NEXT USED is explicitly re-set. Re-issuing it + // before every split is idempotent and keeps the loop simple. + var sql = $@" +ALTER PARTITION SCHEME {PartitionSchemeName} NEXT USED [{TargetFileGroup}]; +ALTER PARTITION FUNCTION {PartitionFunctionName}() SPLIT RANGE ('{literal}');"; + + try + { + await _context.Database.ExecuteSqlRawAsync(sql, ct).ConfigureAwait(false); + added.Add(next); + } + catch (SqlException ex) + { + // Belt-and-braces: even though we read max-boundary first, an + // ALTER from another process could have raced us. Logging at + // Warning rather than Error because the desired end state + // (boundary present) is satisfied by either path. + _logger.LogWarning( + ex, + "EnsureLookaheadAsync: SPLIT RANGE for boundary {Boundary:o} failed; continuing.", + next); + } + + next = NextMonthBoundary(next); + } + + return added; + } + + /// + /// Rounds an arbitrary instant UP to the next first-of-month UTC. Inputs + /// that ARE already a first-of-month at midnight are returned as-is so + /// callers can compose this freely without double-incrementing. + /// + private static DateTime NormalizeToFirstOfMonth(DateTime instant) + { + var utc = instant.Kind == DateTimeKind.Utc + ? instant + : DateTime.SpecifyKind(instant, DateTimeKind.Utc); + + var firstOfThisMonth = new DateTime(utc.Year, utc.Month, 1, 0, 0, 0, DateTimeKind.Utc); + return utc == firstOfThisMonth ? firstOfThisMonth : firstOfThisMonth.AddMonths(1); + } + + private static DateTime NextMonthBoundary(DateTime boundary) => + boundary.AddMonths(1); +} diff --git a/src/ScadaLink.ConfigurationDatabase/ServiceCollectionExtensions.cs b/src/ScadaLink.ConfigurationDatabase/ServiceCollectionExtensions.cs index bf79b29..d926f1e 100644 --- a/src/ScadaLink.ConfigurationDatabase/ServiceCollectionExtensions.cs +++ b/src/ScadaLink.ConfigurationDatabase/ServiceCollectionExtensions.cs @@ -1,8 +1,10 @@ using Microsoft.AspNetCore.DataProtection; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.DependencyInjection; +using ScadaLink.Commons.Interfaces; using ScadaLink.Commons.Interfaces.Repositories; using ScadaLink.Commons.Interfaces.Services; +using ScadaLink.ConfigurationDatabase.Maintenance; using ScadaLink.ConfigurationDatabase.Repositories; using ScadaLink.ConfigurationDatabase.Services; @@ -52,6 +54,13 @@ public static class ServiceCollectionExtensions services.AddScoped(); services.AddScoped(); + // #23 M6 Bundle D: IPartitionMaintenance drives the daily roll-forward + // of pf_AuditLog_Month from the central AuditLogPartitionMaintenanceService + // hosted service. Scoped because the implementation reuses the per-scope + // ScadaLinkDbContext for raw-SQL execution; the hosted service opens a + // fresh scope on each tick (mirrors AuditLogPurgeActor / AuditLogIngestActor). + services.AddScoped(); + services.AddDataProtection() .PersistKeysToDbContext(); diff --git a/src/ScadaLink.Host/Program.cs b/src/ScadaLink.Host/Program.cs index b1119d1..3632824 100644 --- a/src/ScadaLink.Host/Program.cs +++ b/src/ScadaLink.Host/Program.cs @@ -84,6 +84,10 @@ try // IAuditLogRepository. The site writer chain is still registered (lazy // singletons) but is never resolved on a central node. builder.Services.AddAuditLog(builder.Configuration); + // #23 M6-T5 Bundle D — central-only hosted service that rolls + // pf_AuditLog_Month forward monthly. Depends on IPartitionMaintenance + // (registered below by AddConfigurationDatabase). + builder.Services.AddAuditLogCentralMaintenance(builder.Configuration); // Site Call Audit (#22) — central node owns the SiteCallAuditActor // singleton (M3 Bundle F). The extension itself currently registers // nothing — actor Props are constructed inline in AkkaHostedService — diff --git a/tests/ScadaLink.AuditLog.Tests/Central/AuditLogPartitionMaintenanceServiceTests.cs b/tests/ScadaLink.AuditLog.Tests/Central/AuditLogPartitionMaintenanceServiceTests.cs new file mode 100644 index 0000000..4e65207 --- /dev/null +++ b/tests/ScadaLink.AuditLog.Tests/Central/AuditLogPartitionMaintenanceServiceTests.cs @@ -0,0 +1,154 @@ +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using ScadaLink.AuditLog.Central; +using ScadaLink.Commons.Interfaces; +using Xunit; + +namespace ScadaLink.AuditLog.Tests.Central; + +/// +/// Bundle D (#23 M6-T5) tests for . +/// All tests use an in-memory stub — +/// the real EF/MSSQL implementation is exercised by the +/// AuditLogPartitionMaintenanceTests integration suite in +/// ScadaLink.ConfigurationDatabase.Tests. This file is purely +/// about the hosted service's policy decisions (start/stop, exception +/// containment). +/// +public class AuditLogPartitionMaintenanceServiceTests +{ + /// + /// Recording stub — counts EnsureLookaheadAsync invocations and lets the + /// test inject an exception per invocation to drive the catch-all path. + /// + private sealed class RecordingMaintenance : IPartitionMaintenance + { + public int EnsureCallCount; + public Exception? ThrowOnce; + + public Task> EnsureLookaheadAsync(int lookaheadMonths, CancellationToken ct = default) + { + Interlocked.Increment(ref EnsureCallCount); + if (ThrowOnce is { } ex) + { + ThrowOnce = null; + throw ex; + } + return Task.FromResult>(Array.Empty()); + } + + public Task GetMaxBoundaryAsync(CancellationToken ct = default) => + Task.FromResult(DateTime.UtcNow.AddMonths(6)); + } + + /// + /// Captures logged exceptions so the catch-all assertion can prove + /// the exception was actually logged (not silently swallowed) and was + /// the exact instance the stub threw. + /// + private sealed class CapturingLogger : ILogger + { + public List<(LogLevel Level, Exception? Exception, string Message)> Entries { get; } = new(); + + public IDisposable? BeginScope(TState state) where TState : notnull => null; + + public bool IsEnabled(LogLevel logLevel) => true; + + public void Log( + LogLevel logLevel, + EventId eventId, + TState state, + Exception? exception, + Func formatter) + { + Entries.Add((logLevel, exception, formatter(state, exception))); + } + } + + private static IServiceProvider BuildProvider(IPartitionMaintenance maintenance) + { + var services = new ServiceCollection(); + // IPartitionMaintenance is registered as scoped by AddConfigurationDatabase; + // we mirror that here so the hosted service's CreateAsyncScope + + // GetRequiredService resolves the stub the test injected. + services.AddScoped(_ => maintenance); + return services.BuildServiceProvider(); + } + + [Fact] + public async Task StartStop_NoExceptions() + { + // Long interval so only the eager startup tick fires inside the test + // window — keeps assertions deterministic without relying on + // multiple cadence loops. + var opts = Options.Create(new AuditLogPartitionMaintenanceOptions + { + IntervalSeconds = 60, + LookaheadMonths = 1, + }); + var maintenance = new RecordingMaintenance(); + var sp = BuildProvider(maintenance); + + var svc = new AuditLogPartitionMaintenanceService( + sp.GetRequiredService(), + opts, + NullLogger.Instance); + + await svc.StartAsync(CancellationToken.None); + + // Spin briefly until the startup tick has fired — the loop's first + // SafeMaintainAsync runs on a background Task.Run continuation, so + // we can't synchronously rely on its completion. + var deadline = DateTime.UtcNow.AddSeconds(3); + while (Volatile.Read(ref maintenance.EnsureCallCount) < 1 && DateTime.UtcNow < deadline) + { + await Task.Delay(20); + } + + await svc.StopAsync(CancellationToken.None); + svc.Dispose(); + + Assert.True(maintenance.EnsureCallCount >= 1, $"expected at least 1 ensure call, got {maintenance.EnsureCallCount}"); + } + + [Fact] + public async Task SafeMaintain_ExceptionLogged_NotPropagated() + { + var opts = Options.Create(new AuditLogPartitionMaintenanceOptions + { + IntervalSeconds = 60, + LookaheadMonths = 1, + }); + // The injected exception fires on the FIRST EnsureLookaheadAsync call + // (the startup tick) — the hosted service must contain it and + // continue running. + var boom = new InvalidOperationException("simulated maintenance failure"); + var maintenance = new RecordingMaintenance { ThrowOnce = boom }; + var sp = BuildProvider(maintenance); + var logger = new CapturingLogger(); + + var svc = new AuditLogPartitionMaintenanceService( + sp.GetRequiredService(), + opts, + logger); + + // StartAsync must not throw even though the very first tick will fail. + await svc.StartAsync(CancellationToken.None); + + // Wait for the error to surface in the logger. + var deadline = DateTime.UtcNow.AddSeconds(3); + while (!logger.Entries.Any(e => e.Exception == boom) && DateTime.UtcNow < deadline) + { + await Task.Delay(20); + } + + await svc.StopAsync(CancellationToken.None); + svc.Dispose(); + + var errorEntry = Assert.Single(logger.Entries, e => e.Exception == boom); + Assert.Equal(LogLevel.Error, errorEntry.Level); + Assert.Equal(1, maintenance.EnsureCallCount); + } +} diff --git a/tests/ScadaLink.ConfigurationDatabase.Tests/Maintenance/AuditLogPartitionMaintenanceTests.cs b/tests/ScadaLink.ConfigurationDatabase.Tests/Maintenance/AuditLogPartitionMaintenanceTests.cs new file mode 100644 index 0000000..2d8c6c8 --- /dev/null +++ b/tests/ScadaLink.ConfigurationDatabase.Tests/Maintenance/AuditLogPartitionMaintenanceTests.cs @@ -0,0 +1,182 @@ +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Logging.Abstractions; +using ScadaLink.ConfigurationDatabase.Maintenance; +using ScadaLink.ConfigurationDatabase.Tests.Migrations; +using Xunit; + +namespace ScadaLink.ConfigurationDatabase.Tests.Maintenance; + +/// +/// Bundle D (#23 M6-T5) integration tests for +/// . Uses the same +/// as the AuditLog migration / repository +/// tests so the ALTER PARTITION FUNCTION DDL runs against the actual seeded +/// pf_AuditLog_Month. +/// +/// +/// The migration seeds boundaries for every month in 2026 and 2027 (Jan 2026 +/// through Dec 2027). Tests pick a lookahead relative to the current +/// max-boundary at test start (rather than a fixed-target date) so each test +/// is robust against earlier tests in the class having added boundaries to +/// the shared fixture DB. Tests run sequentially within the class via xunit's +/// per-class collection serialisation. +/// +public class AuditLogPartitionMaintenanceTests : IClassFixture +{ + private readonly MsSqlMigrationFixture _fixture; + + public AuditLogPartitionMaintenanceTests(MsSqlMigrationFixture fixture) + { + _fixture = fixture; + } + + private ScadaLinkDbContext CreateContext() => + new(new DbContextOptionsBuilder() + .UseSqlServer(_fixture.ConnectionString).Options); + + private AuditLogPartitionMaintenance NewMaintenance(ScadaLinkDbContext ctx) => + new(ctx, NullLogger.Instance); + + /// + /// Computes the lookahead-in-months required to fall strictly inside the + /// already-covered boundary range. Picks something well below the + /// distance from "now" to the current max — guaranteed not to need any + /// new SPLIT. + /// + private static int LookaheadInsideExistingRange(DateTime max) + { + var now = DateTime.UtcNow; + // (max - now) in whole months, minus a 1-month safety margin so we + // never accidentally hit the boundary horizon edge case. + var months = ((max.Year - now.Year) * 12) + max.Month - now.Month - 1; + return Math.Max(1, months); + } + + /// + /// Computes the lookahead-in-months required to add exactly + /// new boundaries past the current max. + /// + /// + /// EnsureLookaheadAsync defines horizon = + /// NormalizeToFirstOfMonth(UtcNow) + lookaheadMonths. The new + /// boundaries it issues are first-of-month values strictly greater than + /// max, up to and including horizon. So + /// lookaheadMonths = monthsBetween(NormalizeToFirstOfMonth(UtcNow), max) + extraBoundaries + /// is the exact value that lands horizon on max + extraBoundaries + /// months. + /// + private static int LookaheadForExtraBoundaries(DateTime max, int extraBoundaries) + { + var nowFirstOfMonth = FirstOfNextMonth(DateTime.UtcNow); + var monthsToMax = ((max.Year - nowFirstOfMonth.Year) * 12) + max.Month - nowFirstOfMonth.Month; + return monthsToMax + extraBoundaries; + } + + private static DateTime FirstOfNextMonth(DateTime instant) + { + var firstOfThisMonth = new DateTime(instant.Year, instant.Month, 1, 0, 0, 0, DateTimeKind.Utc); + return firstOfThisMonth.AddMonths(1); + } + + [SkippableFact] + public async Task EnsureLookahead_AlreadyHasFutureRange_NoSplit_ReturnsEmpty() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + await using var ctx = CreateContext(); + var maintenance = NewMaintenance(ctx); + + var max = await maintenance.GetMaxBoundaryAsync(); + Assert.NotNull(max); + + // Pick a lookahead small enough that horizon (NormalizeToFirstOfMonth(now) + // + lookahead) lands well INSIDE the already-covered range — no SPLIT + // should fire. + var lookahead = LookaheadInsideExistingRange(max.Value); + + var added = await maintenance.EnsureLookaheadAsync(lookahead); + + Assert.Empty(added); + + // Sanity: the max boundary is unchanged after the no-op call. + var maxAfter = await maintenance.GetMaxBoundaryAsync(); + Assert.Equal(max, maxAfter); + } + + [SkippableFact] + public async Task EnsureLookahead_NeedsOneMoreBoundary_Splits_Returns1Boundary() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + await using var ctx = CreateContext(); + var maintenance = NewMaintenance(ctx); + + var maxBefore = await maintenance.GetMaxBoundaryAsync(); + Assert.NotNull(maxBefore); + + var lookahead = LookaheadForExtraBoundaries(maxBefore.Value, extraBoundaries: 1); + var expectedAdded = maxBefore.Value.AddMonths(1); + + var added = await maintenance.EnsureLookaheadAsync(lookahead); + + Assert.Single(added); + Assert.Equal(expectedAdded, added[0]); + + var maxAfter = await maintenance.GetMaxBoundaryAsync(); + Assert.Equal(expectedAdded, maxAfter); + } + + [SkippableFact] + public async Task EnsureLookahead_NeedsThreeBoundaries_Splits_Returns3Boundaries() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + await using var ctx = CreateContext(); + var maintenance = NewMaintenance(ctx); + + var maxBefore = await maintenance.GetMaxBoundaryAsync(); + Assert.NotNull(maxBefore); + + var lookahead = LookaheadForExtraBoundaries(maxBefore.Value, extraBoundaries: 3); + + var added = await maintenance.EnsureLookaheadAsync(lookahead); + + Assert.Equal(3, added.Count); + Assert.Equal(maxBefore.Value.AddMonths(1), added[0]); + Assert.Equal(maxBefore.Value.AddMonths(2), added[1]); + Assert.Equal(maxBefore.Value.AddMonths(3), added[2]); + + var maxAfter = await maintenance.GetMaxBoundaryAsync(); + Assert.Equal(maxBefore.Value.AddMonths(3), maxAfter); + } + + [SkippableFact] + public async Task EnsureLookahead_BoundaryAlreadyExists_NoError_Idempotent() + { + Skip.IfNot(_fixture.Available, _fixture.SkipReason); + + await using var ctx1 = CreateContext(); + var m1 = NewMaintenance(ctx1); + + var maxStart = await m1.GetMaxBoundaryAsync(); + Assert.NotNull(maxStart); + + // First call: add one boundary. + var lookahead = LookaheadForExtraBoundaries(maxStart.Value, extraBoundaries: 1); + var firstAdded = await m1.EnsureLookaheadAsync(lookahead); + Assert.Single(firstAdded); + + // Second call: the boundary just added is now part of pf_AuditLog_Month, + // so the same lookahead value should be a no-op — no exception, no + // duplicate SPLIT. + await using var ctx2 = CreateContext(); + var m2 = NewMaintenance(ctx2); + var secondAdded = await m2.EnsureLookaheadAsync(lookahead); + + Assert.Empty(secondAdded); + + // The max boundary is unchanged across the second call. + var maxAfter = await m2.GetMaxBoundaryAsync(); + Assert.Equal(firstAdded[0], maxAfter); + } +}