feat(audit): M5.5 per-channel retention overrides via purge-role bounded delete (T3)
This commit is contained in:
@@ -167,6 +167,9 @@ public class AuditLogPurgeActor : ReceiveActor
|
||||
|
||||
if (boundaries.Count == 0)
|
||||
{
|
||||
// No whole-month partitions are eligible, but per-channel overrides may
|
||||
// still expire rows earlier than the global window — run them below.
|
||||
await RunPerChannelOverridesAsync(repository).ConfigureAwait(false);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -202,6 +205,80 @@ public class AuditLogPurgeActor : ReceiveActor
|
||||
sw.ElapsedMilliseconds);
|
||||
}
|
||||
}
|
||||
|
||||
// M5.5 (T3): after the channel-blind global partition switch-out, apply any
|
||||
// per-channel retention overrides that are SHORTER than the global window via
|
||||
// a bounded, batched row DELETE on the same maintenance path. The global
|
||||
// switch-out has already dropped whole months older than RetentionDays; these
|
||||
// deletes only ever expire rows EARLIER than that, so they run last and are a
|
||||
// strict tightening.
|
||||
await RunPerChannelOverridesAsync(repository).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// M5.5 (T3): runs each per-channel retention override whose window is strictly
|
||||
/// shorter than the global <see cref="AuditLogOptions.RetentionDays"/>, deleting
|
||||
/// rows of that channel older than the channel-specific threshold via a bounded,
|
||||
/// batched maintenance-path DELETE. Each channel runs inside its own try/catch so
|
||||
/// one bad channel does not abandon the others on the same tick, mirroring the
|
||||
/// per-boundary error isolation of the partition switch-out loop.
|
||||
/// </summary>
|
||||
/// <param name="repository">The repository resolved for this tick's DI scope.</param>
|
||||
private async Task RunPerChannelOverridesAsync(IAuditLogRepository repository)
|
||||
{
|
||||
var overrides = _auditOptions.PerChannelRetentionDays;
|
||||
if (overrides is null || overrides.Count == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var globalDays = _auditOptions.RetentionDays;
|
||||
|
||||
foreach (var (channel, days) in overrides)
|
||||
{
|
||||
// Only act when the per-channel window is strictly shorter than the global
|
||||
// one. Equal/longer windows are already covered by the global partition
|
||||
// switch-out, so a row DELETE would be redundant work (and a longer window
|
||||
// is meaningless — the partition is dropped on the global schedule).
|
||||
if (days >= globalDays)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var channelThreshold = DateTime.UtcNow - TimeSpan.FromDays(days);
|
||||
var sw = Stopwatch.StartNew();
|
||||
try
|
||||
{
|
||||
var rowsDeleted = await repository
|
||||
.PurgeChannelOlderThanAsync(channel, channelThreshold, _purgeOptions.ChannelPurgeBatchSize)
|
||||
.ConfigureAwait(false);
|
||||
sw.Stop();
|
||||
|
||||
if (rowsDeleted > 0)
|
||||
{
|
||||
_logger.LogInformation(
|
||||
"Purged {RowsDeleted} AuditLog rows for channel {Channel} older than {Threshold:o} " +
|
||||
"(per-channel override {Days}d < global {GlobalDays}d) in {DurationMs} ms.",
|
||||
rowsDeleted,
|
||||
channel,
|
||||
channelThreshold,
|
||||
days,
|
||||
globalDays,
|
||||
sw.ElapsedMilliseconds);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
sw.Stop();
|
||||
_logger.LogError(
|
||||
ex,
|
||||
"Failed to apply per-channel retention override for channel {Channel} " +
|
||||
"({Days}d); other channels continue. Elapsed {DurationMs} ms.",
|
||||
channel,
|
||||
days,
|
||||
sw.ElapsedMilliseconds);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Self-tick triggering a purge pass across all eligible partitions.</summary>
|
||||
|
||||
@@ -28,6 +28,24 @@ public sealed class AuditLogPurgeOptions
|
||||
/// <summary>Period of the purge tick in hours (default 24).</summary>
|
||||
public int IntervalHours { get; set; } = 24;
|
||||
|
||||
/// <summary>
|
||||
/// M5.5 (T3): batch size for the per-channel retention-override row DELETE
|
||||
/// (<see cref="ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Repositories.IAuditLogRepository.PurgeChannelOlderThanAsync"/>).
|
||||
/// Each <c>DELETE TOP (@batch)</c> caps the transaction-log and lock footprint
|
||||
/// per statement; the repository loops batches until no rows remain. Default
|
||||
/// 5000 keeps individual deletes short on a busy central DB while still draining
|
||||
/// a large backlog within a tick. Clamped to a sane minimum in
|
||||
/// <see cref="ChannelPurgeBatchSize"/>.
|
||||
/// </summary>
|
||||
public int ChannelPurgeBatchSizeConfigured { get; set; } = 5000;
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the effective per-channel purge batch size, clamped to at least 1 so
|
||||
/// a misconfigured <c>0</c>/negative value cannot make the repository's DELETE
|
||||
/// loop spin or throw.
|
||||
/// </summary>
|
||||
public int ChannelPurgeBatchSize => ChannelPurgeBatchSizeConfigured < 1 ? 1 : ChannelPurgeBatchSizeConfigured;
|
||||
|
||||
/// <summary>
|
||||
/// Test-only override for finer control over the tick cadence than
|
||||
/// whole-hour resolution allows. When non-null, takes precedence over
|
||||
|
||||
@@ -37,6 +37,33 @@ public sealed class AuditLogOptions
|
||||
/// <summary>Central retention window in days (default 365, range [30, 3650]).</summary>
|
||||
public int RetentionDays { get; set; } = 365;
|
||||
|
||||
/// <summary>
|
||||
/// M5.5 (T3) per-channel retention overrides, keyed by the canonical channel name
|
||||
/// (the <see cref="AuditChannel"/> enum name — e.g. <c>ApiOutbound</c>,
|
||||
/// <c>DbOutbound</c>, <c>Notification</c>, <c>ApiInbound</c>). The value is a
|
||||
/// retention window in days that MUST be SHORTER than or equal to the global
|
||||
/// <see cref="RetentionDays"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The global <see cref="RetentionDays"/> window is enforced by month-partition
|
||||
/// switch-out, which is channel-blind: it can only drop a whole month once every
|
||||
/// row in it is older than the global window. A per-channel override therefore
|
||||
/// can only ever expire rows EARLIER than the global purge would — never later
|
||||
/// (a longer per-channel window is meaningless because the partition switch-out
|
||||
/// would already have dropped the month). Overrides shorter than the global window
|
||||
/// are honoured by the purge actor as a bounded, batched row DELETE on the
|
||||
/// maintenance path (see <c>AuditLogPurgeActor</c>); the append-only writer/ingest
|
||||
/// role is unaffected.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Each value is validated to be in <c>[30, RetentionDays]</c> by
|
||||
/// <c>AuditLogOptionsValidator</c>; keys that are not recognized
|
||||
/// <see cref="AuditChannel"/> names are rejected.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public Dictionary<string, int> PerChannelRetentionDays { get; set; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Per-body byte ceiling applied to <see cref="AuditEvent.RequestSummary"/> and
|
||||
/// <see cref="AuditEvent.ResponseSummary"/> for <see cref="AuditChannel.ApiInbound"/> rows
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using ZB.MOM.WW.Configuration;
|
||||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.ScadaBridge.AuditLog.Configuration;
|
||||
|
||||
@@ -52,5 +53,27 @@ public sealed class AuditLogOptionsValidator : OptionsValidatorBase<AuditLogOpti
|
||||
!(options.InboundMaxBytes < MinInboundMaxBytes || options.InboundMaxBytes > MaxInboundMaxBytes),
|
||||
$"AuditLog:{nameof(AuditLogOptions.InboundMaxBytes)} ({options.InboundMaxBytes}) " +
|
||||
$"must be in [{MinInboundMaxBytes}, {MaxInboundMaxBytes}] bytes.");
|
||||
|
||||
// M5.5 (T3): per-channel retention overrides. Each entry must be keyed by a
|
||||
// recognized AuditChannel name and carry a window in [MinRetentionDays,
|
||||
// RetentionDays] — i.e. SHORTER than or equal to the global window. A longer
|
||||
// per-channel window is meaningless under month-partition switch-out (governed
|
||||
// by the global window), so it is rejected rather than silently ignored.
|
||||
foreach (var (channelKey, days) in options.PerChannelRetentionDays)
|
||||
{
|
||||
builder.RequireThat(
|
||||
Enum.TryParse<AuditChannel>(channelKey, ignoreCase: false, out _),
|
||||
$"AuditLog:{nameof(AuditLogOptions.PerChannelRetentionDays)} key '{channelKey}' " +
|
||||
$"is not a recognized channel name. Valid keys: {string.Join(", ", Enum.GetNames<AuditChannel>())}.");
|
||||
|
||||
// Valid when days is within [MinRetentionDays, RetentionDays] inclusive.
|
||||
// The lower bound matches the global RetentionDays floor; the upper bound
|
||||
// is the configured global window (longer is meaningless — see remarks).
|
||||
builder.RequireThat(
|
||||
!(days < MinRetentionDays || days > options.RetentionDays),
|
||||
$"AuditLog:{nameof(AuditLogOptions.PerChannelRetentionDays)}['{channelKey}'] ({days}) " +
|
||||
$"must be in [{MinRetentionDays}, {nameof(AuditLogOptions.RetentionDays)}={options.RetentionDays}] days " +
|
||||
"— a per-channel window must be shorter than or equal to the global retention window.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,6 +87,42 @@ public interface IAuditLogRepository
|
||||
/// <returns>A task that resolves to the approximate number of rows discarded by the partition switch.</returns>
|
||||
Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// M5.5 (T3) per-channel retention override purge. Deletes <c>AuditLog</c> rows for a
|
||||
/// single <paramref name="channel"/> (matched against the canonical
|
||||
/// <c>Category</c> column — the bare channel name, e.g. <c>ApiOutbound</c>) whose
|
||||
/// <c>OccurredAtUtc</c> is strictly older than <paramref name="threshold"/>, in
|
||||
/// bounded batches of <paramref name="batchSize"/> rows, looping until no further
|
||||
/// rows match. Returns the total number of rows deleted across all batches.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Maintenance path — NOT the writer role.</b> The append-only invariant binds
|
||||
/// the <c>scadabridge_audit_writer</c> ingest role (INSERT + SELECT only). This row
|
||||
/// DELETE runs on the purge/maintenance connection, the same path that performs the
|
||||
/// global partition switch-out (also a destructive operation forbidden to the writer
|
||||
/// role). Per-channel overrides can only ever expire rows EARLIER than the global
|
||||
/// month-partition switch-out would — never later — so this is a strict tightening
|
||||
/// of the retention window, applied AFTER the global purge on the same tick.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Bounded + idempotent.</b> Each batch is a <c>DELETE TOP (@batch)</c> so the
|
||||
/// transaction log and lock footprint stay bounded regardless of backlog. Re-running
|
||||
/// the purge is a no-op once every eligible row is gone (the loop exits when a batch
|
||||
/// deletes zero rows), so a crash mid-loop is recoverable by simply running again.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
/// <param name="channel">Canonical channel name (the <c>Category</c> column value, e.g. <c>ApiOutbound</c>).</param>
|
||||
/// <param name="threshold">Rows with <c>OccurredAtUtc</c> strictly older than this UTC datetime are deleted.</param>
|
||||
/// <param name="batchSize">Maximum rows deleted per batch; must be > 0.</param>
|
||||
/// <param name="ct">Cancellation token.</param>
|
||||
/// <returns>A task that resolves to the total number of rows deleted across all batches.</returns>
|
||||
Task<long> PurgeChannelOlderThanAsync(
|
||||
string channel,
|
||||
DateTime threshold,
|
||||
int batchSize,
|
||||
CancellationToken ct = default);
|
||||
|
||||
/// <summary>
|
||||
/// Returns the set of <c>pf_AuditLog_Month</c> partition lower-bound
|
||||
/// boundaries whose partitions contain only rows with
|
||||
|
||||
@@ -370,6 +370,99 @@ VALUES
|
||||
return rowsDeleted;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<long> PurgeChannelOlderThanAsync(
|
||||
string channel,
|
||||
DateTime threshold,
|
||||
int batchSize,
|
||||
CancellationToken ct = default)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(channel))
|
||||
{
|
||||
throw new ArgumentException("Channel must be a non-empty channel name.", nameof(channel));
|
||||
}
|
||||
|
||||
if (batchSize <= 0)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(batchSize), batchSize, "Batch size must be > 0.");
|
||||
}
|
||||
|
||||
var thresholdUtc = DateTime.SpecifyKind(threshold.ToUniversalTime(), DateTimeKind.Utc);
|
||||
|
||||
// M5.5 (T3) per-channel retention override purge. This is the ONLY DELETE
|
||||
// against dbo.AuditLog in the codebase and it runs on the purge/maintenance
|
||||
// path, NOT the append-only writer role (which has INSERT + SELECT only — see
|
||||
// the DENY UPDATE/DENY DELETE grants in CollapseAuditLogToCanonical). The
|
||||
// AuditLog append-only CI guard (AuditLogAppendOnlyGuardTests) is intentionally
|
||||
// widened to allow ONLY the single marked DELETE below; any other UPDATE/DELETE
|
||||
// targeting AuditLog still trips the guard.
|
||||
//
|
||||
// Bounded + idempotent: DELETE TOP (@batch) caps the log/lock footprint per
|
||||
// statement; the loop repeats until a batch deletes zero rows, so re-running
|
||||
// after a crash mid-loop simply resumes. Category is the canonical
|
||||
// channel-name column (e.g. 'ApiOutbound'); Action holds "{channel}.{kind}" so
|
||||
// it is NOT the right column to match a bare channel name against.
|
||||
//
|
||||
// The trailing AUDIT-PURGE-ALLOWED marker on the DELETE line below is the
|
||||
// single narrow exemption the append-only CI guard (AuditLogAppendOnlyGuardTests)
|
||||
// recognizes; any other UPDATE/DELETE targeting AuditLog still trips the guard.
|
||||
const string deleteBatchSql =
|
||||
"DELETE TOP (@batch) FROM dbo.AuditLog WHERE Category = @channel AND OccurredAtUtc < @threshold;"; // AUDIT-PURGE-ALLOWED: per-channel retention override (M5.5 T3), maintenance path
|
||||
|
||||
long totalDeleted = 0;
|
||||
|
||||
var conn = _context.Database.GetDbConnection();
|
||||
var openedHere = false;
|
||||
if (conn.State != System.Data.ConnectionState.Open)
|
||||
{
|
||||
await conn.OpenAsync(ct).ConfigureAwait(false);
|
||||
openedHere = true;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
|
||||
await using var cmd = conn.CreateCommand();
|
||||
cmd.CommandText = deleteBatchSql;
|
||||
|
||||
var pBatch = cmd.CreateParameter();
|
||||
pBatch.ParameterName = "@batch";
|
||||
pBatch.Value = batchSize;
|
||||
cmd.Parameters.Add(pBatch);
|
||||
|
||||
var pChannel = cmd.CreateParameter();
|
||||
pChannel.ParameterName = "@channel";
|
||||
pChannel.Value = channel;
|
||||
cmd.Parameters.Add(pChannel);
|
||||
|
||||
var pThreshold = cmd.CreateParameter();
|
||||
pThreshold.ParameterName = "@threshold";
|
||||
pThreshold.Value = thresholdUtc;
|
||||
cmd.Parameters.Add(pThreshold);
|
||||
|
||||
var rows = await cmd.ExecuteNonQueryAsync(ct).ConfigureAwait(false);
|
||||
if (rows <= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
totalDeleted += rows;
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (openedHere)
|
||||
{
|
||||
await conn.CloseAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
return totalDeleted;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
DateTime threshold,
|
||||
|
||||
@@ -216,6 +216,10 @@ public class AuditLogIngestActorTests : TestKit, IClassFixture<MsSqlMigrationFix
|
||||
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
|
||||
_inner.SwitchOutPartitionAsync(monthBoundary, ct);
|
||||
|
||||
public Task<long> PurgeChannelOlderThanAsync(
|
||||
string channel, DateTime threshold, int batchSize, CancellationToken ct = default) =>
|
||||
_inner.PurgeChannelOlderThanAsync(channel, threshold, batchSize, ct);
|
||||
|
||||
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
DateTime threshold, CancellationToken ct = default) =>
|
||||
_inner.GetPartitionBoundariesOlderThanAsync(threshold, ct);
|
||||
|
||||
@@ -51,6 +51,12 @@ public class AuditLogPurgeActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
|
||||
public DateTime? ThrowOnBoundary { get; set; }
|
||||
public Exception? BoundaryException { get; set; }
|
||||
|
||||
// M5.5 (T3): records every per-channel purge call as
|
||||
// (channel, threshold, batchSize) so tests can assert which channels the
|
||||
// actor chose to purge and with what window.
|
||||
public List<(string Channel, DateTime Threshold, int BatchSize)> ChannelPurges { get; } = new();
|
||||
public Func<string, long> RowsPerChannel { get; set; } = _ => 0L;
|
||||
|
||||
// The actor enumerator returns whichever list is configured here.
|
||||
// Mutating this between ticks lets tests simulate "no longer
|
||||
// eligible" boundaries on the second tick.
|
||||
@@ -80,6 +86,13 @@ public class AuditLogPurgeActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
|
||||
return Task.FromResult<IReadOnlyList<DateTime>>(Boundaries.ToArray());
|
||||
}
|
||||
|
||||
public Task<long> PurgeChannelOlderThanAsync(
|
||||
string channel, DateTime threshold, int batchSize, CancellationToken ct = default)
|
||||
{
|
||||
ChannelPurges.Add((channel, threshold, batchSize));
|
||||
return Task.FromResult(RowsPerChannel(channel));
|
||||
}
|
||||
|
||||
public Task<ZB.MOM.WW.ScadaBridge.Commons.Types.AuditLogKpiSnapshot> GetKpiSnapshotAsync(
|
||||
TimeSpan window, DateTime? nowUtc = null, CancellationToken ct = default) =>
|
||||
Task.FromResult(new ZB.MOM.WW.ScadaBridge.Commons.Types.AuditLogKpiSnapshot(0L, 0L, 0L, nowUtc ?? DateTime.UtcNow));
|
||||
@@ -381,4 +394,90 @@ public class AuditLogPurgeActorTests : TestKit, IClassFixture<MsSqlMigrationFixt
|
||||
Math.Abs((threshold - expected).TotalMinutes) < 1.0,
|
||||
$"threshold {threshold:o} should be within 1 minute of {expected:o}");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 8. PerChannelOverride_ShorterThanGlobal_TriggersChannelPurge (M5.5 T3)
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[Fact]
|
||||
public void PerChannelOverride_ShorterThanGlobal_TriggersChannelPurge()
|
||||
{
|
||||
// ApiOutbound has a 30-day override under a 365-day global window — strictly
|
||||
// shorter, so the actor must run a per-channel purge with a threshold of
|
||||
// ~today-30d and the configured batch size.
|
||||
var repo = new RecordingRepo { Boundaries = new List<DateTime>() };
|
||||
var purgeOptions = FastTickOptions();
|
||||
purgeOptions.ChannelPurgeBatchSizeConfigured = 1234;
|
||||
|
||||
// Build the options OUTSIDE the Props expression tree — a collection/dictionary
|
||||
// initializer is not legal inside an expression-tree lambda (CS8074).
|
||||
var auditOptions = Options.Create(new AuditLogOptions
|
||||
{
|
||||
RetentionDays = 365,
|
||||
PerChannelRetentionDays = new Dictionary<string, int> { ["ApiOutbound"] = 30 },
|
||||
});
|
||||
var purgeOptionsWrapped = Options.Create(purgeOptions);
|
||||
|
||||
var sp = BuildScopedProvider(repo);
|
||||
Sys.ActorOf(Props.Create(() => new AuditLogPurgeActor(
|
||||
sp,
|
||||
purgeOptionsWrapped,
|
||||
auditOptions,
|
||||
NullLogger<AuditLogPurgeActor>.Instance)));
|
||||
|
||||
AwaitAssert(
|
||||
() => Assert.Contains(repo.ChannelPurges, p => p.Channel == "ApiOutbound"),
|
||||
duration: TimeSpan.FromSeconds(3),
|
||||
interval: TimeSpan.FromMilliseconds(50));
|
||||
|
||||
var purge = repo.ChannelPurges.First(p => p.Channel == "ApiOutbound");
|
||||
Assert.Equal(1234, purge.BatchSize);
|
||||
|
||||
var expected = DateTime.UtcNow - TimeSpan.FromDays(30);
|
||||
Assert.True(
|
||||
Math.Abs((purge.Threshold - expected).TotalMinutes) < 1.0,
|
||||
$"channel threshold {purge.Threshold:o} should be within 1 minute of {expected:o}");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 9. PerChannelOverride_EqualOrLongerThanGlobal_SkipsChannelPurge (M5.5 T3)
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[Fact]
|
||||
public void PerChannelOverride_EqualOrLongerThanGlobal_SkipsChannelPurge()
|
||||
{
|
||||
// DbOutbound = 365 (== global) and Notification = 400 (> global, validator would
|
||||
// normally reject this but the actor must defensively skip it too). Neither is
|
||||
// SHORTER than the global window, so the actor must NOT issue a channel purge —
|
||||
// the global partition switch-out already governs those rows.
|
||||
var repo = new RecordingRepo { Boundaries = new List<DateTime>() };
|
||||
|
||||
// Build the options OUTSIDE the Props expression tree (CS8074).
|
||||
var auditOptions = Options.Create(new AuditLogOptions
|
||||
{
|
||||
RetentionDays = 365,
|
||||
PerChannelRetentionDays = new Dictionary<string, int>
|
||||
{
|
||||
["DbOutbound"] = 365,
|
||||
["Notification"] = 400,
|
||||
},
|
||||
});
|
||||
var purgeOptions = Options.Create(FastTickOptions());
|
||||
|
||||
var sp = BuildScopedProvider(repo);
|
||||
Sys.ActorOf(Props.Create(() => new AuditLogPurgeActor(
|
||||
sp,
|
||||
purgeOptions,
|
||||
auditOptions,
|
||||
NullLogger<AuditLogPurgeActor>.Instance)));
|
||||
|
||||
// Wait for at least one tick (visible via the enumerator call), then assert no
|
||||
// channel purge was issued.
|
||||
AwaitAssert(
|
||||
() => Assert.True(repo.ThresholdQueries.Count >= 1),
|
||||
duration: TimeSpan.FromSeconds(3),
|
||||
interval: TimeSpan.FromMilliseconds(50));
|
||||
|
||||
Assert.Empty(repo.ChannelPurges);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,6 +44,9 @@ public class CentralAuditWriteFailuresTests : TestKit
|
||||
Task.FromResult<IReadOnlyList<AuditEvent>>(Array.Empty<AuditEvent>());
|
||||
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
|
||||
Task.FromResult(0L);
|
||||
public Task<long> PurgeChannelOlderThanAsync(
|
||||
string channel, DateTime threshold, int batchSize, CancellationToken ct = default) =>
|
||||
Task.FromResult(0L);
|
||||
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
DateTime threshold, CancellationToken ct = default) =>
|
||||
Task.FromResult<IReadOnlyList<DateTime>>(Array.Empty<DateTime>());
|
||||
|
||||
+4
@@ -89,6 +89,10 @@ public class SiteAuditReconciliationActorTests : TestKit, IClassFixture<MsSqlMig
|
||||
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default) =>
|
||||
Task.FromResult(0L);
|
||||
|
||||
public Task<long> PurgeChannelOlderThanAsync(
|
||||
string channel, DateTime threshold, int batchSize, CancellationToken ct = default) =>
|
||||
Task.FromResult(0L);
|
||||
|
||||
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
DateTime threshold, CancellationToken ct = default) =>
|
||||
Task.FromResult<IReadOnlyList<DateTime>>(Array.Empty<DateTime>());
|
||||
|
||||
+103
@@ -50,4 +50,107 @@ public class AuditLogOptionsValidatorTests
|
||||
result.Failures!,
|
||||
f => f.Contains(nameof(AuditLogOptions.InboundMaxBytes), StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// M5.5 (T3) per-channel retention overrides
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
[Fact]
|
||||
public void Validate_PerChannelRetention_ShorterThanGlobal_Passes()
|
||||
{
|
||||
// A per-channel window strictly shorter than the global window is the
|
||||
// sanctioned case — the purge actor expires those rows earlier via the
|
||||
// maintenance-path row DELETE.
|
||||
var validator = new AuditLogOptionsValidator();
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
RetentionDays = 365,
|
||||
PerChannelRetentionDays = new Dictionary<string, int>
|
||||
{
|
||||
["ApiOutbound"] = 90,
|
||||
["Notification"] = 30, // floor (MinRetentionDays)
|
||||
},
|
||||
};
|
||||
|
||||
Assert.True(validator.Validate(null, opts).Succeeded);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_PerChannelRetention_EqualToGlobal_Passes()
|
||||
{
|
||||
// Equal to global is allowed (the bound is [Min, RetentionDays] inclusive);
|
||||
// the purge actor simply treats it as a no-op since it is not SHORTER.
|
||||
var validator = new AuditLogOptionsValidator();
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
RetentionDays = 200,
|
||||
PerChannelRetentionDays = new Dictionary<string, int> { ["DbOutbound"] = 200 },
|
||||
};
|
||||
|
||||
Assert.True(validator.Validate(null, opts).Succeeded);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_PerChannelRetention_LongerThanGlobal_Fails()
|
||||
{
|
||||
// A per-channel window LONGER than the global window is meaningless under
|
||||
// month-partition switch-out (governed by the global window) and is rejected.
|
||||
var validator = new AuditLogOptionsValidator();
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
RetentionDays = 100,
|
||||
PerChannelRetentionDays = new Dictionary<string, int> { ["ApiInbound"] = 200 },
|
||||
};
|
||||
|
||||
var result = validator.Validate(null, opts);
|
||||
Assert.False(result.Succeeded);
|
||||
Assert.Contains(
|
||||
result.Failures!,
|
||||
f => f.Contains(nameof(AuditLogOptions.PerChannelRetentionDays), StringComparison.Ordinal)
|
||||
&& f.Contains("ApiInbound", StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_PerChannelRetention_BelowMinimum_Fails()
|
||||
{
|
||||
var validator = new AuditLogOptionsValidator();
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
RetentionDays = 365,
|
||||
PerChannelRetentionDays = new Dictionary<string, int> { ["ApiOutbound"] = 29 },
|
||||
};
|
||||
|
||||
var result = validator.Validate(null, opts);
|
||||
Assert.False(result.Succeeded);
|
||||
Assert.Contains(
|
||||
result.Failures!,
|
||||
f => f.Contains(nameof(AuditLogOptions.PerChannelRetentionDays), StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_PerChannelRetention_UnknownChannelKey_Fails()
|
||||
{
|
||||
// Keys must be recognized AuditChannel names; a typo / unknown key is rejected
|
||||
// rather than silently ignored so a misconfiguration surfaces at boot.
|
||||
var validator = new AuditLogOptionsValidator();
|
||||
var opts = new AuditLogOptions
|
||||
{
|
||||
RetentionDays = 365,
|
||||
PerChannelRetentionDays = new Dictionary<string, int> { ["NotAChannel"] = 90 },
|
||||
};
|
||||
|
||||
var result = validator.Validate(null, opts);
|
||||
Assert.False(result.Succeeded);
|
||||
Assert.Contains(
|
||||
result.Failures!,
|
||||
f => f.Contains("NotAChannel", StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Validate_PerChannelRetention_DefaultEmpty_Passes()
|
||||
{
|
||||
// The default (no overrides) must pass — this is the common case.
|
||||
var validator = new AuditLogOptionsValidator();
|
||||
Assert.True(validator.Validate(null, new AuditLogOptions()).Succeeded);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,19 +67,25 @@ public class PartitionPurgeTests : TestKit, IClassFixture<MsSqlMigrationFixture>
|
||||
SqlConnection conn,
|
||||
Guid eventId,
|
||||
DateTime occurredAtUtc,
|
||||
string siteId)
|
||||
string siteId,
|
||||
string channel = "ApiOutbound",
|
||||
string kind = "ApiCall")
|
||||
{
|
||||
await using var cmd = conn.CreateCommand();
|
||||
// C5 (Task 2.5): dbo.AuditLog is now the 10 canonical columns + DetailsJson;
|
||||
// the ScadaBridge domain fields (channel/kind/status/sourceSiteId) ride in
|
||||
// DetailsJson and the SourceSiteId/Kind/Status computed columns auto-derive.
|
||||
// Action = "{channel}.{kind}", Category = channel name, Outcome = Success.
|
||||
// The channel/kind are parameterized so the M5.5 per-channel purge test can
|
||||
// seed multiple channels into the same partition.
|
||||
cmd.CommandText = @"
|
||||
INSERT INTO dbo.AuditLog
|
||||
(EventId, OccurredAtUtc, Actor, Action, Outcome, Category, Target, SourceNode, CorrelationId, DetailsJson)
|
||||
VALUES
|
||||
(@EventId, @OccurredAtUtc, NULL, 'ApiOutbound.ApiCall', 'Success', 'ApiOutbound', NULL, NULL, NULL,
|
||||
(@EventId, @OccurredAtUtc, NULL, @Action, 'Success', @Category, NULL, NULL, NULL,
|
||||
@DetailsJson);";
|
||||
cmd.Parameters.Add("@Action", System.Data.SqlDbType.VarChar, 64).Value = $"{channel}.{kind}";
|
||||
cmd.Parameters.Add("@Category", System.Data.SqlDbType.VarChar, 32).Value = channel;
|
||||
cmd.Parameters.Add("@EventId", System.Data.SqlDbType.UniqueIdentifier).Value = eventId;
|
||||
// SqlDbType.DateTime2 with explicit Scale 7 matches the
|
||||
// OccurredAtUtc column shape (datetime2(7)) and avoids the implicit
|
||||
@@ -97,7 +103,7 @@ VALUES
|
||||
// the computed SourceSiteId column the verify queries scope on. payloadTruncated
|
||||
// is always present (the codec always writes the bool).
|
||||
var detailsJson =
|
||||
"{\"channel\":\"ApiOutbound\",\"kind\":\"ApiCall\",\"status\":\"Delivered\"," +
|
||||
"{\"channel\":\"" + channel + "\",\"kind\":\"" + kind + "\",\"status\":\"Delivered\"," +
|
||||
"\"sourceSiteId\":\"" + siteId + "\",\"payloadTruncated\":false}";
|
||||
cmd.Parameters.Add("@DetailsJson", System.Data.SqlDbType.NVarChar, -1).Value = detailsJson;
|
||||
await cmd.ExecuteNonQueryAsync();
|
||||
@@ -354,4 +360,87 @@ WHERE name = 'UX_AuditLog_EventId'
|
||||
Assert.Single(rows);
|
||||
Assert.Equal(freshEventId, rows[0].EventId);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// 4. PerChannelOverride_DeletesOnlyOverriddenChannelsOldRows (M5.5 T3)
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
/// <summary>
|
||||
/// M5.5 (T3): exercises <see cref="IAuditLogRepository.PurgeChannelOlderThanAsync"/>
|
||||
/// directly against the real repository + fixture DB. Seeds, in the SAME partition,
|
||||
/// old + recent rows for an OVERRIDDEN channel (<c>ApiOutbound</c>) and old + recent
|
||||
/// rows for an UN-overridden channel (<c>DbOutbound</c>), then runs the per-channel
|
||||
/// purge for <c>ApiOutbound</c> only. Asserts:
|
||||
/// <list type="number">
|
||||
/// <item>The overridden channel's OLD rows are deleted.</item>
|
||||
/// <item>The overridden channel's RECENT rows (newer than the channel threshold) survive.</item>
|
||||
/// <item>The un-overridden channel's rows (old AND recent) are completely untouched
|
||||
/// — they follow the global window, which the channel purge never applies to them.</item>
|
||||
/// </list>
|
||||
/// This is the maintenance-path row DELETE; the fixture connects as <c>sa</c>, which
|
||||
/// the append-only writer-role DENYs do not bind (the role granularity is exercised
|
||||
/// in the repository/migration tests).
|
||||
/// </summary>
|
||||
[SkippableFact]
|
||||
public async Task PerChannelOverride_DeletesOnlyOverriddenChannelsOldRows()
|
||||
{
|
||||
Skip.IfNot(_fixture.Available, _fixture.SkipReason);
|
||||
|
||||
var siteId = "perchannel-" + Guid.NewGuid().ToString("N").Substring(0, 8);
|
||||
|
||||
// Two timestamps: one OLD (older than the channel threshold we will purge with)
|
||||
// and one RECENT (newer than it). Both sit comfortably inside the retention
|
||||
// window so the global partition purge would NOT touch either — isolating the
|
||||
// per-channel DELETE as the only force acting here.
|
||||
var oldOccurred = new DateTime(2026, 1, 15, 0, 0, 0, DateTimeKind.Utc);
|
||||
var recentOccurred = new DateTime(2026, 5, 15, 0, 0, 0, DateTimeKind.Utc);
|
||||
|
||||
var apiOldId = Guid.NewGuid(); // ApiOutbound, old → SHOULD be deleted
|
||||
var apiRecentId = Guid.NewGuid(); // ApiOutbound, recent→ SHOULD survive
|
||||
var dbOldId = Guid.NewGuid(); // DbOutbound, old → SHOULD survive (un-overridden)
|
||||
var dbRecentId = Guid.NewGuid(); // DbOutbound, recent → SHOULD survive
|
||||
|
||||
await using (var seedConn = _fixture.OpenConnection())
|
||||
{
|
||||
await DirectInsertAsync(seedConn, apiOldId, oldOccurred, siteId, channel: "ApiOutbound", kind: "ApiCall");
|
||||
await DirectInsertAsync(seedConn, apiRecentId, recentOccurred, siteId, channel: "ApiOutbound", kind: "ApiCall");
|
||||
await DirectInsertAsync(seedConn, dbOldId, oldOccurred, siteId, channel: "DbOutbound", kind: "DbWrite");
|
||||
await DirectInsertAsync(seedConn, dbRecentId, recentOccurred, siteId, channel: "DbOutbound", kind: "DbWrite");
|
||||
}
|
||||
|
||||
// Purge ApiOutbound rows older than a threshold that sits strictly between the
|
||||
// old (Jan 15) and recent (May 15) seeds — e.g. Mar 1. Only apiOldId qualifies.
|
||||
var channelThreshold = new DateTime(2026, 3, 1, 0, 0, 0, DateTimeKind.Utc);
|
||||
|
||||
await using (var ctx = CreateContext())
|
||||
{
|
||||
var repo = new AuditLogRepository(ctx);
|
||||
var deleted = await repo.PurgeChannelOlderThanAsync(
|
||||
channel: "ApiOutbound",
|
||||
threshold: channelThreshold,
|
||||
batchSize: 2);
|
||||
|
||||
Assert.Equal(1L, deleted);
|
||||
|
||||
// Idempotent: a second run deletes nothing (the eligible row is gone).
|
||||
var deletedAgain = await repo.PurgeChannelOlderThanAsync(
|
||||
channel: "ApiOutbound",
|
||||
threshold: channelThreshold,
|
||||
batchSize: 2);
|
||||
Assert.Equal(0L, deletedAgain);
|
||||
}
|
||||
|
||||
await using var verify = CreateContext();
|
||||
var rows = await verify.Set<AuditLogRow>()
|
||||
.Where(e => e.SourceSiteId == siteId)
|
||||
.ToListAsync();
|
||||
|
||||
// Overridden channel: old gone, recent kept.
|
||||
Assert.DoesNotContain(rows, r => r.EventId == apiOldId);
|
||||
Assert.Contains(rows, r => r.EventId == apiRecentId);
|
||||
|
||||
// Un-overridden channel: BOTH rows untouched (follow the global window).
|
||||
Assert.Contains(rows, r => r.EventId == dbOldId);
|
||||
Assert.Contains(rows, r => r.EventId == dbRecentId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,6 +89,10 @@ public class SiteAuditPushFlowTests : TestKit
|
||||
public Task<long> SwitchOutPartitionAsync(DateTime monthBoundary, CancellationToken ct = default)
|
||||
=> throw new NotSupportedException();
|
||||
|
||||
public Task<long> PurgeChannelOlderThanAsync(
|
||||
string channel, DateTime threshold, int batchSize, CancellationToken ct = default)
|
||||
=> throw new NotSupportedException();
|
||||
|
||||
public Task<IReadOnlyList<DateTime>> GetPartitionBoundariesOlderThanAsync(
|
||||
DateTime threshold, CancellationToken ct = default)
|
||||
=> throw new NotSupportedException();
|
||||
|
||||
Reference in New Issue
Block a user