fix(audit): robust central options binding + interval clamps + doc/contract fixes (review)

This commit is contained in:
Joseph Doherty
2026-06-15 10:11:49 -04:00
parent 36a08a4145
commit c092e89fd1
7 changed files with 153 additions and 40 deletions
@@ -17,8 +17,10 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// </para>
/// <para>
/// <see cref="IntervalOverride"/> exists for tests to drop the cadence to
/// milliseconds without polluting the production config surface; production
/// binds <see cref="IntervalHours"/> only.
/// milliseconds; production config is expected to set <see cref="IntervalHours"/>
/// only. Because this options class is <c>Bind</c>-ed wholesale, a config value
/// at <c>AuditLog:Purge:IntervalOverride</c> would bind if present (and would
/// bypass the <see cref="Interval"/> minimum clamp) — operators must not set it.
/// </para>
/// </remarks>
public sealed class AuditLogPurgeOptions
@@ -29,15 +31,44 @@ public sealed class AuditLogPurgeOptions
/// <summary>
/// Test-only override for finer control over the tick cadence than
/// whole-hour resolution allows. When non-null, takes precedence over
/// <see cref="IntervalHours"/>. Not bound from config — production
/// config exposes <see cref="IntervalHours"/> only.
/// <see cref="IntervalHours"/> AND bypasses the <see cref="Interval"/>
/// minimum clamp (so tests can use millisecond cadences). Production
/// config exposes <see cref="IntervalHours"/> only and never sets this
/// knob — but because the options class is <c>Bind</c>-ed wholesale, a
/// config value at <c>AuditLog:Purge:IntervalOverride</c> WOULD bind if
/// present; operators must not set it.
/// </summary>
public TimeSpan? IntervalOverride { get; set; }
/// <summary>
/// Resolves the effective tick interval, honouring the test override
/// when set. Falls back to <see cref="IntervalHours"/>.
/// Minimum interval the config-bound <see cref="IntervalHours"/> can
/// resolve to. Clamps a misconfigured <c>IntervalHours: 0</c> (or a
/// negative value) away from <see cref="TimeSpan.Zero"/> — a zero
/// interval would make Akka's <c>ScheduleTellRepeatedlyCancelable</c>
/// spin, looping the partition drop/rebuild dance into a sustained SQL
/// outage. The test-only <see cref="IntervalOverride"/> bypasses this
/// clamp so unit tests can still drop the cadence to milliseconds.
/// </summary>
public TimeSpan Interval =>
IntervalOverride ?? TimeSpan.FromHours(IntervalHours);
private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromMinutes(1);
/// <summary>
/// Resolves the effective tick interval, honouring the test override
/// when set. Falls back to <see cref="IntervalHours"/>, clamped to at
/// least <see cref="MinConfiguredInterval"/> so a zero/negative config
/// value can never yield <see cref="TimeSpan.Zero"/> (which would spin
/// the scheduler).
/// </summary>
public TimeSpan Interval
{
get
{
if (IntervalOverride is { } overrideValue)
{
return overrideValue;
}
var resolved = TimeSpan.FromHours(IntervalHours);
return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved;
}
}
}
@@ -9,11 +9,12 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
/// <remarks>
/// The production implementation wraps <c>ISiteRepository.GetAllSitesAsync</c>
/// and projects each <c>Site</c> to a <see cref="SiteEntry"/> using the
/// site's configured <c>GrpcNodeAAddress</c> (falling back to
/// <c>GrpcNodeBAddress</c> when NodeA is unset). Sites with NO gRPC address
/// configured are silently skipped — the reconciliation pull cannot reach
/// them, but absence of an address is a configuration decision, not a runtime
/// error.
/// site's configured <c>GrpcNodeAAddress</c>. This is a NodeA-only first cut:
/// sites with a blank <c>GrpcNodeAAddress</c> are silently SKIPPED — the
/// reconciliation pull cannot reach them, but absence of an address is a
/// configuration decision, not a runtime error. NodeB-fallback endpoint
/// selection (dial NodeB when NodeA is unset/unreachable) is a follow-up
/// (mirrors the comment in <c>SiteEnumerator.cs</c>).
/// </remarks>
public interface ISiteEnumerator
{
@@ -182,6 +182,10 @@ public class SiteAuditReconciliationActor : ReceiveActor
IReadOnlyList<SiteEntry> sites;
try
{
// No ambient CancellationToken in a ReceiveActor message handler —
// CancellationToken.None (the EnumerateAsync default) is intentional.
// The work is bounded by the 5-min reconciliation tick plus the
// 10s graceful-stop drain on PhaseClusterLeave.
sites = await _sites.EnumerateAsync().ConfigureAwait(false);
}
catch (Exception ex)
@@ -31,18 +31,45 @@ public sealed class SiteAuditReconciliationOptions
/// <summary>
/// Test-only override for finer control over the tick cadence than
/// whole-second resolution allows. When non-null, takes precedence over
/// <see cref="ReconciliationIntervalSeconds"/>. Not bound from config —
/// production config exposes <see cref="ReconciliationIntervalSeconds"/>
/// only.
/// <see cref="ReconciliationIntervalSeconds"/> AND bypasses the
/// <see cref="ReconciliationInterval"/> minimum clamp (so tests can use
/// millisecond cadences). Production config exposes
/// <see cref="ReconciliationIntervalSeconds"/> only and never sets this
/// knob — but because the options class is <c>Bind</c>-ed wholesale, a
/// config value at <c>AuditLog:Reconciliation:ReconciliationIntervalOverride</c>
/// WOULD bind if present; operators must not set it.
/// </summary>
public TimeSpan? ReconciliationIntervalOverride { get; set; }
/// <summary>
/// Resolves the effective tick interval, honouring the test override when
/// set. Falls back to <see cref="ReconciliationIntervalSeconds"/>.
/// Minimum interval the config-bound <see cref="ReconciliationIntervalSeconds"/>
/// can resolve to. Clamps a misconfigured <c>ReconciliationIntervalSeconds: 0</c>
/// (or a negative value) away from <see cref="TimeSpan.Zero"/>, which would make
/// Akka's <c>ScheduleTellRepeatedlyCancelable</c> spin. The test-only
/// <see cref="ReconciliationIntervalOverride"/> bypasses this clamp so unit tests
/// can still drop the cadence to milliseconds.
/// </summary>
public TimeSpan ReconciliationInterval =>
ReconciliationIntervalOverride ?? TimeSpan.FromSeconds(ReconciliationIntervalSeconds);
private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromSeconds(1);
/// <summary>
/// Resolves the effective tick interval, honouring the test override when
/// set. Falls back to <see cref="ReconciliationIntervalSeconds"/>, clamped to at
/// least <see cref="MinConfiguredInterval"/> so a zero/negative config value can
/// never yield <see cref="TimeSpan.Zero"/> (which would spin the scheduler).
/// </summary>
public TimeSpan ReconciliationInterval
{
get
{
if (ReconciliationIntervalOverride is { } overrideValue)
{
return overrideValue;
}
var resolved = TimeSpan.FromSeconds(ReconciliationIntervalSeconds);
return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved;
}
}
/// <summary>
/// Maximum number of <see cref="ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit.AuditEvent"/>
@@ -67,7 +67,9 @@ public sealed class SiteEnumerator : ISiteEnumerator
continue;
}
entries.Add(new SiteEntry(site.SiteIdentifier, site.GrpcNodeAAddress));
// The IsNullOrWhiteSpace guard above proves GrpcNodeAAddress is
// non-null here; explicit null-forgiving for clarity.
entries.Add(new SiteEntry(site.SiteIdentifier, site.GrpcNodeAAddress!));
}
return entries;