fix(audit): robust central options binding + interval clamps + doc/contract fixes (review)
This commit is contained in:
@@ -17,8 +17,10 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <see cref="IntervalOverride"/> exists for tests to drop the cadence to
|
||||
/// milliseconds without polluting the production config surface; production
|
||||
/// binds <see cref="IntervalHours"/> only.
|
||||
/// milliseconds; production config is expected to set <see cref="IntervalHours"/>
|
||||
/// only. Because this options class is <c>Bind</c>-ed wholesale, a config value
|
||||
/// at <c>AuditLog:Purge:IntervalOverride</c> would bind if present (and would
|
||||
/// bypass the <see cref="Interval"/> minimum clamp) — operators must not set it.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class AuditLogPurgeOptions
|
||||
@@ -29,15 +31,44 @@ public sealed class AuditLogPurgeOptions
|
||||
/// <summary>
|
||||
/// Test-only override for finer control over the tick cadence than
|
||||
/// whole-hour resolution allows. When non-null, takes precedence over
|
||||
/// <see cref="IntervalHours"/>. Not bound from config — production
|
||||
/// config exposes <see cref="IntervalHours"/> only.
|
||||
/// <see cref="IntervalHours"/> AND bypasses the <see cref="Interval"/>
|
||||
/// minimum clamp (so tests can use millisecond cadences). Production
|
||||
/// config exposes <see cref="IntervalHours"/> only and never sets this
|
||||
/// knob — but because the options class is <c>Bind</c>-ed wholesale, a
|
||||
/// config value at <c>AuditLog:Purge:IntervalOverride</c> WOULD bind if
|
||||
/// present; operators must not set it.
|
||||
/// </summary>
|
||||
public TimeSpan? IntervalOverride { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the effective tick interval, honouring the test override
|
||||
/// when set. Falls back to <see cref="IntervalHours"/>.
|
||||
/// Minimum interval the config-bound <see cref="IntervalHours"/> can
|
||||
/// resolve to. Clamps a misconfigured <c>IntervalHours: 0</c> (or a
|
||||
/// negative value) away from <see cref="TimeSpan.Zero"/> — a zero
|
||||
/// interval would make Akka's <c>ScheduleTellRepeatedlyCancelable</c>
|
||||
/// spin, looping the partition drop/rebuild dance into a sustained SQL
|
||||
/// outage. The test-only <see cref="IntervalOverride"/> bypasses this
|
||||
/// clamp so unit tests can still drop the cadence to milliseconds.
|
||||
/// </summary>
|
||||
public TimeSpan Interval =>
|
||||
IntervalOverride ?? TimeSpan.FromHours(IntervalHours);
|
||||
private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromMinutes(1);
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the effective tick interval, honouring the test override
|
||||
/// when set. Falls back to <see cref="IntervalHours"/>, clamped to at
|
||||
/// least <see cref="MinConfiguredInterval"/> so a zero/negative config
|
||||
/// value can never yield <see cref="TimeSpan.Zero"/> (which would spin
|
||||
/// the scheduler).
|
||||
/// </summary>
|
||||
public TimeSpan Interval
|
||||
{
|
||||
get
|
||||
{
|
||||
if (IntervalOverride is { } overrideValue)
|
||||
{
|
||||
return overrideValue;
|
||||
}
|
||||
|
||||
var resolved = TimeSpan.FromHours(IntervalHours);
|
||||
return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,11 +9,12 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
|
||||
/// <remarks>
|
||||
/// The production implementation wraps <c>ISiteRepository.GetAllSitesAsync</c>
|
||||
/// and projects each <c>Site</c> to a <see cref="SiteEntry"/> using the
|
||||
/// site's configured <c>GrpcNodeAAddress</c> (falling back to
|
||||
/// <c>GrpcNodeBAddress</c> when NodeA is unset). Sites with NO gRPC address
|
||||
/// configured are silently skipped — the reconciliation pull cannot reach
|
||||
/// them, but absence of an address is a configuration decision, not a runtime
|
||||
/// error.
|
||||
/// site's configured <c>GrpcNodeAAddress</c>. This is a NodeA-only first cut:
|
||||
/// sites with a blank <c>GrpcNodeAAddress</c> are silently SKIPPED — the
|
||||
/// reconciliation pull cannot reach them, but absence of an address is a
|
||||
/// configuration decision, not a runtime error. NodeB-fallback endpoint
|
||||
/// selection (dial NodeB when NodeA is unset/unreachable) is a follow-up
|
||||
/// (mirrors the comment in <c>SiteEnumerator.cs</c>).
|
||||
/// </remarks>
|
||||
public interface ISiteEnumerator
|
||||
{
|
||||
|
||||
@@ -182,6 +182,10 @@ public class SiteAuditReconciliationActor : ReceiveActor
|
||||
IReadOnlyList<SiteEntry> sites;
|
||||
try
|
||||
{
|
||||
// No ambient CancellationToken in a ReceiveActor message handler —
|
||||
// CancellationToken.None (the EnumerateAsync default) is intentional.
|
||||
// The work is bounded by the 5-min reconciliation tick plus the
|
||||
// 10s graceful-stop drain on PhaseClusterLeave.
|
||||
sites = await _sites.EnumerateAsync().ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
||||
@@ -31,18 +31,45 @@ public sealed class SiteAuditReconciliationOptions
|
||||
/// <summary>
|
||||
/// Test-only override for finer control over the tick cadence than
|
||||
/// whole-second resolution allows. When non-null, takes precedence over
|
||||
/// <see cref="ReconciliationIntervalSeconds"/>. Not bound from config —
|
||||
/// production config exposes <see cref="ReconciliationIntervalSeconds"/>
|
||||
/// only.
|
||||
/// <see cref="ReconciliationIntervalSeconds"/> AND bypasses the
|
||||
/// <see cref="ReconciliationInterval"/> minimum clamp (so tests can use
|
||||
/// millisecond cadences). Production config exposes
|
||||
/// <see cref="ReconciliationIntervalSeconds"/> only and never sets this
|
||||
/// knob — but because the options class is <c>Bind</c>-ed wholesale, a
|
||||
/// config value at <c>AuditLog:Reconciliation:ReconciliationIntervalOverride</c>
|
||||
/// WOULD bind if present; operators must not set it.
|
||||
/// </summary>
|
||||
public TimeSpan? ReconciliationIntervalOverride { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the effective tick interval, honouring the test override when
|
||||
/// set. Falls back to <see cref="ReconciliationIntervalSeconds"/>.
|
||||
/// Minimum interval the config-bound <see cref="ReconciliationIntervalSeconds"/>
|
||||
/// can resolve to. Clamps a misconfigured <c>ReconciliationIntervalSeconds: 0</c>
|
||||
/// (or a negative value) away from <see cref="TimeSpan.Zero"/>, which would make
|
||||
/// Akka's <c>ScheduleTellRepeatedlyCancelable</c> spin. The test-only
|
||||
/// <see cref="ReconciliationIntervalOverride"/> bypasses this clamp so unit tests
|
||||
/// can still drop the cadence to milliseconds.
|
||||
/// </summary>
|
||||
public TimeSpan ReconciliationInterval =>
|
||||
ReconciliationIntervalOverride ?? TimeSpan.FromSeconds(ReconciliationIntervalSeconds);
|
||||
private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromSeconds(1);
|
||||
|
||||
/// <summary>
|
||||
/// Resolves the effective tick interval, honouring the test override when
|
||||
/// set. Falls back to <see cref="ReconciliationIntervalSeconds"/>, clamped to at
|
||||
/// least <see cref="MinConfiguredInterval"/> so a zero/negative config value can
|
||||
/// never yield <see cref="TimeSpan.Zero"/> (which would spin the scheduler).
|
||||
/// </summary>
|
||||
public TimeSpan ReconciliationInterval
|
||||
{
|
||||
get
|
||||
{
|
||||
if (ReconciliationIntervalOverride is { } overrideValue)
|
||||
{
|
||||
return overrideValue;
|
||||
}
|
||||
|
||||
var resolved = TimeSpan.FromSeconds(ReconciliationIntervalSeconds);
|
||||
return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Maximum number of <see cref="ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit.AuditEvent"/>
|
||||
|
||||
@@ -67,7 +67,9 @@ public sealed class SiteEnumerator : ISiteEnumerator
|
||||
continue;
|
||||
}
|
||||
|
||||
entries.Add(new SiteEntry(site.SiteIdentifier, site.GrpcNodeAAddress));
|
||||
// The IsNullOrWhiteSpace guard above proves GrpcNodeAAddress is
|
||||
// non-null here; explicit null-forgiving for clarity.
|
||||
entries.Add(new SiteEntry(site.SiteIdentifier, site.GrpcNodeAAddress!));
|
||||
}
|
||||
|
||||
return entries;
|
||||
|
||||
@@ -333,6 +333,24 @@ public static class ServiceCollectionExtensions
|
||||
.Bind(config.GetSection(PartitionMaintenanceSectionName));
|
||||
services.AddHostedService<AuditLogPartitionMaintenanceService>();
|
||||
|
||||
// I1 (review): bind the two central-singleton options HERE rather than in
|
||||
// AddAuditLogCentralReconciliationClient. AkkaHostedService.RegisterCentralActors
|
||||
// resolves IOptions<AuditLogPurgeOptions> / <SiteAuditReconciliationOptions>
|
||||
// via GetRequiredService when it wires the AuditLogPurgeActor +
|
||||
// SiteAuditReconciliationActor singletons; AddAuditLogCentralMaintenance is
|
||||
// ALWAYS called on the central path (the reconciliation-client helper is the
|
||||
// one that could in principle be dropped), so binding the options here means
|
||||
// the singletons get a valid IOptions even if the gRPC-client helper is not
|
||||
// wired — instead of a cryptic InvalidOperationException at GetRequiredService.
|
||||
// Defaults are fine when the section is absent (24 h purge cadence /
|
||||
// 5 min reconciliation tick); production exposes IntervalHours /
|
||||
// ReconciliationIntervalSeconds only — the test-only *Override knobs are
|
||||
// not intended to be set from config (see the options classes' remarks).
|
||||
services.AddOptions<AuditLogPurgeOptions>()
|
||||
.Bind(config.GetSection(PurgeSectionName));
|
||||
services.AddOptions<SiteAuditReconciliationOptions>()
|
||||
.Bind(config.GetSection(ReconciliationSectionName));
|
||||
|
||||
// M6 Bundle E (T8 + T9): central health snapshot — a single object
|
||||
// that owns the CentralAuditWriteFailures + AuditRedactionFailure
|
||||
// Interlocked counters AND surfaces them on
|
||||
@@ -397,19 +415,21 @@ public static class ServiceCollectionExtensions
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The production <see cref="ISiteEnumerator"/> (<see cref="SiteEnumerator"/>,
|
||||
/// wrapping the scoped <c>ISiteRepository</c>) IS registered here, alongside
|
||||
/// the <see cref="AuditLogPurgeOptions"/> + <see cref="SiteAuditReconciliationOptions"/>
|
||||
/// bindings — so the two central singletons wired in the Host
|
||||
/// (<see cref="AuditLogPurgeActor"/> + <see cref="SiteAuditReconciliationActor"/>)
|
||||
/// can resolve their collaborators + options from the same central-only
|
||||
/// helper. Keeping the enumerator + options on this central path preserves
|
||||
/// the "every <c>Add*</c> call is safe from any composition root" invariant:
|
||||
/// a site host never calls this helper, so it never registers a
|
||||
/// site-dialing enumerator.
|
||||
/// wrapping the scoped <c>ISiteRepository</c>) IS registered here — so the
|
||||
/// <see cref="SiteAuditReconciliationActor"/> singleton wired in the Host can
|
||||
/// resolve its enumerator + gRPC client from this central-only helper. Keeping
|
||||
/// the enumerator on this central path preserves the "every <c>Add*</c> call is
|
||||
/// safe from any composition root" invariant: a site host never calls this
|
||||
/// helper, so it never registers a site-dialing enumerator. The
|
||||
/// <see cref="AuditLogPurgeOptions"/> + <see cref="SiteAuditReconciliationOptions"/>
|
||||
/// bindings live in <see cref="AddAuditLogCentralMaintenance"/> instead (I1
|
||||
/// review fix) — that helper is unconditionally called on the central path, so
|
||||
/// the two maintenance singletons get a valid <c>IOptions</c> even if this
|
||||
/// gRPC-client helper is ever dropped.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
/// <param name="services">The service collection to register into.</param>
|
||||
/// <param name="config">Application configuration used to bind the purge + reconciliation options sections.</param>
|
||||
/// <param name="config">Application configuration used to bind the gRPC client's communication options (purge + reconciliation options are bound by <see cref="AddAuditLogCentralMaintenance"/>).</param>
|
||||
/// <returns>The same <see cref="IServiceCollection"/> for chaining.</returns>
|
||||
public static IServiceCollection AddAuditLogCentralReconciliationClient(
|
||||
this IServiceCollection services,
|
||||
@@ -425,15 +445,12 @@ public static class ServiceCollectionExtensions
|
||||
// in SiteAuditReconciliationActor / AuditLogPurgeActor).
|
||||
services.TryAddSingleton<ISiteEnumerator>(sp => new SiteEnumerator(sp));
|
||||
|
||||
// Bind the two central-singleton options to their config sections.
|
||||
// Defaults are fine when the section is absent (24 h purge cadence /
|
||||
// 5 min reconciliation tick); production exposes IntervalHours /
|
||||
// ReconciliationIntervalSeconds only — the test-only *Override knobs
|
||||
// are intentionally not bound.
|
||||
services.AddOptions<AuditLogPurgeOptions>()
|
||||
.Bind(config.GetSection(PurgeSectionName));
|
||||
services.AddOptions<SiteAuditReconciliationOptions>()
|
||||
.Bind(config.GetSection(ReconciliationSectionName));
|
||||
// I1 (review): the AuditLogPurgeOptions / SiteAuditReconciliationOptions
|
||||
// bindings moved to AddAuditLogCentralMaintenance — that helper is always
|
||||
// called on the central path, so the two maintenance singletons resolve a
|
||||
// valid IOptions even if this gRPC-client helper is ever dropped. Keep the
|
||||
// ISiteEnumerator + gRPC client registrations here (they dial sites and are
|
||||
// central-only by design).
|
||||
|
||||
// The invoker owns the per-endpoint GrpcChannel cache, so it must be a
|
||||
// singleton — a fresh invoker per resolution would leak channels.
|
||||
|
||||
@@ -7,6 +7,7 @@ using Microsoft.Extensions.DependencyInjection;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
using Microsoft.Extensions.Options;
|
||||
using ZB.MOM.WW.ScadaBridge.AuditLog;
|
||||
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
|
||||
using ZB.MOM.WW.ScadaBridge.AuditLog.Site;
|
||||
using ZB.MOM.WW.ScadaBridge.AuditLog.Site.Telemetry;
|
||||
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
|
||||
@@ -238,6 +239,36 @@ public class CentralAuditWiringTests : IDisposable
|
||||
Assert.NotNull(forwarder);
|
||||
Assert.IsType<CachedCallTelemetryForwarder>(forwarder);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// I4 (review): the central composition root must register the production
|
||||
/// reconciliation collaborators via
|
||||
/// <c>AddAuditLogCentralReconciliationClient</c>. Asserting the concrete
|
||||
/// implementations resolve here is a faster, clearer signal than a runtime
|
||||
/// "actor not found" / cryptic <c>GetRequiredService</c> throw in
|
||||
/// <c>AkkaHostedService.RegisterCentralActors</c> if that helper is ever
|
||||
/// dropped from <c>Program.cs</c>.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Central_Resolves_ISiteEnumerator_AsSiteEnumerator()
|
||||
{
|
||||
var enumerator = _factory.Services.GetService<ISiteEnumerator>();
|
||||
Assert.NotNull(enumerator);
|
||||
Assert.IsType<SiteEnumerator>(enumerator);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// I4 (review): companion to <see cref="Central_Resolves_ISiteEnumerator_AsSiteEnumerator"/>
|
||||
/// — the production gRPC pull client must resolve on the central composition
|
||||
/// root so the SiteAuditReconciliationActor singleton can dial sites.
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public void Central_Resolves_IPullAuditEventsClient_AsGrpcClient()
|
||||
{
|
||||
var client = _factory.Services.GetService<IPullAuditEventsClient>();
|
||||
Assert.NotNull(client);
|
||||
Assert.IsType<GrpcPullAuditEventsClient>(client);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
Reference in New Issue
Block a user