fix(audit): robust central options binding + interval clamps + doc/contract fixes (review)
This commit is contained in:
@@ -17,8 +17,10 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
|
|||||||
/// </para>
|
/// </para>
|
||||||
/// <para>
|
/// <para>
|
||||||
/// <see cref="IntervalOverride"/> exists for tests to drop the cadence to
|
/// <see cref="IntervalOverride"/> exists for tests to drop the cadence to
|
||||||
/// milliseconds without polluting the production config surface; production
|
/// milliseconds; production config is expected to set <see cref="IntervalHours"/>
|
||||||
/// binds <see cref="IntervalHours"/> only.
|
/// only. Because this options class is <c>Bind</c>-ed wholesale, a config value
|
||||||
|
/// at <c>AuditLog:Purge:IntervalOverride</c> would bind if present (and would
|
||||||
|
/// bypass the <see cref="Interval"/> minimum clamp) — operators must not set it.
|
||||||
/// </para>
|
/// </para>
|
||||||
/// </remarks>
|
/// </remarks>
|
||||||
public sealed class AuditLogPurgeOptions
|
public sealed class AuditLogPurgeOptions
|
||||||
@@ -29,15 +31,44 @@ public sealed class AuditLogPurgeOptions
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Test-only override for finer control over the tick cadence than
|
/// Test-only override for finer control over the tick cadence than
|
||||||
/// whole-hour resolution allows. When non-null, takes precedence over
|
/// whole-hour resolution allows. When non-null, takes precedence over
|
||||||
/// <see cref="IntervalHours"/>. Not bound from config — production
|
/// <see cref="IntervalHours"/> AND bypasses the <see cref="Interval"/>
|
||||||
/// config exposes <see cref="IntervalHours"/> only.
|
/// minimum clamp (so tests can use millisecond cadences). Production
|
||||||
|
/// config exposes <see cref="IntervalHours"/> only and never sets this
|
||||||
|
/// knob — but because the options class is <c>Bind</c>-ed wholesale, a
|
||||||
|
/// config value at <c>AuditLog:Purge:IntervalOverride</c> WOULD bind if
|
||||||
|
/// present; operators must not set it.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public TimeSpan? IntervalOverride { get; set; }
|
public TimeSpan? IntervalOverride { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Resolves the effective tick interval, honouring the test override
|
/// Minimum interval the config-bound <see cref="IntervalHours"/> can
|
||||||
/// when set. Falls back to <see cref="IntervalHours"/>.
|
/// resolve to. Clamps a misconfigured <c>IntervalHours: 0</c> (or a
|
||||||
|
/// negative value) away from <see cref="TimeSpan.Zero"/> — a zero
|
||||||
|
/// interval would make Akka's <c>ScheduleTellRepeatedlyCancelable</c>
|
||||||
|
/// spin, looping the partition drop/rebuild dance into a sustained SQL
|
||||||
|
/// outage. The test-only <see cref="IntervalOverride"/> bypasses this
|
||||||
|
/// clamp so unit tests can still drop the cadence to milliseconds.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public TimeSpan Interval =>
|
private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromMinutes(1);
|
||||||
IntervalOverride ?? TimeSpan.FromHours(IntervalHours);
|
|
||||||
|
/// <summary>
|
||||||
|
/// Resolves the effective tick interval, honouring the test override
|
||||||
|
/// when set. Falls back to <see cref="IntervalHours"/>, clamped to at
|
||||||
|
/// least <see cref="MinConfiguredInterval"/> so a zero/negative config
|
||||||
|
/// value can never yield <see cref="TimeSpan.Zero"/> (which would spin
|
||||||
|
/// the scheduler).
|
||||||
|
/// </summary>
|
||||||
|
public TimeSpan Interval
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
if (IntervalOverride is { } overrideValue)
|
||||||
|
{
|
||||||
|
return overrideValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var resolved = TimeSpan.FromHours(IntervalHours);
|
||||||
|
return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,11 +9,12 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central;
|
|||||||
/// <remarks>
|
/// <remarks>
|
||||||
/// The production implementation wraps <c>ISiteRepository.GetAllSitesAsync</c>
|
/// The production implementation wraps <c>ISiteRepository.GetAllSitesAsync</c>
|
||||||
/// and projects each <c>Site</c> to a <see cref="SiteEntry"/> using the
|
/// and projects each <c>Site</c> to a <see cref="SiteEntry"/> using the
|
||||||
/// site's configured <c>GrpcNodeAAddress</c> (falling back to
|
/// site's configured <c>GrpcNodeAAddress</c>. This is a NodeA-only first cut:
|
||||||
/// <c>GrpcNodeBAddress</c> when NodeA is unset). Sites with NO gRPC address
|
/// sites with a blank <c>GrpcNodeAAddress</c> are silently SKIPPED — the
|
||||||
/// configured are silently skipped — the reconciliation pull cannot reach
|
/// reconciliation pull cannot reach them, but absence of an address is a
|
||||||
/// them, but absence of an address is a configuration decision, not a runtime
|
/// configuration decision, not a runtime error. NodeB-fallback endpoint
|
||||||
/// error.
|
/// selection (dial NodeB when NodeA is unset/unreachable) is a follow-up
|
||||||
|
/// (mirrors the comment in <c>SiteEnumerator.cs</c>).
|
||||||
/// </remarks>
|
/// </remarks>
|
||||||
public interface ISiteEnumerator
|
public interface ISiteEnumerator
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -182,6 +182,10 @@ public class SiteAuditReconciliationActor : ReceiveActor
|
|||||||
IReadOnlyList<SiteEntry> sites;
|
IReadOnlyList<SiteEntry> sites;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
// No ambient CancellationToken in a ReceiveActor message handler —
|
||||||
|
// CancellationToken.None (the EnumerateAsync default) is intentional.
|
||||||
|
// The work is bounded by the 5-min reconciliation tick plus the
|
||||||
|
// 10s graceful-stop drain on PhaseClusterLeave.
|
||||||
sites = await _sites.EnumerateAsync().ConfigureAwait(false);
|
sites = await _sites.EnumerateAsync().ConfigureAwait(false);
|
||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
|
|||||||
@@ -31,18 +31,45 @@ public sealed class SiteAuditReconciliationOptions
|
|||||||
/// <summary>
|
/// <summary>
|
||||||
/// Test-only override for finer control over the tick cadence than
|
/// Test-only override for finer control over the tick cadence than
|
||||||
/// whole-second resolution allows. When non-null, takes precedence over
|
/// whole-second resolution allows. When non-null, takes precedence over
|
||||||
/// <see cref="ReconciliationIntervalSeconds"/>. Not bound from config —
|
/// <see cref="ReconciliationIntervalSeconds"/> AND bypasses the
|
||||||
/// production config exposes <see cref="ReconciliationIntervalSeconds"/>
|
/// <see cref="ReconciliationInterval"/> minimum clamp (so tests can use
|
||||||
/// only.
|
/// millisecond cadences). Production config exposes
|
||||||
|
/// <see cref="ReconciliationIntervalSeconds"/> only and never sets this
|
||||||
|
/// knob — but because the options class is <c>Bind</c>-ed wholesale, a
|
||||||
|
/// config value at <c>AuditLog:Reconciliation:ReconciliationIntervalOverride</c>
|
||||||
|
/// WOULD bind if present; operators must not set it.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public TimeSpan? ReconciliationIntervalOverride { get; set; }
|
public TimeSpan? ReconciliationIntervalOverride { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Resolves the effective tick interval, honouring the test override when
|
/// Minimum interval the config-bound <see cref="ReconciliationIntervalSeconds"/>
|
||||||
/// set. Falls back to <see cref="ReconciliationIntervalSeconds"/>.
|
/// can resolve to. Clamps a misconfigured <c>ReconciliationIntervalSeconds: 0</c>
|
||||||
|
/// (or a negative value) away from <see cref="TimeSpan.Zero"/>, which would make
|
||||||
|
/// Akka's <c>ScheduleTellRepeatedlyCancelable</c> spin. The test-only
|
||||||
|
/// <see cref="ReconciliationIntervalOverride"/> bypasses this clamp so unit tests
|
||||||
|
/// can still drop the cadence to milliseconds.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public TimeSpan ReconciliationInterval =>
|
private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromSeconds(1);
|
||||||
ReconciliationIntervalOverride ?? TimeSpan.FromSeconds(ReconciliationIntervalSeconds);
|
|
||||||
|
/// <summary>
|
||||||
|
/// Resolves the effective tick interval, honouring the test override when
|
||||||
|
/// set. Falls back to <see cref="ReconciliationIntervalSeconds"/>, clamped to at
|
||||||
|
/// least <see cref="MinConfiguredInterval"/> so a zero/negative config value can
|
||||||
|
/// never yield <see cref="TimeSpan.Zero"/> (which would spin the scheduler).
|
||||||
|
/// </summary>
|
||||||
|
public TimeSpan ReconciliationInterval
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
if (ReconciliationIntervalOverride is { } overrideValue)
|
||||||
|
{
|
||||||
|
return overrideValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var resolved = TimeSpan.FromSeconds(ReconciliationIntervalSeconds);
|
||||||
|
return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Maximum number of <see cref="ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit.AuditEvent"/>
|
/// Maximum number of <see cref="ZB.MOM.WW.ScadaBridge.Commons.Entities.Audit.AuditEvent"/>
|
||||||
|
|||||||
@@ -67,7 +67,9 @@ public sealed class SiteEnumerator : ISiteEnumerator
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
entries.Add(new SiteEntry(site.SiteIdentifier, site.GrpcNodeAAddress));
|
// The IsNullOrWhiteSpace guard above proves GrpcNodeAAddress is
|
||||||
|
// non-null here; explicit null-forgiving for clarity.
|
||||||
|
entries.Add(new SiteEntry(site.SiteIdentifier, site.GrpcNodeAAddress!));
|
||||||
}
|
}
|
||||||
|
|
||||||
return entries;
|
return entries;
|
||||||
|
|||||||
@@ -333,6 +333,24 @@ public static class ServiceCollectionExtensions
|
|||||||
.Bind(config.GetSection(PartitionMaintenanceSectionName));
|
.Bind(config.GetSection(PartitionMaintenanceSectionName));
|
||||||
services.AddHostedService<AuditLogPartitionMaintenanceService>();
|
services.AddHostedService<AuditLogPartitionMaintenanceService>();
|
||||||
|
|
||||||
|
// I1 (review): bind the two central-singleton options HERE rather than in
|
||||||
|
// AddAuditLogCentralReconciliationClient. AkkaHostedService.RegisterCentralActors
|
||||||
|
// resolves IOptions<AuditLogPurgeOptions> / <SiteAuditReconciliationOptions>
|
||||||
|
// via GetRequiredService when it wires the AuditLogPurgeActor +
|
||||||
|
// SiteAuditReconciliationActor singletons; AddAuditLogCentralMaintenance is
|
||||||
|
// ALWAYS called on the central path (the reconciliation-client helper is the
|
||||||
|
// one that could in principle be dropped), so binding the options here means
|
||||||
|
// the singletons get a valid IOptions even if the gRPC-client helper is not
|
||||||
|
// wired — instead of a cryptic InvalidOperationException at GetRequiredService.
|
||||||
|
// Defaults are fine when the section is absent (24 h purge cadence /
|
||||||
|
// 5 min reconciliation tick); production exposes IntervalHours /
|
||||||
|
// ReconciliationIntervalSeconds only — the test-only *Override knobs are
|
||||||
|
// not intended to be set from config (see the options classes' remarks).
|
||||||
|
services.AddOptions<AuditLogPurgeOptions>()
|
||||||
|
.Bind(config.GetSection(PurgeSectionName));
|
||||||
|
services.AddOptions<SiteAuditReconciliationOptions>()
|
||||||
|
.Bind(config.GetSection(ReconciliationSectionName));
|
||||||
|
|
||||||
// M6 Bundle E (T8 + T9): central health snapshot — a single object
|
// M6 Bundle E (T8 + T9): central health snapshot — a single object
|
||||||
// that owns the CentralAuditWriteFailures + AuditRedactionFailure
|
// that owns the CentralAuditWriteFailures + AuditRedactionFailure
|
||||||
// Interlocked counters AND surfaces them on
|
// Interlocked counters AND surfaces them on
|
||||||
@@ -397,19 +415,21 @@ public static class ServiceCollectionExtensions
|
|||||||
/// </para>
|
/// </para>
|
||||||
/// <para>
|
/// <para>
|
||||||
/// The production <see cref="ISiteEnumerator"/> (<see cref="SiteEnumerator"/>,
|
/// The production <see cref="ISiteEnumerator"/> (<see cref="SiteEnumerator"/>,
|
||||||
/// wrapping the scoped <c>ISiteRepository</c>) IS registered here, alongside
|
/// wrapping the scoped <c>ISiteRepository</c>) IS registered here — so the
|
||||||
/// the <see cref="AuditLogPurgeOptions"/> + <see cref="SiteAuditReconciliationOptions"/>
|
/// <see cref="SiteAuditReconciliationActor"/> singleton wired in the Host can
|
||||||
/// bindings — so the two central singletons wired in the Host
|
/// resolve its enumerator + gRPC client from this central-only helper. Keeping
|
||||||
/// (<see cref="AuditLogPurgeActor"/> + <see cref="SiteAuditReconciliationActor"/>)
|
/// the enumerator on this central path preserves the "every <c>Add*</c> call is
|
||||||
/// can resolve their collaborators + options from the same central-only
|
/// safe from any composition root" invariant: a site host never calls this
|
||||||
/// helper. Keeping the enumerator + options on this central path preserves
|
/// helper, so it never registers a site-dialing enumerator. The
|
||||||
/// the "every <c>Add*</c> call is safe from any composition root" invariant:
|
/// <see cref="AuditLogPurgeOptions"/> + <see cref="SiteAuditReconciliationOptions"/>
|
||||||
/// a site host never calls this helper, so it never registers a
|
/// bindings live in <see cref="AddAuditLogCentralMaintenance"/> instead (I1
|
||||||
/// site-dialing enumerator.
|
/// review fix) — that helper is unconditionally called on the central path, so
|
||||||
|
/// the two maintenance singletons get a valid <c>IOptions</c> even if this
|
||||||
|
/// gRPC-client helper is ever dropped.
|
||||||
/// </para>
|
/// </para>
|
||||||
/// </remarks>
|
/// </remarks>
|
||||||
/// <param name="services">The service collection to register into.</param>
|
/// <param name="services">The service collection to register into.</param>
|
||||||
/// <param name="config">Application configuration used to bind the purge + reconciliation options sections.</param>
|
/// <param name="config">Application configuration used to bind the gRPC client's communication options (purge + reconciliation options are bound by <see cref="AddAuditLogCentralMaintenance"/>).</param>
|
||||||
/// <returns>The same <see cref="IServiceCollection"/> for chaining.</returns>
|
/// <returns>The same <see cref="IServiceCollection"/> for chaining.</returns>
|
||||||
public static IServiceCollection AddAuditLogCentralReconciliationClient(
|
public static IServiceCollection AddAuditLogCentralReconciliationClient(
|
||||||
this IServiceCollection services,
|
this IServiceCollection services,
|
||||||
@@ -425,15 +445,12 @@ public static class ServiceCollectionExtensions
|
|||||||
// in SiteAuditReconciliationActor / AuditLogPurgeActor).
|
// in SiteAuditReconciliationActor / AuditLogPurgeActor).
|
||||||
services.TryAddSingleton<ISiteEnumerator>(sp => new SiteEnumerator(sp));
|
services.TryAddSingleton<ISiteEnumerator>(sp => new SiteEnumerator(sp));
|
||||||
|
|
||||||
// Bind the two central-singleton options to their config sections.
|
// I1 (review): the AuditLogPurgeOptions / SiteAuditReconciliationOptions
|
||||||
// Defaults are fine when the section is absent (24 h purge cadence /
|
// bindings moved to AddAuditLogCentralMaintenance — that helper is always
|
||||||
// 5 min reconciliation tick); production exposes IntervalHours /
|
// called on the central path, so the two maintenance singletons resolve a
|
||||||
// ReconciliationIntervalSeconds only — the test-only *Override knobs
|
// valid IOptions even if this gRPC-client helper is ever dropped. Keep the
|
||||||
// are intentionally not bound.
|
// ISiteEnumerator + gRPC client registrations here (they dial sites and are
|
||||||
services.AddOptions<AuditLogPurgeOptions>()
|
// central-only by design).
|
||||||
.Bind(config.GetSection(PurgeSectionName));
|
|
||||||
services.AddOptions<SiteAuditReconciliationOptions>()
|
|
||||||
.Bind(config.GetSection(ReconciliationSectionName));
|
|
||||||
|
|
||||||
// The invoker owns the per-endpoint GrpcChannel cache, so it must be a
|
// The invoker owns the per-endpoint GrpcChannel cache, so it must be a
|
||||||
// singleton — a fresh invoker per resolution would leak channels.
|
// singleton — a fresh invoker per resolution would leak channels.
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ using Microsoft.Extensions.DependencyInjection;
|
|||||||
using Microsoft.Extensions.Hosting;
|
using Microsoft.Extensions.Hosting;
|
||||||
using Microsoft.Extensions.Options;
|
using Microsoft.Extensions.Options;
|
||||||
using ZB.MOM.WW.ScadaBridge.AuditLog;
|
using ZB.MOM.WW.ScadaBridge.AuditLog;
|
||||||
|
using ZB.MOM.WW.ScadaBridge.AuditLog.Central;
|
||||||
using ZB.MOM.WW.ScadaBridge.AuditLog.Site;
|
using ZB.MOM.WW.ScadaBridge.AuditLog.Site;
|
||||||
using ZB.MOM.WW.ScadaBridge.AuditLog.Site.Telemetry;
|
using ZB.MOM.WW.ScadaBridge.AuditLog.Site.Telemetry;
|
||||||
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
|
using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure;
|
||||||
@@ -238,6 +239,36 @@ public class CentralAuditWiringTests : IDisposable
|
|||||||
Assert.NotNull(forwarder);
|
Assert.NotNull(forwarder);
|
||||||
Assert.IsType<CachedCallTelemetryForwarder>(forwarder);
|
Assert.IsType<CachedCallTelemetryForwarder>(forwarder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// I4 (review): the central composition root must register the production
|
||||||
|
/// reconciliation collaborators via
|
||||||
|
/// <c>AddAuditLogCentralReconciliationClient</c>. Asserting the concrete
|
||||||
|
/// implementations resolve here is a faster, clearer signal than a runtime
|
||||||
|
/// "actor not found" / cryptic <c>GetRequiredService</c> throw in
|
||||||
|
/// <c>AkkaHostedService.RegisterCentralActors</c> if that helper is ever
|
||||||
|
/// dropped from <c>Program.cs</c>.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public void Central_Resolves_ISiteEnumerator_AsSiteEnumerator()
|
||||||
|
{
|
||||||
|
var enumerator = _factory.Services.GetService<ISiteEnumerator>();
|
||||||
|
Assert.NotNull(enumerator);
|
||||||
|
Assert.IsType<SiteEnumerator>(enumerator);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// I4 (review): companion to <see cref="Central_Resolves_ISiteEnumerator_AsSiteEnumerator"/>
|
||||||
|
/// — the production gRPC pull client must resolve on the central composition
|
||||||
|
/// root so the SiteAuditReconciliationActor singleton can dial sites.
|
||||||
|
/// </summary>
|
||||||
|
[Fact]
|
||||||
|
public void Central_Resolves_IPullAuditEventsClient_AsGrpcClient()
|
||||||
|
{
|
||||||
|
var client = _factory.Services.GetService<IPullAuditEventsClient>();
|
||||||
|
Assert.NotNull(client);
|
||||||
|
Assert.IsType<GrpcPullAuditEventsClient>(client);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|||||||
Reference in New Issue
Block a user