diff --git a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/AuditLogPurgeOptions.cs b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/AuditLogPurgeOptions.cs index 43d5bc74..0ba1bc00 100644 --- a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/AuditLogPurgeOptions.cs +++ b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/AuditLogPurgeOptions.cs @@ -17,8 +17,10 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central; /// /// /// exists for tests to drop the cadence to -/// milliseconds without polluting the production config surface; production -/// binds only. +/// milliseconds; production config is expected to set +/// only. Because this options class is Bind-ed wholesale, a config value +/// at AuditLog:Purge:IntervalOverride would bind if present (and would +/// bypass the minimum clamp) — operators must not set it. /// /// public sealed class AuditLogPurgeOptions @@ -29,15 +31,44 @@ public sealed class AuditLogPurgeOptions /// /// Test-only override for finer control over the tick cadence than /// whole-hour resolution allows. When non-null, takes precedence over - /// . Not bound from config — production - /// config exposes only. + /// AND bypasses the + /// minimum clamp (so tests can use millisecond cadences). Production + /// config exposes only and never sets this + /// knob — but because the options class is Bind-ed wholesale, a + /// config value at AuditLog:Purge:IntervalOverride WOULD bind if + /// present; operators must not set it. /// public TimeSpan? IntervalOverride { get; set; } /// - /// Resolves the effective tick interval, honouring the test override - /// when set. Falls back to . + /// Minimum interval the config-bound can + /// resolve to. Clamps a misconfigured IntervalHours: 0 (or a + /// negative value) away from — a zero + /// interval would make Akka's ScheduleTellRepeatedlyCancelable + /// spin, looping the partition drop/rebuild dance into a sustained SQL + /// outage. The test-only bypasses this + /// clamp so unit tests can still drop the cadence to milliseconds. /// - public TimeSpan Interval => - IntervalOverride ?? TimeSpan.FromHours(IntervalHours); + private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromMinutes(1); + + /// + /// Resolves the effective tick interval, honouring the test override + /// when set. Falls back to , clamped to at + /// least so a zero/negative config + /// value can never yield (which would spin + /// the scheduler). + /// + public TimeSpan Interval + { + get + { + if (IntervalOverride is { } overrideValue) + { + return overrideValue; + } + + var resolved = TimeSpan.FromHours(IntervalHours); + return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved; + } + } } diff --git a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/ISiteEnumerator.cs b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/ISiteEnumerator.cs index cc8cae1f..25a5a4c7 100644 --- a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/ISiteEnumerator.cs +++ b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/ISiteEnumerator.cs @@ -9,11 +9,12 @@ namespace ZB.MOM.WW.ScadaBridge.AuditLog.Central; /// /// The production implementation wraps ISiteRepository.GetAllSitesAsync /// and projects each Site to a using the -/// site's configured GrpcNodeAAddress (falling back to -/// GrpcNodeBAddress when NodeA is unset). Sites with NO gRPC address -/// configured are silently skipped — the reconciliation pull cannot reach -/// them, but absence of an address is a configuration decision, not a runtime -/// error. +/// site's configured GrpcNodeAAddress. This is a NodeA-only first cut: +/// sites with a blank GrpcNodeAAddress are silently SKIPPED — the +/// reconciliation pull cannot reach them, but absence of an address is a +/// configuration decision, not a runtime error. NodeB-fallback endpoint +/// selection (dial NodeB when NodeA is unset/unreachable) is a follow-up +/// (mirrors the comment in SiteEnumerator.cs). /// public interface ISiteEnumerator { diff --git a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteAuditReconciliationActor.cs b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteAuditReconciliationActor.cs index fb08bc57..8c6a297b 100644 --- a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteAuditReconciliationActor.cs +++ b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteAuditReconciliationActor.cs @@ -182,6 +182,10 @@ public class SiteAuditReconciliationActor : ReceiveActor IReadOnlyList sites; try { + // No ambient CancellationToken in a ReceiveActor message handler — + // CancellationToken.None (the EnumerateAsync default) is intentional. + // The work is bounded by the 5-min reconciliation tick plus the + // 10s graceful-stop drain on PhaseClusterLeave. sites = await _sites.EnumerateAsync().ConfigureAwait(false); } catch (Exception ex) diff --git a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteAuditReconciliationOptions.cs b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteAuditReconciliationOptions.cs index 31796ad9..b58b3fb4 100644 --- a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteAuditReconciliationOptions.cs +++ b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteAuditReconciliationOptions.cs @@ -31,18 +31,45 @@ public sealed class SiteAuditReconciliationOptions /// /// Test-only override for finer control over the tick cadence than /// whole-second resolution allows. When non-null, takes precedence over - /// . Not bound from config — - /// production config exposes - /// only. + /// AND bypasses the + /// minimum clamp (so tests can use + /// millisecond cadences). Production config exposes + /// only and never sets this + /// knob — but because the options class is Bind-ed wholesale, a + /// config value at AuditLog:Reconciliation:ReconciliationIntervalOverride + /// WOULD bind if present; operators must not set it. /// public TimeSpan? ReconciliationIntervalOverride { get; set; } /// - /// Resolves the effective tick interval, honouring the test override when - /// set. Falls back to . + /// Minimum interval the config-bound + /// can resolve to. Clamps a misconfigured ReconciliationIntervalSeconds: 0 + /// (or a negative value) away from , which would make + /// Akka's ScheduleTellRepeatedlyCancelable spin. The test-only + /// bypasses this clamp so unit tests + /// can still drop the cadence to milliseconds. /// - public TimeSpan ReconciliationInterval => - ReconciliationIntervalOverride ?? TimeSpan.FromSeconds(ReconciliationIntervalSeconds); + private static readonly TimeSpan MinConfiguredInterval = TimeSpan.FromSeconds(1); + + /// + /// Resolves the effective tick interval, honouring the test override when + /// set. Falls back to , clamped to at + /// least so a zero/negative config value can + /// never yield (which would spin the scheduler). + /// + public TimeSpan ReconciliationInterval + { + get + { + if (ReconciliationIntervalOverride is { } overrideValue) + { + return overrideValue; + } + + var resolved = TimeSpan.FromSeconds(ReconciliationIntervalSeconds); + return resolved < MinConfiguredInterval ? MinConfiguredInterval : resolved; + } + } /// /// Maximum number of diff --git a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteEnumerator.cs b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteEnumerator.cs index 159b4ae1..357ee4ee 100644 --- a/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteEnumerator.cs +++ b/src/ZB.MOM.WW.ScadaBridge.AuditLog/Central/SiteEnumerator.cs @@ -67,7 +67,9 @@ public sealed class SiteEnumerator : ISiteEnumerator continue; } - entries.Add(new SiteEntry(site.SiteIdentifier, site.GrpcNodeAAddress)); + // The IsNullOrWhiteSpace guard above proves GrpcNodeAAddress is + // non-null here; explicit null-forgiving for clarity. + entries.Add(new SiteEntry(site.SiteIdentifier, site.GrpcNodeAAddress!)); } return entries; diff --git a/src/ZB.MOM.WW.ScadaBridge.AuditLog/ServiceCollectionExtensions.cs b/src/ZB.MOM.WW.ScadaBridge.AuditLog/ServiceCollectionExtensions.cs index c1114c1e..632e2317 100644 --- a/src/ZB.MOM.WW.ScadaBridge.AuditLog/ServiceCollectionExtensions.cs +++ b/src/ZB.MOM.WW.ScadaBridge.AuditLog/ServiceCollectionExtensions.cs @@ -333,6 +333,24 @@ public static class ServiceCollectionExtensions .Bind(config.GetSection(PartitionMaintenanceSectionName)); services.AddHostedService(); + // I1 (review): bind the two central-singleton options HERE rather than in + // AddAuditLogCentralReconciliationClient. AkkaHostedService.RegisterCentralActors + // resolves IOptions / + // via GetRequiredService when it wires the AuditLogPurgeActor + + // SiteAuditReconciliationActor singletons; AddAuditLogCentralMaintenance is + // ALWAYS called on the central path (the reconciliation-client helper is the + // one that could in principle be dropped), so binding the options here means + // the singletons get a valid IOptions even if the gRPC-client helper is not + // wired — instead of a cryptic InvalidOperationException at GetRequiredService. + // Defaults are fine when the section is absent (24 h purge cadence / + // 5 min reconciliation tick); production exposes IntervalHours / + // ReconciliationIntervalSeconds only — the test-only *Override knobs are + // not intended to be set from config (see the options classes' remarks). + services.AddOptions() + .Bind(config.GetSection(PurgeSectionName)); + services.AddOptions() + .Bind(config.GetSection(ReconciliationSectionName)); + // M6 Bundle E (T8 + T9): central health snapshot — a single object // that owns the CentralAuditWriteFailures + AuditRedactionFailure // Interlocked counters AND surfaces them on @@ -397,19 +415,21 @@ public static class ServiceCollectionExtensions /// /// /// The production (, - /// wrapping the scoped ISiteRepository) IS registered here, alongside - /// the + - /// bindings — so the two central singletons wired in the Host - /// ( + ) - /// can resolve their collaborators + options from the same central-only - /// helper. Keeping the enumerator + options on this central path preserves - /// the "every Add* call is safe from any composition root" invariant: - /// a site host never calls this helper, so it never registers a - /// site-dialing enumerator. + /// wrapping the scoped ISiteRepository) IS registered here — so the + /// singleton wired in the Host can + /// resolve its enumerator + gRPC client from this central-only helper. Keeping + /// the enumerator on this central path preserves the "every Add* call is + /// safe from any composition root" invariant: a site host never calls this + /// helper, so it never registers a site-dialing enumerator. The + /// + + /// bindings live in instead (I1 + /// review fix) — that helper is unconditionally called on the central path, so + /// the two maintenance singletons get a valid IOptions even if this + /// gRPC-client helper is ever dropped. /// /// /// The service collection to register into. - /// Application configuration used to bind the purge + reconciliation options sections. + /// Application configuration used to bind the gRPC client's communication options (purge + reconciliation options are bound by ). /// The same for chaining. public static IServiceCollection AddAuditLogCentralReconciliationClient( this IServiceCollection services, @@ -425,15 +445,12 @@ public static class ServiceCollectionExtensions // in SiteAuditReconciliationActor / AuditLogPurgeActor). services.TryAddSingleton(sp => new SiteEnumerator(sp)); - // Bind the two central-singleton options to their config sections. - // Defaults are fine when the section is absent (24 h purge cadence / - // 5 min reconciliation tick); production exposes IntervalHours / - // ReconciliationIntervalSeconds only — the test-only *Override knobs - // are intentionally not bound. - services.AddOptions() - .Bind(config.GetSection(PurgeSectionName)); - services.AddOptions() - .Bind(config.GetSection(ReconciliationSectionName)); + // I1 (review): the AuditLogPurgeOptions / SiteAuditReconciliationOptions + // bindings moved to AddAuditLogCentralMaintenance — that helper is always + // called on the central path, so the two maintenance singletons resolve a + // valid IOptions even if this gRPC-client helper is ever dropped. Keep the + // ISiteEnumerator + gRPC client registrations here (they dial sites and are + // central-only by design). // The invoker owns the per-endpoint GrpcChannel cache, so it must be a // singleton — a fresh invoker per resolution would leak channels. diff --git a/tests/ZB.MOM.WW.ScadaBridge.Host.Tests/AkkaHostedServiceAuditWiringTests.cs b/tests/ZB.MOM.WW.ScadaBridge.Host.Tests/AkkaHostedServiceAuditWiringTests.cs index f0f101f7..7855d876 100644 --- a/tests/ZB.MOM.WW.ScadaBridge.Host.Tests/AkkaHostedServiceAuditWiringTests.cs +++ b/tests/ZB.MOM.WW.ScadaBridge.Host.Tests/AkkaHostedServiceAuditWiringTests.cs @@ -7,6 +7,7 @@ using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Options; using ZB.MOM.WW.ScadaBridge.AuditLog; +using ZB.MOM.WW.ScadaBridge.AuditLog.Central; using ZB.MOM.WW.ScadaBridge.AuditLog.Site; using ZB.MOM.WW.ScadaBridge.AuditLog.Site.Telemetry; using ZB.MOM.WW.ScadaBridge.ClusterInfrastructure; @@ -238,6 +239,36 @@ public class CentralAuditWiringTests : IDisposable Assert.NotNull(forwarder); Assert.IsType(forwarder); } + + /// + /// I4 (review): the central composition root must register the production + /// reconciliation collaborators via + /// AddAuditLogCentralReconciliationClient. Asserting the concrete + /// implementations resolve here is a faster, clearer signal than a runtime + /// "actor not found" / cryptic GetRequiredService throw in + /// AkkaHostedService.RegisterCentralActors if that helper is ever + /// dropped from Program.cs. + /// + [Fact] + public void Central_Resolves_ISiteEnumerator_AsSiteEnumerator() + { + var enumerator = _factory.Services.GetService(); + Assert.NotNull(enumerator); + Assert.IsType(enumerator); + } + + /// + /// I4 (review): companion to + /// — the production gRPC pull client must resolve on the central composition + /// root so the SiteAuditReconciliationActor singleton can dial sites. + /// + [Fact] + public void Central_Resolves_IPullAuditEventsClient_AsGrpcClient() + { + var client = _factory.Services.GetService(); + Assert.NotNull(client); + Assert.IsType(client); + } } ///