diff --git a/docs/plans/2026-05-20-audit-log-code-roadmap.md b/docs/plans/2026-05-20-audit-log-code-roadmap.md index 9483a2b..488ac25 100644 --- a/docs/plans/2026-05-20-audit-log-code-roadmap.md +++ b/docs/plans/2026-05-20-audit-log-code-roadmap.md @@ -11,12 +11,17 @@ > > **Deferred to v1.x (out of scope, intentionally not implemented):** hash-chain tamper > evidence (`audit verify-chain` ships as a no-op stub), Parquet export (`format=parquet` -> returns HTTP 501), per-channel retention overrides. **Deferred follow-ups noted during -> implementation:** the real site→central gRPC push client (M6 wired the pull RPC + a mockable -> push seam; `NoOpSiteStreamAuditClient` remains the production binding); consolidation of the -> 4 DTO mapper copies; Site Calls UI page + its Audit drill-in; multi-value filter dimensions -> (`AuditLogQueryFilter` is single-value per dimension, so UI chips / CLI flags collapse to the -> first value); audit-results-grid drag resize/reorder UX. +> returns HTTP 501), per-channel retention overrides. **Follow-ups noted during +> implementation — now complete:** the five follow-ups deferred above (the real +> site→central push client; consolidation of the 4 DTO mapper copies; the Site Calls UI +> page + its Audit drill-in; multi-value filter dimensions; audit-results-grid drag +> resize/reorder UX) were all implemented on the `feature/audit-log-followups` branch +> per `docs/plans/2026-05-21-audit-log-followups.md`. The site→central transport shipped +> as a **ClusterClient-based push** (`ClusterClientSiteAuditClient`, reusing the same +> ClusterClient command/control transport notifications use) rather than the gRPC push +> originally sketched here — `ClusterClientSiteAuditClient` is now the production binding +> for site roles, with `NoOpSiteStreamAuditClient` retained only for central/test +> composition roots; and `AuditLogQueryFilter` is now multi-value per dimension. > > **For Claude:** REQUIRED SUB-SKILL FLOW per milestone: `brainstorming` → `writing-plans` → `subagent-driven-development`. Use `docs/requirements/Component-AuditLog.md` + `alog.md` as the spec; this document is the roadmap that sequences milestones and locks acceptance criteria for each. **M1 carries full TDD-level task detail; M2–M8 are milestone-shape detail and will be expanded into bite-sized plans by their own writing-plans pass when their turn comes.** diff --git a/src/ScadaLink.AuditLog/Site/Telemetry/CachedCallTelemetryForwarder.cs b/src/ScadaLink.AuditLog/Site/Telemetry/CachedCallTelemetryForwarder.cs index 7f45453..c2cea5b 100644 --- a/src/ScadaLink.AuditLog/Site/Telemetry/CachedCallTelemetryForwarder.cs +++ b/src/ScadaLink.AuditLog/Site/Telemetry/CachedCallTelemetryForwarder.cs @@ -34,15 +34,17 @@ namespace ScadaLink.AuditLog.Site.Telemetry; /// returns normally. /// /// -/// Wire push deferred to M6. M3 keeps this forwarder synchronous -/// against the local stores: there is no site→central gRPC channel yet, so -/// the RPC -/// is registered on the interface (Bundle E1) but the production binding -/// remains NoOpSiteStreamAuditClient. Once M6 wires a real client the -/// drain pattern from SiteAuditTelemetryActor can be reused — the -/// AuditEvent rows already live in SQLite tagged -/// , so a single drain loop sweeps -/// both M2 and M3 emissions. +/// Local-write only — the wire push is the drain actor's job. This +/// forwarder is deliberately synchronous against the two site-local SQLite +/// stores and never pushes to central itself. The site→central transport is +/// now live: ClusterClientSiteAuditClient is the production binding of +/// on site roles (with +/// NoOpSiteStreamAuditClient retained only for central/test composition +/// roots). The push happens out-of-band: +/// sweeps the AuditEvent rows this forwarder wrote — they live in SQLite +/// tagged — and drains them to central +/// via that client. A single drain loop therefore covers both the audit-only +/// emissions and the cached-call emissions this forwarder produces. /// /// public sealed class CachedCallTelemetryForwarder : ICachedCallTelemetryForwarder diff --git a/src/ScadaLink.Communication/CommunicationService.cs b/src/ScadaLink.Communication/CommunicationService.cs index e7cadf9..4ef5169 100644 --- a/src/ScadaLink.Communication/CommunicationService.cs +++ b/src/ScadaLink.Communication/CommunicationService.cs @@ -355,6 +355,14 @@ public class CommunicationService /// owning site and replies a carrying a /// distinct site-unreachable outcome. Central never mutates the central /// SiteCalls mirror row. + /// + /// This outer Ask uses + /// (default 30s), which must outlive the inner site relay Ask the + /// SiteCallAuditActor issues with SiteCallAuditOptions.RelayTimeout + /// (default 10s). The inner relay must time out first so its distinct + /// SiteUnreachable outcome reaches us; were this outer Ask to expire + /// first, that outcome would be lost to a generic Ask-timeout exception. + /// /// public async Task RetrySiteCallAsync( RetrySiteCallRequest request, CancellationToken cancellationToken = default) diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index 85934b4..3a9d7ea 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -681,9 +681,10 @@ akka {{ // Per Bundle E's brief: the SiteAuditTelemetryActor takes its // collaborators through its constructor, so we resolve them from DI // and pass them in via Props.Create rather than relying on a future - // FactoryProvider. This also lets the M6 follow-up swap the - // NoOpSiteStreamAuditClient registration for the real gRPC client - // without touching this site wiring. + // FactoryProvider. The real site→central client is constructed and + // wired immediately below: a ClusterClientSiteAuditClient (ClusterClient + // transport, not gRPC) replaces the DI-default NoOpSiteStreamAuditClient + // for site roles, without disturbing the rest of this wiring. var siteAuditOptions = _serviceProvider .GetRequiredService>(); var siteAuditQueue = _serviceProvider diff --git a/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs b/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs index 53fe6b9..d29b71d 100644 --- a/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs +++ b/src/ScadaLink.SiteCallAudit/SiteCallAuditOptions.cs @@ -32,6 +32,16 @@ public class SiteCallAuditOptions /// reports a SiteUnreachable outcome. Default 10 seconds: long enough /// to absorb a healthy cross-cluster round-trip, short enough that an /// operator clicking Retry on an offline site gets a fast, honest answer. + /// + /// Ordering invariant: RelayTimeout must stay below + /// CommunicationOptions.QueryTimeout (default 30s), the timeout the + /// outer CommunicationService.RetrySiteCallAsync/DiscardSiteCallAsync + /// Ask of the SiteCallAuditActor uses. The outer Ask must outlive this + /// inner site relay Ask so the inner relay times out first and yields the + /// distinct SiteUnreachable outcome; if the outer Ask expired first, + /// that outcome would be lost to a generic Ask-timeout exception. The + /// defaults (10s < 30s) satisfy this — keep the gap when tuning either. + /// /// public TimeSpan RelayTimeout { get; set; } = TimeSpan.FromSeconds(10); }