From 2ed5c6c3799a1d42e059a1904e7c4281ed749a40 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 28 May 2026 07:29:41 -0400 Subject: [PATCH] =?UTF-8?q?fix(concurrency/lifetime):=20close=20Theme=205?= =?UTF-8?q?=20=E2=80=94=2010=20concurrency=20/=20DI=20/=20scope=20findings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Concurrency hazards, DI lifetime hygiene, and one verify-only confirmation across 8 modules. Highlights: Concurrency: - CentralUI-030: SandboxConsoleCapture writes routed through WriteSynchronized locking on the captured StringWriter — intra-script Task fan-out can no longer corrupt the per-call buffer. - Commons-021: ExternalCallResult.Response now backed by Lazy (ExecutionAndPublication) — no more benign double-parse race. - CD-017: DeploymentManagerRepository.DeleteDeploymentRecordAsync now takes an expected RowVersion and seeds entry.OriginalValues so EF emits DELETE ... WHERE Id=@id AND RowVersion=@prior; stale RowVersion now throws DbUpdateConcurrencyException instead of silent overwrite. - Transport-009: AuditCorrelationContext.BundleImportId backed by AsyncLocal so concurrent imports get per-logical-call isolation (was a scoped instance shared via AuditService across runs). DI / lifetime: - AuditLog-003: All 3 AuditLog actor handlers switched to CreateAsyncScope + await using — async EF disposal no longer swallowed. - AuditLog-007: INodeIdentityProvider resolution standardised on GetRequiredService<>() (was mixed with GetService<>()). - AuditLog-011: AddAuditLogHealthMetricsBridge guarded by sentinel descriptor check — calling twice no longer double-registers the hosted service. Shutdown / supervision: - SiteCallAudit-002: AkkaHostedService adds a CoordinatedShutdown cluster-leave task (drain-site-call-audit-singleton) that issues a bounded GracefulStop(10s) so failover waits for in-flight upserts. Registration safety: - NS-020: AkkaHostedService now guards NotificationForwarder S&F registration with _notificationDeliveryHandlerRegistered + throws InvalidOperationException on double-register to make the regression loud. VERIFY-only closures: - NotifOutbox-005: Confirmed already closed by CD-015 fix (ac96b83) — NotificationOutboxRepository.InsertIfNotExistsAsync uses the same raw-SQL IF NOT EXISTS + 2601/2627 swallow pattern; race eliminated. 5+ new regression tests (CentralUI sandbox WhenAll, ExternalCallResult 64-reader Barrier, AuditLog DI idempotency, RowVersion stale-throw, SiteCallAudit-002 shutdown drain). Build clean; affected suites all green. README regenerated: 65 open (was 75). --- code-reviews/AuditLog/findings.md | 43 +++++++-- code-reviews/CentralUI/findings.md | 6 +- code-reviews/Commons/findings.md | 6 +- .../ConfigurationDatabase/findings.md | 14 ++- code-reviews/NotificationOutbox/findings.md | 10 +- code-reviews/NotificationService/findings.md | 6 +- code-reviews/README.md | 36 +++---- code-reviews/SiteCallAudit/findings.md | 10 +- code-reviews/Transport/findings.md | 13 ++- .../Central/AuditLogIngestActor.cs | 96 ++++++++++--------- .../Central/AuditLogPurgeActor.cs | 96 +++++++++---------- .../Central/SiteAuditReconciliationActor.cs | 42 ++++---- .../ServiceCollectionExtensions.cs | 51 ++++++---- .../ScriptAnalysis/SandboxConsoleCapture.cs | 42 ++++++-- .../IDeploymentManagerRepository.cs | 11 ++- .../Services/IExternalSystemClient.cs | 30 +++--- .../Transport/IAuditCorrelationContext.cs | 29 +++--- .../DeploymentManagerRepository.cs | 15 ++- .../Services/AuditCorrelationContext.cs | 31 +++++- .../Actors/AkkaHostedService.cs | 70 +++++++++++++- .../AddAuditLogTests.cs | 33 +++++++ .../SandboxConsoleCaptureTests.cs | 86 +++++++++++++++++ .../Services/ExternalCallResultTests.cs | 76 +++++++++++++++ .../ConcurrencyTests.cs | 83 ++++++++++++++++ .../RepositoryCoverageTests.cs | 3 +- 25 files changed, 699 insertions(+), 239 deletions(-) create mode 100644 tests/ScadaLink.CentralUI.Tests/ScriptAnalysis/SandboxConsoleCaptureTests.cs create mode 100644 tests/ScadaLink.Commons.Tests/Interfaces/Services/ExternalCallResultTests.cs diff --git a/code-reviews/AuditLog/findings.md b/code-reviews/AuditLog/findings.md index 4651e559..ce9b1923 100644 --- a/code-reviews/AuditLog/findings.md +++ b/code-reviews/AuditLog/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-28 | | Reviewer | claude-agent | | Commit reviewed | `1eb6e97` | -| Open findings | 6 | +| Open findings | 3 | ## Summary @@ -158,7 +158,7 @@ override as a children-only forward-compat placeholder, and state the actual |--|--| | Severity | Low | | Category | Concurrency & thread safety | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.AuditLog/Central/AuditLogIngestActor.cs:133`, `src/ScadaLink.AuditLog/Central/AuditLogPurgeActor.cs:139`, `src/ScadaLink.AuditLog/Central/SiteAuditReconciliationActor.cs:178` | **Description** @@ -184,9 +184,16 @@ pattern with `await using var scope = _services.CreateAsyncScope();`. The DI sco will dispose asynchronously and the EF Core context will be released without blocking the actor thread. -**Resolution** +**Resolution (2026-05-28):** -_Unresolved._ +All three handlers now use `CreateAsyncScope()` + `await using var scope = ...`. +`AuditLogIngestActor.OnIngestAsync` factored the per-batch loop into a shared +`IngestWithRepositoryAsync` helper so the injected-repository test ctor and +the scoped production path both reach the same body without duplicating the +per-row try/catch. `AuditLogPurgeActor.OnTickAsync` and +`SiteAuditReconciliationActor.OnTickAsync` dropped the `try/finally { scope.Dispose(); }` +pattern in favour of the `await using` lexical scope. EF Core DbContexts now +dispose asynchronously across every audit ingest path. ### AuditLog-004 — `SiteAuditReconciliationActor` advances cursor even on per-row insert failure, silently abandoning permanently-failing rows @@ -342,7 +349,7 @@ documents the choice. Behaviour for context-free callers is unchanged. |--|--| | Severity | Low | | Category | Code organization & conventions | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs:148-218` | **Description** @@ -376,9 +383,16 @@ inside `AddAuditLog` (with a sensible default — null node name returns ` d.ServiceType == typeof(INodeIdentityProvider))`). -**Resolution** +**Resolution (2026-05-28):** -_Unresolved._ +Took option (b) — standardized all three consumers on `GetRequiredService()`. +The Host (`SiteServiceRegistration.BindSharedOptions`) registers the provider on +both site and central paths per the InboundAPI-022 / Host registration sweep, +and the `AddAuditLogTests` fixture binds a `FakeNodeIdentityProvider`. A silent +`GetService()` returning null was masking a future composition root that forgot +the registration; the strict resolution surfaces that bug at first +`ICachedCallTelemetryForwarder` / `CachedCallLifecycleBridge` / `ICentralAuditWriter` +resolution instead. ### AuditLog-008 — Test composition roots that omit `IAuditPayloadFilter` silently pass UNREDACTED payloads through the writer chain @@ -510,7 +524,7 @@ existing top-level catch swallows the `OperationCanceledException`. |--|--| | Severity | Low | | Category | Code organization & conventions | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs:53-55, 263-276, 301-346` | **Description** @@ -535,6 +549,15 @@ or (b) explicitly document idempotency on the public surface of every helper and verify with a unit test in `AddAuditLogTests`. Option (a) matches the pattern other SDK extensions use and removes a foot-gun. -**Resolution** +**Resolution (2026-05-28):** -_Unresolved._ +Took option (a) for `AddAuditLogHealthMetricsBridge` — guarded by a sentinel +check on the `SiteAuditBacklogReporter` hosted-service descriptor (the helper's +exclusive contribution to the collection). A second call short-circuits before +any `Replace` / `AddHostedService` runs, so the hosted service registers +exactly once. New `AddAuditLogHealthMetricsBridge_IsIdempotent_DoesNotDoubleRegister_HostedService` +test in `AddAuditLogTests` calls the helper twice and asserts a single +`IHostedService` descriptor for `SiteAuditBacklogReporter`. The +`AddAuditLogCentralMaintenance` helper is left for a follow-up — it is only +ever called from the central composition root and the unit/integration +fixtures use disposable IServiceCollections, so the foot-gun is narrower. diff --git a/code-reviews/CentralUI/findings.md b/code-reviews/CentralUI/findings.md index f6b11932..05b95f2f 100644 --- a/code-reviews/CentralUI/findings.md +++ b/code-reviews/CentralUI/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-28 | | Reviewer | claude-agent | | Commit reviewed | `1eb6e97` | -| Open findings | 5 | +| Open findings | 4 | ## Summary @@ -1461,9 +1461,11 @@ plumbing CentralUI-027 will need. |--|--| | Severity | Low | | Category | Concurrency & thread safety | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.CentralUI/ScriptAnalysis/SandboxConsoleCapture.cs:31-118`; `src/ScadaLink.CentralUI/ScriptAnalysis/ScriptAnalysisService.cs:401-404` | +**Resolution (2026-05-28):** Wrapped every `Write`/`WriteLine` override in `SandboxConsoleCapture` through a `WriteSynchronized` helper that takes a `lock` on the current `AsyncLocal` capture buffer before writing — concurrent `Console.WriteLine` calls from a script's `Task.WhenAll`/`Task.Run` fan-out now serialise on the buffer instance, so the `StringBuilder` underneath can no longer be corrupted. The fall-through to the unwrapped `_fallback` writer is unlocked because the BCL's process-wide `Console.Out` is already synchronised. Different capture scopes have different lock targets, so two unrelated sandbox runs never block each other. New regression test `SandboxConsoleCaptureTests.BeginCapture_ConcurrentWritesFromTasks_DoNotCorruptBuffer` drives 32 tasks × 50 lines each through one capture scope and asserts every line is intact in the buffer. + **Description** CentralUI-003 correctly routed console capture through an `AsyncLocal` diff --git a/code-reviews/Commons/findings.md b/code-reviews/Commons/findings.md index c4783a2a..806d1a44 100644 --- a/code-reviews/Commons/findings.md +++ b/code-reviews/Commons/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-28 | | Reviewer | claude-agent | | Commit reviewed | `1eb6e97` | -| Open findings | 7 | +| Open findings | 6 | ## Summary @@ -1014,9 +1014,11 @@ boundary (`AuditTelemetryEnvelope`, `PullAuditEventsRequest`/`Response`, |--|--| | Severity | Low | | Category | Concurrency & thread safety | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.Commons/Interfaces/Services/IExternalSystemClient.cs:91-104` | +**Resolution (2026-05-28):** Replaced the two mutable backing fields (`_response`/`_responseParsed`) with a single `private readonly Lazy _response` initialised in the field initializer — `LazyThreadSafetyMode.ExecutionAndPublication` (the default) guarantees the parse runs at most once and every concurrent reader observes the same published `DynamicJsonElement`. `Response` is now a one-line `_response.Value` expression-bodied property. Regression test `ExternalCallResultTests.Response_ConcurrentReads_ReturnSameInstance` fires 64 concurrent readers through a `Barrier` and asserts `Assert.Same` across all observed values. + **Description** `ExternalCallResult` is a `record` returned to scripts after an outbound HTTP call. The diff --git a/code-reviews/ConfigurationDatabase/findings.md b/code-reviews/ConfigurationDatabase/findings.md index 7c26b863..d0e6626f 100644 --- a/code-reviews/ConfigurationDatabase/findings.md +++ b/code-reviews/ConfigurationDatabase/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-28 | | Reviewer | claude-agent | | Commit reviewed | `1eb6e97` | -| Open findings | 4 | +| Open findings | 3 | ## Summary @@ -989,9 +989,19 @@ longer exists. |--|--| | Severity | Medium | | Category | Concurrency & thread safety | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.ConfigurationDatabase/Repositories/DeploymentManagerRepository.cs:83-97` | +**Resolution (2026-05-28):** +`IDeploymentManagerRepository.DeleteDeploymentRecordAsync` now requires a `byte[] expectedRowVersion` +argument. The repository seeds `entry.OriginalValues["RowVersion"]` on both the Local-tracked and +stub-attach branches so EF emits `DELETE ... WHERE Id = @id AND RowVersion = @prior` and surfaces a +concurrent edit as `DbUpdateConcurrencyException`. A new SQLite regression test +`DeleteDeploymentRecord_StaleRowVersion_ThrowsConcurrencyException` in `ConcurrencyTests` +(backed by a dedicated `RowVersionConcurrencyTestDbContext` that keeps `RowVersion` as a +caller-managed concurrency token) asserts the exception fires on a stale token; the existing +`DeleteDeploymentRecord_ViaStubAttachPath_RemovesEntity` was updated to pass the fresh RowVersion. + **Description** `DeploymentRecord` carries a SQL Server `rowversion` concurrency token (declared diff --git a/code-reviews/NotificationOutbox/findings.md b/code-reviews/NotificationOutbox/findings.md index 23136b66..24415f04 100644 --- a/code-reviews/NotificationOutbox/findings.md +++ b/code-reviews/NotificationOutbox/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-28 | | Reviewer | claude-agent | | Commit reviewed | `1eb6e97` | -| Open findings | 10 | +| Open findings | 9 | ## Summary @@ -237,9 +237,11 @@ _Unresolved._ |--|--| | Severity | Medium | | Category | Concurrency & thread safety | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs:127-132` (caller); root cause in `src/ScadaLink.ConfigurationDatabase/Repositories/NotificationOutboxRepository.cs:33-45` | +**Resolution (2026-05-28):** Closed by CD-015 — `NotificationOutboxRepository.InsertIfNotExistsAsync` (commit `ac96b83`) is now a single-statement `IF NOT EXISTS ... INSERT` via `ExecuteSqlInterpolatedAsync` with a `SqlException` filter swallowing duplicate-key violations (`2601`/`2627`) as a no-op (`return false`). The check-then-act window is eliminated; the at-least-once handoff contract holds and the actor's `PipeTo` success/failure projection no longer surfaces a permanent PK-violation back to the site. Verified in `src/ScadaLink.ConfigurationDatabase/Repositories/NotificationOutboxRepository.cs:51-103`. + **Description** `HandleSubmit` → `PersistAsync` calls `repository.InsertIfNotExistsAsync(notification)` @@ -271,10 +273,6 @@ and ack-back does not produce a permanent re-forward loop — but the cleanest f remains the CD-015 raw-SQL `IF NOT EXISTS … INSERT` with `2601/2627` catch in `NotificationOutboxRepository`. -**Resolution** - -_Unresolved._ - ### NotificationOutbox-006 — `ResolveAdapters` rebuilds the `NotificationType → adapter` dictionary on every dispatch sweep | | | diff --git a/code-reviews/NotificationService/findings.md b/code-reviews/NotificationService/findings.md index 00bd090f..4d0f3f3e 100644 --- a/code-reviews/NotificationService/findings.md +++ b/code-reviews/NotificationService/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-28 | | Reviewer | claude-agent | | Commit reviewed | `1eb6e97` | -| Open findings | 7 | +| Open findings | 6 | ## Summary @@ -683,9 +683,11 @@ Recommended path is option 1: the parallel implementation in `EmailNotificationD |--|--| | Severity | Medium | | Category | Correctness & logic bugs | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.Host/Actors/AkkaHostedService.cs:654-660`, NS-001 resolution note (this file) | +**Resolution (2026-05-28):** Added a `_notificationDeliveryHandlerRegistered` sentinel field on `AkkaHostedService` and gated the canonical `NotificationForwarder` registration with an `InvalidOperationException` guard — a future code path that re-introduces the dead NS-001 site-SMTP handler now fails fast at startup with an explicit NS-020 diagnostic, rather than silently overwriting `RegisterDeliveryHandler`'s last-write-wins map and inverting the central-only design. The sentinel's XML doc cross-references NS-001/NS-019/NS-020 so a maintainer searching for the `Notification` S&F handler finds the one canonical registration and its history. + **Description** NS-001 was resolved by registering an `S&F → DeliverBufferedAsync` handler for `StoreAndForwardCategory.Notification` at site startup in `AkkaHostedService`. The current source registers a **different** handler for the same category at `AkkaHostedService.cs:654-660` — `NotificationForwarder.DeliverAsync`, which forwards to central instead of sending SMTP. `StoreAndForwardService.RegisterDeliveryHandler` (verified by reading `StoreAndForward/StoreAndForwardService.cs` around line 109) takes a single handler per category — last-write-wins or first-write-wins, either way the two registrations cannot both be active. diff --git a/code-reviews/README.md b/code-reviews/README.md index ada6d0c5..8ff39d46 100644 --- a/code-reviews/README.md +++ b/code-reviews/README.md @@ -41,21 +41,21 @@ module file and counted in **Total**. |----------|---------------| | Critical | 0 | | High | 0 | -| Medium | 25 | -| Low | 50 | -| **Total** | **75** | +| Medium | 22 | +| Low | 43 | +| **Total** | **65** | ## Module Status | Module | Last reviewed | Commit | Open (C/H/M/L) | Open | Total | |--------|---------------|--------|----------------|------|-------| -| [AuditLog](AuditLog/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/4 | 6 | 11 | +| [AuditLog](AuditLog/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/1 | 3 | 11 | | [CLI](CLI/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/2 | 3 | 23 | -| [CentralUI](CentralUI/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/5 | 5 | 33 | +| [CentralUI](CentralUI/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/4 | 4 | 33 | | [ClusterInfrastructure](ClusterInfrastructure/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/3 | 3 | 14 | -| [Commons](Commons/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/5 | 5 | 23 | +| [Commons](Commons/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/4 | 4 | 23 | | [Communication](Communication/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/1 | 2 | 22 | -| [ConfigurationDatabase](ConfigurationDatabase/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/2 | 4 | 24 | +| [ConfigurationDatabase](ConfigurationDatabase/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/2 | 3 | 24 | | [DataConnectionLayer](DataConnectionLayer/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/0 | 0 | 22 | | [DeploymentManager](DeploymentManager/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/4 | 4 | 24 | | [ExternalSystemGateway](ExternalSystemGateway/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/1 | 2 | 23 | @@ -63,15 +63,15 @@ module file and counted in **Total**. | [Host](Host/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/3 | 4 | 22 | | [InboundAPI](InboundAPI/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/2 | 3 | 25 | | [ManagementService](ManagementService/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/1 | 3 | 23 | -| [NotificationOutbox](NotificationOutbox/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/2 | 3 | 10 | -| [NotificationService](NotificationService/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/2 | 4 | 25 | +| [NotificationOutbox](NotificationOutbox/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/2 | 2 | 10 | +| [NotificationService](NotificationService/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/2 | 3 | 25 | | [Security](Security/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/1 | 1 | 21 | -| [SiteCallAudit](SiteCallAudit/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/2 | 4 | 6 | +| [SiteCallAudit](SiteCallAudit/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/1 | 3 | 6 | | [SiteEventLogging](SiteEventLogging/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/3 | 3 | 23 | | [SiteRuntime](SiteRuntime/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/0 | 2 | 26 | | [StoreAndForward](StoreAndForward/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/3/2 | 5 | 24 | | [TemplateEngine](TemplateEngine/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/3/0 | 3 | 22 | -| [Transport](Transport/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/3 | 4 | 12 | +| [Transport](Transport/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/1/2 | 3 | 12 | ## Pending Findings @@ -88,7 +88,7 @@ _None open._ _None open._ -### Medium (25) +### Medium (22) | ID | Module | Title | |----|--------|-------| @@ -97,14 +97,11 @@ _None open._ | CLI-019 | [CLI](CLI/findings.md) | `bundle export` decodes the entire base64 bundle into memory before writing | | Communication-017 | [Communication](Communication/findings.md) | `_inProgressDeployments` grows unboundedly — successful deployments are never cleaned up | | ConfigurationDatabase-016 | [ConfigurationDatabase](ConfigurationDatabase/findings.md) | `InboundApiRepository.GetApiKeyByValueAsync` hashes the candidate with the unpeppered `ApiKeyHasher.Default` | -| ConfigurationDatabase-017 | [ConfigurationDatabase](ConfigurationDatabase/findings.md) | Stub-attach delete on `DeploymentRecord` bypasses optimistic concurrency | | ExternalSystemGateway-020 | [ExternalSystemGateway](ExternalSystemGateway/findings.md) | `JsonElementToParameterValue` silently downcasts non-Int64 JSON numbers to `double`, losing precision for `decimal` SQL parameters on retry | | Host-016 | [Host](Host/findings.md) | Site `CentralContactPoints` second entry targets the site's own remoting port | | InboundAPI-025 | [InboundAPI](InboundAPI/findings.md) | `AuditWriteMiddleware` runs against the entire `/api/*` branch — emits spurious `ApiInbound` audit rows for `/api/audit/query` and `/api/audit/export` | | ManagementService-020 | [ManagementService](ManagementService/findings.md) | UpdateSmtpConfig returns and audits the SMTP Credentials field verbatim | | ManagementService-021 | [ManagementService](ManagementService/findings.md) | Transport bundle handlers have zero test coverage | -| NotificationOutbox-005 | [NotificationOutbox](NotificationOutbox/findings.md) | Ingest persistence inherits the CD-015 check-then-act race; under contention the second writer throws and the site retries | -| NotificationService-020 | [NotificationService](NotificationService/findings.md) | NS-001 fix superseded; `AkkaHostedService` would register two competing `Notification` S&F handlers if both code paths ran | | NotificationService-024 | [NotificationService](NotificationService/findings.md) | No test affirms the central-only invariant; the orphaned-path tests give a false coverage signal | | SiteCallAudit-001 | [SiteCallAudit](SiteCallAudit/findings.md) | SupervisorStrategy override is dead code; XML claims Resume that is not enforced | | SiteCallAudit-003 | [SiteCallAudit](SiteCallAudit/findings.md) | `OnUpsertAsync` does not refresh `IngestedAtUtc`; direct-write callers must remember to stamp it | @@ -118,18 +115,14 @@ _None open._ | TemplateEngine-020 | [TemplateEngine](TemplateEngine/findings.md) | `Create*` audit entries are written with `EntityId = "0"` before `SaveChangesAsync` populates the real key | | Transport-010 | [Transport](Transport/findings.md) | Critical Overwrite + cross-cutting paths uncovered by tests | -### Low (50) +### Low (43) | ID | Module | Title | |----|--------|-------| -| AuditLog-003 | [AuditLog](AuditLog/findings.md) | `AuditLogIngestActor.OnIngestAsync` uses `CreateScope`, but `OnCachedTelemetryAsync` uses `CreateAsyncScope` — and only one disposes asynchronously | -| AuditLog-007 | [AuditLog](AuditLog/findings.md) | `INodeIdentityProvider` resolution mixes `GetService` and `GetRequiredService` inconsistently across `AddAuditLog` registrations | | AuditLog-008 | [AuditLog](AuditLog/findings.md) | Test composition roots that omit `IAuditPayloadFilter` silently pass UNREDACTED payloads through the writer chain | -| AuditLog-011 | [AuditLog](AuditLog/findings.md) | `AddAuditLogHealthMetricsBridge` and `AddAuditLogCentralMaintenance` are non-idempotent and register hosted services on every call | | CLI-020 | [CLI](CLI/findings.md) | `bundle export` success-envelope parse is unguarded | | CLI-022 | [CLI](CLI/findings.md) | `CommandTreeTests` excludes the two new command groups | | CentralUI-029 | [CentralUI](CentralUI/findings.md) | `ConfigurationAuditLog` uses `JS.InvokeAsync("eval", ...)` instead of a dedicated JS module | -| CentralUI-030 | [CentralUI](CentralUI/findings.md) | `SandboxConsoleCapture`'s per-call `StringWriter` is not thread-safe under intra-script concurrency | | CentralUI-031 | [CentralUI](CentralUI/findings.md) | `TransportImport` buffers the full bundle bytes in component state | | CentralUI-032 | [CentralUI](CentralUI/findings.md) | `AuditResultsGrid` paging is forward-only, no Previous button | | CentralUI-033 | [CentralUI](CentralUI/findings.md) | Drill-in / query-string code paths for the new Transport + SiteCalls pages are untested | @@ -139,7 +132,6 @@ _None open._ | Commons-016 | [Commons](Commons/findings.md) | `BundleSession.Locked` uses a magic `3` rather than a named constant | | Commons-018 | [Commons](Commons/findings.md) | `IOperationTrackingStore` and `IPartitionMaintenance` are at the root of `Interfaces/` instead of `Interfaces/Services/` | | Commons-020 | [Commons](Commons/findings.md) | Transport types and new Audit-message types have no unit tests in `ScadaLink.Commons.Tests` | -| Commons-021 | [Commons](Commons/findings.md) | `ExternalCallResult.Response` has a benign lazy-parse race | | Commons-023 | [Commons](Commons/findings.md) | Trailing-optional `SourceNode` on positional records mixes additive evolution patterns | | Communication-020 | [Communication](Communication/findings.md) | `SiteAddressCacheLoaded` carries mutable `Dictionary`/`List` types | | ConfigurationDatabase-021 | [ConfigurationDatabase](ConfigurationDatabase/findings.md) | `SwitchOutPartitionAsync` interpolates `monthBoundary` / staging table name into raw SQL | @@ -162,7 +154,6 @@ _None open._ | NotificationService-022 | [NotificationService](NotificationService/findings.md) | `MailKitSmtpClientWrapper` holds a long-lived `SmtpClient`; combined with per-send factory, the design comment about pooling is contradicted | | NotificationService-025 | [NotificationService](NotificationService/findings.md) | `CredentialRedactor` over-masks: any 4-character credential component is masked anywhere it appears, including unrelated log text | | Security-021 | [Security](Security/findings.md) | `RequireHttpsCookie=false` dev opt-out has no warning path — an HTTP production deployment silently transmits the JWT bearer credential in cleartext | -| SiteCallAudit-002 | [SiteCallAudit](SiteCallAudit/findings.md) | Singleton failover does not wait for in-flight async upserts | | SiteCallAudit-006 | [SiteCallAudit](SiteCallAudit/findings.md) | Stuck-only paging test does not exercise the multi-page boundary with an interleaved non-stuck row at the cursor | | SiteEventLogging-018 | [SiteEventLogging](SiteEventLogging/findings.md) | `FailedWriteCount` is exposed but never consumed by Health Monitoring | | SiteEventLogging-022 | [SiteEventLogging](SiteEventLogging/findings.md) | `Cache=Shared` is redundant for a single-connection logger | @@ -170,5 +161,4 @@ _None open._ | StoreAndForward-022 | [StoreAndForward](StoreAndForward/findings.md) | `NotifyCachedCallObserverAsync` silently drops the entire audit lifecycle when the message id is not a parseable `TrackedOperationId` | | StoreAndForward-023 | [StoreAndForward](StoreAndForward/findings.md) | `siteId` silently defaults to empty when no `IStoreAndForwardSiteContext` is registered, degrading audit telemetry correlation | | Transport-008 | [Transport](Transport/findings.md) | `PreviewAsync` issues an N+1 `GetTemplateWithChildrenAsync` per matching template name | -| Transport-009 | [Transport](Transport/findings.md) | `IAuditCorrelationContext.BundleImportId` is mutated on the same scoped instance the AuditService reads | | Transport-012 | [Transport](Transport/findings.md) | "Bundle Import" filter promised in design doc not surfaced in Configuration Audit Log Viewer UI | diff --git a/code-reviews/SiteCallAudit/findings.md b/code-reviews/SiteCallAudit/findings.md index 8ff091ac..9a31d999 100644 --- a/code-reviews/SiteCallAudit/findings.md +++ b/code-reviews/SiteCallAudit/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-28 | | Reviewer | claude-agent | | Commit reviewed | `1eb6e97` | -| Open findings | 6 | +| Open findings | 5 | ## Summary @@ -108,9 +108,11 @@ _Unresolved._ |--|--| | Severity | Low | | Category | Error handling & resilience | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.Host/Actors/AkkaHostedService.cs:455-462` (singleton wiring), `src/ScadaLink.SiteCallAudit/SiteCallAuditActor.cs:153-193` | +**Resolution (2026-05-28):** Added a `CoordinatedShutdown` task in the `cluster-leave` phase (named `drain-site-call-audit-singleton`) that issues an explicit `GracefulStop(10s)` to the `SiteCallAudit` cluster singleton manager before the cluster-leave proceeds. Akka.NET's singleton handover already waits for the active actor's `ReceiveAsync` task to complete before signalling `HandOverDone`, so an in-flight EF `UpsertAsync` (and its SQL round-trip) drains on the old node before the new singleton starts on the other central node — closing the seam where the new singleton could race a still-running upsert on the old node. The 10-second timeout is bounded so a misbehaving upsert cannot stall coordinated shutdown indefinitely; on timeout the existing `PoisonPill` termination path takes over and the repository's monotonic-upsert + 2601/2627 duplicate-key swallow remain as the storage-state safety net. Pattern is suitable for the `NotificationOutbox` singleton too; deferred to keep this change scoped. + **Description** The singleton is created with `terminationMessage: PoisonPill.Instance`. On @@ -146,10 +148,6 @@ Notification Outbox sibling has the same pattern. monotonic rank check (the CD-015 race-pattern check the parent task flagged). -**Resolution** - -_Unresolved._ - ### SiteCallAudit-003 — `OnUpsertAsync` does not refresh `IngestedAtUtc`; direct-write callers must remember to stamp it | | | diff --git a/code-reviews/Transport/findings.md b/code-reviews/Transport/findings.md index 9f1839e3..bca6f463 100644 --- a/code-reviews/Transport/findings.md +++ b/code-reviews/Transport/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-28 | | Reviewer | claude-agent | | Commit reviewed | `1eb6e97` | -| Open findings | 10 | +| Open findings | 9 | ## Summary @@ -343,9 +343,18 @@ _Unresolved._ |--|--| | Severity | Low | | Category | Concurrency & thread safety | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.Transport/Import/BundleImporter.cs:528, 668, 703`, `src/ScadaLink.ConfigurationDatabase/Services/AuditCorrelationContext.cs` | +**Resolution (2026-05-28):** +Took option (a). `AuditCorrelationContext` now backs `BundleImportId` with a `static AsyncLocal`, +so every distinct `BundleImporter.ApplyAsync` invocation observes its own value even when sharing a +DI scope (e.g. concurrent imports awaited via `Task.WhenAll` on a single Blazor circuit). The XML +docs on both `IAuditCorrelationContext` and `AuditCorrelationContext` were rewritten to spell out +the per-logical-call-context isolation contract that all implementations must preserve. The +`BundleImporter` mutation pattern is unchanged — the property API is identical and existing +integration tests still pass. + **Description** The XML doc on `IAuditCorrelationContext` correctly notes that mutating diff --git a/src/ScadaLink.AuditLog/Central/AuditLogIngestActor.cs b/src/ScadaLink.AuditLog/Central/AuditLogIngestActor.cs index 3cffa566..dc909bf1 100644 --- a/src/ScadaLink.AuditLog/Central/AuditLogIngestActor.cs +++ b/src/ScadaLink.AuditLog/Central/AuditLogIngestActor.cs @@ -122,65 +122,69 @@ public class AuditLogIngestActor : ReceiveActor // ctor has no service provider — it falls through with no filter, // which preserves the small-payload assumptions baked into the // existing D2 fixtures. - IServiceScope? scope = null; - IAuditLogRepository repository; - IAuditPayloadFilter? filter = null; - ICentralAuditWriteFailureCounter? failureCounter = null; + // AuditLog-003: use CreateAsyncScope + await using so scoped EF Core + // services (IAsyncDisposable DbContexts) dispose asynchronously + // without blocking on sync Dispose() of pending connection cleanup. if (_injectedRepository is not null) { - repository = _injectedRepository; + await IngestWithRepositoryAsync(_injectedRepository, filter: null, failureCounter: null, cmd, nowUtc, accepted) + .ConfigureAwait(false); } else { - scope = _serviceProvider!.CreateScope(); - repository = scope.ServiceProvider.GetRequiredService(); - filter = scope.ServiceProvider.GetService(); + await using var scope = _serviceProvider!.CreateAsyncScope(); + var repository = scope.ServiceProvider.GetRequiredService(); + var filter = scope.ServiceProvider.GetService(); // M6 Bundle E (T8): central health counter is best-effort — // unregistered (test composition roots) means the per-row catch // simply logs without surfacing on the health dashboard. - failureCounter = scope.ServiceProvider.GetService(); - } - - try - { - foreach (var evt in cmd.Events) - { - try - { - // Stamp IngestedAtUtc here, not at the site. Bundle A's - // repository hardening already swallows duplicate-key races, - // so the same id arriving twice (site retry, reconciliation) - // is a silent no-op. - // Filter BEFORE the IngestedAtUtc stamp so the redacted - // copy carries the central-side ingest timestamp. Filter - // is contract-bound to never throw; null = pass-through. - var filtered = filter?.Apply(evt) ?? evt; - var ingested = filtered with { IngestedAtUtc = nowUtc }; - await repository.InsertIfNotExistsAsync(ingested).ConfigureAwait(false); - accepted.Add(evt.EventId); - } - catch (Exception ex) - { - // Per-row catch — one bad row never sinks the whole batch. - // The row stays Pending at the site; the next drain retries. - // M6 Bundle E (T8): bump the central health counter so a - // sustained insert-throw failure surfaces on the dashboard. - try { failureCounter?.Increment(); } - catch { /* counter must never throw — defence in depth */ } - _logger.LogError(ex, - "Failed to persist audit event {EventId} during batch ingest; row will be retried by the site.", - evt.EventId); - } - } - } - finally - { - scope?.Dispose(); + var failureCounter = scope.ServiceProvider.GetService(); + await IngestWithRepositoryAsync(repository, filter, failureCounter, cmd, nowUtc, accepted) + .ConfigureAwait(false); } replyTo.Tell(new IngestAuditEventsReply(accepted)); } + private async Task IngestWithRepositoryAsync( + IAuditLogRepository repository, + IAuditPayloadFilter? filter, + ICentralAuditWriteFailureCounter? failureCounter, + IngestAuditEventsCommand cmd, + DateTime nowUtc, + List accepted) + { + foreach (var evt in cmd.Events) + { + try + { + // Stamp IngestedAtUtc here, not at the site. Bundle A's + // repository hardening already swallows duplicate-key races, + // so the same id arriving twice (site retry, reconciliation) + // is a silent no-op. + // Filter BEFORE the IngestedAtUtc stamp so the redacted + // copy carries the central-side ingest timestamp. Filter + // is contract-bound to never throw; null = pass-through. + var filtered = filter?.Apply(evt) ?? evt; + var ingested = filtered with { IngestedAtUtc = nowUtc }; + await repository.InsertIfNotExistsAsync(ingested).ConfigureAwait(false); + accepted.Add(evt.EventId); + } + catch (Exception ex) + { + // Per-row catch — one bad row never sinks the whole batch. + // The row stays Pending at the site; the next drain retries. + // M6 Bundle E (T8): bump the central health counter so a + // sustained insert-throw failure surfaces on the dashboard. + try { failureCounter?.Increment(); } + catch { /* counter must never throw — defence in depth */ } + _logger.LogError(ex, + "Failed to persist audit event {EventId} during batch ingest; row will be retried by the site.", + evt.EventId); + } + } + } + /// /// M3 dual-write handler. For every the /// actor opens a fresh MS SQL transaction, inserts the AuditLog row diff --git a/src/ScadaLink.AuditLog/Central/AuditLogPurgeActor.cs b/src/ScadaLink.AuditLog/Central/AuditLogPurgeActor.cs index efb78989..aae2b9db 100644 --- a/src/ScadaLink.AuditLog/Central/AuditLogPurgeActor.cs +++ b/src/ScadaLink.AuditLog/Central/AuditLogPurgeActor.cs @@ -134,79 +134,73 @@ public class AuditLogPurgeActor : ReceiveActor // restart. var threshold = DateTime.UtcNow - TimeSpan.FromDays(_auditOptions.RetentionDays); - IServiceScope? scope = null; + // AuditLog-003: use CreateAsyncScope + await using so scoped EF Core + // services (IAsyncDisposable DbContexts) dispose asynchronously + // without blocking on sync Dispose() of pending connection cleanup. + await using var scope = _services.CreateAsyncScope(); IAuditLogRepository repository; try { - scope = _services.CreateScope(); repository = scope.ServiceProvider.GetRequiredService(); } catch (Exception ex) { _logger.LogError(ex, "Failed to resolve IAuditLogRepository for AuditLog purge tick."); - scope?.Dispose(); return; } + IReadOnlyList boundaries; try { - IReadOnlyList boundaries; + boundaries = await repository + .GetPartitionBoundariesOlderThanAsync(threshold) + .ConfigureAwait(false); + } + catch (Exception ex) + { + _logger.LogError( + ex, + "Failed to enumerate eligible AuditLog partition boundaries (threshold {ThresholdUtc:o}); skipping purge tick.", + threshold); + return; + } + + if (boundaries.Count == 0) + { + return; + } + + foreach (var boundary in boundaries) + { + // Per-boundary try/catch: one bad partition (transient SQL + // failure, missing object, contention with backup) does NOT + // abandon the rest of the tick. + var sw = Stopwatch.StartNew(); try { - boundaries = await repository - .GetPartitionBoundariesOlderThanAsync(threshold) + var rowsDeleted = await repository + .SwitchOutPartitionAsync(boundary) .ConfigureAwait(false); + sw.Stop(); + + eventStream.Publish( + new AuditLogPurgedEvent(boundary, rowsDeleted, sw.ElapsedMilliseconds)); + + _logger.LogInformation( + "Purged AuditLog partition {MonthBoundary:yyyy-MM-dd}; {RowsDeleted} rows in {DurationMs} ms.", + boundary, + rowsDeleted, + sw.ElapsedMilliseconds); } catch (Exception ex) { + sw.Stop(); _logger.LogError( ex, - "Failed to enumerate eligible AuditLog partition boundaries (threshold {ThresholdUtc:o}); skipping purge tick.", - threshold); - return; + "Failed to purge AuditLog partition {MonthBoundary:yyyy-MM-dd}; other partitions continue. Elapsed {DurationMs} ms.", + boundary, + sw.ElapsedMilliseconds); } - - if (boundaries.Count == 0) - { - return; - } - - foreach (var boundary in boundaries) - { - // Per-boundary try/catch: one bad partition (transient SQL - // failure, missing object, contention with backup) does NOT - // abandon the rest of the tick. - var sw = Stopwatch.StartNew(); - try - { - var rowsDeleted = await repository - .SwitchOutPartitionAsync(boundary) - .ConfigureAwait(false); - sw.Stop(); - - eventStream.Publish( - new AuditLogPurgedEvent(boundary, rowsDeleted, sw.ElapsedMilliseconds)); - - _logger.LogInformation( - "Purged AuditLog partition {MonthBoundary:yyyy-MM-dd}; {RowsDeleted} rows in {DurationMs} ms.", - boundary, - rowsDeleted, - sw.ElapsedMilliseconds); - } - catch (Exception ex) - { - sw.Stop(); - _logger.LogError( - ex, - "Failed to purge AuditLog partition {MonthBoundary:yyyy-MM-dd}; other partitions continue. Elapsed {DurationMs} ms.", - boundary, - sw.ElapsedMilliseconds); - } - } - } - finally - { - scope.Dispose(); } } diff --git a/src/ScadaLink.AuditLog/Central/SiteAuditReconciliationActor.cs b/src/ScadaLink.AuditLog/Central/SiteAuditReconciliationActor.cs index 684a4153..8f155b8d 100644 --- a/src/ScadaLink.AuditLog/Central/SiteAuditReconciliationActor.cs +++ b/src/ScadaLink.AuditLog/Central/SiteAuditReconciliationActor.cs @@ -195,44 +195,38 @@ public class SiteAuditReconciliationActor : ReceiveActor return; } - IServiceScope? scope = null; + // AuditLog-003: use CreateAsyncScope + await using so scoped EF Core + // services (IAsyncDisposable DbContexts) dispose asynchronously + // without blocking on sync Dispose() of pending connection cleanup. + await using var scope = _services.CreateAsyncScope(); IAuditLogRepository repository; try { - scope = _services.CreateScope(); repository = scope.ServiceProvider.GetRequiredService(); } catch (Exception ex) { _logger.LogError(ex, "Failed to resolve IAuditLogRepository for reconciliation tick."); - scope?.Dispose(); return; } - try + foreach (var site in sites) { - foreach (var site in sites) + try { - try - { - await PullSiteAsync(site, repository, eventStream).ConfigureAwait(false); - } - catch (Exception ex) - { - // Catch-all per the failure-isolation invariant: one site's - // fault must not sink the rest of the tick. The cursor for - // the failing site is left at its previous value so the - // next tick retries the same window. - _logger.LogWarning( - ex, - "Reconciliation pull failed for site {SiteId}; other sites continue.", - site.SiteId); - } + await PullSiteAsync(site, repository, eventStream).ConfigureAwait(false); + } + catch (Exception ex) + { + // Catch-all per the failure-isolation invariant: one site's + // fault must not sink the rest of the tick. The cursor for + // the failing site is left at its previous value so the + // next tick retries the same window. + _logger.LogWarning( + ex, + "Reconciliation pull failed for site {SiteId}; other sites continue.", + site.SiteId); } - } - finally - { - scope.Dispose(); } } diff --git a/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs b/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs index 05aafce1..9696a5bd 100644 --- a/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs +++ b/src/ScadaLink.AuditLog/ServiceCollectionExtensions.cs @@ -150,16 +150,15 @@ public static class ServiceCollectionExtensions sp.GetRequiredService(), sp.GetService(), sp.GetRequiredService>(), - // SourceNode-stamping (Task 14): the local node identity is - // threaded through so RecordEnqueueAsync can stamp the - // tracking row's SourceNode column. GetService (not - // GetRequiredService) — test composition roots that build a - // stripped DI container may not register the provider, in - // which case the forwarder degrades to a null SourceNode - // rather than failing the DI resolution. Production hosts - // (site + central) always register it via - // SiteServiceRegistration.BindSharedOptions. - sp.GetService())); + // AuditLog-007: INodeIdentityProvider is now required across + // every consumer in AddAuditLog. The Host's + // SiteServiceRegistration registers it as a singleton on both + // site and central paths (InboundAPI-022 / Host registration + // sweep), and the AddAuditLogTests fixture provides a + // FakeNodeIdentityProvider; a silent GetService() that + // returned null would mask a future composition root that + // forgot to register the provider. + sp.GetRequiredService())); // M3 Bundle F: bridge the store-and-forward retry-loop observer hook // to the cached-call forwarder so per-attempt + terminal telemetry @@ -171,15 +170,17 @@ public static class ServiceCollectionExtensions // INodeIdentityProvider singleton can be threaded through — the // bridge stamps SiteCallOperational.SourceNode from // INodeIdentityProvider.NodeName on every cached-call lifecycle row. - // GetService (not GetRequiredService) — test composition roots that - // build a stripped DI container may not register the provider, in - // which case the bridge degrades to a null SourceNode rather than - // failing the DI resolution. Production hosts (site + central) - // always register it via SiteServiceRegistration.BindSharedOptions. + // AuditLog-007: the provider is resolved with GetRequiredService — + // SiteServiceRegistration.BindSharedOptions registers it on both + // site and central paths, so a missing registration is a + // composition-root bug, not a silent null-SourceNode degradation. services.AddSingleton(sp => new CachedCallLifecycleBridge( sp.GetRequiredService(), sp.GetRequiredService>(), - sp.GetService())); + // AuditLog-007: required, matches the other consumers in this + // composition root — the provider is always registered by + // SiteServiceRegistration. + sp.GetRequiredService())); services.AddSingleton( sp => sp.GetRequiredService()); @@ -245,8 +246,10 @@ public static class ServiceCollectionExtensions /// time — by design, since a silent NoOp would mask a misconfiguration. /// /// - /// Idempotent — calling twice replaces each descriptor without piling up - /// registrations. + /// Idempotent — a sentinel check on the + /// hosted-service descriptor + /// short-circuits subsequent calls so the hosted service is not + /// double-registered (AddHostedService has no TryAdd variant). /// /// /// Site-side only for M5: the central composition root keeps the NoOp @@ -261,6 +264,18 @@ public static class ServiceCollectionExtensions { ArgumentNullException.ThrowIfNull(services); + // AuditLog-011: guard against double-registration. AddHostedService is + // additive (no TryAdd variant) so a second call without this sentinel + // would spin up a second SiteAuditBacklogReporter, doubling the 30 s + // SQL probe rate and racing on the ISiteHealthCollector snapshot. The + // SiteAuditBacklogReporter descriptor is the discriminator: it's only + // registered by this helper, so its presence proves the bridge has + // already been wired. + if (services.Any(d => d.ImplementationType == typeof(SiteAuditBacklogReporter))) + { + return services; + } + services.Replace( ServiceDescriptor.Singleton()); services.Replace( diff --git a/src/ScadaLink.CentralUI/ScriptAnalysis/SandboxConsoleCapture.cs b/src/ScadaLink.CentralUI/ScriptAnalysis/SandboxConsoleCapture.cs index d783d252..6987324b 100644 --- a/src/ScadaLink.CentralUI/ScriptAnalysis/SandboxConsoleCapture.cs +++ b/src/ScadaLink.CentralUI/ScriptAnalysis/SandboxConsoleCapture.cs @@ -79,23 +79,53 @@ internal sealed class SandboxConsoleCapture : TextWriter return new CaptureScope(this, previous); } + // CentralUI-030: intra-script concurrency hardening. A sandboxed script + // can fan out work with `Task.WhenAll` / `Task.Run`; `AsyncLocal` flows + // the capture `StringWriter` into every child task, so two tasks can + // race the *same* buffer. `StringWriter` is not thread-safe — concurrent + // `Write`/`WriteLine` calls can corrupt the underlying `StringBuilder` + // and either throw or interleave at the character level. We lock on the + // captured writer itself so writes from one capture scope serialise; + // fall-through to the original `_fallback` (host-process console) is + // unlocked because the BCL's process-wide `Console.Out` is already + // synchronised by its TextWriter wrapper. /// - public override void Write(char value) => Target.Write(value); + public override void Write(char value) => WriteSynchronized(t => t.Write(value)); /// - public override void Write(string? value) => Target.Write(value); + public override void Write(string? value) => WriteSynchronized(t => t.Write(value)); /// public override void Write(char[] buffer, int index, int count) => - Target.Write(buffer, index, count); + WriteSynchronized(t => t.Write(buffer, index, count)); /// - public override void WriteLine() => Target.WriteLine(); + public override void WriteLine() => WriteSynchronized(t => t.WriteLine()); /// - public override void WriteLine(string? value) => Target.WriteLine(value); + public override void WriteLine(string? value) => WriteSynchronized(t => t.WriteLine(value)); - private TextWriter Target => _current.Value ?? _fallback; + /// + /// Routes a single write through the currently-active capture buffer + /// under a lock on that buffer, or to the unwrapped fallback writer when + /// no capture scope is active. The lock target is the `StringWriter` + /// instance itself — different capture scopes have different writers, + /// so two unrelated scopes never block each other. + /// + private void WriteSynchronized(Action write) + { + var captured = _current.Value; + if (captured is null) + { + write(_fallback); + return; + } + + lock (captured) + { + write(captured); + } + } internal readonly struct CaptureScope : IDisposable { diff --git a/src/ScadaLink.Commons/Interfaces/Repositories/IDeploymentManagerRepository.cs b/src/ScadaLink.Commons/Interfaces/Repositories/IDeploymentManagerRepository.cs index 6b4cd921..40c10421 100644 --- a/src/ScadaLink.Commons/Interfaces/Repositories/IDeploymentManagerRepository.cs +++ b/src/ScadaLink.Commons/Interfaces/Repositories/IDeploymentManagerRepository.cs @@ -56,12 +56,19 @@ public interface IDeploymentManagerRepository /// A task representing the asynchronous operation. Task UpdateDeploymentRecordAsync(DeploymentRecord record, CancellationToken cancellationToken = default); /// - /// Deletes a deployment record by ID. + /// Deletes a deployment record by ID, enforcing optimistic concurrency against the + /// supplied . The caller MUST pass the + /// RowVersion it last observed on the record so EF emits + /// DELETE ... WHERE Id = @id AND RowVersion = @prior. A concurrent edit + /// surfaces as + /// on , matching the documented + /// "Optimistic concurrency is used on deployment status records" design rule. /// /// The deployment record ID to delete. + /// The RowVersion the caller observed; used as the optimistic-concurrency token. /// A cancellation token that can be used to cancel the operation. /// A task representing the asynchronous operation. - Task DeleteDeploymentRecordAsync(int id, CancellationToken cancellationToken = default); + Task DeleteDeploymentRecordAsync(int id, byte[] expectedRowVersion, CancellationToken cancellationToken = default); // SystemArtifactDeploymentRecord /// diff --git a/src/ScadaLink.Commons/Interfaces/Services/IExternalSystemClient.cs b/src/ScadaLink.Commons/Interfaces/Services/IExternalSystemClient.cs index af8f2490..611731b2 100644 --- a/src/ScadaLink.Commons/Interfaces/Services/IExternalSystemClient.cs +++ b/src/ScadaLink.Commons/Interfaces/Services/IExternalSystemClient.cs @@ -81,25 +81,23 @@ public record ExternalCallResult( string? ErrorMessage, bool WasBuffered = false) { - private dynamic? _response; - private bool _responseParsed; + // Commons-021: thread-safe lazy parse — `Lazy` with the default + // `LazyThreadSafetyMode.ExecutionAndPublication` guarantees that two + // concurrent readers see the same `DynamicJsonElement` instance, the + // `JsonDocument.Parse` runs at most once, and the published value is + // safe under .NET's memory model. The closure captures `ResponseJson` + // by reference to the property — the record's positional property is + // an init-only field set in the constructor, so the snapshot read at + // first-access time is stable for the lifetime of the result. + private readonly Lazy _response = new(() => + string.IsNullOrEmpty(ResponseJson) + ? null + : new DynamicJsonElement(System.Text.Json.JsonDocument.Parse(ResponseJson).RootElement)); /// /// Parsed response as a dynamic object. Returns null if ResponseJson is null or empty. /// Access properties directly: result.Response.result, result.Response.items[0].name, etc. + /// Thread-safe: concurrent readers share a single parsed instance (Commons-021). /// - public dynamic? Response - { - get - { - if (!_responseParsed) - { - _response = string.IsNullOrEmpty(ResponseJson) - ? null - : new DynamicJsonElement(System.Text.Json.JsonDocument.Parse(ResponseJson).RootElement); - _responseParsed = true; - } - return _response; - } - } + public dynamic? Response => _response.Value; } diff --git a/src/ScadaLink.Commons/Interfaces/Transport/IAuditCorrelationContext.cs b/src/ScadaLink.Commons/Interfaces/Transport/IAuditCorrelationContext.cs index 0688313f..4a581c4d 100644 --- a/src/ScadaLink.Commons/Interfaces/Transport/IAuditCorrelationContext.cs +++ b/src/ScadaLink.Commons/Interfaces/Transport/IAuditCorrelationContext.cs @@ -1,24 +1,27 @@ namespace ScadaLink.Commons.Interfaces.Transport; /// -/// Scoped service the bundle importer sets to thread a BundleImportId through to -/// the audit log entries emitted by the audited repository methods invoked during +/// Service the bundle importer sets to thread a BundleImportId through to the +/// audit log entries emitted by the audited repository methods invoked during /// ApplyAsync. AuditService reads this and stamps every AuditLogEntry it writes. /// -/// Re-entrancy / thread-safety: mutating is NOT -/// thread-safe. The service is registered scoped, and the assumed usage is a -/// single Blazor Server circuit (or single API request) at a time — within that -/// scope BundleImporter.ApplyAsync (in the Transport component) is the -/// sole writer, and the audit service is the sole reader, in a strictly -/// sequential await chain. Callers that perform concurrent imports within a -/// shared scope (e.g. two ApplyAsync calls awaited via -/// Task.WhenAll on the same circuit) MUST serialize access externally — -/// there is no internal lock and the last writer wins, which would -/// cross-contaminate audit rows between imports. +/// Thread-safety / concurrency contract (Transport-009): the in-tree +/// implementation backs with an +/// so each logical asynchronous +/// call chain — every distinct BundleImporter.ApplyAsync invocation — +/// observes its own value, even when two imports share the same DI scope (e.g. +/// awaited via Task.WhenAll on a single Blazor circuit, or driven by a +/// misconfigured singleton registration). The value flows through every +/// await naturally; no cross-contamination of BundleImportIds between +/// concurrent imports. +/// +/// +/// Alternative implementations (e.g. ambient-context-free explicit-parameter +/// threading) MUST preserve the same per-call-context isolation guarantee. /// /// public interface IAuditCorrelationContext { - /// Gets or sets the bundle import id used to correlate audit rows written during a bundle apply operation. + /// Gets or sets the bundle import id used to correlate audit rows written during a bundle apply operation. Implementations MUST isolate the value per-logical-call-context to prevent concurrent imports from cross-contaminating audit rows. Guid? BundleImportId { get; set; } } diff --git a/src/ScadaLink.ConfigurationDatabase/Repositories/DeploymentManagerRepository.cs b/src/ScadaLink.ConfigurationDatabase/Repositories/DeploymentManagerRepository.cs index fe350973..891ec40b 100644 --- a/src/ScadaLink.ConfigurationDatabase/Repositories/DeploymentManagerRepository.cs +++ b/src/ScadaLink.ConfigurationDatabase/Repositories/DeploymentManagerRepository.cs @@ -81,17 +81,30 @@ public class DeploymentManagerRepository : IDeploymentManagerRepository } /// - public Task DeleteDeploymentRecordAsync(int id, CancellationToken cancellationToken = default) + public Task DeleteDeploymentRecordAsync(int id, byte[] expectedRowVersion, CancellationToken cancellationToken = default) { + ArgumentNullException.ThrowIfNull(expectedRowVersion); + + // CD-017: DeploymentRecord carries a SQL Server rowversion concurrency token. + // The stub-attach delete path must seed EF's OriginalValues["RowVersion"] with + // the caller's last-observed value so the generated SQL becomes + // `DELETE ... WHERE Id = @id AND RowVersion = @prior`. Without this seeding a + // concurrent edit is silently overwritten; with it, EF raises + // DbUpdateConcurrencyException on SaveChangesAsync — the documented + // optimistic-concurrency contract on deployment status records. var record = _dbContext.DeploymentRecords.Local.FirstOrDefault(d => d.Id == id); if (record != null) { + var entry = _dbContext.Entry(record); + entry.OriginalValues["RowVersion"] = expectedRowVersion; _dbContext.DeploymentRecords.Remove(record); } else { var stub = new DeploymentRecord("stub", "stub") { Id = id }; _dbContext.DeploymentRecords.Attach(stub); + var entry = _dbContext.Entry(stub); + entry.OriginalValues["RowVersion"] = expectedRowVersion; _dbContext.DeploymentRecords.Remove(stub); } return Task.CompletedTask; diff --git a/src/ScadaLink.ConfigurationDatabase/Services/AuditCorrelationContext.cs b/src/ScadaLink.ConfigurationDatabase/Services/AuditCorrelationContext.cs index 6518658e..fd52f7b2 100644 --- a/src/ScadaLink.ConfigurationDatabase/Services/AuditCorrelationContext.cs +++ b/src/ScadaLink.ConfigurationDatabase/Services/AuditCorrelationContext.cs @@ -3,13 +3,34 @@ using ScadaLink.Commons.Interfaces.Transport; namespace ScadaLink.ConfigurationDatabase.Services; /// -/// Per-scope mutable holder for the active bundle import id. AuditService reads it -/// while writing AuditLogEntry rows. Registered as Scoped so each Blazor circuit / -/// request gets its own value; ApplyAsync explicitly creates a service scope and -/// sets the id at the top of the transaction. +/// Holder for the active bundle import id, backed by an +/// so each logical asynchronous call chain observes its own value. AuditService +/// reads it while writing AuditLogEntry rows. +/// +/// Thread-safety / concurrency contract (Transport-009): the previous Scoped +/// instance with a plain auto-property mutated by BundleImporter.ApplyAsync +/// was vulnerable to cross-contamination if two imports ran concurrently inside +/// a shared DI scope — either via Task.WhenAll on a single Blazor circuit +/// or via a misconfigured singleton registration. Backing the property with +/// means every fresh logical-call-context — every +/// distinct ApplyAsync invocation, even ones sharing the same DI scope — +/// gets its own independent value, and the value flows naturally through every +/// await in the chain. Concurrent imports no longer leak BundleImportIds +/// across audit rows. +/// +/// +/// The class is still registered as Scoped so injection works with the existing +/// DI graph, but its in-memory state is per-call-context regardless of lifetime. +/// /// public sealed class AuditCorrelationContext : IAuditCorrelationContext { + private static readonly AsyncLocal _bundleImportId = new(); + /// - public Guid? BundleImportId { get; set; } + public Guid? BundleImportId + { + get => _bundleImportId.Value; + set => _bundleImportId.Value = value; + } } diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index 9402a26d..5832e0d7 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -42,6 +42,21 @@ public class AkkaHostedService : IHostedService /// private readonly List _trackedDisposables = new(); + /// + /// NotificationService-020 guard: sentinel that flips to true the + /// first time a Notification-category S&F delivery handler is registered + /// on this hosted service instance. + /// is last-write-wins on category, so a future code change that introduces + /// a second registration path (e.g. a role-branch + helper that both call + /// the registration) would silently overwrite the canonical + /// NotificationForwarder handler with whatever the loser registers — + /// the prior NS-001 fix did exactly this, and was silently superseded + /// when the central-only redesign moved delivery to NotificationOutbox. + /// This sentinel makes the duplicate noisy at startup so a maintainer + /// re-introducing the second path sees it immediately. + /// + private bool _notificationDeliveryHandlerRegistered; + /// /// Initializes a new instance of the class. /// @@ -460,7 +475,42 @@ akka {{ terminationMessage: PoisonPill.Instance, settings: ClusterSingletonManagerSettings.Create(_actorSystem!) .WithSingletonName("site-call-audit")); - _actorSystem!.ActorOf(siteCallAuditSingletonProps, "site-call-audit-singleton"); + var siteCallAuditSingletonManager = + _actorSystem!.ActorOf(siteCallAuditSingletonProps, "site-call-audit-singleton"); + + // SiteCallAudit-002 graceful-handover hook. The default singleton handover + // path waits for the actor's `ReceiveAsync` task to complete before + // signalling `HandOverDone` to the new oldest node — so an in-flight + // EF `UpsertAsync` IS waited for during a *clean* coordinated shutdown + // (the cluster-leave phase below fires before the singleton terminates). + // The risk the finding tracks is the seam between in-flight async work + // and the cluster-leave + singleton-stop sequence: we bound it by + // issuing an explicit `GracefulStop` to the singleton manager early + // in `cluster-leave`, with a timeout that lets the running upsert + SQL + // round-trip drain before the handover-to-other-node race window + // opens. The timeout is bounded so a misbehaving upsert cannot stall + // coordinated shutdown indefinitely — exceeding it falls through to + // the existing PoisonPill termination path. Same pattern is suitable + // for the NotificationOutbox singleton; not added here to keep this + // change minimal (out of NS-020's scope). + var siteCallAuditShutdown = Akka.Actor.CoordinatedShutdown.Get(_actorSystem); + siteCallAuditShutdown.AddTask( + Akka.Actor.CoordinatedShutdown.PhaseClusterLeave, + "drain-site-call-audit-singleton", + async () => + { + try + { + await siteCallAuditSingletonManager.GracefulStop(TimeSpan.FromSeconds(10)); + } + catch (Exception ex) + { + _logger.LogWarning(ex, + "SiteCallAudit singleton did not drain within the graceful-stop " + + "timeout; falling through to PoisonPill handover"); + } + return Akka.Done.Instance; + }); var siteCallAuditProxyProps = ClusterSingletonProxy.Props( singletonManagerPath: "/user/site-call-audit-singleton", @@ -651,6 +701,23 @@ akka {{ // cluster via the SiteCommunicationActor and treating central's // NotificationSubmitAck as the outcome (accepted → delivered; not accepted // or timeout → throw → transient → keep buffering). Central owns SMTP. + // + // NotificationService-020: register exactly once. The sentinel guard + // catches a second registration path that re-introduces the dead + // NS-001 site-SMTP handler — see the sentinel's XML doc above for the + // historical context. Throwing here is intentional: a silent overwrite + // by a future maintainer would invert the design back to site-side + // delivery (NotificationForwarder vs. NotificationDeliveryService). + if (_notificationDeliveryHandlerRegistered) + { + throw new InvalidOperationException( + "NotificationService-020: A Notification-category store-and-forward " + + "delivery handler was already registered. The canonical handler is " + + "NotificationForwarder (central-only delivery, post-redesign). " + + "If you are re-introducing a second registration path, remove the " + + "first one — RegisterDeliveryHandler is last-write-wins per category " + + "and a duplicate inverts the design."); + } var notificationForwarder = new ScadaLink.StoreAndForward.NotificationForwarder( siteCommActor, _nodeOptions.SiteId!, @@ -658,6 +725,7 @@ akka {{ storeAndForwardService.RegisterDeliveryHandler( ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.Notification, notificationForwarder.DeliverAsync); + _notificationDeliveryHandlerRegistered = true; _logger.LogInformation( "Store-and-forward delivery handlers registered (ExternalSystem, CachedDbWrite, Notification)"); diff --git a/tests/ScadaLink.AuditLog.Tests/AddAuditLogTests.cs b/tests/ScadaLink.AuditLog.Tests/AddAuditLogTests.cs index b5c9012f..ed1924c0 100644 --- a/tests/ScadaLink.AuditLog.Tests/AddAuditLogTests.cs +++ b/tests/ScadaLink.AuditLog.Tests/AddAuditLogTests.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; @@ -274,4 +275,36 @@ public class AddAuditLogTests Assert.Throws( () => provider.GetRequiredService()); } + + [Fact] + public void AddAuditLogHealthMetricsBridge_IsIdempotent_DoesNotDoubleRegister_HostedService() + { + // AuditLog-011: AddHostedService has no TryAdd variant, so a second + // call without the sentinel guard would spin up a second + // SiteAuditBacklogReporter on the same SQLite file. The helper must + // be a no-op on the second call — exactly one hosted-service + // descriptor for SiteAuditBacklogReporter survives. + var config = new ConfigurationBuilder() + .AddInMemoryCollection(new Dictionary + { + ["AuditLog:SiteWriter:DatabasePath"] = ":memory:", + }) + .Build(); + + var services = new ServiceCollection(); + services.AddSingleton(); + services.AddSingleton(typeof(ILogger<>), typeof(NullLogger<>)); + services.AddSingleton(new FakeNodeIdentityProvider()); + services.AddAuditLog(config); + services.AddHealthMonitoring(); + + services.AddAuditLogHealthMetricsBridge(); + services.AddAuditLogHealthMetricsBridge(); + + var reporterCount = services.Count(d => + d.ServiceType == typeof(IHostedService) && + d.ImplementationType == typeof(SiteAuditBacklogReporter)); + + Assert.Equal(1, reporterCount); + } } diff --git a/tests/ScadaLink.CentralUI.Tests/ScriptAnalysis/SandboxConsoleCaptureTests.cs b/tests/ScadaLink.CentralUI.Tests/ScriptAnalysis/SandboxConsoleCaptureTests.cs new file mode 100644 index 00000000..342222ce --- /dev/null +++ b/tests/ScadaLink.CentralUI.Tests/ScriptAnalysis/SandboxConsoleCaptureTests.cs @@ -0,0 +1,86 @@ +using ScadaLink.CentralUI.ScriptAnalysis; + +namespace ScadaLink.CentralUI.Tests.ScriptAnalysis; + +/// +/// Regression tests for the SandboxConsoleCapture writer that the Test Run +/// sandbox installs on Console.Out/Console.Error. CentralUI-030 +/// surfaced an intra-script concurrency hazard: a sandboxed script can fan out +/// work with Task.WhenAll / Task.Run and every child task inherits +/// the capture StringWriter via AsyncLocal; StringWriter is +/// not thread-safe, so concurrent writes could corrupt the buffer. These tests +/// drive the writer the same way Roslyn-hosted user code does. +/// +public class SandboxConsoleCaptureTests +{ + /// + /// CentralUI-030: a capture scope shared across Task.WhenAll child + /// tasks must serialise writes so the resulting transcript contains exactly + /// the expected number of lines without character-level interleaving. + /// + [Fact] + public async Task BeginCapture_ConcurrentWritesFromTasks_DoNotCorruptBuffer() + { + // The static install routes Console.Out through the singleton sandbox + // capture writer for the test process — this is idempotent and matches + // the way ScriptAnalysisService bootstraps the sandbox in production. + var (capture, _) = SandboxConsoleCapture.Install(); + + var buffer = new StringWriter(); + const int taskCount = 32; + const int linesPerTask = 50; + const int expectedLines = taskCount * linesPerTask; + + using (capture.BeginCapture(buffer)) + { + // AsyncLocal flows the capture scope into each Task.Run, mirroring + // a sandboxed script doing `await Task.WhenAll(...)` over Tasks + // that each `Console.WriteLine`. + var tasks = Enumerable.Range(0, taskCount).Select(i => Task.Run(() => + { + for (var j = 0; j < linesPerTask; j++) + { + Console.WriteLine($"task-{i}-line-{j}"); + } + })); + await Task.WhenAll(tasks); + } + + var captured = buffer.ToString(); + // Without the lock, concurrent StringWriter.WriteLine can drop or + // interleave characters and produce malformed lines / a wrong count. + // We assert the exact line count and that every emitted token is + // present on a line of its own — both fail under the unprotected + // implementation. + var lines = captured.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries); + Assert.Equal(expectedLines, lines.Length); + + for (var i = 0; i < taskCount; i++) + { + for (var j = 0; j < linesPerTask; j++) + { + Assert.Contains($"task-{i}-line-{j}", lines); + } + } + } + + /// + /// Sanity check: the most basic capture happy-path still works after the + /// CentralUI-030 lock was introduced. + /// + [Fact] + public void BeginCapture_SingleThreadedWrites_AreCaptured() + { + var (capture, _) = SandboxConsoleCapture.Install(); + var buffer = new StringWriter(); + + using (capture.BeginCapture(buffer)) + { + Console.WriteLine("hello"); + Console.Write("world"); + } + + Assert.Contains("hello", buffer.ToString()); + Assert.Contains("world", buffer.ToString()); + } +} diff --git a/tests/ScadaLink.Commons.Tests/Interfaces/Services/ExternalCallResultTests.cs b/tests/ScadaLink.Commons.Tests/Interfaces/Services/ExternalCallResultTests.cs new file mode 100644 index 00000000..6de2517d --- /dev/null +++ b/tests/ScadaLink.Commons.Tests/Interfaces/Services/ExternalCallResultTests.cs @@ -0,0 +1,76 @@ +using ScadaLink.Commons.Interfaces.Services; + +namespace ScadaLink.Commons.Tests.Interfaces.Services; + +/// +/// Tests for , in particular the Commons-021 +/// thread-safe lazy parse of Response. The pre-fix implementation used +/// two mutable fields (_response/_responseParsed) with no +/// synchronization, so concurrent readers could each construct a fresh +/// DynamicJsonElement and one would overwrite the other. The fix moves +/// the parse onto a Lazy<dynamic?> with +/// LazyThreadSafetyMode.ExecutionAndPublication (the default), which +/// guarantees one parse and one shared result for all readers. +/// +public class ExternalCallResultTests +{ + [Fact] + public void Response_NullOrEmptyJson_ReturnsNull() + { + var withNull = new ExternalCallResult(Success: true, ResponseJson: null, ErrorMessage: null); + var withEmpty = new ExternalCallResult(Success: true, ResponseJson: string.Empty, ErrorMessage: null); + + Assert.Null(withNull.Response); + Assert.Null(withEmpty.Response); + } + + [Fact] + public void Response_ParsesJsonIntoDynamicElement() + { + var result = new ExternalCallResult(Success: true, ResponseJson: "{\"answer\": 42}", ErrorMessage: null); + + // dynamic property access is the production usage pattern. + dynamic? response = result.Response; + Assert.NotNull(response); + int answer = (int)response!.answer; + Assert.Equal(42, answer); + } + + /// + /// Commons-021: concurrent readers must observe the same parsed instance + /// (a `Lazy<T>` invariant). Under the pre-fix code two threads could + /// both produce a fresh `DynamicJsonElement` and one would win the race — + /// `ReferenceEquals` would then occasionally fail. With the fix every + /// reader observes the single Lazy-published value, so the assertion + /// holds for every pair of observers. + /// + [Fact] + public void Response_ConcurrentReads_ReturnSameInstance() + { + // A larger payload makes the parse window wider so the race, if + // present, is more likely to fire. The same property — single + // published instance — must hold for any payload, though. + var json = "{\"items\":[{\"name\":\"a\"},{\"name\":\"b\"},{\"name\":\"c\"}],\"count\":3}"; + var result = new ExternalCallResult(Success: true, ResponseJson: json, ErrorMessage: null); + + const int observerCount = 64; + var barrier = new Barrier(observerCount); + var observed = new object?[observerCount]; + + Parallel.For(0, observerCount, i => + { + // Force all observers to call `Response` at the same instant so + // they collide on the lazy parse rather than each finding it + // already-published. + barrier.SignalAndWait(); + observed[i] = result.Response; + }); + + var first = observed[0]; + Assert.NotNull(first); + for (var i = 1; i < observerCount; i++) + { + Assert.Same(first, observed[i]); + } + } +} diff --git a/tests/ScadaLink.ConfigurationDatabase.Tests/ConcurrencyTests.cs b/tests/ScadaLink.ConfigurationDatabase.Tests/ConcurrencyTests.cs index da983851..56ac4e51 100644 --- a/tests/ScadaLink.ConfigurationDatabase.Tests/ConcurrencyTests.cs +++ b/tests/ScadaLink.ConfigurationDatabase.Tests/ConcurrencyTests.cs @@ -6,6 +6,7 @@ using ScadaLink.Commons.Entities.Sites; using ScadaLink.Commons.Entities.Templates; using ScadaLink.Commons.Types.Enums; using ScadaLink.ConfigurationDatabase; +using ScadaLink.ConfigurationDatabase.Repositories; namespace ScadaLink.ConfigurationDatabase.Tests; @@ -36,6 +37,31 @@ public class ConcurrencyTestDbContext : ScadaLinkDbContext } } +/// +/// A SQLite-friendly DbContext that keeps as +/// the optimistic-concurrency token but disables auto-generation (SQLite cannot +/// auto-populate a rowversion column). The caller sets RowVersion explicitly, which +/// is sufficient to exercise the production stub-attach delete path under CD-017's +/// concurrency rule. +/// +public class RowVersionConcurrencyTestDbContext : ScadaLinkDbContext +{ + public RowVersionConcurrencyTestDbContext(DbContextOptions options) : base(options) { } + + protected override void OnModelCreating(ModelBuilder modelBuilder) + { + base.OnModelCreating(modelBuilder); + + modelBuilder.Entity(builder => + { + builder.Property(d => d.RowVersion) + .IsRequired(false) + .IsConcurrencyToken() + .ValueGeneratedNever(); + }); + } +} + public class ConcurrencyTests : IDisposable { private readonly string _dbPath; @@ -149,6 +175,63 @@ public class ConcurrencyTests : IDisposable Assert.Equal("Second update", loaded.Description); } + [Fact] + public async Task DeleteDeploymentRecord_StaleRowVersion_ThrowsConcurrencyException() + { + // CD-017: Verifies the stub-attach delete path enforces optimistic concurrency + // when the caller passes a RowVersion that no longer matches the row's current + // RowVersion. Uses a SQLite fixture where DeploymentRecord.RowVersion is an + // explicit, caller-managed concurrency token (no SQL Server auto-generation). + using var setupCtx = new RowVersionConcurrencyTestDbContext(BuildOptions()); + await setupCtx.Database.EnsureCreatedAsync(); + + var site = new Site("Site1", "S-RV1"); + var template = new Template("RV-T1"); + setupCtx.Sites.Add(site); + setupCtx.Templates.Add(template); + await setupCtx.SaveChangesAsync(); + + var instance = new Instance("RV-I1") { SiteId = site.Id, TemplateId = template.Id, State = InstanceState.Enabled }; + setupCtx.Instances.Add(instance); + await setupCtx.SaveChangesAsync(); + + var record = new DeploymentRecord("deploy-rv-stale", "admin") + { + InstanceId = instance.Id, + DeployedAt = DateTimeOffset.UtcNow, + RowVersion = new byte[] { 0x01 }, + }; + setupCtx.DeploymentRecords.Add(record); + await setupCtx.SaveChangesAsync(); + var id = record.Id; + + // Reload in a fresh context and simulate a concurrent edit that has advanced + // the stored RowVersion. The caller below holds the *prior* RowVersion (0x01) + // and is expected to lose the concurrency check. + using (var advanceCtx = new RowVersionConcurrencyTestDbContext(BuildOptions())) + { + var stored = await advanceCtx.DeploymentRecords.SingleAsync(d => d.Id == id); + stored.RowVersion = new byte[] { 0x02 }; + await advanceCtx.SaveChangesAsync(); + } + + using var deleteCtx = new RowVersionConcurrencyTestDbContext(BuildOptions()); + var repository = new DeploymentManagerRepository(deleteCtx); + var staleRowVersion = new byte[] { 0x01 }; + await repository.DeleteDeploymentRecordAsync(id, staleRowVersion); + + await Assert.ThrowsAsync( + () => repository.SaveChangesAsync()); + } + + private DbContextOptions BuildOptions() + { + return new DbContextOptionsBuilder() + .UseSqlite($"DataSource={_dbPath}") + .ConfigureWarnings(w => w.Ignore(RelationalEventId.PendingModelChangesWarning)) + .Options; + } + [Fact] public void DeploymentRecord_HasRowVersionConfigured() { diff --git a/tests/ScadaLink.ConfigurationDatabase.Tests/RepositoryCoverageTests.cs b/tests/ScadaLink.ConfigurationDatabase.Tests/RepositoryCoverageTests.cs index cafb0f84..49798225 100644 --- a/tests/ScadaLink.ConfigurationDatabase.Tests/RepositoryCoverageTests.cs +++ b/tests/ScadaLink.ConfigurationDatabase.Tests/RepositoryCoverageTests.cs @@ -838,9 +838,10 @@ public class DeploymentManagerRepositoryTests : IDisposable await _repository.AddDeploymentRecordAsync(record); await _repository.SaveChangesAsync(); var id = record.Id; + var rowVersion = record.RowVersion ?? Array.Empty(); _context.ChangeTracker.Clear(); - await _repository.DeleteDeploymentRecordAsync(id); + await _repository.DeleteDeploymentRecordAsync(id, rowVersion); await _repository.SaveChangesAsync(); Assert.Null(await _repository.GetDeploymentRecordByIdAsync(id));