From ac96b83b08982226e1261f745d6307cd40430de5 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Thu, 28 May 2026 05:40:15 -0400 Subject: [PATCH] fix(high-severity): close 9 of 10 open High findings across 8 modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Comm-016: delete dead HandleConnectionStateChanged + _debugSubscriptions / _inProgressDeployments tracking + ConnectionStateChanged message record. Disconnect detection is owned by the transport layers (gRPC keepalive PING ~25s; Ask-timeout at CommunicationService). Updates the Component-Communication.md design doc to make that explicit. SnF-018: NotificationForwarder.DeliverAsync now discards a corrupt buffered payload (Warning log + return true) instead of returning false and parking the row — honoring the design's "notifications do not park" invariant. DM-018: reconciliation no longer force-sets Enabled, preserving an intentional Disabled state after central failover. ESG-018: DeliverBufferedAsync (both ExternalSystemClient + DatabaseGateway) catches JsonException and returns false, turning a corrupt buffered row into a parked operation instead of a retry-forever poison message. InboundAPI-022: register ActiveNodeGate as IActiveNodeGate in the Central DI branch so standby-node gating is actually wired up in production. NS-019: remove orphaned NotificationDeliveryService / INotificationDeliveryService / NotificationResult; central notification delivery now lives entirely in NotificationOutbox. SEL-016: normalise From/To filters to UTC before ISO-string compare so non-UTC DateTimeOffset clients no longer get spuriously excluded events. TE-017: include Description on attributes/alarms and a HashableConnections projection (protocol, endpoint JSON, failover count) in the revision hash and DiffService; staleness detection now catches description-only and connection-endpoint edits. Transport-001 and Transport-002 (also High) remain Open — they're being handled in a follow-up batch because both touch BundleImporter.cs and must serialise. --- code-reviews/Communication/findings.md | 18 +- code-reviews/DeploymentManager/findings.md | 4 +- .../ExternalSystemGateway/findings.md | 4 +- code-reviews/InboundAPI/findings.md | 4 +- code-reviews/NotificationService/findings.md | 4 +- code-reviews/SiteEventLogging/findings.md | 4 +- code-reviews/StoreAndForward/findings.md | 12 +- code-reviews/TemplateEngine/findings.md | 18 +- docs/requirements/Component-Communication.md | 6 +- .../Services/INotificationDeliveryService.cs | 32 - .../Communication/ConnectionStateChanged.cs | 6 - .../Actors/CentralCommunicationActor.cs | 95 +- .../DeploymentService.cs | 44 +- .../DatabaseGateway.cs | 21 +- .../ExternalSystemClient.cs | 21 +- src/ScadaLink.Host/Health/ActiveNodeGate.cs | 57 + src/ScadaLink.Host/Program.cs | 10 + src/ScadaLink.Host/SiteServiceRegistration.cs | 3 +- .../EmailNotificationDeliveryAdapter.cs | 27 +- .../MailKitSmtpClientWrapper.cs | 2 +- .../NotificationDeliveryService.cs | 448 ------- .../NotificationOptions.cs | 8 +- .../ServiceCollectionExtensions.cs | 11 +- .../SmtpErrorClassifier.cs | 8 +- .../NotificationForwarder.cs | 52 +- .../Flattening/DiffService.cs | 25 + .../Flattening/RevisionHashService.cs | 53 +- .../Messages/CompatibilityTests.cs | 15 +- .../CentralCommunicationActorTests.cs | 31 +- .../DeploymentServiceTests.cs | 50 + .../DatabaseGatewayTests.cs | 25 + .../ExternalSystemClientTests.cs | 26 + .../CompositionRootTests.cs | 35 +- .../IntegrationSurfaceTests.cs | 43 +- .../NotificationDeliveryServiceTests.cs | 1039 ----------------- .../NotificationForwarderTests.cs | 54 + .../Flattening/DiffServiceTests.cs | 118 ++ .../Flattening/RevisionHashServiceTests.cs | 148 +++ 38 files changed, 852 insertions(+), 1729 deletions(-) delete mode 100644 src/ScadaLink.Commons/Interfaces/Services/INotificationDeliveryService.cs delete mode 100644 src/ScadaLink.Commons/Messages/Communication/ConnectionStateChanged.cs create mode 100644 src/ScadaLink.Host/Health/ActiveNodeGate.cs delete mode 100644 src/ScadaLink.NotificationService/NotificationDeliveryService.cs delete mode 100644 tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs diff --git a/code-reviews/Communication/findings.md b/code-reviews/Communication/findings.md index 298b40e4..4327b7fa 100644 --- a/code-reviews/Communication/findings.md +++ b/code-reviews/Communication/findings.md @@ -774,9 +774,25 @@ than being masked by an endpoint-agnostic mock. |--|--| | Severity | High | | Category | Design-document adherence | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs:169`, `src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs:338-375` | +**Resolution** — deleted the dead code path in favour of the keepalive-based +detection that is the actual production behaviour: removed the +`Receive` handler, the `HandleConnectionStateChanged` +method, the `_debugSubscriptions` / `_inProgressDeployments` tracking dicts ++ the `TrackMessageForCleanup` helper that fed them, and the dead message +record `src/ScadaLink.Commons/Messages/Communication/ConnectionStateChanged.cs`. +The two dead tests (`ConnectionLost_DebugStreamsKilled` in +CentralCommunicationActorTests, `RoundTrip_ConnectionStateChanged_Succeeds` +in CompatibilityTests) were removed alongside. The design doc +`docs/requirements/Component-Communication.md` "Connection Failure Behavior" +section was updated to state explicitly that disconnect is detected at the +transport layer (gRPC keepalive PING ~25 s for debug streams; Ask-timeout +at the CommunicationService layer for command/control), with no +application-level signal. `DebugStreamTerminated` survives because +`DebugStreamBridgeActor` uses it for an unrelated intra-actor stop signal. + **Description** `CentralCommunicationActor.HandleConnectionStateChanged` is wired to diff --git a/code-reviews/DeploymentManager/findings.md b/code-reviews/DeploymentManager/findings.md index fccc4a78..3457eb28 100644 --- a/code-reviews/DeploymentManager/findings.md +++ b/code-reviews/DeploymentManager/findings.md @@ -912,9 +912,11 @@ would be meaningless). |--|--| | Severity | High | | Category | Correctness & logic bugs | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.DeploymentManager/DeploymentService.cs:675-682,721-748` | +**Resolution** — Added a `forceEnabledState` parameter to `ApplyPostSuccessSideEffectsAsync`. The normal deploy path passes `true` (fresh apply legitimately ends in `Enabled`); the reconciliation path passes `false`, so the helper only promotes `NotDeployed → Enabled` and leaves an existing `Disabled` (or `Enabled`) untouched. Regression test `DeployInstanceAsync_Reconciled_DisabledInstance_PreservesDisabledState` exercises the failover scenario and asserts the prior record still flips to `Success` while `Instance.State` stays `Disabled`. + **Description** `TryReconcileWithSiteAsync` calls `ApplyPostSuccessSideEffectsAsync` whenever diff --git a/code-reviews/ExternalSystemGateway/findings.md b/code-reviews/ExternalSystemGateway/findings.md index ac5e201a..9e75f03f 100644 --- a/code-reviews/ExternalSystemGateway/findings.md +++ b/code-reviews/ExternalSystemGateway/findings.md @@ -1003,9 +1003,11 @@ captured request URI has no trailing `?`; it was verified to fail before the fix |--|--| | Severity | High | | Category | Error handling & resilience | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs:176`, `src/ScadaLink.ExternalSystemGateway/DatabaseGateway.cs:151` | +**Resolution** — Wrapped the `JsonSerializer.Deserialize<...>(message.PayloadJson)` call in both `ExternalSystemClient.DeliverBufferedAsync` and `DatabaseGateway.DeliverBufferedAsync` in a `try`/`catch (JsonException)` block. A `JsonException` is by definition permanent (the same payload bytes always deserialize identically), so the catch branch logs at `LogError` and returns `false`, parking the message via the S&F engine instead of letting it throw and be retried as a transient failure. Regression tests `DeliverBuffered_MalformedJsonPayload_ReturnsFalseSoMessageParks` were added to both `ExternalSystemClientTests` and `DatabaseGatewayTests` — each feeds a truncated `PayloadJson` to the handler and asserts `delivered == false` and that no exception escapes. + **Description** Both `ExternalSystemClient.DeliverBufferedAsync` and `DatabaseGateway.DeliverBufferedAsync` diff --git a/code-reviews/InboundAPI/findings.md b/code-reviews/InboundAPI/findings.md index cd813f34..9c23c56d 100644 --- a/code-reviews/InboundAPI/findings.md +++ b/code-reviews/InboundAPI/findings.md @@ -1061,9 +1061,11 @@ that an attribute read/write carries the inherited `ParentExecutionId`. |--|--| | Severity | High | | Category | Security | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.InboundAPI/IActiveNodeGate.cs`, `src/ScadaLink.InboundAPI/InboundApiEndpointFilter.cs:52-60`; absent from `src/ScadaLink.Host/Program.cs` | +**Resolution** — Added `src/ScadaLink.Host/Health/ActiveNodeGate.cs`, a production `IActiveNodeGate` implementation backed by `AkkaHostedService` that mirrors `ActiveNodeHealthCheck`'s leadership probe (member status `Up` AND `Cluster.State.Leader == SelfAddress`), and registered it as a singleton in the central-role branch of `Program.cs`. A structural regression test (`CentralCompositionRootTests.Central_IActiveNodeGate_IsRegisteredAsActiveNodeGate`) reflects over the built `IServiceProvider` to assert the registration's existence and concrete type — failing on `main` and passing after the fix. The `InboundApiEndpointFilter`'s fall-through-to-allow behaviour is retained as the documented safe default for non-clustered hosts and tests. + **Description** InboundAPI-008's resolution adds `IActiveNodeGate` (lines 17–24 of diff --git a/code-reviews/NotificationService/findings.md b/code-reviews/NotificationService/findings.md index 3611ed93..a410dc35 100644 --- a/code-reviews/NotificationService/findings.md +++ b/code-reviews/NotificationService/findings.md @@ -647,9 +647,11 @@ Resolved 2026-05-17. All three issues confirmed against source. The hand-rolled |--|--| | Severity | High | | Category | Design-document adherence | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:18-442`, `src/ScadaLink.NotificationService/ServiceCollectionExtensions.cs:20-21`, `src/ScadaLink.Commons/Interfaces/Services/INotificationDeliveryService.cs:1-33`, `src/ScadaLink.Host/Program.cs:77` | +**Resolution** — Executed option 1. Deleted `src/ScadaLink.NotificationService/NotificationDeliveryService.cs`, `src/ScadaLink.Commons/Interfaces/Services/INotificationDeliveryService.cs` (also retires `NotificationResult` + `BufferedNotification`), and the orphaned `tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs` suite; reduced `AddNotificationService` to the shared SMTP primitives (`OAuth2TokenService`, `Func`, `NotificationOptions`), updated `CompositionRootTests` (assert the primitives instead of the dead types), and removed the `Notification_Send_MockSmtp_Delivers` assertion in `IntegrationSurfaceTests` (central delivery is covered by `EmailNotificationDeliveryAdapterTests`). Grep-verified `grep -rn "INotificationDeliveryService\|NotificationDeliveryService\|NotificationResult\|BufferedNotification\|DeliverBufferedAsync" --include="*.cs" src/ tests/` before delete: zero production callers (only XML-doc cross-references in NS, MailKit wrapper, NotificationOptions and `EmailNotificationDeliveryAdapter`, plus the dead test files); cross-reference comments updated to remove the stale class references. `dotnet build ScadaLink.slnx` succeeds (0 warnings, 0 errors); affected test projects all pass (`NotificationService.Tests` 52/52, `NotificationOutbox.Tests` 86/86 on rerun — one flaky timing-sensitive Akka.TestKit test unrelated to NS-019, `Host.Tests` 205/205); `IntegrationTests` 64/66 with two pre-existing failures in `NotificationOutboxFlowTests` (SQLite "near IF: syntax error", reproducible on pristine `main`, unrelated to NS-019). + **Description** The updated `Component-NotificationService.md` (re-read in full at this commit) makes the new design unambiguous: "The Notification Service is the central component that manages notification-list and SMTP definitions and provides the per-type delivery adapters used to send notifications. … Notification delivery has been inverted: a site script's notification is store-and-forwarded to the central cluster, and the central **Notification Outbox** owns dispatch and delivery, calling an `INotificationDeliveryAdapter` supplied by this component." The doc explicitly states the service is "central cluster only", "no longer present at site clusters", and "no longer delivers notifications from sites". diff --git a/code-reviews/SiteEventLogging/findings.md b/code-reviews/SiteEventLogging/findings.md index 806f45dd..219c6cd1 100644 --- a/code-reviews/SiteEventLogging/findings.md +++ b/code-reviews/SiteEventLogging/findings.md @@ -793,9 +793,11 @@ chosen policy on `ISiteEventLogger.LogEventAsync`. |--|--| | Severity | High | | Category | Correctness & logic bugs | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.SiteEventLogging/EventLogQueryService.cs:67-77`, `src/ScadaLink.SiteEventLogging/SiteEventLogger.cs:159`, `src/ScadaLink.SiteEventLogging/EventLogPurgeService.cs:72-78` | +**Resolution** — `EventLogQueryService.ExecuteQuery` now calls `.ToUniversalTime()` on `request.From`/`request.To` before `ToString("o")`, so the produced ISO 8601 string always ends in `+00:00` and lexicographically matches the UTC timestamps written by `SiteEventLogger`. `EventLogPurgeService.PurgeByRetention` was also made defensive with an explicit `.ToUniversalTime()` on the cutoff. A regression test (`Query_FiltersByTimeRange_HandlesNonUtcOffset`) constructs a `+05:00` `DateTimeOffset` and asserts the matching UTC-stored events are returned and out-of-range ones are excluded. + **Description** Event rows are persisted with `timestamp` = `DateTimeOffset.UtcNow.ToString("o")`, diff --git a/code-reviews/StoreAndForward/findings.md b/code-reviews/StoreAndForward/findings.md index 5477712a..413ac20e 100644 --- a/code-reviews/StoreAndForward/findings.md +++ b/code-reviews/StoreAndForward/findings.md @@ -991,9 +991,19 @@ the StoreAndForward-016 replication) — and pass it to `RaiseActivity` (falling |--|--| | Severity | High | | Category | Design-document adherence | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.StoreAndForward/NotificationForwarder.cs:62`–`:69`, `:105`–`:122`; `src/ScadaLink.StoreAndForward/StoreAndForwardService.cs:369`–`:397` | +**Resolution** — `NotificationForwarder.DeliverAsync` now discards a corrupt +buffered payload instead of returning false. The corrupt path logs a Warning +with the buffered row id + length-capped payload preview via an injected +`ILogger` (NullLogger by default for back-compat), +then returns true so the S&F engine clears the row via its standard +success-path cleanup — honoring the "notifications do not park" design +invariant. Two regression tests in `NotificationForwarderTests` cover the +two corrupt shapes (invalid JSON, `null` deserialisation) and pin that +nothing is forwarded to central in either case. + **Description** The Component design doc explicitly carves out notifications from the parking lifecycle: diff --git a/code-reviews/TemplateEngine/findings.md b/code-reviews/TemplateEngine/findings.md index a6d5eada..a0fad6c1 100644 --- a/code-reviews/TemplateEngine/findings.md +++ b/code-reviews/TemplateEngine/findings.md @@ -843,9 +843,11 @@ resolves against the real parent module. Regression test: |--|--| | Severity | High | | Category | Design-document adherence | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.TemplateEngine/Flattening/RevisionHashService.cs:128`, `src/ScadaLink.TemplateEngine/Flattening/RevisionHashService.cs:156`, `src/ScadaLink.TemplateEngine/Flattening/RevisionHashService.cs:42`, `src/ScadaLink.TemplateEngine/Flattening/DiffService.cs:110`, `src/ScadaLink.TemplateEngine/Flattening/DiffService.cs:118` | +**Resolution** — Added `Description` to `HashableAttribute` and `HashableAlarm` (placed alphabetically per the determinism contract) and introduced a `HashableConnection` projection plus a `SortedDictionary Connections` field on `HashableConfiguration` that captures protocol, primary/backup JSON, and failover retry count for every deployed connection. `DiffService.AttributesEqual` and `AlarmsEqual` now compare `Description`, and a new public `ConnectionsEqual` helper covers connection-endpoint drift so callers can detect the change in the same shape used by the other entity comparators. Regression tests `ComputeHash_AttributeDescriptionEdit_ChangesHash`, `ComputeHash_AlarmDescriptionEdit_ChangesHash`, `ComputeHash_ConnectionEndpointEdit_ChangesHash`, and `ConnectionsEqual_EndpointEdit_ReturnsFalse` lock the behaviour in. + **Description** The design states the revision hash is "computed from the resolved content" and @@ -891,7 +893,19 @@ it in `DiffService`. Add tests: **Resolution** -_Unresolved._ +Resolved (commit `pending`): `RevisionHashService` now folds `Description` into +the `HashableAttribute` / `HashableAlarm` projections (alphabetical placement +preserved) and adds a sorted `Connections` map of `HashableConnection` +(Protocol, ConfigurationJson, BackupConfigurationJson, FailoverRetryCount) on +`HashableConfiguration`. `DiffService.AttributesEqual` / `AlarmsEqual` compare +`Description`, and a public `ConnectionsEqual` helper covers connection drift +in the same shape as the other entity comparators. Regression tests: +`ComputeHash_AttributeDescriptionEdit_ChangesHash`, +`ComputeHash_AlarmDescriptionEdit_ChangesHash`, +`ComputeHash_ConnectionEndpointEdit_ChangesHash`, +`ConnectionsEqual_EndpointEdit_ReturnsFalse`. The diff-shape extension that +surfaces added/removed/changed connections in the UI remains tracked under +TemplateEngine-018. ### TemplateEngine-018 — `DiffService` reports no entries for added/removed/changed connections diff --git a/docs/requirements/Component-Communication.md b/docs/requirements/Component-Communication.md index 6c870a9a..a4195b5c 100644 --- a/docs/requirements/Component-Communication.md +++ b/docs/requirements/Component-Communication.md @@ -224,8 +224,10 @@ The ManagementActor is registered at the well-known path `/user/management` on c ## Connection Failure Behavior -- **In-flight messages**: When a connection drops while a request is in flight (e.g., deployment sent but no response received), the Akka ask pattern times out and the caller receives a failure. There is **no automatic retry or buffering at central** — the engineer sees the failure in the UI and re-initiates the action. This is consistent with the design principle that central does not buffer messages. -- **Debug streams**: Any gRPC stream interruption triggers reconnection logic in the `DebugStreamBridgeActor`. The bridge actor attempts to reconnect to the other site node endpoint (NodeB if NodeA failed, or vice versa), with up to 3 retries and 5-second backoff. If all retries fail, the consumer is notified via `OnStreamTerminated` and the bridge actor is stopped. Events during the reconnection gap are lost (acceptable for real-time debug view). On successful reconnection, the consumer can request a fresh snapshot to re-sync state. +Disconnect is detected at the **transport layer**, never via an application-level signal from central. There is no `ConnectionStateChanged`-style synchronous notification: the central coordinator does not maintain a model of "this site is up / down" because the two transports already report unavailability at their natural cadence. + +- **In-flight command/control messages (ClusterClient + Ask)**: When a connection drops while a request is in flight (e.g., a deployment sent but no response received), the Akka ask pattern times out and the caller receives a failure. There is **no automatic retry or buffering at central** — the engineer sees the failure in the UI and re-initiates the action. This is consistent with the design principle that central does not buffer messages. An in-progress deployment whose round-trip exceeds the Ask timeout (default 120 s at `CommunicationService.DeployInstanceAsync`) surfaces as `DeploymentStatus.Failed` to the caller. +- **Debug streams (gRPC)**: Any gRPC stream interruption is detected by the HTTP/2 keepalive PING (~25 s) and triggers reconnection logic in the `DebugStreamBridgeActor`. The bridge actor attempts to reconnect to the other site node endpoint (NodeB if NodeA failed, or vice versa), with up to 3 retries and 5-second backoff. If all retries fail, the consumer is notified via `OnStreamTerminated` and the bridge actor is stopped. Events during the reconnection gap are lost (acceptable for real-time debug view). On successful reconnection, the consumer can request a fresh snapshot to re-sync state. ## Failover Behavior diff --git a/src/ScadaLink.Commons/Interfaces/Services/INotificationDeliveryService.cs b/src/ScadaLink.Commons/Interfaces/Services/INotificationDeliveryService.cs deleted file mode 100644 index 863d363e..00000000 --- a/src/ScadaLink.Commons/Interfaces/Services/INotificationDeliveryService.cs +++ /dev/null @@ -1,32 +0,0 @@ -namespace ScadaLink.Commons.Interfaces.Services; - -/// -/// Interface for sending notifications. -/// Implemented by NotificationService, consumed by ScriptRuntimeContext. -/// -public interface INotificationDeliveryService -{ - /// - /// Sends a notification to a named list. Transient failures go to S&F. - /// Permanent failures returned to caller. - /// - /// Name of the notification list to deliver to. - /// Subject line of the notification. - /// Plain-text body of the notification. - /// Optional name of the instance that triggered the send. - /// Cancellation token for the async operation. - Task SendAsync( - string listName, - string subject, - string message, - string? originInstanceName = null, - CancellationToken cancellationToken = default); -} - -/// -/// Result of a notification send attempt. -/// -public record NotificationResult( - bool Success, - string? ErrorMessage, - bool WasBuffered = false); diff --git a/src/ScadaLink.Commons/Messages/Communication/ConnectionStateChanged.cs b/src/ScadaLink.Commons/Messages/Communication/ConnectionStateChanged.cs deleted file mode 100644 index ab067635..00000000 --- a/src/ScadaLink.Commons/Messages/Communication/ConnectionStateChanged.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace ScadaLink.Commons.Messages.Communication; - -public record ConnectionStateChanged( - string SiteId, - bool IsConnected, - DateTimeOffset Timestamp); diff --git a/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs b/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs index c1bf2519..68bb1a66 100644 --- a/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs +++ b/src/ScadaLink.Communication/Actors/CentralCommunicationActor.cs @@ -60,17 +60,18 @@ public class CentralCommunicationActor : ReceiveActor /// private readonly Dictionary ContactAddresses)> _siteClients = new(); - /// - /// Tracks active debug view subscriptions: correlationId → (siteId, subscriber). - /// Used to kill debug streams on site disconnection (WP-5). - /// - private readonly Dictionary _debugSubscriptions = new(); - - /// - /// Tracks in-progress deployments: deploymentId → siteId. - /// On central failover, in-progress deployments are treated as failed (WP-5). - /// - private readonly Dictionary _inProgressDeployments = new(); + // Communication-016: the previous _debugSubscriptions / _inProgressDeployments + // dictionaries existed solely to support a documented "synchronous kill streams + + // mark deployments failed on site disconnect" workflow triggered by + // ConnectionStateChanged. No production code ever emitted that message — only + // the unit test did — so the workflow was dead from end to end. Disconnect + // detection is owned by the underlying transports: the gRPC keepalive PING + // signals stream interruption in ~25s (handled by DebugStreamBridgeActor's own + // reconnection logic), and an Ask round-trip for a deploy times out at the + // CommunicationService layer (caller sees failure). The tracking dicts + + // ConnectionStateChanged record + HandleConnectionStateChanged handler are + // removed; see docs/requirements/Component-Communication.md "Connection + // Failure Behavior" for the keepalive-based contract that survives. private ICancelable? _refreshSchedule; @@ -165,9 +166,6 @@ public class CentralCommunicationActor : ReceiveActor Receive(r => ProcessLocally(r.Report)); Receive(_ => { /* DistributedPubSub subscribe confirmation */ }); - // Connection state changes - Receive(HandleConnectionStateChanged); - // Route enveloped messages to sites Receive(HandleSiteEnvelope); @@ -335,44 +333,10 @@ public class CentralCommunicationActor : ReceiveActor } } - private void HandleConnectionStateChanged(ConnectionStateChanged msg) - { - if (!msg.IsConnected) - { - _log.Warning("Site {0} disconnected at {1}", msg.SiteId, msg.Timestamp); - - // WP-5: Kill active debug streams for the disconnected site - var toRemove = _debugSubscriptions - .Where(kvp => kvp.Value.SiteId == msg.SiteId) - .ToList(); - - foreach (var kvp in toRemove) - { - _log.Info("Killing debug stream {0} for disconnected site {1}", kvp.Key, msg.SiteId); - kvp.Value.Subscriber.Tell(new DebugStreamTerminated(msg.SiteId, kvp.Key)); - _debugSubscriptions.Remove(kvp.Key); - } - - // WP-5: Mark in-progress deployments as failed - var failedDeployments = _inProgressDeployments - .Where(kvp => kvp.Value == msg.SiteId) - .Select(kvp => kvp.Key) - .ToList(); - - foreach (var deploymentId in failedDeployments) - { - _log.Warning("Deployment {0} to site {1} treated as failed due to disconnection", - deploymentId, msg.SiteId); - _inProgressDeployments.Remove(deploymentId); - } - - // Note: Do NOT stop the ClusterClient — it handles reconnection internally - } - else - { - _log.Info("Site {0} connected at {1}", msg.SiteId, msg.Timestamp); - } - } + // Communication-016: HandleConnectionStateChanged removed — no production + // caller emitted ConnectionStateChanged, so the workflow ran only in tests. + // Disconnect detection is owned by the transport layers (gRPC keepalive + + // ClusterClient/Ask timeout). private void HandleSiteEnvelope(SiteEnvelope envelope) { @@ -385,9 +349,6 @@ public class CentralCommunicationActor : ReceiveActor return; } - // Track debug subscriptions for cleanup on disconnect - TrackMessageForCleanup(envelope); - // Route via ClusterClient — Sender is preserved for Ask response routing entry.Client.Tell( new ClusterClient.Send("/user/site-communication", envelope.Message), @@ -485,23 +446,8 @@ public class CentralCommunicationActor : ReceiveActor _log.Info("Site ClusterClient cache refreshed with {0} site(s)", _siteClients.Count); } - private void TrackMessageForCleanup(SiteEnvelope envelope) - { - switch (envelope.Message) - { - case Commons.Messages.DebugView.SubscribeDebugViewRequest sub: - _debugSubscriptions[sub.CorrelationId] = (envelope.SiteId, Sender); - break; - - case Commons.Messages.DebugView.UnsubscribeDebugViewRequest unsub: - _debugSubscriptions.Remove(unsub.CorrelationId); - break; - - case Commons.Messages.Deployment.DeployInstanceCommand deploy: - _inProgressDeployments[deploy.DeploymentId] = envelope.SiteId; - break; - } - } + // Communication-016: TrackMessageForCleanup removed — the dicts it fed + // existed solely to support the dead ConnectionStateChanged workflow. /// protected override SupervisorStrategy SupervisorStrategy() @@ -547,11 +493,8 @@ public class CentralCommunicationActor : ReceiveActor /// protected override void PostStop() { - _log.Info("CentralCommunicationActor stopped. In-progress deployments treated as failed (WP-5)."); + _log.Info("CentralCommunicationActor stopped"); _refreshSchedule?.Cancel(); - // On central failover, all in-progress deployments are failed - _inProgressDeployments.Clear(); - _debugSubscriptions.Clear(); } } diff --git a/src/ScadaLink.DeploymentManager/DeploymentService.cs b/src/ScadaLink.DeploymentManager/DeploymentService.cs index 7d0eef39..d2922ff6 100644 --- a/src/ScadaLink.DeploymentManager/DeploymentService.cs +++ b/src/ScadaLink.DeploymentManager/DeploymentService.cs @@ -228,7 +228,8 @@ public class DeploymentService // logged loudly for operator reconciliation but must not flip // the already-committed Success record back to Failed. await ApplyPostSuccessSideEffectsAsync( - instance, deploymentId, revisionHash, configJson, cancellationToken); + instance, deploymentId, revisionHash, configJson, + forceEnabledState: true, cancellationToken); } // Audit log @@ -677,8 +678,22 @@ public class DeploymentService // the instance State to Enabled and store/refresh the deployed // config snapshot — otherwise the central state machine and the // deployed-snapshot invariant diverge from what the site is running. + // + // DeploymentManager-018: the reconciliation path runs only when the + // prior record is InProgress or timeout-Failed — exactly the cases + // that survive a central failover. The in-memory operation lock is + // lost on failover, so an operator may have legitimately invoked + // Disable on the instance between the original timed-out deploy and + // this redeploy. Disable does not change the deployed config, so the + // site still reports the target revision hash. Reconciliation must + // therefore PRESERVE an intentional Disabled state instead of + // silently flipping it back to Enabled — pass forceEnabledState: + // false so the helper only promotes NotDeployed → Enabled (the + // first-deploy-timed-out case) and leaves an explicit Disabled + // alone. await ApplyPostSuccessSideEffectsAsync( - instance, prior.DeploymentId, targetRevisionHash, configJson, cancellationToken); + instance, prior.DeploymentId, targetRevisionHash, configJson, + forceEnabledState: false, cancellationToken); await _auditService.LogAsync(prior.DeployedBy, "DeployReconciled", "Instance", instance.Id.ToString(), instance.UniqueName, @@ -713,6 +728,19 @@ public class DeploymentService /// deployed config snapshot (WP-8). Factored into one helper so the two /// paths cannot drift (DeploymentManager-015). /// + /// DeploymentManager-018: distinguishes + /// the two callers. The normal deploy path passes true — a fresh + /// successful apply legitimately puts the instance into + /// (the documented "Deploy on a Disabled instance also enables it" semantics + /// of ). The reconciliation path + /// passes false: it is reconciling a *prior* deployment that may + /// have completed before the current operator session (central failover + /// loses the in-memory operation lock, so an operator may have legitimately + /// Disabled the instance in between). On that path we only promote + /// + /// (the first-deploy-timed-out case) and leave an explicit Disabled alone, + /// so reconciliation never silently undoes a Disable. + /// /// Best-effort: the deployment record's terminal /// status is already committed by the caller before this runs. A failure /// here is logged loudly for operator reconciliation but is NOT propagated — @@ -723,12 +751,20 @@ public class DeploymentService string deploymentId, string revisionHash, string configJson, + bool forceEnabledState, CancellationToken cancellationToken) { try { - // WP-4: Update instance state to Enabled on successful deployment - instance.State = InstanceState.Enabled; + // WP-4: Update instance state to Enabled on successful deployment. + // DeploymentManager-018: on the reconciliation path + // (forceEnabledState=false) only promote NotDeployed → Enabled, + // preserving an intentional Disabled state set between the original + // timed-out deploy and the redeploy. + if (forceEnabledState || instance.State == InstanceState.NotDeployed) + { + instance.State = InstanceState.Enabled; + } await _repository.UpdateInstanceAsync(instance, cancellationToken); // WP-8: Store deployed config snapshot diff --git a/src/ScadaLink.ExternalSystemGateway/DatabaseGateway.cs b/src/ScadaLink.ExternalSystemGateway/DatabaseGateway.cs index 32576cab..78b28aca 100644 --- a/src/ScadaLink.ExternalSystemGateway/DatabaseGateway.cs +++ b/src/ScadaLink.ExternalSystemGateway/DatabaseGateway.cs @@ -148,7 +148,26 @@ public class DatabaseGateway : IDatabaseGateway public async Task DeliverBufferedAsync( StoreAndForwardMessage message, CancellationToken cancellationToken = default) { - var payload = JsonSerializer.Deserialize(message.PayloadJson); + // ExternalSystemGateway-018: a malformed (not just empty/null-fielded) + // PayloadJson would otherwise throw `JsonException` here, which the S&F + // engine treats as a transient failure and retries forever (poison + // message). Re-running the same deserialization against the same payload + // will throw deterministically, so JsonException is permanent — log, + // and return false so the S&F engine parks the message instead. + CachedWritePayload? payload; + try + { + payload = JsonSerializer.Deserialize(message.PayloadJson); + } + catch (JsonException ex) + { + _logger.LogError( + ex, + "Buffered CachedDbWrite message {Id} has malformed JSON payload; parking.", + message.Id); + return false; + } + if (payload == null || string.IsNullOrEmpty(payload.ConnectionName) || string.IsNullOrEmpty(payload.Sql)) { _logger.LogError("Buffered CachedDbWrite message {Id} has an unreadable payload; parking.", message.Id); diff --git a/src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs b/src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs index 231c143c..260c8c62 100644 --- a/src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs +++ b/src/ScadaLink.ExternalSystemGateway/ExternalSystemClient.cs @@ -173,7 +173,26 @@ public class ExternalSystemClient : IExternalSystemClient public async Task DeliverBufferedAsync( StoreAndForwardMessage message, CancellationToken cancellationToken = default) { - var payload = JsonSerializer.Deserialize(message.PayloadJson); + // ExternalSystemGateway-018: a malformed (not just empty/null-fielded) + // PayloadJson would otherwise throw `JsonException` here, which the S&F + // engine treats as a transient failure and retries forever (poison + // message). Re-running the same deserialization against the same payload + // will throw deterministically, so JsonException is permanent — log, + // and return false so the S&F engine parks the message instead. + CachedCallPayload? payload; + try + { + payload = JsonSerializer.Deserialize(message.PayloadJson); + } + catch (JsonException ex) + { + _logger.LogError( + ex, + "Buffered ExternalSystem message {Id} has malformed JSON payload; parking.", + message.Id); + return false; + } + if (payload == null || string.IsNullOrEmpty(payload.SystemName) || string.IsNullOrEmpty(payload.MethodName)) { _logger.LogError("Buffered ExternalSystem message {Id} has an unreadable payload; parking.", message.Id); diff --git a/src/ScadaLink.Host/Health/ActiveNodeGate.cs b/src/ScadaLink.Host/Health/ActiveNodeGate.cs new file mode 100644 index 00000000..421bcb7f --- /dev/null +++ b/src/ScadaLink.Host/Health/ActiveNodeGate.cs @@ -0,0 +1,57 @@ +using Akka.Cluster; +using ScadaLink.Host.Actors; +using ScadaLink.InboundAPI; + +namespace ScadaLink.Host.Health; + +/// +/// InboundAPI-008 / InboundAPI-022: production implementation of +/// backed by the running Akka.NET cluster. +/// +/// The inbound API is "Central cluster only (active node)" — a standby central +/// node must not execute method scripts or Route.To() calls. This gate +/// mirrors the leadership check in (the +/// node is the cluster leader, ), so +/// can return HTTP 503 on a standby. +/// +/// Registered only in the Central-role branch of Program.cs. The gate +/// is resolved per request from HttpContext.RequestServices; while the +/// AkkaHostedService is still warming up (ActorSystem == null) +/// or the node has not yet reached , this +/// implementation reports IsActiveNode == false — the safe-by-default +/// answer matching the standby case. +/// +public sealed class ActiveNodeGate : IActiveNodeGate +{ + private readonly AkkaHostedService _akkaService; + + /// Initializes a new bound to the given Akka hosted service. + /// The Akka hosted service exposing the cluster's . + public ActiveNodeGate(AkkaHostedService akkaService) + { + _akkaService = akkaService; + } + + /// + /// true only when this node has joined the cluster () + /// AND is the current cluster leader; false in every other state + /// (actor system not yet started, node still joining, node is a standby). + /// + public bool IsActiveNode + { + get + { + var system = _akkaService.ActorSystem; + if (system == null) + return false; + + var cluster = Cluster.Get(system); + var self = cluster.SelfMember; + if (self.Status != MemberStatus.Up) + return false; + + var leader = cluster.State.Leader; + return leader != null && leader == self.Address; + } + } +} diff --git a/src/ScadaLink.Host/Program.cs b/src/ScadaLink.Host/Program.cs index 33f919fd..9651925a 100644 --- a/src/ScadaLink.Host/Program.cs +++ b/src/ScadaLink.Host/Program.cs @@ -120,6 +120,16 @@ try builder.Services.AddSingleton(); builder.Services.AddHostedService(sp => sp.GetRequiredService()); + // InboundAPI-022: register the production IActiveNodeGate implementation so + // standby-node gating is actually enforced (the InboundApiEndpointFilter + // consults IActiveNodeGate and defaults to "allow" when none is registered, + // which leaves the design's "central cluster only (active node)" guarantee + // unenforced in deployed binaries). The gate is backed by the same Akka + // cluster-leadership check as ActiveNodeHealthCheck above, so the inbound + // API and the /health/active endpoint Traefik routes against agree on + // which node is active. + builder.Services.AddSingleton(); + // Cluster node status provider scoped to the Central role — feeds the // CentralHealthReportLoop so the central cluster appears on /monitoring/health. builder.Services.AddSingleton(sp => diff --git a/src/ScadaLink.Host/SiteServiceRegistration.cs b/src/ScadaLink.Host/SiteServiceRegistration.cs index 17409bca..7746ea56 100644 --- a/src/ScadaLink.Host/SiteServiceRegistration.cs +++ b/src/ScadaLink.Host/SiteServiceRegistration.cs @@ -34,8 +34,7 @@ public static class SiteServiceRegistration // Sites no longer deliver notifications over SMTP — a buffered notification is // forwarded to the central cluster (via NotificationForwarder / SiteCommunicationActor), // and central owns SMTP delivery through the Notification Outbox. The SMTP machinery - // (OAuth2TokenService, ISmtpClientWrapper, INotificationDeliveryService) has no - // consumer on a site node. + // (OAuth2TokenService, ISmtpClientWrapper) has no consumer on a site node. // Health report transport: sends SiteHealthReport to SiteCommunicationActor via Akka services.AddSingleton(); diff --git a/src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs b/src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs index ef351b76..7395b904 100644 --- a/src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs +++ b/src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs @@ -10,14 +10,14 @@ namespace ScadaLink.NotificationOutbox.Delivery; /// /// Task 12: Email channel delivery adapter for the central notification outbox. /// -/// Reuses the SMTP machinery — +/// Reuses the SMTP primitives — /// , , /// and the typed . -/// The connect/auth/send/disconnect sequence and error classification mirror -/// NotificationDeliveryService.DeliverAsync; this adapter, however, maps the -/// result to the outbox's three-way (Success / Permanent -/// / Transient) rather than the S&F-coupled NotificationResult, which cannot -/// distinguish a permanent failure from a buffered transient one. +/// This adapter owns the full connect/auth/send/disconnect sequence and maps the +/// outcome to the outbox's three-way (Success / Permanent / +/// Transient) — the canonical central-side email delivery path. NS-019: the prior +/// site-shaped NotificationDeliveryService was deleted with sites no longer +/// delivering notifications. /// public sealed class EmailNotificationDeliveryAdapter : INotificationDeliveryAdapter { @@ -44,9 +44,8 @@ public sealed class EmailNotificationDeliveryAdapter : INotificationDeliveryAdap _smtpClientFactory = smtpClientFactory; _logger = logger; _tokenService = tokenService; - // Mirrors NotificationDeliveryService: NotificationOptions supplies the - // documented fallback values used when a deployed SmtpConfiguration row - // leaves a field unset (non-positive). + // NotificationOptions supplies the documented fallback values used when a + // deployed SmtpConfiguration row leaves a field unset (non-positive). _options = options?.Value ?? new NotificationOptions(); } @@ -81,7 +80,7 @@ public sealed class EmailNotificationDeliveryAdapter : INotificationDeliveryAdap } // An unknown TLS mode is a configuration error that retrying cannot fix — - // surface it as a permanent failure (mirrors NS-005 in NotificationDeliveryService). + // surface it as a permanent failure (NS-005 SMTP TLS validation policy). SmtpTlsMode tlsMode; try { @@ -154,11 +153,9 @@ public sealed class EmailNotificationDeliveryAdapter : INotificationDeliveryAdap } /// - /// Delivers the plain-text BCC email via SMTP. Mirrors the connect/auth/send/ - /// disconnect sequence of NotificationDeliveryService.DeliverAsync: a - /// permanent failure surfaces as ; transient - /// failures propagate for the caller's classifier; the connection is always torn - /// down in the finally block. + /// Delivers the plain-text BCC email via SMTP. A permanent failure surfaces as + /// ; transient failures propagate for the + /// caller's classifier; the connection is always torn down in the finally block. /// private async Task SendAsync( SmtpConfiguration config, diff --git a/src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs b/src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs index 74aa782b..42f47f84 100644 --- a/src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs +++ b/src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs @@ -57,7 +57,7 @@ public class MailKitSmtpClientWrapper : ISmtpClientWrapper, IDisposable // worst, sending where authentication was required. Authentication being // skipped must never be silent: each of these is a permanent configuration // fault, surfaced as SmtpPermanentException so SendAsync returns a clean - // failure and DeliverBufferedAsync parks the buffered message. + // failure that the central Notification Outbox dispatcher classifies as permanent. if (string.IsNullOrEmpty(credentials)) { throw new SmtpPermanentException( diff --git a/src/ScadaLink.NotificationService/NotificationDeliveryService.cs b/src/ScadaLink.NotificationService/NotificationDeliveryService.cs deleted file mode 100644 index f11f881f..00000000 --- a/src/ScadaLink.NotificationService/NotificationDeliveryService.cs +++ /dev/null @@ -1,448 +0,0 @@ -using System.Text.Json; -using Microsoft.Extensions.Logging; -using Microsoft.Extensions.Options; -using ScadaLink.Commons.Entities.Notifications; -using ScadaLink.Commons.Interfaces.Repositories; -using ScadaLink.Commons.Interfaces.Services; -using ScadaLink.Commons.Types.Enums; -using ScadaLink.StoreAndForward; - -namespace ScadaLink.NotificationService; - -/// -/// WP-11: Notification delivery via SMTP. -/// WP-12: Error classification and S&F integration. -/// Transient: connection refused, timeout, SMTP 4xx → hand to S&F. -/// Permanent: SMTP 5xx → returned to script. -/// -public class NotificationDeliveryService : INotificationDeliveryService, IDisposable -{ - private readonly INotificationRepository _repository; - private readonly Func _smtpClientFactory; - private readonly OAuth2TokenService? _tokenService; - private readonly StoreAndForwardService? _storeAndForward; - private readonly ILogger _logger; - private readonly NotificationOptions _options; - - /// - /// Initializes a new instance of the NotificationDeliveryService with the specified dependencies. - /// - /// The notification repository for data access. - /// Factory for creating SMTP client instances. - /// Logger for diagnostic messages. - /// Optional OAuth2 token service for authentication. - /// Optional store-and-forward service for handling transient failures. - /// Optional notification options with fallback values. - public NotificationDeliveryService( - INotificationRepository repository, - Func smtpClientFactory, - ILogger logger, - OAuth2TokenService? tokenService = null, - StoreAndForwardService? storeAndForward = null, - IOptions? options = null) - { - _repository = repository; - _smtpClientFactory = smtpClientFactory; - _logger = logger; - _tokenService = tokenService; - _storeAndForward = storeAndForward; - // NS-017: NotificationOptions supplies the documented fallback values used - // when a deployed SmtpConfiguration row leaves a field unset (non-positive). - _options = options?.Value ?? new NotificationOptions(); - } - - /// - public async Task SendAsync( - string listName, - string subject, - string message, - string? originInstanceName = null, - CancellationToken cancellationToken = default) - { - ObjectDisposedException.ThrowIf(_disposed, this); - - var list = await _repository.GetListByNameAsync(listName, cancellationToken); - if (list == null) - { - return new NotificationResult(false, $"Notification list '{listName}' not found"); - } - - var recipients = await _repository.GetRecipientsByListIdAsync(list.Id, cancellationToken); - if (recipients.Count == 0) - { - return new NotificationResult(false, $"Notification list '{listName}' has no recipients"); - } - - var smtpConfigs = await _repository.GetAllSmtpConfigurationsAsync(cancellationToken); - var smtpConfig = smtpConfigs.FirstOrDefault(); - if (smtpConfig == null) - { - return new NotificationResult(false, "No SMTP configuration available"); - } - - // NS-005: validate the configured TLS mode up front — an unknown value is a - // configuration error and must surface as a clean result, not a silent - // fallback to opportunistic TLS negotiation. - try - { - SmtpTlsModeParser.Parse(smtpConfig.TlsMode); - } - catch (ArgumentException ex) - { - _logger.LogError("Invalid SMTP TLS mode for list {List}: {Reason}", listName, ex.Message); - return new NotificationResult(false, ex.Message); - } - - // NS-008: validate every email address before attempting delivery. A single - // malformed address previously caused MailboxAddress.Parse to throw a - // ParseException that escaped SendAsync unhandled; it must instead produce a - // clean NotificationResult the calling script can handle. - var addressError = EmailAddressValidator.ValidateAddresses(smtpConfig.FromAddress, recipients); - if (addressError != null) - { - _logger.LogWarning("Notification to list {List} has invalid addresses: {Reason}", listName, addressError); - return new NotificationResult(false, addressError); - } - - try - { - await DeliverAsync(smtpConfig, recipients, subject, message, cancellationToken); - return new NotificationResult(true, null); - } - catch (SmtpPermanentException ex) - { - // WP-12: Permanent SMTP failure — returned to script. - // NS-009: scrub credential fragments out of the server-supplied message - // before logging or returning it. - var detail = CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials); - _logger.LogError( - "Permanent SMTP failure sending to list {List}: {Detail}", listName, detail); - return new NotificationResult(false, $"Permanent SMTP error: {detail}"); - } - catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) - { - // NS-002: a caller-requested cancellation propagates; it is not buffered. - throw; - } - catch (Exception ex) when (SmtpErrorClassifier.IsTransient(ex, cancellationToken)) - { - // WP-12: Transient SMTP failure — hand to S&F. - // NS-009: scrub credential fragments before logging. - _logger.LogWarning( - "Transient SMTP failure sending to list {List} ({ExceptionType}): {Detail}; buffering for retry", - listName, ex.GetType().Name, CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials)); - - if (_storeAndForward == null) - { - return new NotificationResult(false, "Transient SMTP error and store-and-forward not available"); - } - - var payload = JsonSerializer.Serialize(new - { - ListName = listName, - Subject = subject, - Message = message - }); - - // attemptImmediateDelivery: false — DeliverAsync was already attempted - // above; letting EnqueueAsync re-invoke the handler would send twice. - await _storeAndForward.EnqueueAsync( - StoreAndForwardCategory.Notification, - listName, - payload, - originInstanceName, - smtpConfig.MaxRetries > 0 ? smtpConfig.MaxRetries : null, - smtpConfig.RetryDelay > TimeSpan.Zero ? smtpConfig.RetryDelay : null, - attemptImmediateDelivery: false); - - return new NotificationResult(true, null, WasBuffered: true); - } - catch (Exception ex) - { - // NS-015: a failure that SmtpErrorClassifier does not recognise (Unknown) — - // most importantly an OAuth2 token-fetch failure (HttpRequestException - // from EnsureSuccessStatusCode, or InvalidOperationException from a - // malformed credential triple) — used to fall through all the catch - // clauses above and escape SendAsync as a raw exception to the calling - // script, which the INotificationDeliveryService contract never - // advertises. Convert any otherwise-unhandled exception into a clean, - // credential-scrubbed permanent NotificationResult: returning control to - // the script is the safe default. (A caller-requested cancellation is - // already re-thrown by the filter above and never reaches here.) - var detail = CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials); - _logger.LogError( - "Unclassified failure sending to list {List} ({ExceptionType}): {Detail}", - listName, ex.GetType().Name, detail); - return new NotificationResult(false, $"Notification delivery failed: {detail}"); - } - } - - /// - /// WP-11/12: Delivers a buffered notification during a store-and-forward retry - /// sweep — re-resolves the list, recipients and SMTP config and re-attempts - /// delivery. Returns true on success, false on permanent failure (the message - /// is parked); throws on a transient failure so the engine retries. - /// - /// The buffered store-and-forward message to deliver. - /// Cancellation token for the delivery attempt. - public async Task DeliverBufferedAsync( - StoreAndForwardMessage message, CancellationToken cancellationToken = default) - { - var payload = JsonSerializer.Deserialize(message.PayloadJson); - if (payload == null || string.IsNullOrEmpty(payload.ListName)) - { - _logger.LogError("Buffered notification message {Id} has an unreadable payload; parking.", message.Id); - return false; - } - - var list = await _repository.GetListByNameAsync(payload.ListName, cancellationToken); - if (list == null) - { - _logger.LogError( - "Buffered notification to list '{List}' cannot be delivered — the list no longer exists; parking.", - payload.ListName); - return false; - } - - var recipients = await _repository.GetRecipientsByListIdAsync(list.Id, cancellationToken); - if (recipients.Count == 0) - { - _logger.LogError("Buffered notification to list '{List}' has no recipients; parking.", payload.ListName); - return false; - } - - var smtpConfig = (await _repository.GetAllSmtpConfigurationsAsync(cancellationToken)).FirstOrDefault(); - if (smtpConfig == null) - { - _logger.LogError("Buffered notification cannot be delivered — no SMTP configuration available; parking."); - return false; - } - - // NS-005: an unknown TLS mode is a configuration error that retrying cannot - // fix — park the buffered message rather than throwing on every sweep. - try - { - SmtpTlsModeParser.Parse(smtpConfig.TlsMode); - } - catch (ArgumentException ex) - { - _logger.LogError( - "Buffered notification to list '{List}' cannot be delivered — {Reason}; parking.", - payload.ListName, ex.Message); - return false; - } - - // NS-008: a malformed address cannot be fixed by retrying — park it. - var addressError = EmailAddressValidator.ValidateAddresses(smtpConfig.FromAddress, recipients); - if (addressError != null) - { - _logger.LogError( - "Buffered notification to list '{List}' has invalid addresses ({Reason}); parking.", - payload.ListName, addressError); - return false; - } - - try - { - await DeliverAsync(smtpConfig, recipients, payload.Subject, payload.Message, cancellationToken); - return true; - } - catch (SmtpPermanentException ex) - { - // NS-009: scrub credential fragments out of the message before logging. - _logger.LogError( - "Buffered notification to list '{List}' failed permanently ({Detail}); parking.", - payload.ListName, CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials)); - return false; - } - catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) - { - // A handler shutdown cancellation is neither a delivery success nor a - // permanent failure — let it propagate so the engine does not park. - throw; - } - catch (Exception ex) when (SmtpErrorClassifier.IsTransient(ex, cancellationToken)) - { - // A typed transient SMTP error: re-throw so the S&F engine retries. - throw; - } - catch (Exception ex) - { - // NS-014: an exception SmtpErrorClassifier does not recognise (Unknown) — - // chiefly an OAuth2 token-fetch failure — used to escape this handler. - // The S&F engine treats ANY thrown exception as transient, so a - // permanently-broken config (bad client secret, malformed credential - // triple) was retried on every sweep until MaxRetries, burning token - // endpoint calls. Decide deliberately rather than letting it leak: - // - an HttpRequestException with a 5xx token-endpoint status is a - // transient outage → re-throw so the engine retries; - // - everything else (a 4xx/401 token rejection, a malformed credential - // InvalidOperationException, any other unclassified fault) is not - // fixable by retrying → return false so the message is parked. - if (ex is HttpRequestException { StatusCode: { } status } && (int)status is >= 500 and < 600) - { - _logger.LogWarning( - "Buffered notification to list '{List}' hit a transient OAuth2 token-endpoint error ({Status}); will retry.", - payload.ListName, (int)status); - throw; - } - - _logger.LogError( - "Buffered notification to list '{List}' failed with a non-retryable error ({ExceptionType}: {Detail}); parking.", - payload.ListName, ex.GetType().Name, - CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials)); - return false; - } - } - - private sealed record BufferedNotification(string ListName, string Subject, string Message); - - /// - /// NS-007: throttles concurrent SMTP deliveries to the configured - /// MaxConcurrentConnections. One SMTP config is deployed per site, so the - /// limit is a stable per-site invariant; it is captured lazily on first use. - /// NS-018: a replaces the hand-rolled double-checked - /// init — its publication is correctly synchronised (no lock-free read of a - /// non-volatile field) and it is disposed in . - /// - private Lazy? _concurrencyLimiter; - private readonly object _limiterLock = new(); - private bool _disposed; - - private SemaphoreSlim GetConcurrencyLimiter(SmtpConfiguration config) - { - // NS-018: the limiter is sized once; capture the size now so the Lazy - // factory does not close over a value that could change between calls. - var configured = config.MaxConcurrentConnections > 0 - ? config.MaxConcurrentConnections - // NS-017: fall back to the NotificationOptions value, then the - // design-doc default of 5, when the deployed row leaves it unset. - : _options.MaxConcurrentConnections > 0 ? _options.MaxConcurrentConnections : 5; - - lock (_limiterLock) - { - ObjectDisposedException.ThrowIf(_disposed, this); - _concurrencyLimiter ??= new Lazy( - () => new SemaphoreSlim(configured, configured)); - return _concurrencyLimiter.Value; - } - } - - /// - /// NS-018: disposes the lazily-created concurrency limiter. The service is a - /// scoped DI service; without this the leaked a - /// handle per scope. - /// - public void Dispose() - { - lock (_limiterLock) - { - if (_disposed) - { - return; - } - - _disposed = true; - if (_concurrencyLimiter is { IsValueCreated: true } limiter) - { - limiter.Value.Dispose(); - } - } - - GC.SuppressFinalize(this); - } - - /// - /// Delivers an email via SMTP. Throws on failure (transient errors and - /// propagate; the caller classifies them). - /// - /// The SMTP configuration to use for the connection. - /// The list of recipients to deliver to. - /// The email subject line. - /// The plain-text email body. - /// Cancellation token for the delivery. - internal async Task DeliverAsync( - SmtpConfiguration config, - IReadOnlyList recipients, - string subject, - string body, - CancellationToken cancellationToken) - { - var tlsMode = SmtpTlsModeParser.Parse(config.TlsMode); - - // NS-007: bound the number of concurrent SMTP connections per site. - var limiter = GetConcurrencyLimiter(config); - await limiter.WaitAsync(cancellationToken); - - // NS-004: create exactly one client and dispose the one actually used. - var smtp = _smtpClientFactory(); - using var disposable = smtp as IDisposable; - - try - { - // NS-005/NS-007: explicit TLS mode and the configured connection timeout. - // NS-017: when the deployed SmtpConfiguration row leaves the timeout - // unset (non-positive), fall back to the NotificationOptions value. - var timeoutSeconds = config.ConnectionTimeoutSeconds > 0 - ? config.ConnectionTimeoutSeconds - : _options.ConnectionTimeoutSeconds; - await smtp.ConnectAsync( - config.Host, config.Port, tlsMode, timeoutSeconds, cancellationToken); - - // Resolve credentials (OAuth2 token fetched/cached by the token service). - var credentials = config.Credentials; - if (config.AuthType.Equals("oauth2", StringComparison.OrdinalIgnoreCase) && _tokenService != null && credentials != null) - { - var token = await _tokenService.GetTokenAsync(credentials, cancellationToken); - credentials = token; - } - - // NS-021: OAuth2 XOAUTH2 requires the user identity (FromAddress) to be - // sent alongside the access token; an empty user is rejected by M365. - await smtp.AuthenticateAsync( - config.AuthType, - credentials, - oauth2UserName: config.FromAddress, - cancellationToken: cancellationToken); - - var bccAddresses = recipients.Select(r => r.EmailAddress).ToList(); - await smtp.SendAsync(config.FromAddress, bccAddresses, subject, body, cancellationToken); - } - catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) - { - // NS-002: A deliberately cancelled token must propagate as a cancellation, - // not be misclassified as a transient SMTP failure and buffered for retry. - throw; - } - catch (Exception ex) when (SmtpErrorClassifier.Classify(ex, cancellationToken) == SmtpErrorClass.Permanent - && ex is not SmtpPermanentException) - { - // NS-003: Permanent SMTP failure (5xx) — surface a typed permanent exception. - throw new SmtpPermanentException(ex.Message, ex); - } - // Transient and SmtpPermanentException both propagate unchanged: SendAsync's - // catch filters (SmtpPermanentException / SmtpErrorClassifier.IsTransient) handle them. - finally - { - // NS-010: always tear the connection down, regardless of outcome. The - // SMTP QUIT used to run only on the success path inside the try block, - // so a failed Connect/Authenticate/Send left an open, authenticated - // connection until finalization reclaimed the socket — exhausting the - // server's connection slots under sustained transient failures. - // Disconnect is best-effort: a disconnect failure (e.g. the connection - // is already dead) must not mask the original delivery exception. - try - { - await smtp.DisconnectAsync(cancellationToken); - } - catch (Exception disconnectEx) - { - _logger.LogDebug( - "Ignoring SMTP disconnect failure during cleanup: {Reason}", disconnectEx.Message); - } - - // NS-007: always release the concurrency slot, even on failure. - limiter.Release(); - } - } -} diff --git a/src/ScadaLink.NotificationService/NotificationOptions.cs b/src/ScadaLink.NotificationService/NotificationOptions.cs index 0cc61865..6663778c 100644 --- a/src/ScadaLink.NotificationService/NotificationOptions.cs +++ b/src/ScadaLink.NotificationService/NotificationOptions.cs @@ -5,10 +5,10 @@ namespace ScadaLink.NotificationService; /// ScadaLink:Notification configuration section. /// /// SMTP settings are primarily carried by the deployed SmtpConfiguration -/// entity. NS-017: these values are the fallback used by -/// when the corresponding -/// SmtpConfiguration field is left unset (non-positive) on a partially -/// deployed row — a value present on the row always takes precedence. +/// entity. NS-017: these values are the fallback used by the central +/// Notification Outbox's EmailNotificationDeliveryAdapter when the +/// corresponding SmtpConfiguration field is left unset (non-positive) on a +/// partially deployed row — a value present on the row always takes precedence. /// public class NotificationOptions { diff --git a/src/ScadaLink.NotificationService/ServiceCollectionExtensions.cs b/src/ScadaLink.NotificationService/ServiceCollectionExtensions.cs index deafa776..87e9da3e 100644 --- a/src/ScadaLink.NotificationService/ServiceCollectionExtensions.cs +++ b/src/ScadaLink.NotificationService/ServiceCollectionExtensions.cs @@ -1,12 +1,17 @@ using Microsoft.Extensions.DependencyInjection; -using ScadaLink.Commons.Interfaces.Services; namespace ScadaLink.NotificationService; public static class ServiceCollectionExtensions { /// - /// Registers the notification delivery services (SMTP, OAuth2 token, delivery adapter). + /// Registers the shared SMTP delivery primitives consumed by the central Notification + /// Outbox's EmailNotificationDeliveryAdapter: , + /// , and the factory. + /// Central-only — sites no longer deliver notifications (see + /// Component-NotificationService.md), and the orphaned site-shaped + /// NotificationDeliveryService + INotificationDeliveryService contract + /// was removed (NS-019). Notification dispatch lives in ScadaLink.NotificationOutbox. /// /// The service collection to register into. public static IServiceCollection AddNotificationService(this IServiceCollection services) @@ -17,8 +22,6 @@ public static class ServiceCollectionExtensions services.AddHttpClient(); services.AddSingleton(); services.AddSingleton>(_ => () => new MailKitSmtpClientWrapper()); - services.AddScoped(); - services.AddScoped(sp => sp.GetRequiredService()); return services; } diff --git a/src/ScadaLink.NotificationService/SmtpErrorClassifier.cs b/src/ScadaLink.NotificationService/SmtpErrorClassifier.cs index ba632640..ff5b09b0 100644 --- a/src/ScadaLink.NotificationService/SmtpErrorClassifier.cs +++ b/src/ScadaLink.NotificationService/SmtpErrorClassifier.cs @@ -26,10 +26,10 @@ public enum SmtpErrorClass /// the numeric rather than locale-dependent substring /// matching on the exception message. /// -/// Public and shared: both (store-and-forward -/// delivery) and the central Notification Outbox's EmailNotificationDeliveryAdapter -/// route every SMTP failure through this single policy, so a transient/permanent -/// boundary change cannot diverge between the two delivery paths. +/// Public and shared: the central Notification Outbox's EmailNotificationDeliveryAdapter +/// routes every SMTP failure through this single policy. (NS-019: the orphaned site-side +/// NotificationDeliveryService that previously co-used this classifier was removed +/// when sites stopped delivering notifications.) /// /// public static class SmtpErrorClassifier diff --git a/src/ScadaLink.StoreAndForward/NotificationForwarder.cs b/src/ScadaLink.StoreAndForward/NotificationForwarder.cs index 31b98e5a..61d968a6 100644 --- a/src/ScadaLink.StoreAndForward/NotificationForwarder.cs +++ b/src/ScadaLink.StoreAndForward/NotificationForwarder.cs @@ -1,5 +1,7 @@ using System.Text.Json; using Akka.Actor; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging.Abstractions; using ScadaLink.Commons.Messages.Notification; namespace ScadaLink.StoreAndForward; @@ -31,6 +33,7 @@ public sealed class NotificationForwarder private readonly IActorRef _siteCommunicationActor; private readonly string _sourceSiteId; private readonly TimeSpan _forwardTimeout; + private readonly ILogger _logger; /// /// The site communication actor. It forwards a to @@ -42,14 +45,21 @@ public sealed class NotificationForwarder /// How long to wait for central's ack before treating the forward as a transient /// failure. Sourced from host configuration. /// + /// + /// Optional logger. StoreAndForward-018: a corrupt buffered payload is logged at + /// Warning before being discarded so an operator has a forensic trail of the row + /// that vanished from the buffer. + /// public NotificationForwarder( IActorRef siteCommunicationActor, string sourceSiteId, - TimeSpan forwardTimeout) + TimeSpan forwardTimeout, + ILogger? logger = null) { _siteCommunicationActor = siteCommunicationActor; _sourceSiteId = sourceSiteId; _forwardTimeout = forwardTimeout; + _logger = logger ?? NullLogger.Instance; } /// @@ -61,11 +71,26 @@ public sealed class NotificationForwarder /// The buffered store-and-forward message to deliver to central. public async Task DeliverAsync(StoreAndForwardMessage message) { - // An unreadable payload cannot be fixed by retrying — park it (return false), - // mirroring how the former SMTP handler treated a corrupt buffered payload. + // StoreAndForward-018: an unreadable payload cannot be fixed by retrying. + // The design doc explicitly forbids parking notifications ("notifications do + // not park — they are retried at the fixed forward interval until central + // acks"; Component-StoreAndForward.md). The earlier behaviour returned false + // here, which the S&F engine interprets as a permanent failure and parks + // the row — contradicting the invariant and surfacing the row in the + // central UI's parked-message list. The correct outcome for a corrupt-payload + // notification is to DISCARD: log a Warning with the buffered row id + + // payload preview for forensics, then return true so the engine clears the + // buffer via its standard success-path cleanup. The buffered row is + // unrecoverable; retrying or parking would both make the queue worse, not + // better. if (!TryBuildSubmit(message, out var submit)) { - return false; + _logger.LogWarning( + "Discarding corrupt buffered notification {NotificationId} (payload is not deserialisable as NotificationSubmit). " + + "Payload preview: {PayloadPreview}", + message.Id, + PreviewPayload(message.PayloadJson)); + return true; } // The reply may legitimately be a non-accepted ack, so it is not requested as @@ -140,6 +165,25 @@ public sealed class NotificationForwarder }; return true; } + + private const int CorruptPayloadPreviewMaxLength = 200; + + /// + /// Returns a length-capped preview of a corrupt buffered payload for the Warning + /// log line emitted on discard. The full payload may be megabytes and is not + /// suitable for the structured log; the preview retains the leading characters, + /// which is what an operator typically uses to identify the producing script. + /// + private static string PreviewPayload(string? payloadJson) + { + if (string.IsNullOrEmpty(payloadJson)) + { + return ""; + } + return payloadJson.Length <= CorruptPayloadPreviewMaxLength + ? payloadJson + : payloadJson.Substring(0, CorruptPayloadPreviewMaxLength) + "…"; + } } /// diff --git a/src/ScadaLink.TemplateEngine/Flattening/DiffService.cs b/src/ScadaLink.TemplateEngine/Flattening/DiffService.cs index f7c667e8..731907aa 100644 --- a/src/ScadaLink.TemplateEngine/Flattening/DiffService.cs +++ b/src/ScadaLink.TemplateEngine/Flattening/DiffService.cs @@ -111,12 +111,14 @@ public class DiffService a.CanonicalName == b.CanonicalName && a.Value == b.Value && a.DataType == b.DataType && + a.Description == b.Description && a.IsLocked == b.IsLocked && a.DataSourceReference == b.DataSourceReference && a.BoundDataConnectionId == b.BoundDataConnectionId; private static bool AlarmsEqual(ResolvedAlarm a, ResolvedAlarm b) => a.CanonicalName == b.CanonicalName && + a.Description == b.Description && a.PriorityLevel == b.PriorityLevel && a.IsLocked == b.IsLocked && a.TriggerType == b.TriggerType && @@ -132,4 +134,27 @@ public class DiffService a.ParameterDefinitions == b.ParameterDefinitions && a.ReturnDefinition == b.ReturnDefinition && a.MinTimeBetweenRuns == b.MinTimeBetweenRuns; + + /// + /// Compares two instances for equality across + /// the fields that travel in the deployment package: protocol, primary and + /// backup configuration JSON, and failover retry count. Used by callers that + /// need to detect connection-endpoint drift; the public diff shape only + /// exposes attribute / alarm / script changes today (see TemplateEngine-018 + /// for the diff-shape extension that surfaces added / removed / changed + /// connections in the UI). + /// + /// First connection configuration. + /// Second connection configuration. + /// True when both configurations are equal. + public static bool ConnectionsEqual(ConnectionConfig a, ConnectionConfig b) + { + ArgumentNullException.ThrowIfNull(a); + ArgumentNullException.ThrowIfNull(b); + + return a.Protocol == b.Protocol && + a.ConfigurationJson == b.ConfigurationJson && + a.BackupConfigurationJson == b.BackupConfigurationJson && + a.FailoverRetryCount == b.FailoverRetryCount; + } } diff --git a/src/ScadaLink.TemplateEngine/Flattening/RevisionHashService.cs b/src/ScadaLink.TemplateEngine/Flattening/RevisionHashService.cs index b07a7c8e..a11f635e 100644 --- a/src/ScadaLink.TemplateEngine/Flattening/RevisionHashService.cs +++ b/src/ScadaLink.TemplateEngine/Flattening/RevisionHashService.cs @@ -52,6 +52,7 @@ public class RevisionHashService CanonicalName = a.CanonicalName, Value = a.Value, DataType = a.DataType, + Description = a.Description, IsLocked = a.IsLocked, DataSourceReference = a.DataSourceReference, BoundDataConnectionId = a.BoundDataConnectionId @@ -62,6 +63,7 @@ public class RevisionHashService .Select(a => new HashableAlarm { CanonicalName = a.CanonicalName, + Description = a.Description, PriorityLevel = a.PriorityLevel, IsLocked = a.IsLocked, TriggerType = a.TriggerType, @@ -82,7 +84,20 @@ public class RevisionHashService ReturnDefinition = s.ReturnDefinition, MinTimeBetweenRunsTicks = s.MinTimeBetweenRuns?.Ticks }) - .ToList() + .ToList(), + Connections = configuration.Connections is { Count: > 0 } + ? new SortedDictionary( + configuration.Connections.ToDictionary( + kvp => kvp.Key, + kvp => new HashableConnection + { + BackupConfigurationJson = kvp.Value.BackupConfigurationJson, + ConfigurationJson = kvp.Value.ConfigurationJson, + FailoverRetryCount = kvp.Value.FailoverRetryCount, + Protocol = kvp.Value.Protocol + }), + StringComparer.Ordinal) + : null }; var json = JsonSerializer.Serialize(hashInput, CanonicalJsonOptions); @@ -108,6 +123,12 @@ public class RevisionHashService /// public List Attributes { get; init; } = []; /// + /// Data connection configurations keyed by connection name. Sorted by key + /// (ordinal) to keep serialization deterministic. Null when the deployment + /// package carries no connections. + /// + public SortedDictionary? Connections { get; init; } + /// /// The unique instance name. /// public string InstanceUniqueName { get; init; } = string.Empty; @@ -144,6 +165,11 @@ public class RevisionHashService /// public string DataType { get; init; } = string.Empty; /// + /// The attribute description (authoring-time documentation that still + /// travels with the deployed payload). + /// + public string? Description { get; init; } + /// /// Whether the attribute is locked. /// public bool IsLocked { get; init; } @@ -160,6 +186,11 @@ public class RevisionHashService /// public string CanonicalName { get; init; } = string.Empty; /// + /// The alarm description (authoring-time documentation that still + /// travels with the deployed payload). + /// + public string? Description { get; init; } + /// /// Whether the alarm is locked. /// public bool IsLocked { get; init; } @@ -181,6 +212,26 @@ public class RevisionHashService public string TriggerType { get; init; } = string.Empty; } + private sealed record HashableConnection + { + /// + /// Backup connection configuration JSON, if any. + /// + public string? BackupConfigurationJson { get; init; } + /// + /// Primary connection configuration JSON. + /// + public string? ConfigurationJson { get; init; } + /// + /// Number of failover retries before giving up. + /// + public int FailoverRetryCount { get; init; } + /// + /// Protocol name (e.g. "OpcUa"). + /// + public string Protocol { get; init; } = string.Empty; + } + private sealed record HashableScript { /// diff --git a/tests/ScadaLink.Commons.Tests/Messages/CompatibilityTests.cs b/tests/ScadaLink.Commons.Tests/Messages/CompatibilityTests.cs index 36287b40..0c3f5598 100644 --- a/tests/ScadaLink.Commons.Tests/Messages/CompatibilityTests.cs +++ b/tests/ScadaLink.Commons.Tests/Messages/CompatibilityTests.cs @@ -230,17 +230,10 @@ public class CompatibilityTests // ── Round-trip serialization for all key message types ── - [Fact] - public void RoundTrip_ConnectionStateChanged_Succeeds() - { - var msg = new ConnectionStateChanged("site-01", true, DateTimeOffset.UtcNow); - var json = JsonSerializer.Serialize(msg); - var deserialized = JsonSerializer.Deserialize(json, Options); - - Assert.NotNull(deserialized); - Assert.Equal("site-01", deserialized!.SiteId); - Assert.True(deserialized.IsConnected); - } + // Communication-016: RoundTrip_ConnectionStateChanged_Succeeds removed + // alongside the dead ConnectionStateChanged message record. No production + // code emits or receives this message — disconnect detection is owned by + // the gRPC keepalive and the Ask-timeout path. [Fact] public void RoundTrip_AlarmStateChanged_Succeeds() diff --git a/tests/ScadaLink.Communication.Tests/CentralCommunicationActorTests.cs b/tests/ScadaLink.Communication.Tests/CentralCommunicationActorTests.cs index 60480d00..82492861 100644 --- a/tests/ScadaLink.Communication.Tests/CentralCommunicationActorTests.cs +++ b/tests/ScadaLink.Communication.Tests/CentralCommunicationActorTests.cs @@ -116,30 +116,13 @@ public class CentralCommunicationActorTests : TestKit ExpectNoMsg(TimeSpan.FromMilliseconds(200)); } - [Fact] - public void ConnectionLost_DebugStreamsKilled() - { - var site = CreateSite("site1", "akka.tcp://scadalink@host:8082"); - var (actor, _, siteProbes) = CreateActorWithMockRepo(new[] { site }); - - // Wait for auto-refresh - Thread.Sleep(1000); - - // Subscribe to debug view (tracks the subscription) - var subscriberProbe = CreateTestProbe(); - var subRequest = new SubscribeDebugViewRequest("inst1", "corr-123"); - actor.Tell(new SiteEnvelope("site1", subRequest), subscriberProbe.Ref); - - // The ClusterClient probe receives the routed message - siteProbes["site1"].ExpectMsg(); - - // Simulate site disconnection - actor.Tell(new ConnectionStateChanged("site1", false, DateTimeOffset.UtcNow)); - - // The subscriber should receive a DebugStreamTerminated notification - subscriberProbe.ExpectMsg( - msg => msg.SiteId == "site1" && msg.CorrelationId == "corr-123"); - } + // Communication-016: the prior `ConnectionLost_DebugStreamsKilled` test was + // removed alongside the dead HandleConnectionStateChanged handler. No + // production code ever emitted ConnectionStateChanged, so the test was + // exercising a workflow that never ran. Disconnect detection is owned by + // the gRPC keepalive (DebugStreamBridgeActor self-terminates) and by the + // Ask-timeout path at the CommunicationService layer (deploy callers see + // a failure). [Fact] public void Heartbeat_BumpsAggregatorTimestamp() diff --git a/tests/ScadaLink.DeploymentManager.Tests/DeploymentServiceTests.cs b/tests/ScadaLink.DeploymentManager.Tests/DeploymentServiceTests.cs index 8b06a328..d96a51e6 100644 --- a/tests/ScadaLink.DeploymentManager.Tests/DeploymentServiceTests.cs +++ b/tests/ScadaLink.DeploymentManager.Tests/DeploymentServiceTests.cs @@ -820,6 +820,56 @@ public class DeploymentServiceTests : TestKit Assert.Equal("sha256:target", storedSnapshot.RevisionHash); } + // ── DeploymentManager-018: reconciliation must preserve an intentional Disabled state ── + + [Fact] + public async Task DeployInstanceAsync_Reconciled_DisabledInstance_PreservesDisabledState() + { + // DeploymentManager-018: after a central failover, the in-memory + // OperationLockManager is lost (by design — in-progress treated as + // failed). The prior deployment record remains InProgress in the DB. + // The operator can legitimately invoke Disable on the instance between + // the timed-out deploy and the redeploy. Disable does not change the + // deployed config, so the site still reports the target revision hash. + // When the operator retries the deploy, the reconciliation branch must + // NOT silently overwrite Instance.State back to Enabled — that would + // undo the explicit operator action with no audit trail. + var instance = new Instance("ReconcileDisabled") + { + Id = 72, SiteId = 1, State = InstanceState.Disabled + }; + _repo.GetInstanceByIdAsync(72, Arg.Any()).Returns(instance); + SetupValidPipeline(72, "ReconcileDisabled", "sha256:target"); + + var prior = new DeploymentRecord("dep-prior-72", "admin") + { + InstanceId = 72, + Status = DeploymentStatus.InProgress, + RevisionHash = "sha256:target" + }; + _repo.GetCurrentDeploymentStatusAsync(72, Arg.Any()).Returns(prior); + _repo.GetDeployedSnapshotByInstanceIdAsync(72, Arg.Any()) + .Returns((DeployedConfigSnapshot?)null); + + var commActor = Sys.ActorOf(Props.Create(() => + new ReconcileProbeActor(siteHash: "sha256:target", failQuery: false))); + var service = CreateServiceWithCommActor(commActor); + + var result = await service.DeployInstanceAsync(72, "admin"); + + // The reconciliation still succeeds and the prior record is marked + // Success — central and site agree on the applied config. + Assert.True(result.IsSuccess); + Assert.Equal(DeploymentStatus.Success, prior.Status); + Assert.Equal(1, ReconcileProbeActor.QueryCount); + Assert.Equal(0, ReconcileProbeActor.DeployCount); + + // DeploymentManager-018: the operator's explicit Disable must survive + // the reconciliation — Instance.State stays Disabled, not silently + // flipped to Enabled. + Assert.Equal(InstanceState.Disabled, instance.State); + } + // ── DeploymentManager-016: reconciled record must carry the target revision hash ── [Fact] diff --git a/tests/ScadaLink.ExternalSystemGateway.Tests/DatabaseGatewayTests.cs b/tests/ScadaLink.ExternalSystemGateway.Tests/DatabaseGatewayTests.cs index 36fbd9cc..221d70c4 100644 --- a/tests/ScadaLink.ExternalSystemGateway.Tests/DatabaseGatewayTests.cs +++ b/tests/ScadaLink.ExternalSystemGateway.Tests/DatabaseGatewayTests.cs @@ -207,6 +207,31 @@ public class DatabaseGatewayTests Assert.False(delivered); // permanent — the S&F engine parks the message } + // ── ExternalSystemGateway-018: malformed JSON payload must park, not retry-forever ── + + [Fact] + public async Task DeliverBuffered_MalformedJsonPayload_ReturnsFalseSoMessageParks() + { + // No connection stub needed — deserialization fails before any + // resolution or SQL execution. If the JsonException were to escape (the + // pre-018 behaviour) the S&F engine would treat it as transient and + // retry the same poison row forever. + var gateway = new DatabaseGateway(_repository, NullLogger.Instance); + + var poisonMessage = new ScadaLink.StoreAndForward.StoreAndForwardMessage + { + Id = Guid.NewGuid().ToString("N"), + Category = ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.CachedDbWrite, + Target = "someDb", + // Truncated mid-write — `{` opens an object that never closes. + PayloadJson = "{\"ConnectionName\":\"someDb\",\"Sql\":\"INSERT", + }; + + var delivered = await gateway.DeliverBufferedAsync(poisonMessage); + + Assert.False(delivered); // permanent — the S&F engine parks the message + } + // ── ExternalSystemGateway-010: SqlConnection must not leak when OpenAsync fails ── [Fact] diff --git a/tests/ScadaLink.ExternalSystemGateway.Tests/ExternalSystemClientTests.cs b/tests/ScadaLink.ExternalSystemGateway.Tests/ExternalSystemClientTests.cs index 3898456f..882fb6be 100644 --- a/tests/ScadaLink.ExternalSystemGateway.Tests/ExternalSystemClientTests.cs +++ b/tests/ScadaLink.ExternalSystemGateway.Tests/ExternalSystemClientTests.cs @@ -234,6 +234,32 @@ public class ExternalSystemClientTests () => client.DeliverBufferedAsync(BufferedCall("TestAPI", "failMethod"))); } + // ── ExternalSystemGateway-018: malformed JSON payload must park, not retry-forever ── + + [Fact] + public async Task DeliverBuffered_MalformedJsonPayload_ReturnsFalseSoMessageParks() + { + // No repository / HTTP stubs needed — deserialization fails before any + // resolution or HTTP call. If the JsonException were to escape (the + // pre-018 behaviour) the S&F engine would treat it as transient and + // retry the same poison row forever. + var client = new ExternalSystemClient( + _httpClientFactory, _repository, NullLogger.Instance); + + var poisonMessage = new StoreAndForwardMessage + { + Id = Guid.NewGuid().ToString("N"), + Category = ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.ExternalSystem, + Target = "TestAPI", + // Truncated mid-write — `{` opens an object that never closes. + PayloadJson = "{\"SystemName\":\"TestAPI\",\"MethodName\":\"get", + }; + + var delivered = await client.DeliverBufferedAsync(poisonMessage); + + Assert.False(delivered); // permanent — the S&F engine parks the message + } + // ── ExternalSystemGateway-003: CachedCall must not double-dispatch ── [Fact] diff --git a/tests/ScadaLink.Host.Tests/CompositionRootTests.cs b/tests/ScadaLink.Host.Tests/CompositionRootTests.cs index 92f2a8f1..1d4c6930 100644 --- a/tests/ScadaLink.Host.Tests/CompositionRootTests.cs +++ b/tests/ScadaLink.Host.Tests/CompositionRootTests.cs @@ -17,6 +17,7 @@ using ScadaLink.ExternalSystemGateway; using ScadaLink.HealthMonitoring; using ScadaLink.Host; using ScadaLink.Host.Actors; +using ScadaLink.Host.Health; using ScadaLink.InboundAPI; using ScadaLink.ManagementService; using ScadaLink.NotificationService; @@ -204,9 +205,13 @@ public class CentralCompositionRootTests : IDisposable new object[] { typeof(IExternalSystemClient) }, new object[] { typeof(DatabaseGateway) }, new object[] { typeof(IDatabaseGateway) }, - // NotificationService - new object[] { typeof(NotificationDeliveryService) }, - new object[] { typeof(INotificationDeliveryService) }, + // NotificationService — central-only SMTP primitives. The orphan + // NotificationDeliveryService + INotificationDeliveryService were removed + // (NS-019) when sites stopped delivering notifications; the central + // EmailNotificationDeliveryAdapter is now the only resolver of these + // primitives. + new object[] { typeof(Func) }, + new object[] { typeof(OAuth2TokenService) }, // ConfigurationDatabase repositories new object[] { typeof(ScadaLinkDbContext) }, new object[] { typeof(ISecurityRepository) }, @@ -277,6 +282,30 @@ public class CentralCompositionRootTests : IDisposable var hostedServices = _factory.Services.GetServices(); Assert.Contains(hostedServices, s => s.GetType() == typeof(CentralHealthAggregator)); } + + // --- InboundAPI-022 regression --- + + /// + /// InboundAPI-022 regression: the Central composition root MUST register a + /// concrete implementation. Without it, + /// falls through to "allow" and a + /// standby central node continues to serve the inbound API, racing the + /// active node and executing scripts against stale singleton state. The + /// design's "central cluster only (active node)" guarantee is enforced only + /// when the production gate is wired here. + /// + /// Structural check on the built provider (not just ) + /// — a registration the framework cannot resolve to a concrete instance is + /// indistinguishable from "missing" at runtime, which is the failure mode + /// the finding describes. + /// + [Fact] + public void Central_IActiveNodeGate_IsRegisteredAsActiveNodeGate() + { + var gate = _factory.Services.GetService(); + Assert.NotNull(gate); + Assert.IsType(gate); + } } /// diff --git a/tests/ScadaLink.IntegrationTests/IntegrationSurfaceTests.cs b/tests/ScadaLink.IntegrationTests/IntegrationSurfaceTests.cs index 279e5b06..bc3f23f6 100644 --- a/tests/ScadaLink.IntegrationTests/IntegrationSurfaceTests.cs +++ b/tests/ScadaLink.IntegrationTests/IntegrationSurfaceTests.cs @@ -5,11 +5,9 @@ using System.Text.Json; using Microsoft.Extensions.DependencyInjection; using NSubstitute; using ScadaLink.Commons.Entities.InboundApi; -using ScadaLink.Commons.Entities.Notifications; using ScadaLink.Commons.Interfaces.Repositories; using ScadaLink.Commons.Interfaces.Services; using ScadaLink.InboundAPI; -using ScadaLink.NotificationService; namespace ScadaLink.IntegrationTests; @@ -98,42 +96,11 @@ public class IntegrationSurfaceTests } // ── Notification: mock SMTP delivery ── - - [Fact] - public async Task Notification_Send_MockSmtp_Delivers() - { - var repository = Substitute.For(); - var smtpClient = Substitute.For(); - - var list = new NotificationList("alerts") { Id = 1 }; - var recipients = new List - { - new("Admin", "admin@example.com") { Id = 1, NotificationListId = 1 } - }; - var smtpConfig = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "user:pass" - }; - - repository.GetListByNameAsync("alerts").Returns(list); - repository.GetRecipientsByListIdAsync(1).Returns(recipients); - repository.GetAllSmtpConfigurationsAsync().Returns(new List { smtpConfig }); - - var service = new NotificationDeliveryService( - repository, - () => smtpClient, - Microsoft.Extensions.Logging.Abstractions.NullLogger.Instance); - - var result = await service.SendAsync("alerts", "Test Alert", "Something happened"); - - Assert.True(result.Success); - await smtpClient.Received(1).SendAsync( - "noreply@example.com", - Arg.Is>(r => r.Contains("admin@example.com")), - "Test Alert", - "Something happened", - Arg.Any()); - } + // NS-019: the site-shaped NotificationDeliveryService that this case exercised + // was removed when sites stopped delivering notifications. The central SMTP + // delivery path is now covered end-to-end by + // ScadaLink.NotificationOutbox.Tests.Delivery.EmailNotificationDeliveryAdapterTests; + // no equivalent integration-surface assertion is needed here. // ── Script Context: integration API wiring ── diff --git a/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs b/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs deleted file mode 100644 index 8a743629..00000000 --- a/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs +++ /dev/null @@ -1,1039 +0,0 @@ -using System.Net; -using System.Text.Json; -using MailKit; -using MailKit.Net.Smtp; -using Microsoft.Extensions.Logging.Abstractions; -using NSubstitute; -using NSubstitute.ExceptionExtensions; -using ScadaLink.Commons.Entities.Notifications; -using ScadaLink.Commons.Interfaces.Repositories; -using ScadaLink.StoreAndForward; - -namespace ScadaLink.NotificationService.Tests; - -/// -/// WP-11/12: Tests for notification delivery — SMTP delivery, error classification, S&F integration. -/// -public class NotificationDeliveryServiceTests -{ - private readonly INotificationRepository _repository = Substitute.For(); - private readonly ISmtpClientWrapper _smtpClient = Substitute.For(); - - private NotificationDeliveryService CreateService(StoreAndForward.StoreAndForwardService? sf = null) - { - return new NotificationDeliveryService( - _repository, - () => _smtpClient, - NullLogger.Instance, - tokenService: null, - storeAndForward: sf); - } - - private void SetupHappyPath() - { - var list = new NotificationList("ops-team") { Id = 1 }; - var recipients = new List - { - new("Alice", "alice@example.com") { Id = 1, NotificationListId = 1 }, - new("Bob", "bob@example.com") { Id = 2, NotificationListId = 1 } - }; - var smtpConfig = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls" - }; - - _repository.GetListByNameAsync("ops-team").Returns(list); - _repository.GetRecipientsByListIdAsync(1).Returns(recipients); - _repository.GetAllSmtpConfigurationsAsync().Returns(new List { smtpConfig }); - } - - [Fact] - public async Task Send_ListNotFound_ReturnsError() - { - _repository.GetListByNameAsync("nonexistent").Returns((NotificationList?)null); - var service = CreateService(); - - var result = await service.SendAsync("nonexistent", "Subject", "Body"); - - Assert.False(result.Success); - Assert.Contains("not found", result.ErrorMessage); - } - - [Fact] - public async Task Send_NoRecipients_ReturnsError() - { - var list = new NotificationList("empty-list") { Id = 1 }; - _repository.GetListByNameAsync("empty-list").Returns(list); - _repository.GetRecipientsByListIdAsync(1).Returns(new List()); - - var service = CreateService(); - var result = await service.SendAsync("empty-list", "Subject", "Body"); - - Assert.False(result.Success); - Assert.Contains("no recipients", result.ErrorMessage); - } - - [Fact] - public async Task Send_NoSmtpConfig_ReturnsError() - { - var list = new NotificationList("test") { Id = 1 }; - var recipients = new List - { - new("Alice", "alice@example.com") { Id = 1, NotificationListId = 1 } - }; - _repository.GetListByNameAsync("test").Returns(list); - _repository.GetRecipientsByListIdAsync(1).Returns(recipients); - _repository.GetAllSmtpConfigurationsAsync().Returns(new List()); - - var service = CreateService(); - var result = await service.SendAsync("test", "Subject", "Body"); - - Assert.False(result.Success); - Assert.Contains("No SMTP configuration", result.ErrorMessage); - } - - [Fact] - public async Task Send_Successful_ReturnsSuccess() - { - SetupHappyPath(); - var service = CreateService(); - - var result = await service.SendAsync("ops-team", "Alert", "Something happened"); - - Assert.True(result.Success); - Assert.Null(result.ErrorMessage); - Assert.False(result.WasBuffered); - } - - [Fact] - public async Task Send_SmtpConnectsWithCorrectParams() - { - SetupHappyPath(); - var service = CreateService(); - - await service.SendAsync("ops-team", "Alert", "Body"); - - await _smtpClient.Received().ConnectAsync( - "smtp.example.com", 587, SmtpTlsMode.StartTls, Arg.Any(), Arg.Any()); - await _smtpClient.Received().AuthenticateAsync( - "basic", "user:pass", Arg.Any(), Arg.Any()); - await _smtpClient.Received().SendAsync( - "noreply@example.com", - Arg.Is>(bcc => bcc.Count() == 2), - "Alert", - "Body", - Arg.Any()); - } - - [Fact] - public async Task Send_PermanentSmtpError_ReturnsErrorDirectly() - { - SetupHappyPath(); - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new SmtpPermanentException("550 Mailbox not found")); - - var service = CreateService(); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.Contains("Permanent SMTP error", result.ErrorMessage); - } - - [Fact] - public async Task Send_TransientError_NoStoreAndForward_ReturnsError() - { - SetupHappyPath(); - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new TimeoutException("Connection timed out")); - - var service = CreateService(sf: null); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.Contains("store-and-forward not available", result.ErrorMessage); - } - - [Fact] - public async Task Send_UsesBccDelivery_AllRecipientsInBcc() - { - SetupHappyPath(); - IEnumerable? capturedBcc = null; - _smtpClient.SendAsync( - Arg.Any(), - Arg.Do>(bcc => capturedBcc = bcc), - Arg.Any(), - Arg.Any(), - Arg.Any()) - .Returns(Task.CompletedTask); - - var service = CreateService(); - await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.NotNull(capturedBcc); - var bccList = capturedBcc!.ToList(); - Assert.Equal(2, bccList.Count); - Assert.Contains("alice@example.com", bccList); - Assert.Contains("bob@example.com", bccList); - } - - [Fact] - public async Task Send_TransientError_WithStoreAndForward_BuffersMessage() - { - SetupHappyPath(); - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new TimeoutException("Connection timed out")); - - var dbName = $"file:sf_test_{Guid.NewGuid():N}?mode=memory&cache=shared"; - var storage = new StoreAndForward.StoreAndForwardStorage( - $"Data Source={dbName}", NullLogger.Instance); - await storage.InitializeAsync(); - - var sfOptions = new StoreAndForward.StoreAndForwardOptions(); - var sfService = new StoreAndForward.StoreAndForwardService( - storage, sfOptions, NullLogger.Instance); - - var service = CreateService(sf: sfService); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.True(result.Success); - Assert.True(result.WasBuffered); - } - - // ── NotificationService-001: buffered-notification delivery handler ── - - private static StoreAndForward.StoreAndForwardMessage BufferedNotification(string listName) => - new() - { - Id = Guid.NewGuid().ToString("N"), - Category = ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.Notification, - Target = listName, - PayloadJson = $$"""{"ListName":"{{listName}}","Subject":"Alert","Message":"Body"}""", - }; - - [Fact] - public async Task DeliverBuffered_HappyPath_ReturnsTrue() - { - SetupHappyPath(); - var service = CreateService(); - - var delivered = await service.DeliverBufferedAsync(BufferedNotification("ops-team")); - - Assert.True(delivered); - } - - [Fact] - public async Task DeliverBuffered_ListNoLongerExists_ReturnsFalseSoMessageParks() - { - _repository.GetListByNameAsync("gone-list").Returns((NotificationList?)null); - var service = CreateService(); - - var delivered = await service.DeliverBufferedAsync(BufferedNotification("gone-list")); - - Assert.False(delivered); // permanent — the S&F engine parks the message - } - - // ── NotificationService-002: cancellation must not be misclassified as transient ── - - /// - /// Like but matches any , - /// so tests that pass an already-cancelled token still resolve the list/recipients. - /// - private void SetupHappyPathAnyToken() - { - var list = new NotificationList("ops-team") { Id = 1 }; - var recipients = new List - { - new("Alice", "alice@example.com") { Id = 1, NotificationListId = 1 } - }; - var smtpConfig = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls" - }; - - _repository.GetListByNameAsync("ops-team", Arg.Any()).Returns(list); - _repository.GetRecipientsByListIdAsync(1, Arg.Any()).Returns(recipients); - _repository.GetAllSmtpConfigurationsAsync(Arg.Any()) - .Returns(new List { smtpConfig }); - } - - [Fact] - public async Task Send_CancellationRequested_PropagatesAndDoesNotBuffer() - { - SetupHappyPathAnyToken(); - using var cts = new CancellationTokenSource(); - cts.Cancel(); - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new OperationCanceledException(cts.Token)); - - var sfService = await CreateSfServiceAsync(); - var service = CreateService(sf: sfService); - - await Assert.ThrowsAnyAsync( - () => service.SendAsync("ops-team", "Alert", "Body", cancellationToken: cts.Token)); - - // The cancellation propagated instead of being buffered for retry. - var depth = await sfService.GetBufferDepthAsync(); - depth.TryGetValue(ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.Notification, out var count); - Assert.Equal(0, count); - } - - [Fact] - public async Task Send_TaskCanceledException_WithCancellation_Propagates() - { - SetupHappyPathAnyToken(); - using var cts = new CancellationTokenSource(); - cts.Cancel(); - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new TaskCanceledException()); - - var service = CreateService(sf: null); - - await Assert.ThrowsAnyAsync( - () => service.SendAsync("ops-team", "Alert", "Body", cancellationToken: cts.Token)); - } - - // ── NotificationService-003: classify on MailKit typed exceptions / status codes ── - - [Fact] - public async Task Send_Smtp5xxCommandException_ClassifiedPermanent() - { - SetupHappyPath(); - // 550 MailboxUnavailable — a real permanent rejection. - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new SmtpCommandException( - SmtpErrorCode.RecipientNotAccepted, SmtpStatusCode.MailboxUnavailable, "rejected")); - - var service = CreateService(); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.Contains("Permanent SMTP error", result.ErrorMessage); - } - - [Fact] - public async Task Send_Smtp4xxCommandException_ClassifiedTransientAndBuffered() - { - SetupHappyPath(); - // 450 MailboxBusy — a real transient failure. - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new SmtpCommandException( - SmtpErrorCode.MessageNotAccepted, SmtpStatusCode.MailboxBusy, "try again")); - - var sfService = await CreateSfServiceAsync(); - var service = CreateService(sf: sfService); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.True(result.Success); - Assert.True(result.WasBuffered); - } - - [Fact] - public async Task Send_NonSmtpExceptionWith5xxLookalikeText_NotClassifiedAsPermanentSmtpError() - { - // NS-003: the old classifier promoted ANY exception whose message contained - // "5." / "550" / etc. to a permanent SMTP error — so an unrelated failure - // referencing a host like "smtp5.example.com" was silently swallowed as a - // clean "Permanent SMTP error" result. Classification now uses MailKit's - // typed exceptions only, so a non-SMTP exception is no longer misclassified. - // NS-015: that unclassified exception no longer escapes SendAsync either — it - // returns a clean generic "delivery failed" result (NOT "Permanent SMTP error"). - SetupHappyPath(); - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new InvalidOperationException("internal error talking to smtp5.example.com")); - - var service = CreateService(); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - // It is reported as a generic delivery failure, not mistaken for a 5xx rejection. - Assert.DoesNotContain("Permanent SMTP error", result.ErrorMessage); - } - - [Fact] - public async Task Send_SmtpProtocolException_ClassifiedTransient() - { - SetupHappyPath(); - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new SmtpProtocolException("protocol error")); - - var sfService = await CreateSfServiceAsync(); - var service = CreateService(sf: sfService); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.True(result.Success); - Assert.True(result.WasBuffered); - } - - // ── NotificationService-004: DeliverAsync must create exactly one client and dispose it ── - - private sealed class TrackingSmtpClient : ISmtpClientWrapper, IDisposable - { - public bool Disposed { get; private set; } - public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default) - => Task.CompletedTask; - public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default) - => Task.CompletedTask; - public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default) - => Task.CompletedTask; - public Task DisconnectAsync(CancellationToken cancellationToken = default) - => Task.CompletedTask; - public void Dispose() => Disposed = true; - } - - [Fact] - public async Task Send_CreatesExactlyOneSmtpClient_AndDisposesIt() - { - SetupHappyPath(); - var created = new List(); - var service = new NotificationDeliveryService( - _repository, - () => - { - var c = new TrackingSmtpClient(); - created.Add(c); - return c; - }, - NullLogger.Instance); - - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.True(result.Success); - Assert.Single(created); // NS-004: factory invoked once, not twice - Assert.True(created[0].Disposed); // the client actually used is disposed - } - - private static async Task CreateSfServiceAsync() - { - var dbName = $"file:sf_test_{Guid.NewGuid():N}?mode=memory&cache=shared"; - var storage = new StoreAndForwardStorage( - $"Data Source={dbName}", NullLogger.Instance); - await storage.InitializeAsync(); - return new StoreAndForwardService( - storage, new StoreAndForwardOptions(), NullLogger.Instance); - } - - // ── NotificationService-010: SMTP client is disconnected on the failure path ── - - /// - /// An SMTP wrapper that records whether ran and - /// can be told to fail at a chosen stage of the delivery sequence. - /// - private sealed class DisconnectTrackingClient : ISmtpClientWrapper, IDisposable - { - private readonly Func? _failOnSend; - private readonly Func? _failOnAuthenticate; - - public DisconnectTrackingClient( - Func? failOnSend = null, Func? failOnAuthenticate = null) - { - _failOnSend = failOnSend; - _failOnAuthenticate = failOnAuthenticate; - } - - public bool Disconnected { get; private set; } - public bool Disposed { get; private set; } - - public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default) - => Task.CompletedTask; - - public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default) - => _failOnAuthenticate != null ? Task.FromException(_failOnAuthenticate()) : Task.CompletedTask; - - public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default) - => _failOnSend != null ? Task.FromException(_failOnSend()) : Task.CompletedTask; - - public Task DisconnectAsync(CancellationToken cancellationToken = default) - { - Disconnected = true; - return Task.CompletedTask; - } - - public void Dispose() => Disposed = true; - } - - [Fact] - public async Task Send_TransientFailureDuringSend_StillDisconnectsClient() - { - // NS-010: DisconnectAsync used to run only on the success path inside the - // try block. A failure in SendAsync left the authenticated connection open - // (the SMTP QUIT was never issued), leaking server connection slots under - // sustained transient failures. - SetupHappyPath(); - var tracking = new DisconnectTrackingClient( - failOnSend: () => new SmtpProtocolException("protocol error")); - var service = new NotificationDeliveryService( - _repository, () => tracking, NullLogger.Instance); - - await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.True(tracking.Disconnected, "DeliverAsync must disconnect the SMTP client even when the send fails"); - } - - [Fact] - public async Task Send_FailureDuringAuthenticate_StillDisconnectsClient() - { - // NS-010: an AuthenticateAsync failure must also tear the connection down. - SetupHappyPath(); - var tracking = new DisconnectTrackingClient( - failOnAuthenticate: () => new SmtpProtocolException("auth handshake failed")); - var service = new NotificationDeliveryService( - _repository, () => tracking, NullLogger.Instance); - - await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.True(tracking.Disconnected, "DeliverAsync must disconnect the SMTP client even when authentication fails"); - } - - // ── NotificationService-005: explicit TLS mode passed through to the wrapper ── - - /// An SMTP wrapper that records the TLS mode and timeout it was connected with. - private sealed class RecordingTlsClient : ISmtpClientWrapper - { - public SmtpTlsMode? TlsMode { get; private set; } - public int ConnectionTimeoutSeconds { get; private set; } - public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default) - { - TlsMode = tlsMode; - ConnectionTimeoutSeconds = connectionTimeoutSeconds; - return Task.CompletedTask; - } - public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default) - => Task.CompletedTask; - public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default) - => Task.CompletedTask; - public Task DisconnectAsync(CancellationToken cancellationToken = default) => Task.CompletedTask; - } - - private void SetupHappyPathWithSmtp(SmtpConfiguration smtpConfig) - { - var list = new NotificationList("ops-team") { Id = 1 }; - var recipients = new List - { - new("Alice", "alice@example.com") { Id = 1, NotificationListId = 1 } - }; - _repository.GetListByNameAsync("ops-team").Returns(list); - _repository.GetRecipientsByListIdAsync(1).Returns(recipients); - _repository.GetAllSmtpConfigurationsAsync().Returns(new List { smtpConfig }); - } - - [Fact] - public async Task Send_TlsModeNone_DoesNotNegotiateTls() - { - // NS-005: TlsMode "none" must connect with SmtpTlsMode.None, not the old - // SecureSocketOptions.Auto (which let MailKit opportunistically negotiate TLS). - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 25, Credentials = "user:pass", TlsMode = "none" - }; - SetupHappyPathWithSmtp(cfg); - var recording = new RecordingTlsClient(); - var service = new NotificationDeliveryService( - _repository, () => recording, NullLogger.Instance); - - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.True(result.Success); - Assert.Equal(SmtpTlsMode.None, recording.TlsMode); - } - - [Fact] - public async Task Send_TlsModeSsl_UsesImplicitSsl() - { - // NS-005: TlsMode "ssl" (port 465 implicit TLS) must be honoured, not - // collapsed into the same path as "none". - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 465, Credentials = "user:pass", TlsMode = "ssl" - }; - SetupHappyPathWithSmtp(cfg); - var recording = new RecordingTlsClient(); - var service = new NotificationDeliveryService( - _repository, () => recording, NullLogger.Instance); - - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.True(result.Success); - Assert.Equal(SmtpTlsMode.Ssl, recording.TlsMode); - } - - [Fact] - public async Task Send_UnknownTlsMode_ReturnsErrorNotSilentFallback() - { - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "bogus" - }; - SetupHappyPathWithSmtp(cfg); - var service = new NotificationDeliveryService( - _repository, () => new RecordingTlsClient(), NullLogger.Instance); - - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.Contains("TLS mode", result.ErrorMessage); - } - - // ── NotificationService-007: connection timeout passed through to the wrapper ── - - [Fact] - public async Task Send_PassesConfiguredConnectionTimeoutToClient() - { - // NS-007: SmtpConfiguration.ConnectionTimeoutSeconds must reach the wrapper - // so SmtpClient.Timeout is set; it was previously dead configuration. - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls", - ConnectionTimeoutSeconds = 17 - }; - SetupHappyPathWithSmtp(cfg); - var recording = new RecordingTlsClient(); - var service = new NotificationDeliveryService( - _repository, () => recording, NullLogger.Instance); - - await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.Equal(17, recording.ConnectionTimeoutSeconds); - } - - [Fact] - public async Task Send_MaxConcurrentConnections_LimitsConcurrentDeliveries() - { - // NS-007: MaxConcurrentConnections must throttle concurrent SMTP deliveries. - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls", - MaxConcurrentConnections = 2 - }; - SetupHappyPathWithSmtp(cfg); - - var inFlight = 0; - var maxObserved = 0; - var gate = new SemaphoreSlim(0); - var sync = new object(); - - var service = new NotificationDeliveryService( - _repository, - () => new BlockingSmtpClient( - onSend: async () => - { - lock (sync) - { - inFlight++; - if (inFlight > maxObserved) maxObserved = inFlight; - } - await gate.WaitAsync(); - lock (sync) { inFlight--; } - }), - NullLogger.Instance); - - var sends = Enumerable.Range(0, 6) - .Select(_ => service.SendAsync("ops-team", "Alert", "Body")) - .ToList(); - - // Give the throttled sends time to reach the SMTP send call. - await Task.Delay(200); - gate.Release(6); - await Task.WhenAll(sends); - - Assert.True(maxObserved <= 2, $"Expected at most 2 concurrent deliveries, observed {maxObserved}"); - } - - private sealed class BlockingSmtpClient : ISmtpClientWrapper, IDisposable - { - private readonly Func _onSend; - public BlockingSmtpClient(Func onSend) => _onSend = onSend; - public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default) - => Task.CompletedTask; - public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default) - => Task.CompletedTask; - public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default) - => _onSend(); - public Task DisconnectAsync(CancellationToken cancellationToken = default) => Task.CompletedTask; - public void Dispose() { } - } - - // ── NotificationService-008: recipient address validation ── - - [Fact] - public async Task Send_MalformedRecipientAddress_ReturnsCleanError_DoesNotThrow() - { - // NS-008: a malformed recipient address previously caused MailboxAddress.Parse - // to throw ParseException, which escaped SendAsync unhandled. It must now - // produce a clean NotificationResult failure. - var list = new NotificationList("ops-team") { Id = 1 }; - var recipients = new List - { - new("Alice", "alice@example.com") { Id = 1, NotificationListId = 1 }, - new("Bad", "not a valid address @@") { Id = 2, NotificationListId = 1 } - }; - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls" - }; - _repository.GetListByNameAsync("ops-team").Returns(list); - _repository.GetRecipientsByListIdAsync(1).Returns(recipients); - _repository.GetAllSmtpConfigurationsAsync().Returns(new List { cfg }); - - var service = CreateService(); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.Contains("address", result.ErrorMessage, StringComparison.OrdinalIgnoreCase); - Assert.Contains("not a valid address @@", result.ErrorMessage); - } - - // ── NotificationService-009: credential secrets scrubbed from logs/results ── - - [Fact] - public async Task Send_PermanentError_RedactsCredentialFromResultMessage() - { - // NS-009: a permanent-failure message echoing a credential fragment must be - // scrubbed before it reaches the caller-facing NotificationResult. - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "svcuser:Hunter2Secret", TlsMode = "starttls" - }; - SetupHappyPathWithSmtp(cfg); - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new SmtpPermanentException("550 rejected — password Hunter2Secret is invalid")); - - var service = CreateService(); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.DoesNotContain("Hunter2Secret", result.ErrorMessage); - } - - [Fact] - public async Task Send_MalformedFromAddress_ReturnsCleanError_DoesNotThrow() - { - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "@@bad-from@@") - { - Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls" - }; - SetupHappyPathWithSmtp(cfg); - - var service = CreateService(); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.Contains("address", result.ErrorMessage, StringComparison.OrdinalIgnoreCase); - } - - // ── NotificationService-012: OAuth2 delivery path coverage ── - - /// An SMTP wrapper that records the auth type, credentials, and OAuth2 user identity it received. - private sealed class RecordingAuthClient : ISmtpClientWrapper - { - public string? AuthType { get; private set; } - public string? Credentials { get; private set; } - public string? OAuth2UserName { get; private set; } - public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default) - => Task.CompletedTask; - public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default) - { - AuthType = authType; - Credentials = credentials; - OAuth2UserName = oauth2UserName; - return Task.CompletedTask; - } - public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default) - => Task.CompletedTask; - public Task DisconnectAsync(CancellationToken cancellationToken = default) => Task.CompletedTask; - } - - private static OAuth2TokenService CreateTokenService(string accessToken, int expiresIn = 3600) - { - var json = JsonSerializer.Serialize(new - { - access_token = accessToken, - expires_in = expiresIn, - token_type = "Bearer" - }); - var factory = Substitute.For(); - factory.CreateClient(Arg.Any()) - .Returns(_ => new HttpClient(new StubHttpHandler(json))); - return new OAuth2TokenService(factory, NullLogger.Instance); - } - - private sealed class StubHttpHandler : HttpMessageHandler - { - private readonly string _json; - public StubHttpHandler(string json) => _json = json; - protected override Task SendAsync( - HttpRequestMessage request, CancellationToken cancellationToken) - => Task.FromResult(new HttpResponseMessage(System.Net.HttpStatusCode.OK) - { - Content = new StringContent(_json) - }); - } - - [Fact] - public async Task Send_OAuth2Config_AuthenticatesWithResolvedAccessToken() - { - // NS-012: the OAuth2 delivery branch in DeliverAsync (token resolution during - // a send) was never exercised — every other test uses Basic Auth and a null - // token service. The credentials reaching the SMTP client must be the access - // token from OAuth2TokenService, not the raw tenant:client:secret triple. - var cfg = new SmtpConfiguration("smtp.office365.com", "oauth2", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "tenant1:client1:secret1", TlsMode = "starttls" - }; - SetupHappyPathWithSmtp(cfg); - - var recording = new RecordingAuthClient(); - var service = new NotificationDeliveryService( - _repository, - () => recording, - NullLogger.Instance, - tokenService: CreateTokenService("oauth2-access-token-xyz")); - - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.True(result.Success); - Assert.Equal("oauth2", recording.AuthType); - Assert.Equal("oauth2-access-token-xyz", recording.Credentials); - // NS-021: OAuth2 SASL must carry the FromAddress as the user identity so - // the M365 XOAUTH2 handshake's `user=` field matches the token's mailbox. - Assert.Equal("noreply@example.com", recording.OAuth2UserName); - } - - // ── NotificationService-015: unclassified exceptions must not escape SendAsync ── - - /// - /// An whose token endpoint returns a non-success - /// HTTP status, so GetTokenAsync throws . - /// - private static OAuth2TokenService CreateFailingTokenService( - HttpStatusCode status = HttpStatusCode.Unauthorized) - { - var factory = Substitute.For(); - factory.CreateClient(Arg.Any()) - .Returns(_ => new HttpClient(new FailingHttpHandler(status))); - return new OAuth2TokenService(factory, NullLogger.Instance); - } - - private sealed class FailingHttpHandler : HttpMessageHandler - { - private readonly System.Net.HttpStatusCode _status; - public FailingHttpHandler(System.Net.HttpStatusCode status) => _status = status; - protected override Task SendAsync( - HttpRequestMessage request, CancellationToken cancellationToken) - => Task.FromResult(new HttpResponseMessage(_status) - { - Content = new StringContent("error") - }); - } - - [Fact] - public async Task Send_OAuth2TokenFetchFails_ReturnsCleanError_DoesNotThrow() - { - // NS-015: an OAuth2 token-fetch failure (HttpRequestException from - // EnsureSuccessStatusCode) is classified Unknown — it fell through all three - // catch clauses and escaped SendAsync as a raw exception to the calling - // script. It must instead produce a clean NotificationResult failure. - var cfg = new SmtpConfiguration("smtp.office365.com", "oauth2", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "tenant1:client1:secret1", TlsMode = "starttls" - }; - SetupHappyPathWithSmtp(cfg); - - var service = new NotificationDeliveryService( - _repository, - () => new RecordingAuthClient(), - NullLogger.Instance, - tokenService: CreateFailingTokenService()); - - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.NotNull(result.ErrorMessage); - } - - [Fact] - public async Task Send_OAuth2MalformedCredentials_ReturnsCleanError_DoesNotThrow() - { - // NS-015: a malformed tenant:client:secret triple makes GetTokenAsync throw - // InvalidOperationException — also Unknown-classified, also escaped SendAsync. - var cfg = new SmtpConfiguration("smtp.office365.com", "oauth2", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "no-colons-here", TlsMode = "starttls" - }; - SetupHappyPathWithSmtp(cfg); - - var service = new NotificationDeliveryService( - _repository, - () => new RecordingAuthClient(), - NullLogger.Instance, - tokenService: CreateTokenService("unused")); - - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.NotNull(result.ErrorMessage); - } - - [Fact] - public async Task Send_UnclassifiedException_RedactsCredentialFromResult() - { - // NS-015: the catch-all result, like the permanent-error path (NS-009), must - // not leak credential fragments echoed in an exception message. - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "svcuser:Hunter2Secret", TlsMode = "starttls" - }; - SetupHappyPathWithSmtp(cfg); - _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) - .Throws(new InvalidOperationException("internal failure exposing Hunter2Secret")); - - var service = CreateService(); - var result = await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.False(result.Success); - Assert.DoesNotContain("Hunter2Secret", result.ErrorMessage); - } - - // ── NotificationService-014: unclassified exceptions must not escape DeliverBufferedAsync ── - - [Fact] - public async Task DeliverBuffered_OAuth2MalformedCredentials_ReturnsFalseSoMessageParks() - { - // NS-014: DeliverBufferedAsync caught only SmtpPermanentException. An OAuth2 - // InvalidOperationException (a permanent, unfixable misconfiguration) escaped - // the handler; the S&F engine reinterprets any thrown exception as transient - // and retries forever. A non-retryable cause must park (return false). - var cfg = new SmtpConfiguration("smtp.office365.com", "oauth2", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "no-colons-here", TlsMode = "starttls" - }; - SetupHappyPathWithSmtp(cfg); - - var service = new NotificationDeliveryService( - _repository, - () => new RecordingAuthClient(), - NullLogger.Instance, - tokenService: CreateTokenService("unused")); - - var delivered = await service.DeliverBufferedAsync(BufferedNotification("ops-team")); - - Assert.False(delivered); // parked — retrying cannot fix a malformed credential - } - - [Fact] - public async Task DeliverBuffered_OAuth2TokenEndpoint401_ReturnsFalseSoMessageParks() - { - // NS-014: a 401 from the OAuth2 token endpoint is a permanent credential - // failure — it must park, not be retried on every sweep. - var cfg = new SmtpConfiguration("smtp.office365.com", "oauth2", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "tenant1:client1:secret1", TlsMode = "starttls" - }; - SetupHappyPathWithSmtp(cfg); - - var service = new NotificationDeliveryService( - _repository, - () => new RecordingAuthClient(), - NullLogger.Instance, - tokenService: CreateFailingTokenService(HttpStatusCode.Unauthorized)); - - var delivered = await service.DeliverBufferedAsync(BufferedNotification("ops-team")); - - Assert.False(delivered); // parked — a 401 is a permanent credential failure - } - - [Fact] - public async Task DeliverBuffered_OAuth2TokenEndpoint503_ThrowsSoEngineRetries() - { - // NS-014: a 5xx from the OAuth2 token endpoint is a transient outage — the - // handler must throw so the S&F engine retries on the next sweep. - var cfg = new SmtpConfiguration("smtp.office365.com", "oauth2", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "tenant1:client1:secret1", TlsMode = "starttls" - }; - SetupHappyPathWithSmtp(cfg); - - var service = new NotificationDeliveryService( - _repository, - () => new RecordingAuthClient(), - NullLogger.Instance, - tokenService: CreateFailingTokenService(HttpStatusCode.ServiceUnavailable)); - - await Assert.ThrowsAnyAsync( - () => service.DeliverBufferedAsync(BufferedNotification("ops-team"))); - } - - // ── NotificationService-017: NotificationOptions used as fallback for unset SmtpConfiguration fields ── - - [Fact] - public async Task Send_SmtpConfigTimeoutUnset_FallsBackToNotificationOptions() - { - // NS-017: NotificationOptions was bound from configuration but never read. - // It is now the documented fallback: when SmtpConfiguration.ConnectionTimeoutSeconds - // is non-positive (0 from a partially-deployed row) the NotificationOptions - // value is used instead of leaving the timeout unbounded. - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls", - ConnectionTimeoutSeconds = 0 // not configured on the row - }; - SetupHappyPathWithSmtp(cfg); - var recording = new RecordingTlsClient(); - - var options = Microsoft.Extensions.Options.Options.Create( - new NotificationOptions { ConnectionTimeoutSeconds = 42, MaxConcurrentConnections = 5 }); - var service = new NotificationDeliveryService( - _repository, () => recording, NullLogger.Instance, - options: options); - - await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.Equal(42, recording.ConnectionTimeoutSeconds); - } - - [Fact] - public async Task Send_SmtpConfigTimeoutSet_OverridesNotificationOptions() - { - // NS-017: a value present on the SmtpConfiguration row still wins over the - // NotificationOptions fallback. - var cfg = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") - { - Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls", - ConnectionTimeoutSeconds = 19 - }; - SetupHappyPathWithSmtp(cfg); - var recording = new RecordingTlsClient(); - - var options = Microsoft.Extensions.Options.Options.Create( - new NotificationOptions { ConnectionTimeoutSeconds = 42 }); - var service = new NotificationDeliveryService( - _repository, () => recording, NullLogger.Instance, - options: options); - - await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.Equal(19, recording.ConnectionTimeoutSeconds); - } - - // ── NotificationService-018: concurrency limiter disposal ── - - [Fact] - public async Task Service_Dispose_DisposesConcurrencyLimiter() - { - // NS-018: the lazily-created SemaphoreSlim was never disposed and the service - // did not implement IDisposable — a slow handle leak per scope. Disposing the - // service must dispose the limiter; using it afterwards must fault. - SetupHappyPath(); - var service = CreateService(); - - // A send creates the limiter. - await service.SendAsync("ops-team", "Alert", "Body"); - - Assert.IsAssignableFrom(service); - ((IDisposable)service).Dispose(); - - // A second send after disposal must fail fast on the disposed semaphore - // rather than silently using a disposed object. - await Assert.ThrowsAnyAsync( - () => service.SendAsync("ops-team", "Alert", "Body")); - } -} diff --git a/tests/ScadaLink.StoreAndForward.Tests/NotificationForwarderTests.cs b/tests/ScadaLink.StoreAndForward.Tests/NotificationForwarderTests.cs index 5fa79f45..b3f34a45 100644 --- a/tests/ScadaLink.StoreAndForward.Tests/NotificationForwarderTests.cs +++ b/tests/ScadaLink.StoreAndForward.Tests/NotificationForwarderTests.cs @@ -197,4 +197,58 @@ public class NotificationForwarderTests : TestKit Assert.Equal(submit1.NotificationId, submit2.NotificationId); Assert.Equal("stable-msg-id", submit1.NotificationId); } + + [Fact] + public async Task Deliver_CorruptJsonPayload_ReturnsTrue_AndDoesNotForwardAnything() + { + // Regression test for StoreAndForward-018. The design doc forbids parking + // notifications ("notifications do not park — they are retried at the fixed + // forward interval until central acks"; Component-StoreAndForward.md). The + // previous implementation returned false on a corrupt payload, which the S&F + // engine interprets as a permanent failure and parks the row — contradicting + // the invariant. The fix: discard a corrupt buffered notification by + // returning true (engine clears the buffer via its normal success path), + // with a Warning log line carrying the row id and a payload preview. + var centralProbe = CreateTestProbe(); + var forwarder = new NotificationForwarder( + centralProbe.Ref, "site-7", ForwardTimeout); + + var corrupt = new StoreAndForwardMessage + { + Id = "msg-corrupt", + Category = StoreAndForwardCategory.Notification, + Target = "Operators", + PayloadJson = "{not-valid-json", + OriginInstanceName = "Plant.Pump3", + }; + + Assert.True(await forwarder.DeliverAsync(corrupt)); + + // The corrupt-payload path must NOT round-trip to central — no + // NotificationSubmit / no Ask. ExpectNoMsg confirms nothing was forwarded. + centralProbe.ExpectNoMsg(TimeSpan.FromMilliseconds(200)); + } + + [Fact] + public async Task Deliver_NullDeserializedPayload_ReturnsTrue_AndDoesNotForwardAnything() + { + // The companion case to corrupt JSON: the payload is valid JSON but + // deserialises to null (e.g. "null"). Same treatment per StoreAndForward-018 + // — discard rather than park. + var centralProbe = CreateTestProbe(); + var forwarder = new NotificationForwarder( + centralProbe.Ref, "site-7", ForwardTimeout); + + var nullPayload = new StoreAndForwardMessage + { + Id = "msg-null", + Category = StoreAndForwardCategory.Notification, + Target = "Operators", + PayloadJson = "null", + OriginInstanceName = "Plant.Pump3", + }; + + Assert.True(await forwarder.DeliverAsync(nullPayload)); + centralProbe.ExpectNoMsg(TimeSpan.FromMilliseconds(200)); + } } diff --git a/tests/ScadaLink.TemplateEngine.Tests/Flattening/DiffServiceTests.cs b/tests/ScadaLink.TemplateEngine.Tests/Flattening/DiffServiceTests.cs index aa7088d0..dc132fac 100644 --- a/tests/ScadaLink.TemplateEngine.Tests/Flattening/DiffServiceTests.cs +++ b/tests/ScadaLink.TemplateEngine.Tests/Flattening/DiffServiceTests.cs @@ -133,4 +133,122 @@ public class DiffServiceTests Assert.Single(diff.ScriptChanges); Assert.Equal(DiffChangeType.Changed, diff.ScriptChanges[0].ChangeType); } + + [Fact] + public void ComputeDiff_AttributeDescriptionChange_DetectedAsChanged() + { + // TemplateEngine-017: AttributesEqual must compare Description. + var oldConfig = new FlattenedConfiguration + { + InstanceUniqueName = "Instance1", + Attributes = + [ + new ResolvedAttribute { CanonicalName = "Temp", Value = "25", DataType = "Double", Description = "Original" } + ] + }; + var newConfig = new FlattenedConfiguration + { + InstanceUniqueName = "Instance1", + Attributes = + [ + new ResolvedAttribute { CanonicalName = "Temp", Value = "25", DataType = "Double", Description = "Updated" } + ] + }; + + var diff = _sut.ComputeDiff(oldConfig, newConfig); + + Assert.True(diff.HasChanges); + Assert.Single(diff.AttributeChanges); + Assert.Equal(DiffChangeType.Changed, diff.AttributeChanges[0].ChangeType); + } + + [Fact] + public void ComputeDiff_AlarmDescriptionChange_DetectedAsChanged() + { + // TemplateEngine-017: AlarmsEqual must compare Description. + var oldConfig = new FlattenedConfiguration + { + InstanceUniqueName = "Instance1", + Alarms = + [ + new ResolvedAlarm { CanonicalName = "HighTemp", TriggerType = "RangeViolation", Description = "Original" } + ] + }; + var newConfig = new FlattenedConfiguration + { + InstanceUniqueName = "Instance1", + Alarms = + [ + new ResolvedAlarm { CanonicalName = "HighTemp", TriggerType = "RangeViolation", Description = "Updated" } + ] + }; + + var diff = _sut.ComputeDiff(oldConfig, newConfig); + + Assert.True(diff.HasChanges); + Assert.Single(diff.AlarmChanges); + Assert.Equal(DiffChangeType.Changed, diff.AlarmChanges[0].ChangeType); + } + + [Fact] + public void ConnectionsEqual_IdenticalConfigs_ReturnsTrue() + { + // TemplateEngine-017: ConnectionsEqual is the comparator callers use + // to detect connection-endpoint drift (the diff-view extension that + // surfaces this in the UI is tracked under TemplateEngine-018). + var a = new ConnectionConfig + { + Protocol = "OpcUa", + ConfigurationJson = "{\"endpoint\":\"opc.tcp://host-a\"}", + BackupConfigurationJson = "{\"endpoint\":\"opc.tcp://host-b\"}", + FailoverRetryCount = 3 + }; + var b = a with { }; + + Assert.True(DiffService.ConnectionsEqual(a, b)); + } + + [Fact] + public void ConnectionsEqual_EndpointEdit_ReturnsFalse() + { + // TemplateEngine-017: primary endpoint JSON edit must surface as a + // change. Without this, deployment redeploys ship a different + // ConnectionConfig with no visible drift signal. + var a = new ConnectionConfig + { + Protocol = "OpcUa", + ConfigurationJson = "{\"endpoint\":\"opc.tcp://host-a:4840\"}", + FailoverRetryCount = 3 + }; + var b = a with { ConfigurationJson = "{\"endpoint\":\"opc.tcp://host-b:4840\"}" }; + + Assert.False(DiffService.ConnectionsEqual(a, b)); + } + + [Fact] + public void ConnectionsEqual_BackupConfigurationEdit_ReturnsFalse() + { + var a = new ConnectionConfig { Protocol = "OpcUa", ConfigurationJson = "{}", BackupConfigurationJson = null, FailoverRetryCount = 3 }; + var b = a with { BackupConfigurationJson = "{\"endpoint\":\"opc.tcp://backup\"}" }; + + Assert.False(DiffService.ConnectionsEqual(a, b)); + } + + [Fact] + public void ConnectionsEqual_FailoverRetryCountEdit_ReturnsFalse() + { + var a = new ConnectionConfig { Protocol = "OpcUa", ConfigurationJson = "{}", FailoverRetryCount = 3 }; + var b = a with { FailoverRetryCount = 5 }; + + Assert.False(DiffService.ConnectionsEqual(a, b)); + } + + [Fact] + public void ConnectionsEqual_ProtocolEdit_ReturnsFalse() + { + var a = new ConnectionConfig { Protocol = "OpcUa", ConfigurationJson = "{}", FailoverRetryCount = 3 }; + var b = a with { Protocol = "Modbus" }; + + Assert.False(DiffService.ConnectionsEqual(a, b)); + } } diff --git a/tests/ScadaLink.TemplateEngine.Tests/Flattening/RevisionHashServiceTests.cs b/tests/ScadaLink.TemplateEngine.Tests/Flattening/RevisionHashServiceTests.cs index 5ab851e9..0ed2d176 100644 --- a/tests/ScadaLink.TemplateEngine.Tests/Flattening/RevisionHashServiceTests.cs +++ b/tests/ScadaLink.TemplateEngine.Tests/Flattening/RevisionHashServiceTests.cs @@ -131,6 +131,154 @@ public class RevisionHashServiceTests } } + [Fact] + public void ComputeHash_AttributeDescriptionEdit_ChangesHash() + { + // TemplateEngine-017: Description must be folded into the hash so that + // edits to authoring-time documentation (which still travels in the + // deployed payload) flow through the staleness indicator. + var baseAttr = new ResolvedAttribute + { + CanonicalName = "Temperature", + Value = "25", + DataType = "Double", + Description = "Original description" + }; + var editedAttr = baseAttr with { Description = "Updated description" }; + + var configBefore = new FlattenedConfiguration + { + InstanceUniqueName = "Instance1", + TemplateId = 1, + SiteId = 1, + Attributes = [baseAttr] + }; + var configAfter = configBefore with { Attributes = [editedAttr] }; + + Assert.NotEqual(_sut.ComputeHash(configBefore), _sut.ComputeHash(configAfter)); + } + + [Fact] + public void ComputeHash_AlarmDescriptionEdit_ChangesHash() + { + // TemplateEngine-017: same Description contract applies to alarms. + var baseAlarm = new ResolvedAlarm + { + CanonicalName = "HighTemp", + TriggerType = "RangeViolation", + Description = "Original" + }; + var editedAlarm = baseAlarm with { Description = "Updated" }; + + var configBefore = new FlattenedConfiguration + { + InstanceUniqueName = "Instance1", + TemplateId = 1, + SiteId = 1, + Alarms = [baseAlarm] + }; + var configAfter = configBefore with { Alarms = [editedAlarm] }; + + Assert.NotEqual(_sut.ComputeHash(configBefore), _sut.ComputeHash(configAfter)); + } + + [Fact] + public void ComputeHash_ConnectionEndpointEdit_ChangesHash() + { + // TemplateEngine-017: a Deployment user editing the primary endpoint + // JSON of a data connection bound to an instance must produce a + // different revision hash. The connection's protocol, primary/backup + // configuration JSON, and failover retry count are all part of the + // deployment package and therefore part of the hash input. + var connectionsBefore = new Dictionary + { + ["plc1"] = new ConnectionConfig + { + Protocol = "OpcUa", + ConfigurationJson = "{\"endpoint\":\"opc.tcp://host-a:4840\"}", + BackupConfigurationJson = null, + FailoverRetryCount = 3 + } + }; + var connectionsAfter = new Dictionary + { + ["plc1"] = connectionsBefore["plc1"] with + { + ConfigurationJson = "{\"endpoint\":\"opc.tcp://host-b:4840\"}" + } + }; + + var configBefore = new FlattenedConfiguration + { + InstanceUniqueName = "Instance1", + TemplateId = 1, + SiteId = 1, + Connections = connectionsBefore + }; + var configAfter = configBefore with { Connections = connectionsAfter }; + + Assert.NotEqual(_sut.ComputeHash(configBefore), _sut.ComputeHash(configAfter)); + } + + [Fact] + public void ComputeHash_ConnectionProtocolEdit_ChangesHash() + { + // TemplateEngine-017: changing protocol must change the hash. + var connectionsBefore = new Dictionary + { + ["plc1"] = new ConnectionConfig + { + Protocol = "OpcUa", + ConfigurationJson = "{}", + FailoverRetryCount = 3 + } + }; + var connectionsAfter = new Dictionary + { + ["plc1"] = connectionsBefore["plc1"] with { Protocol = "Modbus" } + }; + + var configBefore = new FlattenedConfiguration + { + InstanceUniqueName = "Instance1", + TemplateId = 1, + SiteId = 1, + Connections = connectionsBefore + }; + var configAfter = configBefore with { Connections = connectionsAfter }; + + Assert.NotEqual(_sut.ComputeHash(configBefore), _sut.ComputeHash(configAfter)); + } + + [Fact] + public void ComputeHash_ConnectionsSameContent_SameHash() + { + // TemplateEngine-017: equal Connections maps must yield the same hash, + // regardless of dictionary iteration order (the SortedDictionary + // projection guards this). + var connections1 = new Dictionary + { + ["b"] = new ConnectionConfig { Protocol = "OpcUa", ConfigurationJson = "{\"k\":2}", FailoverRetryCount = 3 }, + ["a"] = new ConnectionConfig { Protocol = "OpcUa", ConfigurationJson = "{\"k\":1}", FailoverRetryCount = 3 } + }; + var connections2 = new Dictionary + { + ["a"] = new ConnectionConfig { Protocol = "OpcUa", ConfigurationJson = "{\"k\":1}", FailoverRetryCount = 3 }, + ["b"] = new ConnectionConfig { Protocol = "OpcUa", ConfigurationJson = "{\"k\":2}", FailoverRetryCount = 3 } + }; + + var config1 = new FlattenedConfiguration + { + InstanceUniqueName = "Instance1", + TemplateId = 1, + SiteId = 1, + Connections = connections1 + }; + var config2 = config1 with { Connections = connections2 }; + + Assert.Equal(_sut.ComputeHash(config1), _sut.ComputeHash(config2)); + } + private static FlattenedConfiguration CreateConfig(string instanceName, string tempValue) { return new FlattenedConfiguration