From 393172f169ac4582ec4fa5b79a40b4b81a76b714 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Sat, 16 May 2026 19:47:17 -0400 Subject: [PATCH] =?UTF-8?q?fix(notification-service):=20resolve=20Notifica?= =?UTF-8?q?tionService-002/003/004=20=E2=80=94=20error=20classification=20?= =?UTF-8?q?by=20SMTP=20status=20code,=20single=20SMTP=20client?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code-reviews/NotificationService/findings.md | 35 +++- .../NotificationDeliveryService.cs | 101 +++++++--- .../NotificationDeliveryServiceTests.cs | 180 ++++++++++++++++++ ...ScadaLink.NotificationService.Tests.csproj | 1 + 4 files changed, 288 insertions(+), 29 deletions(-) diff --git a/code-reviews/NotificationService/findings.md b/code-reviews/NotificationService/findings.md index ead577d..39b697f 100644 --- a/code-reviews/NotificationService/findings.md +++ b/code-reviews/NotificationService/findings.md @@ -8,7 +8,7 @@ | Last reviewed | 2026-05-16 | | Reviewer | claude-agent | | Commit reviewed | `9c60592` | -| Open findings | 11 | +| Open findings | 8 | ## Summary @@ -82,7 +82,7 @@ path. Fixed by the commit whose message references `NotificationService-001`. |--|--| | Severity | High | | Category | Error handling & resilience | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:157-167` | **Description** @@ -95,7 +95,14 @@ Re-throw `OperationCanceledException`/`TaskCanceledException` when `cancellation **Resolution** -_Unresolved._ +Resolved 2026-05-16 (commit ``). Classification was rewritten around a typed +`ClassifySmtpError` helper: a caller-requested cancellation (`OperationCanceledException`/ +`TaskCanceledException` while `cancellationToken.IsCancellationRequested`) now propagates +out of both `SendAsync` and `DeliverAsync` via dedicated `catch` filters instead of being +buffered. The broad `IOException` catch-all was dropped — only MailKit's typed exceptions +plus `SocketException`/`TimeoutException` are treated as transient. Regression tests +`Send_CancellationRequested_PropagatesAndDoesNotBuffer` and +`Send_TaskCanceledException_WithCancellation_Propagates`. ### NotificationService-003 — Error classification by substring matching on exception messages is fragile @@ -103,7 +110,7 @@ _Unresolved._ |--|--| | Severity | High | | Category | Error handling & resilience | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:144-147`, `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:163-166` | **Description** @@ -116,7 +123,16 @@ Classify on MailKit's typed exceptions and `SmtpCommandException.StatusCode` (4x **Resolution** -_Unresolved._ +Resolved 2026-05-16 (commit ``). All `ex.Message.Contains(...)` checks were +removed. The new `ClassifySmtpError` helper inspects `SmtpCommandException.StatusCode` +(numeric SMTP code: 4xx → transient, 5xx → permanent) and treats `SmtpProtocolException`, +`ServiceNotConnectedException`, `SocketException` and `TimeoutException` as transient; +anything else is `Unknown` and propagates unclassified rather than being guessed. The +permanent-promotion `catch` block in `DeliverAsync` now keys off this classification. +Regression tests `Send_Smtp5xxCommandException_ClassifiedPermanent`, +`Send_Smtp4xxCommandException_ClassifiedTransientAndBuffered`, +`Send_SmtpProtocolException_ClassifiedTransient`, and +`Send_NonSmtpExceptionWith5xxLookalikeText_NotPromotedToPermanent`. ### NotificationService-004 — `DeliverAsync` constructs two SMTP clients and leaks the used one @@ -124,7 +140,7 @@ _Unresolved._ |--|--| | Severity | High | | Category | Performance & resource management | -| Status | Open | +| Status | Resolved | | Location | `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:118-119` | **Description** @@ -143,7 +159,12 @@ Create exactly one client and dispose the one that is actually used: **Resolution** -_Unresolved._ +Resolved 2026-05-16 (commit ``). `DeliverAsync` now invokes `_smtpClientFactory()` +exactly once and disposes the client actually used via `using var disposable = smtp as +IDisposable;`. The previous code created two `MailKitSmtpClientWrapper` instances per send +and disposed the unused one while leaking the connected one. Regression test +`Send_CreatesExactlyOneSmtpClient_AndDisposesIt` verifies the factory is invoked once and +the resulting client is disposed. ### NotificationService-005 — Non-TLS path uses `SecureSocketOptions.Auto`, contradicting the requested mode diff --git a/src/ScadaLink.NotificationService/NotificationDeliveryService.cs b/src/ScadaLink.NotificationService/NotificationDeliveryService.cs index 3e1934e..68efd2b 100644 --- a/src/ScadaLink.NotificationService/NotificationDeliveryService.cs +++ b/src/ScadaLink.NotificationService/NotificationDeliveryService.cs @@ -1,4 +1,7 @@ +using System.Net.Sockets; using System.Text.Json; +using MailKit; +using MailKit.Net.Smtp; using Microsoft.Extensions.Logging; using ScadaLink.Commons.Entities.Notifications; using ScadaLink.Commons.Interfaces.Repositories; @@ -76,7 +79,12 @@ public class NotificationDeliveryService : INotificationDeliveryService _logger.LogError(ex, "Permanent SMTP failure sending to list {List}", listName); return new NotificationResult(false, $"Permanent SMTP error: {ex.Message}"); } - catch (Exception ex) when (IsTransientSmtpError(ex)) + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + // NS-002: a caller-requested cancellation propagates; it is not buffered. + throw; + } + catch (Exception ex) when (IsTransientSmtpError(ex, cancellationToken)) { // WP-12: Transient SMTP failure — hand to S&F _logger.LogWarning(ex, "Transient SMTP failure sending to list {List}, buffering for retry", listName); @@ -172,8 +180,9 @@ public class NotificationDeliveryService : INotificationDeliveryService string body, CancellationToken cancellationToken) { - using var client = _smtpClientFactory() as IDisposable; + // NS-004: create exactly one client and dispose the one actually used. var smtp = _smtpClientFactory(); + using var disposable = smtp as IDisposable; try { @@ -195,32 +204,80 @@ public class NotificationDeliveryService : INotificationDeliveryService await smtp.DisconnectAsync(cancellationToken); } - catch (Exception ex) when (ex is not SmtpPermanentException && !IsTransientSmtpError(ex)) + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { - // Classify unrecognized SMTP exceptions - if (ex.Message.Contains("5.", StringComparison.Ordinal) || - ex.Message.Contains("550", StringComparison.Ordinal) || - ex.Message.Contains("553", StringComparison.Ordinal) || - ex.Message.Contains("554", StringComparison.Ordinal)) - { - throw new SmtpPermanentException(ex.Message, ex); - } - - // Default: treat as transient + // NS-002: A deliberately cancelled token must propagate as a cancellation, + // not be misclassified as a transient SMTP failure and buffered for retry. throw; } + catch (Exception ex) when (ClassifySmtpError(ex, cancellationToken) == SmtpErrorClass.Permanent + && ex is not SmtpPermanentException) + { + // NS-003: Permanent SMTP failure (5xx) — surface a typed permanent exception. + throw new SmtpPermanentException(ex.Message, ex); + } + // Transient and SmtpPermanentException both propagate unchanged: SendAsync's + // catch filters (SmtpPermanentException / IsTransientSmtpError) handle them. } - private static bool IsTransientSmtpError(Exception ex) + private enum SmtpErrorClass { - return ex is TimeoutException - or OperationCanceledException - or System.Net.Sockets.SocketException - or IOException - || ex.Message.Contains("4.", StringComparison.Ordinal) - || ex.Message.Contains("421", StringComparison.Ordinal) - || ex.Message.Contains("450", StringComparison.Ordinal) - || ex.Message.Contains("451", StringComparison.Ordinal); + /// Cancellation or an unrecognised exception — caller decides. + Unknown, + /// Retryable failure (4xx, connection/socket/protocol error, timeout). + Transient, + /// Non-retryable failure (5xx) — must be returned to the script. + Permanent, + } + + /// + /// NS-002/NS-003: Classifies an SMTP failure using MailKit's typed exceptions and + /// the numeric rather than locale-dependent substring + /// matching on the exception message. A cancellation requested by the caller is + /// never treated as a transient SMTP error. + /// + private static SmtpErrorClass ClassifySmtpError(Exception ex, CancellationToken cancellationToken) + { + // A deliberate cancellation is not an SMTP error at all. + if (ex is OperationCanceledException && cancellationToken.IsCancellationRequested) + { + return SmtpErrorClass.Unknown; + } + + // MailKit reports SMTP command failures with the real status code; the + // SmtpStatusCode enum's underlying value is the numeric SMTP reply code. + if (ex is SmtpCommandException command) + { + var code = (int)command.StatusCode; + if (code >= 400 && code < 500) + { + return SmtpErrorClass.Transient; + } + + if (code >= 500 && code < 600) + { + return SmtpErrorClass.Permanent; + } + + return SmtpErrorClass.Unknown; + } + + // Protocol errors, a dropped/unavailable service, socket failures and + // timeouts are all retryable — the message has not been rejected. + if (ex is SmtpProtocolException + or ServiceNotConnectedException + or SocketException + or TimeoutException) + { + return SmtpErrorClass.Transient; + } + + return SmtpErrorClass.Unknown; + } + + private static bool IsTransientSmtpError(Exception ex, CancellationToken cancellationToken) + { + return ClassifySmtpError(ex, cancellationToken) == SmtpErrorClass.Transient; } } diff --git a/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs b/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs index 7b938ec..063680e 100644 --- a/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs +++ b/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs @@ -1,3 +1,5 @@ +using MailKit; +using MailKit.Net.Smtp; using Microsoft.Extensions.Logging.Abstractions; using NSubstitute; using NSubstitute.ExceptionExtensions; @@ -225,4 +227,182 @@ public class NotificationDeliveryServiceTests Assert.False(delivered); // permanent — the S&F engine parks the message } + + // ── NotificationService-002: cancellation must not be misclassified as transient ── + + /// + /// Like but matches any , + /// so tests that pass an already-cancelled token still resolve the list/recipients. + /// + private void SetupHappyPathAnyToken() + { + var list = new NotificationList("ops-team") { Id = 1 }; + var recipients = new List + { + new("Alice", "alice@example.com") { Id = 1, NotificationListId = 1 } + }; + var smtpConfig = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com") + { + Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls" + }; + + _repository.GetListByNameAsync("ops-team", Arg.Any()).Returns(list); + _repository.GetRecipientsByListIdAsync(1, Arg.Any()).Returns(recipients); + _repository.GetAllSmtpConfigurationsAsync(Arg.Any()) + .Returns(new List { smtpConfig }); + } + + [Fact] + public async Task Send_CancellationRequested_PropagatesAndDoesNotBuffer() + { + SetupHappyPathAnyToken(); + using var cts = new CancellationTokenSource(); + cts.Cancel(); + _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) + .Throws(new OperationCanceledException(cts.Token)); + + var sfService = await CreateSfServiceAsync(); + var service = CreateService(sf: sfService); + + await Assert.ThrowsAnyAsync( + () => service.SendAsync("ops-team", "Alert", "Body", cancellationToken: cts.Token)); + + // The cancellation propagated instead of being buffered for retry. + var depth = await sfService.GetBufferDepthAsync(); + depth.TryGetValue(ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.Notification, out var count); + Assert.Equal(0, count); + } + + [Fact] + public async Task Send_TaskCanceledException_WithCancellation_Propagates() + { + SetupHappyPathAnyToken(); + using var cts = new CancellationTokenSource(); + cts.Cancel(); + _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) + .Throws(new TaskCanceledException()); + + var service = CreateService(sf: null); + + await Assert.ThrowsAnyAsync( + () => service.SendAsync("ops-team", "Alert", "Body", cancellationToken: cts.Token)); + } + + // ── NotificationService-003: classify on MailKit typed exceptions / status codes ── + + [Fact] + public async Task Send_Smtp5xxCommandException_ClassifiedPermanent() + { + SetupHappyPath(); + // 550 MailboxUnavailable — a real permanent rejection. + _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) + .Throws(new SmtpCommandException( + SmtpErrorCode.RecipientNotAccepted, SmtpStatusCode.MailboxUnavailable, "rejected")); + + var service = CreateService(); + var result = await service.SendAsync("ops-team", "Alert", "Body"); + + Assert.False(result.Success); + Assert.Contains("Permanent SMTP error", result.ErrorMessage); + } + + [Fact] + public async Task Send_Smtp4xxCommandException_ClassifiedTransientAndBuffered() + { + SetupHappyPath(); + // 450 MailboxBusy — a real transient failure. + _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) + .Throws(new SmtpCommandException( + SmtpErrorCode.MessageNotAccepted, SmtpStatusCode.MailboxBusy, "try again")); + + var sfService = await CreateSfServiceAsync(); + var service = CreateService(sf: sfService); + var result = await service.SendAsync("ops-team", "Alert", "Body"); + + Assert.True(result.Success); + Assert.True(result.WasBuffered); + } + + [Fact] + public async Task Send_NonSmtpExceptionWith5xxLookalikeText_NotPromotedToPermanent() + { + // NS-003: the old classifier promoted ANY exception whose message contained + // "5." / "550" / etc. to a permanent SMTP error — so an unrelated failure + // referencing a host like "smtp5.example.com" was silently swallowed as a + // clean permanent NotificationResult. Classification now uses MailKit's + // typed exceptions only, so a non-SMTP exception is no longer misclassified. + SetupHappyPath(); + _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) + .Throws(new InvalidOperationException("internal error talking to smtp5.example.com")); + + var service = CreateService(); + + // The exception is not classified at all (not a typed SMTP failure); it + // surfaces rather than being mistaken for a permanent 5xx rejection. + await Assert.ThrowsAsync( + () => service.SendAsync("ops-team", "Alert", "Body")); + } + + [Fact] + public async Task Send_SmtpProtocolException_ClassifiedTransient() + { + SetupHappyPath(); + _smtpClient.SendAsync(Arg.Any(), Arg.Any>(), Arg.Any(), Arg.Any(), Arg.Any()) + .Throws(new SmtpProtocolException("protocol error")); + + var sfService = await CreateSfServiceAsync(); + var service = CreateService(sf: sfService); + var result = await service.SendAsync("ops-team", "Alert", "Body"); + + Assert.True(result.Success); + Assert.True(result.WasBuffered); + } + + // ── NotificationService-004: DeliverAsync must create exactly one client and dispose it ── + + private sealed class TrackingSmtpClient : ISmtpClientWrapper, IDisposable + { + public bool Disposed { get; private set; } + public Task ConnectAsync(string host, int port, bool useTls, CancellationToken cancellationToken = default) + => Task.CompletedTask; + public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default) + => Task.CompletedTask; + public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default) + => Task.CompletedTask; + public Task DisconnectAsync(CancellationToken cancellationToken = default) + => Task.CompletedTask; + public void Dispose() => Disposed = true; + } + + [Fact] + public async Task Send_CreatesExactlyOneSmtpClient_AndDisposesIt() + { + SetupHappyPath(); + var created = new List(); + var service = new NotificationDeliveryService( + _repository, + () => + { + var c = new TrackingSmtpClient(); + created.Add(c); + return c; + }, + NullLogger.Instance); + + var result = await service.SendAsync("ops-team", "Alert", "Body"); + + Assert.True(result.Success); + Assert.Single(created); // NS-004: factory invoked once, not twice + Assert.True(created[0].Disposed); // the client actually used is disposed + } + + private static async Task CreateSfServiceAsync() + { + var dbName = $"file:sf_test_{Guid.NewGuid():N}?mode=memory&cache=shared"; + var storage = new StoreAndForwardStorage( + $"Data Source={dbName}", NullLogger.Instance); + await storage.InitializeAsync(); + return new StoreAndForwardService( + storage, new StoreAndForwardOptions(), NullLogger.Instance); + } } diff --git a/tests/ScadaLink.NotificationService.Tests/ScadaLink.NotificationService.Tests.csproj b/tests/ScadaLink.NotificationService.Tests/ScadaLink.NotificationService.Tests.csproj index 58132bd..62a0803 100644 --- a/tests/ScadaLink.NotificationService.Tests/ScadaLink.NotificationService.Tests.csproj +++ b/tests/ScadaLink.NotificationService.Tests/ScadaLink.NotificationService.Tests.csproj @@ -10,6 +10,7 @@ +