fix(notification-service): resolve NotificationService-002/003/004 — error classification by SMTP status code, single SMTP client

This commit is contained in:
Joseph Doherty
2026-05-16 19:47:17 -04:00
parent b249ca3bf7
commit 393172f169
4 changed files with 288 additions and 29 deletions

View File

@@ -8,7 +8,7 @@
| Last reviewed | 2026-05-16 |
| Reviewer | claude-agent |
| Commit reviewed | `9c60592` |
| Open findings | 11 |
| Open findings | 8 |
## Summary
@@ -82,7 +82,7 @@ path. Fixed by the commit whose message references `NotificationService-001`.
|--|--|
| Severity | High |
| Category | Error handling & resilience |
| Status | Open |
| Status | Resolved |
| Location | `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:157-167` |
**Description**
@@ -95,7 +95,14 @@ Re-throw `OperationCanceledException`/`TaskCanceledException` when `cancellation
**Resolution**
_Unresolved._
Resolved 2026-05-16 (commit `<pending>`). Classification was rewritten around a typed
`ClassifySmtpError` helper: a caller-requested cancellation (`OperationCanceledException`/
`TaskCanceledException` while `cancellationToken.IsCancellationRequested`) now propagates
out of both `SendAsync` and `DeliverAsync` via dedicated `catch` filters instead of being
buffered. The broad `IOException` catch-all was dropped — only MailKit's typed exceptions
plus `SocketException`/`TimeoutException` are treated as transient. Regression tests
`Send_CancellationRequested_PropagatesAndDoesNotBuffer` and
`Send_TaskCanceledException_WithCancellation_Propagates`.
### NotificationService-003 — Error classification by substring matching on exception messages is fragile
@@ -103,7 +110,7 @@ _Unresolved._
|--|--|
| Severity | High |
| Category | Error handling & resilience |
| Status | Open |
| Status | Resolved |
| Location | `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:144-147`, `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:163-166` |
**Description**
@@ -116,7 +123,16 @@ Classify on MailKit's typed exceptions and `SmtpCommandException.StatusCode` (4x
**Resolution**
_Unresolved._
Resolved 2026-05-16 (commit `<pending>`). All `ex.Message.Contains(...)` checks were
removed. The new `ClassifySmtpError` helper inspects `SmtpCommandException.StatusCode`
(numeric SMTP code: 4xx → transient, 5xx → permanent) and treats `SmtpProtocolException`,
`ServiceNotConnectedException`, `SocketException` and `TimeoutException` as transient;
anything else is `Unknown` and propagates unclassified rather than being guessed. The
permanent-promotion `catch` block in `DeliverAsync` now keys off this classification.
Regression tests `Send_Smtp5xxCommandException_ClassifiedPermanent`,
`Send_Smtp4xxCommandException_ClassifiedTransientAndBuffered`,
`Send_SmtpProtocolException_ClassifiedTransient`, and
`Send_NonSmtpExceptionWith5xxLookalikeText_NotPromotedToPermanent`.
### NotificationService-004 — `DeliverAsync` constructs two SMTP clients and leaks the used one
@@ -124,7 +140,7 @@ _Unresolved._
|--|--|
| Severity | High |
| Category | Performance & resource management |
| Status | Open |
| Status | Resolved |
| Location | `src/ScadaLink.NotificationService/NotificationDeliveryService.cs:118-119` |
**Description**
@@ -143,7 +159,12 @@ Create exactly one client and dispose the one that is actually used:
**Resolution**
_Unresolved._
Resolved 2026-05-16 (commit `<pending>`). `DeliverAsync` now invokes `_smtpClientFactory()`
exactly once and disposes the client actually used via `using var disposable = smtp as
IDisposable;`. The previous code created two `MailKitSmtpClientWrapper` instances per send
and disposed the unused one while leaking the connected one. Regression test
`Send_CreatesExactlyOneSmtpClient_AndDisposesIt` verifies the factory is invoked once and
the resulting client is disposed.
### NotificationService-005 — Non-TLS path uses `SecureSocketOptions.Auto`, contradicting the requested mode

View File

@@ -1,4 +1,7 @@
using System.Net.Sockets;
using System.Text.Json;
using MailKit;
using MailKit.Net.Smtp;
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Entities.Notifications;
using ScadaLink.Commons.Interfaces.Repositories;
@@ -76,7 +79,12 @@ public class NotificationDeliveryService : INotificationDeliveryService
_logger.LogError(ex, "Permanent SMTP failure sending to list {List}", listName);
return new NotificationResult(false, $"Permanent SMTP error: {ex.Message}");
}
catch (Exception ex) when (IsTransientSmtpError(ex))
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
// NS-002: a caller-requested cancellation propagates; it is not buffered.
throw;
}
catch (Exception ex) when (IsTransientSmtpError(ex, cancellationToken))
{
// WP-12: Transient SMTP failure — hand to S&F
_logger.LogWarning(ex, "Transient SMTP failure sending to list {List}, buffering for retry", listName);
@@ -172,8 +180,9 @@ public class NotificationDeliveryService : INotificationDeliveryService
string body,
CancellationToken cancellationToken)
{
using var client = _smtpClientFactory() as IDisposable;
// NS-004: create exactly one client and dispose the one actually used.
var smtp = _smtpClientFactory();
using var disposable = smtp as IDisposable;
try
{
@@ -195,32 +204,80 @@ public class NotificationDeliveryService : INotificationDeliveryService
await smtp.DisconnectAsync(cancellationToken);
}
catch (Exception ex) when (ex is not SmtpPermanentException && !IsTransientSmtpError(ex))
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
// Classify unrecognized SMTP exceptions
if (ex.Message.Contains("5.", StringComparison.Ordinal) ||
ex.Message.Contains("550", StringComparison.Ordinal) ||
ex.Message.Contains("553", StringComparison.Ordinal) ||
ex.Message.Contains("554", StringComparison.Ordinal))
{
throw new SmtpPermanentException(ex.Message, ex);
}
// Default: treat as transient
// NS-002: A deliberately cancelled token must propagate as a cancellation,
// not be misclassified as a transient SMTP failure and buffered for retry.
throw;
}
catch (Exception ex) when (ClassifySmtpError(ex, cancellationToken) == SmtpErrorClass.Permanent
&& ex is not SmtpPermanentException)
{
// NS-003: Permanent SMTP failure (5xx) — surface a typed permanent exception.
throw new SmtpPermanentException(ex.Message, ex);
}
// Transient and SmtpPermanentException both propagate unchanged: SendAsync's
// catch filters (SmtpPermanentException / IsTransientSmtpError) handle them.
}
private static bool IsTransientSmtpError(Exception ex)
private enum SmtpErrorClass
{
return ex is TimeoutException
or OperationCanceledException
or System.Net.Sockets.SocketException
or IOException
|| ex.Message.Contains("4.", StringComparison.Ordinal)
|| ex.Message.Contains("421", StringComparison.Ordinal)
|| ex.Message.Contains("450", StringComparison.Ordinal)
|| ex.Message.Contains("451", StringComparison.Ordinal);
/// <summary>Cancellation or an unrecognised exception — caller decides.</summary>
Unknown,
/// <summary>Retryable failure (4xx, connection/socket/protocol error, timeout).</summary>
Transient,
/// <summary>Non-retryable failure (5xx) — must be returned to the script.</summary>
Permanent,
}
/// <summary>
/// NS-002/NS-003: Classifies an SMTP failure using MailKit's typed exceptions and
/// the numeric <see cref="SmtpStatusCode"/> rather than locale-dependent substring
/// matching on the exception message. A cancellation requested by the caller is
/// never treated as a transient SMTP error.
/// </summary>
private static SmtpErrorClass ClassifySmtpError(Exception ex, CancellationToken cancellationToken)
{
// A deliberate cancellation is not an SMTP error at all.
if (ex is OperationCanceledException && cancellationToken.IsCancellationRequested)
{
return SmtpErrorClass.Unknown;
}
// MailKit reports SMTP command failures with the real status code; the
// SmtpStatusCode enum's underlying value is the numeric SMTP reply code.
if (ex is SmtpCommandException command)
{
var code = (int)command.StatusCode;
if (code >= 400 && code < 500)
{
return SmtpErrorClass.Transient;
}
if (code >= 500 && code < 600)
{
return SmtpErrorClass.Permanent;
}
return SmtpErrorClass.Unknown;
}
// Protocol errors, a dropped/unavailable service, socket failures and
// timeouts are all retryable — the message has not been rejected.
if (ex is SmtpProtocolException
or ServiceNotConnectedException
or SocketException
or TimeoutException)
{
return SmtpErrorClass.Transient;
}
return SmtpErrorClass.Unknown;
}
private static bool IsTransientSmtpError(Exception ex, CancellationToken cancellationToken)
{
return ClassifySmtpError(ex, cancellationToken) == SmtpErrorClass.Transient;
}
}

View File

@@ -1,3 +1,5 @@
using MailKit;
using MailKit.Net.Smtp;
using Microsoft.Extensions.Logging.Abstractions;
using NSubstitute;
using NSubstitute.ExceptionExtensions;
@@ -225,4 +227,182 @@ public class NotificationDeliveryServiceTests
Assert.False(delivered); // permanent — the S&F engine parks the message
}
// ── NotificationService-002: cancellation must not be misclassified as transient ──
/// <summary>
/// Like <see cref="SetupHappyPath"/> but matches any <see cref="CancellationToken"/>,
/// so tests that pass an already-cancelled token still resolve the list/recipients.
/// </summary>
private void SetupHappyPathAnyToken()
{
var list = new NotificationList("ops-team") { Id = 1 };
var recipients = new List<NotificationRecipient>
{
new("Alice", "alice@example.com") { Id = 1, NotificationListId = 1 }
};
var smtpConfig = new SmtpConfiguration("smtp.example.com", "basic", "noreply@example.com")
{
Id = 1, Port = 587, Credentials = "user:pass", TlsMode = "starttls"
};
_repository.GetListByNameAsync("ops-team", Arg.Any<CancellationToken>()).Returns(list);
_repository.GetRecipientsByListIdAsync(1, Arg.Any<CancellationToken>()).Returns(recipients);
_repository.GetAllSmtpConfigurationsAsync(Arg.Any<CancellationToken>())
.Returns(new List<SmtpConfiguration> { smtpConfig });
}
[Fact]
public async Task Send_CancellationRequested_PropagatesAndDoesNotBuffer()
{
SetupHappyPathAnyToken();
using var cts = new CancellationTokenSource();
cts.Cancel();
_smtpClient.SendAsync(Arg.Any<string>(), Arg.Any<IEnumerable<string>>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<CancellationToken>())
.Throws(new OperationCanceledException(cts.Token));
var sfService = await CreateSfServiceAsync();
var service = CreateService(sf: sfService);
await Assert.ThrowsAnyAsync<OperationCanceledException>(
() => service.SendAsync("ops-team", "Alert", "Body", cancellationToken: cts.Token));
// The cancellation propagated instead of being buffered for retry.
var depth = await sfService.GetBufferDepthAsync();
depth.TryGetValue(ScadaLink.Commons.Types.Enums.StoreAndForwardCategory.Notification, out var count);
Assert.Equal(0, count);
}
[Fact]
public async Task Send_TaskCanceledException_WithCancellation_Propagates()
{
SetupHappyPathAnyToken();
using var cts = new CancellationTokenSource();
cts.Cancel();
_smtpClient.SendAsync(Arg.Any<string>(), Arg.Any<IEnumerable<string>>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<CancellationToken>())
.Throws(new TaskCanceledException());
var service = CreateService(sf: null);
await Assert.ThrowsAnyAsync<OperationCanceledException>(
() => service.SendAsync("ops-team", "Alert", "Body", cancellationToken: cts.Token));
}
// ── NotificationService-003: classify on MailKit typed exceptions / status codes ──
[Fact]
public async Task Send_Smtp5xxCommandException_ClassifiedPermanent()
{
SetupHappyPath();
// 550 MailboxUnavailable — a real permanent rejection.
_smtpClient.SendAsync(Arg.Any<string>(), Arg.Any<IEnumerable<string>>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<CancellationToken>())
.Throws(new SmtpCommandException(
SmtpErrorCode.RecipientNotAccepted, SmtpStatusCode.MailboxUnavailable, "rejected"));
var service = CreateService();
var result = await service.SendAsync("ops-team", "Alert", "Body");
Assert.False(result.Success);
Assert.Contains("Permanent SMTP error", result.ErrorMessage);
}
[Fact]
public async Task Send_Smtp4xxCommandException_ClassifiedTransientAndBuffered()
{
SetupHappyPath();
// 450 MailboxBusy — a real transient failure.
_smtpClient.SendAsync(Arg.Any<string>(), Arg.Any<IEnumerable<string>>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<CancellationToken>())
.Throws(new SmtpCommandException(
SmtpErrorCode.MessageNotAccepted, SmtpStatusCode.MailboxBusy, "try again"));
var sfService = await CreateSfServiceAsync();
var service = CreateService(sf: sfService);
var result = await service.SendAsync("ops-team", "Alert", "Body");
Assert.True(result.Success);
Assert.True(result.WasBuffered);
}
[Fact]
public async Task Send_NonSmtpExceptionWith5xxLookalikeText_NotPromotedToPermanent()
{
// NS-003: the old classifier promoted ANY exception whose message contained
// "5." / "550" / etc. to a permanent SMTP error — so an unrelated failure
// referencing a host like "smtp5.example.com" was silently swallowed as a
// clean permanent NotificationResult. Classification now uses MailKit's
// typed exceptions only, so a non-SMTP exception is no longer misclassified.
SetupHappyPath();
_smtpClient.SendAsync(Arg.Any<string>(), Arg.Any<IEnumerable<string>>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<CancellationToken>())
.Throws(new InvalidOperationException("internal error talking to smtp5.example.com"));
var service = CreateService();
// The exception is not classified at all (not a typed SMTP failure); it
// surfaces rather than being mistaken for a permanent 5xx rejection.
await Assert.ThrowsAsync<InvalidOperationException>(
() => service.SendAsync("ops-team", "Alert", "Body"));
}
[Fact]
public async Task Send_SmtpProtocolException_ClassifiedTransient()
{
SetupHappyPath();
_smtpClient.SendAsync(Arg.Any<string>(), Arg.Any<IEnumerable<string>>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<CancellationToken>())
.Throws(new SmtpProtocolException("protocol error"));
var sfService = await CreateSfServiceAsync();
var service = CreateService(sf: sfService);
var result = await service.SendAsync("ops-team", "Alert", "Body");
Assert.True(result.Success);
Assert.True(result.WasBuffered);
}
// ── NotificationService-004: DeliverAsync must create exactly one client and dispose it ──
private sealed class TrackingSmtpClient : ISmtpClientWrapper, IDisposable
{
public bool Disposed { get; private set; }
public Task ConnectAsync(string host, int port, bool useTls, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
public Task SendAsync(string from, IEnumerable<string> bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
public Task DisconnectAsync(CancellationToken cancellationToken = default)
=> Task.CompletedTask;
public void Dispose() => Disposed = true;
}
[Fact]
public async Task Send_CreatesExactlyOneSmtpClient_AndDisposesIt()
{
SetupHappyPath();
var created = new List<TrackingSmtpClient>();
var service = new NotificationDeliveryService(
_repository,
() =>
{
var c = new TrackingSmtpClient();
created.Add(c);
return c;
},
NullLogger<NotificationDeliveryService>.Instance);
var result = await service.SendAsync("ops-team", "Alert", "Body");
Assert.True(result.Success);
Assert.Single(created); // NS-004: factory invoked once, not twice
Assert.True(created[0].Disposed); // the client actually used is disposed
}
private static async Task<StoreAndForwardService> CreateSfServiceAsync()
{
var dbName = $"file:sf_test_{Guid.NewGuid():N}?mode=memory&cache=shared";
var storage = new StoreAndForwardStorage(
$"Data Source={dbName}", NullLogger<StoreAndForwardStorage>.Instance);
await storage.InitializeAsync();
return new StoreAndForwardService(
storage, new StoreAndForwardOptions(), NullLogger<StoreAndForwardService>.Instance);
}
}

View File

@@ -10,6 +10,7 @@
<ItemGroup>
<PackageReference Include="coverlet.collector" />
<PackageReference Include="MailKit" />
<PackageReference Include="Microsoft.NET.Test.Sdk" />
<PackageReference Include="NSubstitute" />
<PackageReference Include="xunit" />