fix(notifications): close OAuth2 SMTP + dispatcher resilience gaps (5 findings)
NS-021/NO-001: thread FromAddress into XOAUTH2 so M365 stops rejecting sends with 535 5.7.3. Added an additive oauth2UserName parameter on ISmtpClientWrapper.AuthenticateAsync; both NotificationService and NotificationOutbox now pass config.FromAddress. NO-002: clamp non-positive SmtpConfiguration.MaxRetries/RetryDelay to the 1-min / 10-attempt fallback with a Warning so a misconfigured row no longer parks transient failures on the first attempt or burn-loops. NO-003: route a lifecycle-scoped CancellationToken from the NotificationOutboxActor through the dispatch sweep into the adapter so in-flight SMTP sends abort on PostStop instead of blocking CoordinatedShutdown for the full SMTP timeout per row. NO-004: await the central audit writer inside the existing try/catch instead of fire-and-forget so the audit task can't outlive the per-sweep DI scope and writer faults reach the operator log instead of being silently dropped. Two AuditLog integration tests seeded RetryDelay = TimeSpan.Zero to force immediate re-claim on the second tick; updated them to 1 ms so they keep the same intent without tripping the NO-002 clamp.
This commit is contained in:
@@ -365,11 +365,13 @@ public class AuditWriteFailureSafetyTests : TestKit, IClassFixture<MsSqlMigratio
|
||||
private async Task SeedSmtpConfigAsync()
|
||||
{
|
||||
await using var ctx = CreateContext();
|
||||
// NO-002: dispatcher clamps non-positive RetryDelay to the 1-minute fallback;
|
||||
// use 1 ms so a transient outcome's NextAttemptAt is still effectively due.
|
||||
ctx.SmtpConfigurations.Add(new SmtpConfiguration(
|
||||
"smtp.example.com", "Basic", "noreply@example.com")
|
||||
{
|
||||
MaxRetries = 5,
|
||||
RetryDelay = TimeSpan.Zero,
|
||||
RetryDelay = TimeSpan.FromMilliseconds(1),
|
||||
});
|
||||
await ctx.SaveChangesAsync();
|
||||
}
|
||||
|
||||
@@ -130,9 +130,12 @@ public class NotifyDispatcherAuditTrailTests : TestKit, IClassFixture<MsSqlMigra
|
||||
/// <summary>
|
||||
/// Inserts a single SMTP configuration row so the dispatcher's
|
||||
/// <c>ResolveRetryPolicyAsync</c> sees a real (maxRetries, retryDelay)
|
||||
/// pair rather than the conservative fallback. RetryDelay of 0 means a
|
||||
/// transient outcome's <c>NextAttemptAt</c> is immediately due — useful so
|
||||
/// the SECOND DispatchTick re-claims the row without waiting.
|
||||
/// pair rather than the conservative fallback. A tiny positive RetryDelay
|
||||
/// means a transient outcome's <c>NextAttemptAt</c> is immediately due —
|
||||
/// useful so the SECOND DispatchTick re-claims the row without waiting.
|
||||
/// NO-002: the dispatcher now clamps a non-positive RetryDelay to the
|
||||
/// 1-minute fallback to avoid burn-looping on transient failures, so this
|
||||
/// must be a strictly positive value (1 ms is fine for tests).
|
||||
/// </summary>
|
||||
private async Task SeedSmtpConfigAsync(int maxRetries = 5)
|
||||
{
|
||||
@@ -141,7 +144,7 @@ public class NotifyDispatcherAuditTrailTests : TestKit, IClassFixture<MsSqlMigra
|
||||
"smtp.example.com", "Basic", "noreply@example.com")
|
||||
{
|
||||
MaxRetries = maxRetries,
|
||||
RetryDelay = TimeSpan.Zero,
|
||||
RetryDelay = TimeSpan.FromMilliseconds(1),
|
||||
});
|
||||
await ctx.SaveChangesAsync();
|
||||
}
|
||||
|
||||
@@ -270,6 +270,131 @@ public class NotificationOutboxActorDispatchTests : TestKit
|
||||
Arg.Any<DateTimeOffset>(), Arg.Any<int>(), Arg.Any<CancellationToken>()));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TransientFailure_WithZeroMaxRetries_RetriesUsingFallback_DoesNotParkImmediately()
|
||||
{
|
||||
// NO-002: SmtpConfiguration.MaxRetries=0 used to satisfy 1 >= 0 on the very first
|
||||
// transient failure and park the row without a single retry. ResolveRetryPolicyAsync
|
||||
// now clamps non-positive MaxRetries to the FallbackMaxRetries (10) so transient
|
||||
// failures actually retry before parking.
|
||||
SetupSmtpRetryPolicy(maxRetries: 0, retryDelay: TimeSpan.FromMinutes(1));
|
||||
var notification = MakeNotification(retryCount: 0);
|
||||
_outboxRepository.GetDueAsync(Arg.Any<DateTimeOffset>(), Arg.Any<int>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new[] { notification });
|
||||
var adapter = new StubAdapter(() => DeliveryOutcome.Transient("smtp timeout"));
|
||||
var actor = CreateActor([adapter]);
|
||||
|
||||
actor.Tell(InternalMessages.DispatchTick.Instance);
|
||||
|
||||
AwaitAssert(() =>
|
||||
{
|
||||
_outboxRepository.Received(1).UpdateAsync(
|
||||
Arg.Is<Notification>(n =>
|
||||
n.Status == NotificationStatus.Retrying &&
|
||||
n.RetryCount == 1 &&
|
||||
n.NextAttemptAt != null &&
|
||||
n.LastError == "smtp timeout"),
|
||||
Arg.Any<CancellationToken>());
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TransientFailure_WithNegativeMaxRetries_RetriesUsingFallback_DoesNotParkImmediately()
|
||||
{
|
||||
// NO-002: a negative MaxRetries reaches ResolveRetryPolicyAsync just as -1 — same
|
||||
// park-immediately bug. Clamp to FallbackMaxRetries.
|
||||
SetupSmtpRetryPolicy(maxRetries: -1, retryDelay: TimeSpan.FromMinutes(1));
|
||||
var notification = MakeNotification(retryCount: 0);
|
||||
_outboxRepository.GetDueAsync(Arg.Any<DateTimeOffset>(), Arg.Any<int>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new[] { notification });
|
||||
var adapter = new StubAdapter(() => DeliveryOutcome.Transient("smtp timeout"));
|
||||
var actor = CreateActor([adapter]);
|
||||
|
||||
actor.Tell(InternalMessages.DispatchTick.Instance);
|
||||
|
||||
AwaitAssert(() =>
|
||||
{
|
||||
_outboxRepository.Received(1).UpdateAsync(
|
||||
Arg.Is<Notification>(n =>
|
||||
n.Status == NotificationStatus.Retrying &&
|
||||
n.RetryCount == 1 &&
|
||||
n.NextAttemptAt != null &&
|
||||
n.LastError == "smtp timeout"),
|
||||
Arg.Any<CancellationToken>());
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TransientFailure_WithNonPositiveRetryDelay_UsesFallbackDelay_NotZero()
|
||||
{
|
||||
// NO-002: a non-positive RetryDelay would burn-loop the dispatcher because
|
||||
// NextAttemptAt would equal now. Clamp to FallbackRetryDelay (1 min) so the
|
||||
// schedule actually advances.
|
||||
SetupSmtpRetryPolicy(maxRetries: 5, retryDelay: TimeSpan.Zero);
|
||||
var before = DateTimeOffset.UtcNow;
|
||||
var notification = MakeNotification(retryCount: 0);
|
||||
_outboxRepository.GetDueAsync(Arg.Any<DateTimeOffset>(), Arg.Any<int>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new[] { notification });
|
||||
var adapter = new StubAdapter(() => DeliveryOutcome.Transient("smtp timeout"));
|
||||
var actor = CreateActor([adapter]);
|
||||
|
||||
actor.Tell(InternalMessages.DispatchTick.Instance);
|
||||
|
||||
AwaitAssert(() =>
|
||||
{
|
||||
_outboxRepository.Received(1).UpdateAsync(
|
||||
Arg.Is<Notification>(n =>
|
||||
n.Status == NotificationStatus.Retrying &&
|
||||
n.NextAttemptAt != null &&
|
||||
n.NextAttemptAt > before + TimeSpan.FromSeconds(30)),
|
||||
Arg.Any<CancellationToken>());
|
||||
});
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PostStop_CancelsInFlightDelivery_LeavesRowNonTerminal()
|
||||
{
|
||||
// NO-003: the dispatcher used to drop the CancellationToken on its way into
|
||||
// the channel adapter, so a coordinated shutdown had to wait the full SMTP
|
||||
// connect/auth/send timeout per in-flight notification before the sweep
|
||||
// finished. The actor now passes a lifecycle-scoped token; cancelling it on
|
||||
// PostStop must abort the in-flight Task.Delay (standing in for an SMTP
|
||||
// send) and the row must NOT be updated to a terminal state — the next
|
||||
// active node picks it back up.
|
||||
SetupSmtpRetryPolicy(maxRetries: 5, retryDelay: TimeSpan.FromMinutes(1));
|
||||
var notification = MakeNotification();
|
||||
_outboxRepository.GetDueAsync(Arg.Any<DateTimeOffset>(), Arg.Any<int>(), Arg.Any<CancellationToken>())
|
||||
.Returns(new[] { notification });
|
||||
// Long delay simulates a slow SMTP send; the test triggers PostStop before
|
||||
// the delay would naturally elapse, so the only way the delay completes is
|
||||
// if the token wired through.
|
||||
var adapter = new StubAdapter(
|
||||
() => DeliveryOutcome.Success("ops@example.com"),
|
||||
delay: TimeSpan.FromSeconds(30));
|
||||
var actor = CreateActor([adapter]);
|
||||
|
||||
actor.Tell(InternalMessages.DispatchTick.Instance);
|
||||
// Wait until the adapter is actually in flight before stopping.
|
||||
AwaitAssert(() => Assert.Equal(1, adapter.CallCount));
|
||||
|
||||
var start = DateTimeOffset.UtcNow;
|
||||
Sys.Stop(actor);
|
||||
|
||||
// The sweep should observe cancellation promptly (well under the 30s delay).
|
||||
AwaitAssert(
|
||||
() =>
|
||||
{
|
||||
// No UpdateAsync was issued — the row is untouched and will be re-claimed
|
||||
// by the next active node.
|
||||
_outboxRepository.DidNotReceive().UpdateAsync(
|
||||
Arg.Any<Notification>(), Arg.Any<CancellationToken>());
|
||||
},
|
||||
duration: TimeSpan.FromSeconds(5));
|
||||
|
||||
Assert.True(DateTimeOffset.UtcNow - start < TimeSpan.FromSeconds(5),
|
||||
"PostStop did not cancel the in-flight delivery promptly.");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OverlappingTicks_WhileDispatchInFlight_DoNotClaimConcurrently()
|
||||
{
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
using System.Text;
|
||||
using MailKit.Security;
|
||||
|
||||
namespace ScadaLink.NotificationService.Tests;
|
||||
|
||||
/// <summary>
|
||||
@@ -5,6 +8,8 @@ namespace ScadaLink.NotificationService.Tests;
|
||||
/// silently skip authentication for a misconfigured SMTP config — a missing
|
||||
/// credential, an unrecognised auth type, or an unparseable Basic credential
|
||||
/// must be a hard, surfaced error rather than an unauthenticated send.
|
||||
/// NS-021: the OAuth2 (XOAUTH2) branch must carry a non-empty user identity
|
||||
/// (the SMTP From address) — an empty user is rejected by M365 with `535 5.7.3`.
|
||||
/// </summary>
|
||||
public class MailKitSmtpClientWrapperTests
|
||||
{
|
||||
@@ -42,4 +47,35 @@ public class MailKitSmtpClientWrapperTests
|
||||
await Assert.ThrowsAsync<SmtpPermanentException>(
|
||||
() => wrapper.AuthenticateAsync("basic", "nocolon"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Authenticate_OAuth2WithoutUserName_Throws()
|
||||
{
|
||||
// NS-021: passing an OAuth2 access token but no user identity (FromAddress)
|
||||
// used to construct `new SaslMechanismOAuth2("", credentials)`, which M365
|
||||
// rejects with `535 5.7.3`. The wrapper now refuses upfront so the caller
|
||||
// sees a clean configuration error rather than a confusing server reject.
|
||||
var wrapper = new MailKitSmtpClientWrapper();
|
||||
|
||||
await Assert.ThrowsAsync<SmtpPermanentException>(
|
||||
() => wrapper.AuthenticateAsync("oauth2", "access-token", oauth2UserName: null));
|
||||
await Assert.ThrowsAsync<SmtpPermanentException>(
|
||||
() => wrapper.AuthenticateAsync("oauth2", "access-token", oauth2UserName: ""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void XOAuth2InitialResponse_CarriesUserAndBearer()
|
||||
{
|
||||
// NS-021 regression guard: independent of the wrapper, prove that MailKit's
|
||||
// SaslMechanismOAuth2 puts `user=<userName>` into the initial-response bytes
|
||||
// — i.e. wiring the wrapper to pass `FromAddress` is sufficient to fix the
|
||||
// M365 handshake. If MailKit ever changes the framing this test will catch it.
|
||||
var sasl = new SaslMechanismOAuth2("noreply@example.com", "tok-xyz");
|
||||
|
||||
var initial = sasl.Challenge(string.Empty);
|
||||
var asString = Encoding.UTF8.GetString(Convert.FromBase64String(initial));
|
||||
|
||||
Assert.Contains("user=noreply@example.com", asString);
|
||||
Assert.Contains("auth=Bearer tok-xyz", asString);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -115,7 +115,8 @@ public class NotificationDeliveryServiceTests
|
||||
|
||||
await _smtpClient.Received().ConnectAsync(
|
||||
"smtp.example.com", 587, SmtpTlsMode.StartTls, Arg.Any<int>(), Arg.Any<CancellationToken>());
|
||||
await _smtpClient.Received().AuthenticateAsync("basic", "user:pass", Arg.Any<CancellationToken>());
|
||||
await _smtpClient.Received().AuthenticateAsync(
|
||||
"basic", "user:pass", Arg.Any<string?>(), Arg.Any<CancellationToken>());
|
||||
await _smtpClient.Received().SendAsync(
|
||||
"noreply@example.com",
|
||||
Arg.Is<IEnumerable<string>>(bcc => bcc.Count() == 2),
|
||||
@@ -370,7 +371,7 @@ public class NotificationDeliveryServiceTests
|
||||
public bool Disposed { get; private set; }
|
||||
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
|
||||
public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
public Task SendAsync(string from, IEnumerable<string> bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
@@ -435,7 +436,7 @@ public class NotificationDeliveryServiceTests
|
||||
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
|
||||
public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
|
||||
public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
|
||||
=> _failOnAuthenticate != null ? Task.FromException(_failOnAuthenticate()) : Task.CompletedTask;
|
||||
|
||||
public Task SendAsync(string from, IEnumerable<string> bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
|
||||
@@ -496,7 +497,7 @@ public class NotificationDeliveryServiceTests
|
||||
ConnectionTimeoutSeconds = connectionTimeoutSeconds;
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
|
||||
public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
public Task SendAsync(string from, IEnumerable<string> bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
@@ -643,7 +644,7 @@ public class NotificationDeliveryServiceTests
|
||||
public BlockingSmtpClient(Func<Task> onSend) => _onSend = onSend;
|
||||
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
|
||||
public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
public Task SendAsync(string from, IEnumerable<string> bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
|
||||
=> _onSend();
|
||||
@@ -721,17 +722,19 @@ public class NotificationDeliveryServiceTests
|
||||
|
||||
// ── NotificationService-012: OAuth2 delivery path coverage ──
|
||||
|
||||
/// <summary>An SMTP wrapper that records the auth type and credentials it received.</summary>
|
||||
/// <summary>An SMTP wrapper that records the auth type, credentials, and OAuth2 user identity it received.</summary>
|
||||
private sealed class RecordingAuthClient : ISmtpClientWrapper
|
||||
{
|
||||
public string? AuthType { get; private set; }
|
||||
public string? Credentials { get; private set; }
|
||||
public string? OAuth2UserName { get; private set; }
|
||||
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
|
||||
=> Task.CompletedTask;
|
||||
public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
|
||||
public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
|
||||
{
|
||||
AuthType = authType;
|
||||
Credentials = credentials;
|
||||
OAuth2UserName = oauth2UserName;
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
public Task SendAsync(string from, IEnumerable<string> bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
|
||||
@@ -790,6 +793,9 @@ public class NotificationDeliveryServiceTests
|
||||
Assert.True(result.Success);
|
||||
Assert.Equal("oauth2", recording.AuthType);
|
||||
Assert.Equal("oauth2-access-token-xyz", recording.Credentials);
|
||||
// NS-021: OAuth2 SASL must carry the FromAddress as the user identity so
|
||||
// the M365 XOAUTH2 handshake's `user=` field matches the token's mailbox.
|
||||
Assert.Equal("noreply@example.com", recording.OAuth2UserName);
|
||||
}
|
||||
|
||||
// ── NotificationService-015: unclassified exceptions must not escape SendAsync ──
|
||||
|
||||
Reference in New Issue
Block a user