diff --git a/code-reviews/NotificationOutbox/findings.md b/code-reviews/NotificationOutbox/findings.md
index 5fb6c48d..75e2951a 100644
--- a/code-reviews/NotificationOutbox/findings.md
+++ b/code-reviews/NotificationOutbox/findings.md
@@ -62,7 +62,7 @@ from `Component-NotificationOutbox.md`. No Critical findings; two High, six Medi
|--|--|
| Severity | High |
| Category | Correctness & logic bugs |
-| Status | Open |
+| Status | Resolved |
| Location | `src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs:185-191` (calls `smtp.AuthenticateAsync("oauth2", token)`); root cause in `src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs:76-79` |
**Description**
@@ -105,7 +105,7 @@ _Unresolved._
|--|--|
| Severity | High |
| Category | Correctness & logic bugs |
-| Status | Open |
+| Status | Resolved |
| Location | `src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs:348-360` |
**Description**
@@ -145,7 +145,7 @@ _Unresolved._
|--|--|
| Severity | Medium |
| Category | Error handling & resilience |
-| Status | Open |
+| Status | Resolved |
| Location | `src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs:334`, `src/ScadaLink.NotificationOutbox/Delivery/INotificationDeliveryAdapter.cs:22` |
**Description**
@@ -185,7 +185,7 @@ _Unresolved._
|--|--|
| Severity | Medium |
| Category | Akka.NET conventions |
-| Status | Open |
+| Status | Resolved |
| Location | `src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs:425-435`, `463-485` |
**Description**
diff --git a/code-reviews/NotificationService/findings.md b/code-reviews/NotificationService/findings.md
index 407e7e30..3611ed93 100644
--- a/code-reviews/NotificationService/findings.md
+++ b/code-reviews/NotificationService/findings.md
@@ -702,7 +702,7 @@ Mark the NS-001 resolution note in this file as **superseded by NS-019** with a
|--|--|
| Severity | High |
| Category | Correctness & logic bugs |
-| Status | Open |
+| Status | Resolved |
| Location | `src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs:76-79` |
**Description**
diff --git a/code-reviews/README.md b/code-reviews/README.md
index 60db5aa5..b8a3989c 100644
--- a/code-reviews/README.md
+++ b/code-reviews/README.md
@@ -40,10 +40,10 @@ module file and counted in **Total**.
| Severity | Open findings |
|----------|---------------|
| Critical | 0 |
-| High | 16 |
-| Medium | 58 |
+| High | 13 |
+| Medium | 56 |
| Low | 90 |
-| **Total** | **164** |
+| **Total** | **159** |
## Module Status
@@ -63,8 +63,8 @@ module file and counted in **Total**.
| [Host](Host/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/5 | 7 | 22 |
| [InboundAPI](InboundAPI/findings.md) | 2026-05-28 | `1eb6e97` | 0/1/3/4 | 8 | 25 |
| [ManagementService](ManagementService/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/2 | 4 | 23 |
-| [NotificationOutbox](NotificationOutbox/findings.md) | 2026-05-28 | `1eb6e97` | 0/2/5/3 | 10 | 10 |
-| [NotificationService](NotificationService/findings.md) | 2026-05-28 | `1eb6e97` | 0/2/2/3 | 7 | 25 |
+| [NotificationOutbox](NotificationOutbox/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/3/3 | 6 | 10 |
+| [NotificationService](NotificationService/findings.md) | 2026-05-28 | `1eb6e97` | 0/1/2/3 | 6 | 25 |
| [Security](Security/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/0/2 | 2 | 21 |
| [SiteCallAudit](SiteCallAudit/findings.md) | 2026-05-28 | `1eb6e97` | 0/0/2/4 | 6 | 6 |
| [SiteEventLogging](SiteEventLogging/findings.md) | 2026-05-28 | `1eb6e97` | 0/1/2/6 | 9 | 23 |
@@ -84,7 +84,7 @@ description, location, recommendation — lives in the module's `findings.md`.
_None open._
-### High (16)
+### High (13)
| ID | Module | Title |
|----|--------|-------|
@@ -94,10 +94,7 @@ _None open._
| DeploymentManager-018 | [DeploymentManager](DeploymentManager/findings.md) | Reconciliation force-sets `Enabled`, overwriting an intentional `Disabled` after central failover |
| ExternalSystemGateway-018 | [ExternalSystemGateway](ExternalSystemGateway/findings.md) | `DeliverBufferedAsync` lets `JsonException` propagate, turning a corrupt buffered row into a permanent retry-forever poison message |
| InboundAPI-022 | [InboundAPI](InboundAPI/findings.md) | `IActiveNodeGate` has no production registration in Host — standby-node gating is silently disabled in production |
-| NotificationOutbox-001 | [NotificationOutbox](NotificationOutbox/findings.md) | `EmailNotificationDeliveryAdapter` inherits the OAuth2 empty-user SASL bug (NS-021) on the M365 send path |
-| NotificationOutbox-002 | [NotificationOutbox](NotificationOutbox/findings.md) | Dispatcher parks on first transient failure when `SmtpConfiguration.MaxRetries == 0` |
| NotificationService-019 | [NotificationService](NotificationService/findings.md) | `NotificationDeliveryService` and `INotificationDeliveryService` are orphaned by the central-only redesign |
-| NotificationService-021 | [NotificationService](NotificationService/findings.md) | OAuth2 SASL constructed with empty user identifier; M365 SMTP will reject the auth handshake |
| SiteEventLogging-016 | [SiteEventLogging](SiteEventLogging/findings.md) | `From`/`To` filters compare non-normalised ISO 8601 strings against UTC-stored timestamps |
| StoreAndForward-018 | [StoreAndForward](StoreAndForward/findings.md) | Notification corrupt-payload parks the buffered message, contradicting the "notifications do not park" design invariant |
| TemplateEngine-017 | [TemplateEngine](TemplateEngine/findings.md) | Revision hash and diff both ignore `Description` and `Connections`, defeating staleness detection for real deployment changes |
@@ -105,7 +102,7 @@ _None open._
| Transport-002 | [Transport](Transport/findings.md) | ExternalSystem Overwrite never syncs methods |
| Transport-003 | [Transport](Transport/findings.md) | Unlock lockout is enforced only client-side; server session is never marked Locked |
-### Medium (58)
+### Medium (56)
| ID | Module | Title |
|----|--------|-------|
@@ -140,8 +137,6 @@ _None open._
| InboundAPI-025 | [InboundAPI](InboundAPI/findings.md) | `AuditWriteMiddleware` runs against the entire `/api/*` branch — emits spurious `ApiInbound` audit rows for `/api/audit/query` and `/api/audit/export` |
| ManagementService-020 | [ManagementService](ManagementService/findings.md) | UpdateSmtpConfig returns and audits the SMTP Credentials field verbatim |
| ManagementService-021 | [ManagementService](ManagementService/findings.md) | Transport bundle handlers have zero test coverage |
-| NotificationOutbox-003 | [NotificationOutbox](NotificationOutbox/findings.md) | Dispatcher does not propagate a `CancellationToken` into delivery; in-flight SMTP sends cannot be cancelled on shutdown |
-| NotificationOutbox-004 | [NotificationOutbox](NotificationOutbox/findings.md) | `EmitAttemptAudit`/`EmitTerminalAudit` fire-and-forget pattern can outlive the per-sweep DI scope |
| NotificationOutbox-005 | [NotificationOutbox](NotificationOutbox/findings.md) | Ingest persistence inherits the CD-015 check-then-act race; under contention the second writer throws and the site retries |
| NotificationOutbox-007 | [NotificationOutbox](NotificationOutbox/findings.md) | `NotificationOutboxOptions.DispatchBatchSize`, `DeliveredKpiWindow`, and `PurgeInterval` are not in the design document |
| NotificationOutbox-010 | [NotificationOutbox](NotificationOutbox/findings.md) | Comment claims `PipeTo` is not used "because the writer never throws"; the surrounding try/catch is dead-letter for the documented failure mode |
diff --git a/src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs b/src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs
index 7b359a99..ef351b76 100644
--- a/src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs
+++ b/src/ScadaLink.NotificationOutbox/Delivery/EmailNotificationDeliveryAdapter.cs
@@ -188,7 +188,13 @@ public sealed class EmailNotificationDeliveryAdapter : INotificationDeliveryAdap
credentials = await _tokenService.GetTokenAsync(credentials, cancellationToken);
}
- await smtp.AuthenticateAsync(config.AuthType, credentials, cancellationToken);
+ // NO-001/NS-021: OAuth2 XOAUTH2 requires the user identity (FromAddress)
+ // to be sent alongside the access token; an empty user is rejected by M365.
+ await smtp.AuthenticateAsync(
+ config.AuthType,
+ credentials,
+ oauth2UserName: config.FromAddress,
+ cancellationToken: cancellationToken);
await smtp.SendAsync(config.FromAddress, bccAddresses, subject, body, cancellationToken);
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
diff --git a/src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs b/src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs
index 0ffbcb87..be2429c5 100644
--- a/src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs
+++ b/src/ScadaLink.NotificationOutbox/NotificationOutboxActor.cs
@@ -49,6 +49,14 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
///
private bool _dispatching;
+ ///
+ /// NO-003: lifecycle-scoped cancellation source, cancelled in so
+ /// any in-flight dispatch sweep — including a long-running SMTP send via the channel
+ /// adapter — observes shutdown promptly instead of blocking CoordinatedShutdown
+ /// for the full SMTP connect/auth/send timeout per in-progress notification.
+ ///
+ private CancellationTokenSource? _shutdownCts;
+
/// Akka timer scheduler, assigned by the actor system via .
public ITimerScheduler Timers { get; set; } = null!;
@@ -91,12 +99,35 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
protected override void PreStart()
{
base.PreStart();
+ // NO-003: shutdown token is alive for the lifetime of the actor; cancelled in PostStop
+ // so dispatcher sweeps and the SMTP send beneath them observe coordinated shutdown.
+ _shutdownCts = new CancellationTokenSource();
Timers.StartPeriodicTimer(
DispatchTimerKey, InternalMessages.DispatchTick.Instance, _options.DispatchInterval);
Timers.StartPeriodicTimer(
PurgeTimerKey, InternalMessages.PurgeTick.Instance, _options.PurgeInterval);
}
+ ///
+ protected override void PostStop()
+ {
+ // NO-003: cancel the shutdown token first so the in-flight sweep's adapter call
+ // observes cancellation, then dispose the source. Order matters — disposing first
+ // would race with an in-flight sweep registering with the token.
+ try
+ {
+ _shutdownCts?.Cancel();
+ }
+ catch (ObjectDisposedException)
+ {
+ // Already disposed under a restarted-actor race; nothing to do.
+ }
+
+ _shutdownCts?.Dispose();
+ _shutdownCts = null;
+ base.PostStop();
+ }
+
///
/// Maps an inbound onto a ,
/// persists it idempotently, and pipes the outcome back to so the
@@ -167,11 +198,15 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
_dispatching = true;
var now = DateTimeOffset.UtcNow;
+ // NO-003: hand the lifecycle token to the sweep so cancellation reaches the
+ // adapter. A null token (very early start / post-stop race) is replaced with
+ // None — no behaviour change vs. the pre-NO-003 dispatcher.
+ var cancellationToken = _shutdownCts?.Token ?? CancellationToken.None;
// RunDispatchPass swallows its own errors, but the failure projection is kept as a
// belt-and-braces guard so even a faulted task still lowers the in-flight guard —
// otherwise the dispatcher would wedge permanently.
- RunDispatchPass(now).PipeTo(
+ RunDispatchPass(now, cancellationToken).PipeTo(
Self,
success: () => InternalMessages.DispatchComplete.Instance,
failure: ex =>
@@ -194,7 +229,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
/// directly, so a long-lived adapter reference on
/// this singleton actor would be a captive dependency over a disposed DbContext.
///
- private async Task RunDispatchPass(DateTimeOffset now)
+ private async Task RunDispatchPass(DateTimeOffset now, CancellationToken cancellationToken)
{
try
{
@@ -206,7 +241,13 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
IReadOnlyList due;
try
{
- due = await outboxRepository.GetDueAsync(now, _options.DispatchBatchSize);
+ due = await outboxRepository.GetDueAsync(now, _options.DispatchBatchSize, cancellationToken);
+ }
+ catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
+ {
+ // NO-003: shutdown cancelled the claim; row stays Pending and the next active
+ // node picks it up. Not a failure.
+ return;
}
catch (Exception ex)
{
@@ -223,9 +264,23 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
foreach (var notification in due)
{
+ // NO-003: between deliveries, observe shutdown so we don't kick off a fresh
+ // SMTP send when the actor is already tearing down.
+ if (cancellationToken.IsCancellationRequested)
+ {
+ return;
+ }
+
try
{
- await DeliverOneAsync(notification, now, maxRetries, retryDelay, outboxRepository, adapters);
+ await DeliverOneAsync(
+ notification, now, maxRetries, retryDelay, outboxRepository, adapters, cancellationToken);
+ }
+ catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
+ {
+ // NO-003: in-flight delivery interrupted by shutdown. Row remains in its
+ // pre-attempt state; next active sweep retries.
+ return;
}
catch (Exception ex)
{
@@ -248,14 +303,48 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
/// configuration exists, falls back to a conservative default — delivery itself will
/// permanently fail in that case, so the policy only acts as a guard.
///
+ ///
+ /// NO-002: a non-positive (zero or negative)
+ /// would otherwise satisfy RetryCount >= maxRetries on the very first transient
+ /// failure and park the row without a single retry — silently halving the outbox's
+ /// delivery guarantees. The same applies to a non-positive RetryDelay, which
+ /// would burn-loop the dispatcher. Both values are clamped to the fallback constants
+ /// with a Warning so an operator can spot the misconfiguration in logs.
+ ///
private async Task<(int MaxRetries, TimeSpan RetryDelay)> ResolveRetryPolicyAsync(
INotificationRepository notificationRepository)
{
var configurations = await notificationRepository.GetAllSmtpConfigurationsAsync();
var configuration = configurations.Count > 0 ? configurations[0] : null;
- return configuration is null
- ? (FallbackMaxRetries, FallbackRetryDelay)
- : (configuration.MaxRetries, configuration.RetryDelay);
+ if (configuration is null)
+ {
+ return (FallbackMaxRetries, FallbackRetryDelay);
+ }
+
+ var maxRetries = configuration.MaxRetries;
+ var retryDelay = configuration.RetryDelay;
+
+ if (maxRetries <= 0)
+ {
+ _logger.LogWarning(
+ "SmtpConfiguration.MaxRetries={ConfiguredMaxRetries} is non-positive; " +
+ "clamping to FallbackMaxRetries={FallbackMaxRetries} so transient failures " +
+ "actually retry before parking.",
+ maxRetries, FallbackMaxRetries);
+ maxRetries = FallbackMaxRetries;
+ }
+
+ if (retryDelay <= TimeSpan.Zero)
+ {
+ _logger.LogWarning(
+ "SmtpConfiguration.RetryDelay={ConfiguredRetryDelay} is non-positive; " +
+ "clamping to FallbackRetryDelay={FallbackRetryDelay} so the dispatcher does " +
+ "not burn-loop on transient failures.",
+ retryDelay, FallbackRetryDelay);
+ retryDelay = FallbackRetryDelay;
+ }
+
+ return (maxRetries, retryDelay);
}
///
@@ -307,7 +396,8 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
int maxRetries,
TimeSpan retryDelay,
INotificationOutboxRepository outboxRepository,
- IReadOnlyDictionary adapters)
+ IReadOnlyDictionary adapters,
+ CancellationToken cancellationToken)
{
if (!adapters.TryGetValue(notification.Type, out var adapter))
{
@@ -318,20 +408,22 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
notification.Status = NotificationStatus.Parked;
notification.LastError = missingAdapterError;
notification.LastAttemptAt = now;
- await outboxRepository.UpdateAsync(notification);
- EmitAttemptAudit(
+ await outboxRepository.UpdateAsync(notification, cancellationToken);
+ await EmitAttemptAuditAsync(
notification,
now,
durationMs: 0,
errorMessage: missingAdapterError);
- EmitTerminalAudit(notification, now, errorMessage: missingAdapterError);
+ await EmitTerminalAuditAsync(notification, now, errorMessage: missingAdapterError);
return;
}
// Measure the attempt duration around the adapter call so the
// Attempted row carries it for KPI use.
+ // NO-003: pass the lifecycle token so a coordinated shutdown promptly cancels the
+ // in-flight SMTP send instead of waiting for the SMTP connect/auth/send timeout.
var attemptStart = DateTimeOffset.UtcNow;
- var outcome = await adapter.DeliverAsync(notification);
+ var outcome = await adapter.DeliverAsync(notification, cancellationToken);
var durationMs = (int)Math.Min(
int.MaxValue, Math.Max(0, (DateTimeOffset.UtcNow - attemptStart).TotalMilliseconds));
@@ -367,13 +459,13 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
break;
}
- await outboxRepository.UpdateAsync(notification);
+ await outboxRepository.UpdateAsync(notification, cancellationToken);
// Emit the per-attempt Attempted row exactly once regardless of the
// outcome (B2). The error message comes from the outcome, not from
// notification.LastError, so a success row is null and a transient
// row carries the SMTP failure reason verbatim.
- EmitAttemptAudit(
+ await EmitAttemptAuditAsync(
notification,
now,
durationMs: durationMs,
@@ -386,7 +478,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
// reason so downstream consumers can link Attempted+Parked rows.
if (IsTerminal(notification.Status))
{
- EmitTerminalAudit(
+ await EmitTerminalAuditAsync(
notification,
now,
errorMessage: outcome.Result == DeliveryResult.Success ? null : outcome.Error);
@@ -412,9 +504,15 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
/// /
/// audit row carrying the terminal status (Delivered, Parked, or
/// Discarded) of . Wrapped in try/catch
- /// for the same defensive reason as .
+ /// for the same defensive reason as .
///
- private void EmitTerminalAudit(
+ ///
+ /// NO-004: is awaited inside the
+ /// try/catch so the catch is actually reachable for writer faults and so the
+ /// audit task does not outlive the per-sweep DI scope. The audit-write-never-
+ /// affects-delivery invariant is preserved by the surrounding catch.
+ ///
+ private async Task EmitTerminalAuditAsync(
Notification notification,
DateTimeOffset now,
string? errorMessage)
@@ -423,7 +521,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
{
var terminalStatus = MapNotificationStatusToAuditStatus(notification.Status);
var evt = BuildNotifyDeliverEvent(notification, now, terminalStatus, errorMessage);
- _ = _auditWriter.WriteAsync(evt);
+ await _auditWriter.WriteAsync(evt);
}
catch (Exception ex)
{
@@ -457,10 +555,17 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
/// /
/// audit row with . Wrapped in
/// try/catch so an audit-write failure never propagates back into the
- /// dispatcher loop — the already
- /// swallows, this is defensive (alog.md §13).
+ /// dispatcher loop (alog.md §13).
///
- private void EmitAttemptAudit(
+ ///
+ /// NO-004: previously the writer task was discarded (_ = WriteAsync(...)),
+ /// which made the surrounding catch unreachable for any fault originating in the
+ /// awaited body of WriteAsync and let the audit task outlive the dispatcher's
+ /// per-sweep DI scope. The task is now awaited inside the try/catch: the
+ /// audit-write-never-affects-delivery invariant is preserved by the catch, and
+ /// writer faults reach the operator log instead of being silently lost.
+ ///
+ private async Task EmitAttemptAuditAsync(
Notification notification,
DateTimeOffset now,
int durationMs,
@@ -470,10 +575,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
{
var evt = BuildNotifyDeliverEvent(notification, now, AuditStatus.Attempted, errorMessage)
with { DurationMs = durationMs };
- // Fire-and-forget — we do NOT await: the dispatcher loop must not
- // be blocked by audit IO, and the writer swallows its own faults.
- // PipeTo is not used because the writer never throws.
- _ = _auditWriter.WriteAsync(evt);
+ await _auditWriter.WriteAsync(evt);
}
catch (Exception ex)
{
@@ -845,7 +947,7 @@ public class NotificationOutboxActor : ReceiveActor, IWithTimers
// Emit a Discarded NotifyDeliver row to match the dispatcher's
// Delivered/Parked emissions; the row carries no error message because
// the discard is an operator-driven cancellation, not a delivery error.
- EmitTerminalAudit(notification, DateTimeOffset.UtcNow, errorMessage: null);
+ await EmitTerminalAuditAsync(notification, DateTimeOffset.UtcNow, errorMessage: null);
return new DiscardNotificationResponse(request.CorrelationId, Success: true, ErrorMessage: null);
}
diff --git a/src/ScadaLink.NotificationService/ISmtpClientWrapper.cs b/src/ScadaLink.NotificationService/ISmtpClientWrapper.cs
index acf0d3c0..14cff028 100644
--- a/src/ScadaLink.NotificationService/ISmtpClientWrapper.cs
+++ b/src/ScadaLink.NotificationService/ISmtpClientWrapper.cs
@@ -29,8 +29,19 @@ public interface ISmtpClientWrapper
/// Authenticates to the SMTP server using the specified auth type and credentials.
/// Authentication mechanism (e.g. PLAIN, XOAUTH2).
/// Credential string appropriate for the auth type, or null.
+ ///
+ /// NS-021: mailbox identity the OAuth2 access token was issued for (typically
+ /// the SMTP FromAddress). Used as the user= field of the XOAUTH2
+ /// SASL initial response — M365 rejects an empty/mismatched user with
+ /// 535 5.7.3. Ignored for non-OAuth2 auth types; default null for
+ /// callers that do not authenticate with OAuth2.
+ ///
/// Cancellation token.
- Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default);
+ Task AuthenticateAsync(
+ string authType,
+ string? credentials,
+ string? oauth2UserName = null,
+ CancellationToken cancellationToken = default);
/// Sends an email message with the specified recipients via BCC.
/// Sender address.
/// Recipients delivered as BCC.
diff --git a/src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs b/src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs
index eaf1e3c3..74aa782b 100644
--- a/src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs
+++ b/src/ScadaLink.NotificationService/MailKitSmtpClientWrapper.cs
@@ -45,7 +45,11 @@ public class MailKitSmtpClientWrapper : ISmtpClientWrapper, IDisposable
}
///
- public async Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
+ public async Task AuthenticateAsync(
+ string authType,
+ string? credentials,
+ string? oauth2UserName = null,
+ CancellationToken cancellationToken = default)
{
// NS-016: missing/unparseable credentials and an unrecognised auth type used
// to make this method silently return and the connection then sent mail
@@ -74,8 +78,21 @@ public class MailKitSmtpClientWrapper : ISmtpClientWrapper, IDisposable
break;
case "oauth2":
- // OAuth2 token is passed directly as credentials (pre-fetched by token service)
- var oauth2 = new SaslMechanismOAuth2("", credentials);
+ // NS-021: the XOAUTH2 SASL initial response embeds a `user=`
+ // field that M365 (and most OAuth2-enabled SMTP relays) require to
+ // match the mailbox identity the token was issued for. An empty user
+ // gets rejected with `535 5.7.3`. The token (credentials) is
+ // pre-fetched by OAuth2TokenService; the user identity is the SMTP
+ // From address, threaded through `oauth2UserName`.
+ if (string.IsNullOrEmpty(oauth2UserName))
+ {
+ throw new SmtpPermanentException(
+ "OAuth2 SMTP auth requires a non-empty user identity " +
+ "(mailbox the access token was issued for); " +
+ "the caller did not pass an oauth2UserName.");
+ }
+
+ var oauth2 = new SaslMechanismOAuth2(oauth2UserName, credentials);
await _client.AuthenticateAsync(oauth2, cancellationToken);
break;
diff --git a/src/ScadaLink.NotificationService/NotificationDeliveryService.cs b/src/ScadaLink.NotificationService/NotificationDeliveryService.cs
index 3d177f0a..f11f881f 100644
--- a/src/ScadaLink.NotificationService/NotificationDeliveryService.cs
+++ b/src/ScadaLink.NotificationService/NotificationDeliveryService.cs
@@ -397,7 +397,13 @@ public class NotificationDeliveryService : INotificationDeliveryService, IDispos
credentials = token;
}
- await smtp.AuthenticateAsync(config.AuthType, credentials, cancellationToken);
+ // NS-021: OAuth2 XOAUTH2 requires the user identity (FromAddress) to be
+ // sent alongside the access token; an empty user is rejected by M365.
+ await smtp.AuthenticateAsync(
+ config.AuthType,
+ credentials,
+ oauth2UserName: config.FromAddress,
+ cancellationToken: cancellationToken);
var bccAddresses = recipients.Select(r => r.EmailAddress).ToList();
await smtp.SendAsync(config.FromAddress, bccAddresses, subject, body, cancellationToken);
diff --git a/tests/ScadaLink.AuditLog.Tests/Integration/AuditWriteFailureSafetyTests.cs b/tests/ScadaLink.AuditLog.Tests/Integration/AuditWriteFailureSafetyTests.cs
index 9399fb78..7f53300d 100644
--- a/tests/ScadaLink.AuditLog.Tests/Integration/AuditWriteFailureSafetyTests.cs
+++ b/tests/ScadaLink.AuditLog.Tests/Integration/AuditWriteFailureSafetyTests.cs
@@ -365,11 +365,13 @@ public class AuditWriteFailureSafetyTests : TestKit, IClassFixture
/// Inserts a single SMTP configuration row so the dispatcher's
/// ResolveRetryPolicyAsync sees a real (maxRetries, retryDelay)
- /// pair rather than the conservative fallback. RetryDelay of 0 means a
- /// transient outcome's NextAttemptAt is immediately due — useful so
- /// the SECOND DispatchTick re-claims the row without waiting.
+ /// pair rather than the conservative fallback. A tiny positive RetryDelay
+ /// means a transient outcome's NextAttemptAt is immediately due —
+ /// useful so the SECOND DispatchTick re-claims the row without waiting.
+ /// NO-002: the dispatcher now clamps a non-positive RetryDelay to the
+ /// 1-minute fallback to avoid burn-looping on transient failures, so this
+ /// must be a strictly positive value (1 ms is fine for tests).
///
private async Task SeedSmtpConfigAsync(int maxRetries = 5)
{
@@ -141,7 +144,7 @@ public class NotifyDispatcherAuditTrailTests : TestKit, IClassFixture(), Arg.Any(), Arg.Any()));
}
+ [Fact]
+ public void TransientFailure_WithZeroMaxRetries_RetriesUsingFallback_DoesNotParkImmediately()
+ {
+ // NO-002: SmtpConfiguration.MaxRetries=0 used to satisfy 1 >= 0 on the very first
+ // transient failure and park the row without a single retry. ResolveRetryPolicyAsync
+ // now clamps non-positive MaxRetries to the FallbackMaxRetries (10) so transient
+ // failures actually retry before parking.
+ SetupSmtpRetryPolicy(maxRetries: 0, retryDelay: TimeSpan.FromMinutes(1));
+ var notification = MakeNotification(retryCount: 0);
+ _outboxRepository.GetDueAsync(Arg.Any(), Arg.Any(), Arg.Any())
+ .Returns(new[] { notification });
+ var adapter = new StubAdapter(() => DeliveryOutcome.Transient("smtp timeout"));
+ var actor = CreateActor([adapter]);
+
+ actor.Tell(InternalMessages.DispatchTick.Instance);
+
+ AwaitAssert(() =>
+ {
+ _outboxRepository.Received(1).UpdateAsync(
+ Arg.Is(n =>
+ n.Status == NotificationStatus.Retrying &&
+ n.RetryCount == 1 &&
+ n.NextAttemptAt != null &&
+ n.LastError == "smtp timeout"),
+ Arg.Any());
+ });
+ }
+
+ [Fact]
+ public void TransientFailure_WithNegativeMaxRetries_RetriesUsingFallback_DoesNotParkImmediately()
+ {
+ // NO-002: a negative MaxRetries reaches ResolveRetryPolicyAsync just as -1 — same
+ // park-immediately bug. Clamp to FallbackMaxRetries.
+ SetupSmtpRetryPolicy(maxRetries: -1, retryDelay: TimeSpan.FromMinutes(1));
+ var notification = MakeNotification(retryCount: 0);
+ _outboxRepository.GetDueAsync(Arg.Any(), Arg.Any(), Arg.Any())
+ .Returns(new[] { notification });
+ var adapter = new StubAdapter(() => DeliveryOutcome.Transient("smtp timeout"));
+ var actor = CreateActor([adapter]);
+
+ actor.Tell(InternalMessages.DispatchTick.Instance);
+
+ AwaitAssert(() =>
+ {
+ _outboxRepository.Received(1).UpdateAsync(
+ Arg.Is(n =>
+ n.Status == NotificationStatus.Retrying &&
+ n.RetryCount == 1 &&
+ n.NextAttemptAt != null &&
+ n.LastError == "smtp timeout"),
+ Arg.Any());
+ });
+ }
+
+ [Fact]
+ public void TransientFailure_WithNonPositiveRetryDelay_UsesFallbackDelay_NotZero()
+ {
+ // NO-002: a non-positive RetryDelay would burn-loop the dispatcher because
+ // NextAttemptAt would equal now. Clamp to FallbackRetryDelay (1 min) so the
+ // schedule actually advances.
+ SetupSmtpRetryPolicy(maxRetries: 5, retryDelay: TimeSpan.Zero);
+ var before = DateTimeOffset.UtcNow;
+ var notification = MakeNotification(retryCount: 0);
+ _outboxRepository.GetDueAsync(Arg.Any(), Arg.Any(), Arg.Any())
+ .Returns(new[] { notification });
+ var adapter = new StubAdapter(() => DeliveryOutcome.Transient("smtp timeout"));
+ var actor = CreateActor([adapter]);
+
+ actor.Tell(InternalMessages.DispatchTick.Instance);
+
+ AwaitAssert(() =>
+ {
+ _outboxRepository.Received(1).UpdateAsync(
+ Arg.Is(n =>
+ n.Status == NotificationStatus.Retrying &&
+ n.NextAttemptAt != null &&
+ n.NextAttemptAt > before + TimeSpan.FromSeconds(30)),
+ Arg.Any());
+ });
+ }
+
+ [Fact]
+ public void PostStop_CancelsInFlightDelivery_LeavesRowNonTerminal()
+ {
+ // NO-003: the dispatcher used to drop the CancellationToken on its way into
+ // the channel adapter, so a coordinated shutdown had to wait the full SMTP
+ // connect/auth/send timeout per in-flight notification before the sweep
+ // finished. The actor now passes a lifecycle-scoped token; cancelling it on
+ // PostStop must abort the in-flight Task.Delay (standing in for an SMTP
+ // send) and the row must NOT be updated to a terminal state — the next
+ // active node picks it back up.
+ SetupSmtpRetryPolicy(maxRetries: 5, retryDelay: TimeSpan.FromMinutes(1));
+ var notification = MakeNotification();
+ _outboxRepository.GetDueAsync(Arg.Any(), Arg.Any(), Arg.Any())
+ .Returns(new[] { notification });
+ // Long delay simulates a slow SMTP send; the test triggers PostStop before
+ // the delay would naturally elapse, so the only way the delay completes is
+ // if the token wired through.
+ var adapter = new StubAdapter(
+ () => DeliveryOutcome.Success("ops@example.com"),
+ delay: TimeSpan.FromSeconds(30));
+ var actor = CreateActor([adapter]);
+
+ actor.Tell(InternalMessages.DispatchTick.Instance);
+ // Wait until the adapter is actually in flight before stopping.
+ AwaitAssert(() => Assert.Equal(1, adapter.CallCount));
+
+ var start = DateTimeOffset.UtcNow;
+ Sys.Stop(actor);
+
+ // The sweep should observe cancellation promptly (well under the 30s delay).
+ AwaitAssert(
+ () =>
+ {
+ // No UpdateAsync was issued — the row is untouched and will be re-claimed
+ // by the next active node.
+ _outboxRepository.DidNotReceive().UpdateAsync(
+ Arg.Any(), Arg.Any());
+ },
+ duration: TimeSpan.FromSeconds(5));
+
+ Assert.True(DateTimeOffset.UtcNow - start < TimeSpan.FromSeconds(5),
+ "PostStop did not cancel the in-flight delivery promptly.");
+ }
+
[Fact]
public void OverlappingTicks_WhileDispatchInFlight_DoNotClaimConcurrently()
{
diff --git a/tests/ScadaLink.NotificationService.Tests/MailKitSmtpClientWrapperTests.cs b/tests/ScadaLink.NotificationService.Tests/MailKitSmtpClientWrapperTests.cs
index 0ee92084..df6e3a1a 100644
--- a/tests/ScadaLink.NotificationService.Tests/MailKitSmtpClientWrapperTests.cs
+++ b/tests/ScadaLink.NotificationService.Tests/MailKitSmtpClientWrapperTests.cs
@@ -1,3 +1,6 @@
+using System.Text;
+using MailKit.Security;
+
namespace ScadaLink.NotificationService.Tests;
///
@@ -5,6 +8,8 @@ namespace ScadaLink.NotificationService.Tests;
/// silently skip authentication for a misconfigured SMTP config — a missing
/// credential, an unrecognised auth type, or an unparseable Basic credential
/// must be a hard, surfaced error rather than an unauthenticated send.
+/// NS-021: the OAuth2 (XOAUTH2) branch must carry a non-empty user identity
+/// (the SMTP From address) — an empty user is rejected by M365 with `535 5.7.3`.
///
public class MailKitSmtpClientWrapperTests
{
@@ -42,4 +47,35 @@ public class MailKitSmtpClientWrapperTests
await Assert.ThrowsAsync(
() => wrapper.AuthenticateAsync("basic", "nocolon"));
}
+
+ [Fact]
+ public async Task Authenticate_OAuth2WithoutUserName_Throws()
+ {
+ // NS-021: passing an OAuth2 access token but no user identity (FromAddress)
+ // used to construct `new SaslMechanismOAuth2("", credentials)`, which M365
+ // rejects with `535 5.7.3`. The wrapper now refuses upfront so the caller
+ // sees a clean configuration error rather than a confusing server reject.
+ var wrapper = new MailKitSmtpClientWrapper();
+
+ await Assert.ThrowsAsync(
+ () => wrapper.AuthenticateAsync("oauth2", "access-token", oauth2UserName: null));
+ await Assert.ThrowsAsync(
+ () => wrapper.AuthenticateAsync("oauth2", "access-token", oauth2UserName: ""));
+ }
+
+ [Fact]
+ public void XOAuth2InitialResponse_CarriesUserAndBearer()
+ {
+ // NS-021 regression guard: independent of the wrapper, prove that MailKit's
+ // SaslMechanismOAuth2 puts `user=` into the initial-response bytes
+ // — i.e. wiring the wrapper to pass `FromAddress` is sufficient to fix the
+ // M365 handshake. If MailKit ever changes the framing this test will catch it.
+ var sasl = new SaslMechanismOAuth2("noreply@example.com", "tok-xyz");
+
+ var initial = sasl.Challenge(string.Empty);
+ var asString = Encoding.UTF8.GetString(Convert.FromBase64String(initial));
+
+ Assert.Contains("user=noreply@example.com", asString);
+ Assert.Contains("auth=Bearer tok-xyz", asString);
+ }
}
diff --git a/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs b/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs
index 7312089c..8a743629 100644
--- a/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs
+++ b/tests/ScadaLink.NotificationService.Tests/NotificationDeliveryServiceTests.cs
@@ -115,7 +115,8 @@ public class NotificationDeliveryServiceTests
await _smtpClient.Received().ConnectAsync(
"smtp.example.com", 587, SmtpTlsMode.StartTls, Arg.Any(), Arg.Any());
- await _smtpClient.Received().AuthenticateAsync("basic", "user:pass", Arg.Any());
+ await _smtpClient.Received().AuthenticateAsync(
+ "basic", "user:pass", Arg.Any(), Arg.Any());
await _smtpClient.Received().SendAsync(
"noreply@example.com",
Arg.Is>(bcc => bcc.Count() == 2),
@@ -370,7 +371,7 @@ public class NotificationDeliveryServiceTests
public bool Disposed { get; private set; }
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
- public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
+ public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
@@ -435,7 +436,7 @@ public class NotificationDeliveryServiceTests
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
- public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
+ public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
=> _failOnAuthenticate != null ? Task.FromException(_failOnAuthenticate()) : Task.CompletedTask;
public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
@@ -496,7 +497,7 @@ public class NotificationDeliveryServiceTests
ConnectionTimeoutSeconds = connectionTimeoutSeconds;
return Task.CompletedTask;
}
- public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
+ public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
@@ -643,7 +644,7 @@ public class NotificationDeliveryServiceTests
public BlockingSmtpClient(Func onSend) => _onSend = onSend;
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
- public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
+ public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
=> _onSend();
@@ -721,17 +722,19 @@ public class NotificationDeliveryServiceTests
// ── NotificationService-012: OAuth2 delivery path coverage ──
- /// An SMTP wrapper that records the auth type and credentials it received.
+ /// An SMTP wrapper that records the auth type, credentials, and OAuth2 user identity it received.
private sealed class RecordingAuthClient : ISmtpClientWrapper
{
public string? AuthType { get; private set; }
public string? Credentials { get; private set; }
+ public string? OAuth2UserName { get; private set; }
public Task ConnectAsync(string host, int port, SmtpTlsMode tlsMode, int connectionTimeoutSeconds, CancellationToken cancellationToken = default)
=> Task.CompletedTask;
- public Task AuthenticateAsync(string authType, string? credentials, CancellationToken cancellationToken = default)
+ public Task AuthenticateAsync(string authType, string? credentials, string? oauth2UserName = null, CancellationToken cancellationToken = default)
{
AuthType = authType;
Credentials = credentials;
+ OAuth2UserName = oauth2UserName;
return Task.CompletedTask;
}
public Task SendAsync(string from, IEnumerable bccRecipients, string subject, string body, CancellationToken cancellationToken = default)
@@ -790,6 +793,9 @@ public class NotificationDeliveryServiceTests
Assert.True(result.Success);
Assert.Equal("oauth2", recording.AuthType);
Assert.Equal("oauth2-access-token-xyz", recording.Credentials);
+ // NS-021: OAuth2 SASL must carry the FromAddress as the user identity so
+ // the M365 XOAUTH2 handshake's `user=` field matches the token's mailbox.
+ Assert.Equal("noreply@example.com", recording.OAuth2UserName);
}
// ── NotificationService-015: unclassified exceptions must not escape SendAsync ──