fix(notification-service): resolve NotificationService-014..018 — classify OAuth2 failures, fail on bad auth config, wire NotificationOptions fallback, disposable concurrency limiter
This commit is contained in:
@@ -3,6 +3,7 @@ using System.Text.Json;
|
||||
using MailKit;
|
||||
using MailKit.Net.Smtp;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Options;
|
||||
using MimeKit;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
@@ -18,26 +19,31 @@ namespace ScadaLink.NotificationService;
|
||||
/// Transient: connection refused, timeout, SMTP 4xx → hand to S&F.
|
||||
/// Permanent: SMTP 5xx → returned to script.
|
||||
/// </summary>
|
||||
public class NotificationDeliveryService : INotificationDeliveryService
|
||||
public class NotificationDeliveryService : INotificationDeliveryService, IDisposable
|
||||
{
|
||||
private readonly INotificationRepository _repository;
|
||||
private readonly Func<ISmtpClientWrapper> _smtpClientFactory;
|
||||
private readonly OAuth2TokenService? _tokenService;
|
||||
private readonly StoreAndForwardService? _storeAndForward;
|
||||
private readonly ILogger<NotificationDeliveryService> _logger;
|
||||
private readonly NotificationOptions _options;
|
||||
|
||||
public NotificationDeliveryService(
|
||||
INotificationRepository repository,
|
||||
Func<ISmtpClientWrapper> smtpClientFactory,
|
||||
ILogger<NotificationDeliveryService> logger,
|
||||
OAuth2TokenService? tokenService = null,
|
||||
StoreAndForwardService? storeAndForward = null)
|
||||
StoreAndForwardService? storeAndForward = null,
|
||||
IOptions<NotificationOptions>? options = null)
|
||||
{
|
||||
_repository = repository;
|
||||
_smtpClientFactory = smtpClientFactory;
|
||||
_logger = logger;
|
||||
_tokenService = tokenService;
|
||||
_storeAndForward = storeAndForward;
|
||||
// NS-017: NotificationOptions supplies the documented fallback values used
|
||||
// when a deployed SmtpConfiguration row leaves a field unset (non-positive).
|
||||
_options = options?.Value ?? new NotificationOptions();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -50,6 +56,8 @@ public class NotificationDeliveryService : INotificationDeliveryService
|
||||
string? originInstanceName = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
var list = await _repository.GetListByNameAsync(listName, cancellationToken);
|
||||
if (list == null)
|
||||
{
|
||||
@@ -146,6 +154,24 @@ public class NotificationDeliveryService : INotificationDeliveryService
|
||||
|
||||
return new NotificationResult(true, null, WasBuffered: true);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// NS-015: a failure that ClassifySmtpError does not recognise (Unknown) —
|
||||
// most importantly an OAuth2 token-fetch failure (HttpRequestException
|
||||
// from EnsureSuccessStatusCode, or InvalidOperationException from a
|
||||
// malformed credential triple) — used to fall through all the catch
|
||||
// clauses above and escape SendAsync as a raw exception to the calling
|
||||
// script, which the INotificationDeliveryService contract never
|
||||
// advertises. Convert any otherwise-unhandled exception into a clean,
|
||||
// credential-scrubbed permanent NotificationResult: returning control to
|
||||
// the script is the safe default. (A caller-requested cancellation is
|
||||
// already re-thrown by the filter above and never reaches here.)
|
||||
var detail = CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials);
|
||||
_logger.LogError(
|
||||
"Unclassified failure sending to list {List} ({ExceptionType}): {Detail}",
|
||||
listName, ex.GetType().Name, detail);
|
||||
return new NotificationResult(false, $"Notification delivery failed: {detail}");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -224,36 +250,103 @@ public class NotificationDeliveryService : INotificationDeliveryService
|
||||
payload.ListName, CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials));
|
||||
return false;
|
||||
}
|
||||
// Transient SMTP errors propagate out of DeliverAsync — the S&F engine retries.
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
// A handler shutdown cancellation is neither a delivery success nor a
|
||||
// permanent failure — let it propagate so the engine does not park.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex) when (IsTransientSmtpError(ex, cancellationToken))
|
||||
{
|
||||
// A typed transient SMTP error: re-throw so the S&F engine retries.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// NS-014: an exception ClassifySmtpError does not recognise (Unknown) —
|
||||
// chiefly an OAuth2 token-fetch failure — used to escape this handler.
|
||||
// The S&F engine treats ANY thrown exception as transient, so a
|
||||
// permanently-broken config (bad client secret, malformed credential
|
||||
// triple) was retried on every sweep until MaxRetries, burning token
|
||||
// endpoint calls. Decide deliberately rather than letting it leak:
|
||||
// - an HttpRequestException with a 5xx token-endpoint status is a
|
||||
// transient outage → re-throw so the engine retries;
|
||||
// - everything else (a 4xx/401 token rejection, a malformed credential
|
||||
// InvalidOperationException, any other unclassified fault) is not
|
||||
// fixable by retrying → return false so the message is parked.
|
||||
if (ex is HttpRequestException { StatusCode: { } status } && (int)status is >= 500 and < 600)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"Buffered notification to list '{List}' hit a transient OAuth2 token-endpoint error ({Status}); will retry.",
|
||||
payload.ListName, (int)status);
|
||||
throw;
|
||||
}
|
||||
|
||||
_logger.LogError(
|
||||
"Buffered notification to list '{List}' failed with a non-retryable error ({ExceptionType}: {Detail}); parking.",
|
||||
payload.ListName, ex.GetType().Name,
|
||||
CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private sealed record BufferedNotification(string ListName, string Subject, string Message);
|
||||
|
||||
/// <summary>
|
||||
/// NS-007: throttles concurrent SMTP deliveries to the configured
|
||||
/// <c>MaxConcurrentConnections</c>. Created lazily from the first SMTP config
|
||||
/// seen (one SMTP config is deployed per site, so the limit is stable).
|
||||
/// <c>MaxConcurrentConnections</c>. One SMTP config is deployed per site, so the
|
||||
/// limit is a stable per-site invariant; it is captured lazily on first use.
|
||||
/// NS-018: a <see cref="Lazy{T}"/> replaces the hand-rolled double-checked
|
||||
/// init — its publication is correctly synchronised (no lock-free read of a
|
||||
/// non-volatile field) and it is disposed in <see cref="Dispose"/>.
|
||||
/// </summary>
|
||||
private SemaphoreSlim? _concurrencyLimiter;
|
||||
private Lazy<SemaphoreSlim>? _concurrencyLimiter;
|
||||
private readonly object _limiterLock = new();
|
||||
private bool _disposed;
|
||||
|
||||
private SemaphoreSlim GetConcurrencyLimiter(SmtpConfiguration config)
|
||||
{
|
||||
if (_concurrencyLimiter != null)
|
||||
{
|
||||
return _concurrencyLimiter;
|
||||
}
|
||||
// NS-018: the limiter is sized once; capture the size now so the Lazy
|
||||
// factory does not close over a value that could change between calls.
|
||||
var configured = config.MaxConcurrentConnections > 0
|
||||
? config.MaxConcurrentConnections
|
||||
// NS-017: fall back to the NotificationOptions value, then the
|
||||
// design-doc default of 5, when the deployed row leaves it unset.
|
||||
: _options.MaxConcurrentConnections > 0 ? _options.MaxConcurrentConnections : 5;
|
||||
|
||||
lock (_limiterLock)
|
||||
{
|
||||
// NS-007: a non-positive configured value would make SemaphoreSlim
|
||||
// throw; fall back to the design-doc default of 5.
|
||||
var max = config.MaxConcurrentConnections > 0 ? config.MaxConcurrentConnections : 5;
|
||||
_concurrencyLimiter ??= new SemaphoreSlim(max, max);
|
||||
return _concurrencyLimiter;
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
_concurrencyLimiter ??= new Lazy<SemaphoreSlim>(
|
||||
() => new SemaphoreSlim(configured, configured));
|
||||
return _concurrencyLimiter.Value;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// NS-018: disposes the lazily-created concurrency limiter. The service is a
|
||||
/// scoped DI service; without this the <see cref="SemaphoreSlim"/> leaked a
|
||||
/// handle per scope.
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
lock (_limiterLock)
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_disposed = true;
|
||||
if (_concurrencyLimiter is { IsValueCreated: true } limiter)
|
||||
{
|
||||
limiter.Value.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// NS-008: Validates the sender and recipient email addresses, returning a
|
||||
/// human-readable error string if any is malformed, or null if all parse.
|
||||
@@ -300,8 +393,13 @@ public class NotificationDeliveryService : INotificationDeliveryService
|
||||
try
|
||||
{
|
||||
// NS-005/NS-007: explicit TLS mode and the configured connection timeout.
|
||||
// NS-017: when the deployed SmtpConfiguration row leaves the timeout
|
||||
// unset (non-positive), fall back to the NotificationOptions value.
|
||||
var timeoutSeconds = config.ConnectionTimeoutSeconds > 0
|
||||
? config.ConnectionTimeoutSeconds
|
||||
: _options.ConnectionTimeoutSeconds;
|
||||
await smtp.ConnectAsync(
|
||||
config.Host, config.Port, tlsMode, config.ConnectionTimeoutSeconds, cancellationToken);
|
||||
config.Host, config.Port, tlsMode, timeoutSeconds, cancellationToken);
|
||||
|
||||
// Resolve credentials (OAuth2 token fetched/cached by the token service).
|
||||
var credentials = config.Credentials;
|
||||
|
||||
Reference in New Issue
Block a user