Files
scadalink-design/src/ScadaLink.NotificationService/NotificationDeliveryService.cs

429 lines
19 KiB
C#

using System.Text.Json;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Entities.Notifications;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Interfaces.Services;
using ScadaLink.Commons.Types.Enums;
using ScadaLink.StoreAndForward;
namespace ScadaLink.NotificationService;
/// <summary>
/// WP-11: Notification delivery via SMTP.
/// WP-12: Error classification and S&amp;F integration.
/// Transient: connection refused, timeout, SMTP 4xx → hand to S&amp;F.
/// Permanent: SMTP 5xx → returned to script.
/// </summary>
public class NotificationDeliveryService : INotificationDeliveryService, IDisposable
{
private readonly INotificationRepository _repository;
private readonly Func<ISmtpClientWrapper> _smtpClientFactory;
private readonly OAuth2TokenService? _tokenService;
private readonly StoreAndForwardService? _storeAndForward;
private readonly ILogger<NotificationDeliveryService> _logger;
private readonly NotificationOptions _options;
public NotificationDeliveryService(
INotificationRepository repository,
Func<ISmtpClientWrapper> smtpClientFactory,
ILogger<NotificationDeliveryService> logger,
OAuth2TokenService? tokenService = null,
StoreAndForwardService? storeAndForward = null,
IOptions<NotificationOptions>? options = null)
{
_repository = repository;
_smtpClientFactory = smtpClientFactory;
_logger = logger;
_tokenService = tokenService;
_storeAndForward = storeAndForward;
// NS-017: NotificationOptions supplies the documented fallback values used
// when a deployed SmtpConfiguration row leaves a field unset (non-positive).
_options = options?.Value ?? new NotificationOptions();
}
/// <summary>
/// Sends a notification to a named list. BCC delivery, plain text.
/// </summary>
public async Task<NotificationResult> SendAsync(
string listName,
string subject,
string message,
string? originInstanceName = null,
CancellationToken cancellationToken = default)
{
ObjectDisposedException.ThrowIf(_disposed, this);
var list = await _repository.GetListByNameAsync(listName, cancellationToken);
if (list == null)
{
return new NotificationResult(false, $"Notification list '{listName}' not found");
}
var recipients = await _repository.GetRecipientsByListIdAsync(list.Id, cancellationToken);
if (recipients.Count == 0)
{
return new NotificationResult(false, $"Notification list '{listName}' has no recipients");
}
var smtpConfigs = await _repository.GetAllSmtpConfigurationsAsync(cancellationToken);
var smtpConfig = smtpConfigs.FirstOrDefault();
if (smtpConfig == null)
{
return new NotificationResult(false, "No SMTP configuration available");
}
// NS-005: validate the configured TLS mode up front — an unknown value is a
// configuration error and must surface as a clean result, not a silent
// fallback to opportunistic TLS negotiation.
try
{
SmtpTlsModeParser.Parse(smtpConfig.TlsMode);
}
catch (ArgumentException ex)
{
_logger.LogError("Invalid SMTP TLS mode for list {List}: {Reason}", listName, ex.Message);
return new NotificationResult(false, ex.Message);
}
// NS-008: validate every email address before attempting delivery. A single
// malformed address previously caused MailboxAddress.Parse to throw a
// ParseException that escaped SendAsync unhandled; it must instead produce a
// clean NotificationResult the calling script can handle.
var addressError = EmailAddressValidator.ValidateAddresses(smtpConfig.FromAddress, recipients);
if (addressError != null)
{
_logger.LogWarning("Notification to list {List} has invalid addresses: {Reason}", listName, addressError);
return new NotificationResult(false, addressError);
}
try
{
await DeliverAsync(smtpConfig, recipients, subject, message, cancellationToken);
return new NotificationResult(true, null);
}
catch (SmtpPermanentException ex)
{
// WP-12: Permanent SMTP failure — returned to script.
// NS-009: scrub credential fragments out of the server-supplied message
// before logging or returning it.
var detail = CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials);
_logger.LogError(
"Permanent SMTP failure sending to list {List}: {Detail}", listName, detail);
return new NotificationResult(false, $"Permanent SMTP error: {detail}");
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
// NS-002: a caller-requested cancellation propagates; it is not buffered.
throw;
}
catch (Exception ex) when (SmtpErrorClassifier.IsTransient(ex, cancellationToken))
{
// WP-12: Transient SMTP failure — hand to S&F.
// NS-009: scrub credential fragments before logging.
_logger.LogWarning(
"Transient SMTP failure sending to list {List} ({ExceptionType}): {Detail}; buffering for retry",
listName, ex.GetType().Name, CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials));
if (_storeAndForward == null)
{
return new NotificationResult(false, "Transient SMTP error and store-and-forward not available");
}
var payload = JsonSerializer.Serialize(new
{
ListName = listName,
Subject = subject,
Message = message
});
// attemptImmediateDelivery: false — DeliverAsync was already attempted
// above; letting EnqueueAsync re-invoke the handler would send twice.
await _storeAndForward.EnqueueAsync(
StoreAndForwardCategory.Notification,
listName,
payload,
originInstanceName,
smtpConfig.MaxRetries > 0 ? smtpConfig.MaxRetries : null,
smtpConfig.RetryDelay > TimeSpan.Zero ? smtpConfig.RetryDelay : null,
attemptImmediateDelivery: false);
return new NotificationResult(true, null, WasBuffered: true);
}
catch (Exception ex)
{
// NS-015: a failure that SmtpErrorClassifier does not recognise (Unknown) —
// most importantly an OAuth2 token-fetch failure (HttpRequestException
// from EnsureSuccessStatusCode, or InvalidOperationException from a
// malformed credential triple) — used to fall through all the catch
// clauses above and escape SendAsync as a raw exception to the calling
// script, which the INotificationDeliveryService contract never
// advertises. Convert any otherwise-unhandled exception into a clean,
// credential-scrubbed permanent NotificationResult: returning control to
// the script is the safe default. (A caller-requested cancellation is
// already re-thrown by the filter above and never reaches here.)
var detail = CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials);
_logger.LogError(
"Unclassified failure sending to list {List} ({ExceptionType}): {Detail}",
listName, ex.GetType().Name, detail);
return new NotificationResult(false, $"Notification delivery failed: {detail}");
}
}
/// <summary>
/// WP-11/12: Delivers a buffered notification during a store-and-forward retry
/// sweep — re-resolves the list, recipients and SMTP config and re-attempts
/// delivery. Returns true on success, false on permanent failure (the message
/// is parked); throws on a transient failure so the engine retries.
/// </summary>
public async Task<bool> DeliverBufferedAsync(
StoreAndForwardMessage message, CancellationToken cancellationToken = default)
{
var payload = JsonSerializer.Deserialize<BufferedNotification>(message.PayloadJson);
if (payload == null || string.IsNullOrEmpty(payload.ListName))
{
_logger.LogError("Buffered notification message {Id} has an unreadable payload; parking.", message.Id);
return false;
}
var list = await _repository.GetListByNameAsync(payload.ListName, cancellationToken);
if (list == null)
{
_logger.LogError(
"Buffered notification to list '{List}' cannot be delivered — the list no longer exists; parking.",
payload.ListName);
return false;
}
var recipients = await _repository.GetRecipientsByListIdAsync(list.Id, cancellationToken);
if (recipients.Count == 0)
{
_logger.LogError("Buffered notification to list '{List}' has no recipients; parking.", payload.ListName);
return false;
}
var smtpConfig = (await _repository.GetAllSmtpConfigurationsAsync(cancellationToken)).FirstOrDefault();
if (smtpConfig == null)
{
_logger.LogError("Buffered notification cannot be delivered — no SMTP configuration available; parking.");
return false;
}
// NS-005: an unknown TLS mode is a configuration error that retrying cannot
// fix — park the buffered message rather than throwing on every sweep.
try
{
SmtpTlsModeParser.Parse(smtpConfig.TlsMode);
}
catch (ArgumentException ex)
{
_logger.LogError(
"Buffered notification to list '{List}' cannot be delivered — {Reason}; parking.",
payload.ListName, ex.Message);
return false;
}
// NS-008: a malformed address cannot be fixed by retrying — park it.
var addressError = EmailAddressValidator.ValidateAddresses(smtpConfig.FromAddress, recipients);
if (addressError != null)
{
_logger.LogError(
"Buffered notification to list '{List}' has invalid addresses ({Reason}); parking.",
payload.ListName, addressError);
return false;
}
try
{
await DeliverAsync(smtpConfig, recipients, payload.Subject, payload.Message, cancellationToken);
return true;
}
catch (SmtpPermanentException ex)
{
// NS-009: scrub credential fragments out of the message before logging.
_logger.LogError(
"Buffered notification to list '{List}' failed permanently ({Detail}); parking.",
payload.ListName, CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials));
return false;
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
// A handler shutdown cancellation is neither a delivery success nor a
// permanent failure — let it propagate so the engine does not park.
throw;
}
catch (Exception ex) when (SmtpErrorClassifier.IsTransient(ex, cancellationToken))
{
// A typed transient SMTP error: re-throw so the S&F engine retries.
throw;
}
catch (Exception ex)
{
// NS-014: an exception SmtpErrorClassifier does not recognise (Unknown) —
// chiefly an OAuth2 token-fetch failure — used to escape this handler.
// The S&F engine treats ANY thrown exception as transient, so a
// permanently-broken config (bad client secret, malformed credential
// triple) was retried on every sweep until MaxRetries, burning token
// endpoint calls. Decide deliberately rather than letting it leak:
// - an HttpRequestException with a 5xx token-endpoint status is a
// transient outage → re-throw so the engine retries;
// - everything else (a 4xx/401 token rejection, a malformed credential
// InvalidOperationException, any other unclassified fault) is not
// fixable by retrying → return false so the message is parked.
if (ex is HttpRequestException { StatusCode: { } status } && (int)status is >= 500 and < 600)
{
_logger.LogWarning(
"Buffered notification to list '{List}' hit a transient OAuth2 token-endpoint error ({Status}); will retry.",
payload.ListName, (int)status);
throw;
}
_logger.LogError(
"Buffered notification to list '{List}' failed with a non-retryable error ({ExceptionType}: {Detail}); parking.",
payload.ListName, ex.GetType().Name,
CredentialRedactor.Scrub(ex.Message, smtpConfig.Credentials));
return false;
}
}
private sealed record BufferedNotification(string ListName, string Subject, string Message);
/// <summary>
/// NS-007: throttles concurrent SMTP deliveries to the configured
/// <c>MaxConcurrentConnections</c>. One SMTP config is deployed per site, so the
/// limit is a stable per-site invariant; it is captured lazily on first use.
/// NS-018: a <see cref="Lazy{T}"/> replaces the hand-rolled double-checked
/// init — its publication is correctly synchronised (no lock-free read of a
/// non-volatile field) and it is disposed in <see cref="Dispose"/>.
/// </summary>
private Lazy<SemaphoreSlim>? _concurrencyLimiter;
private readonly object _limiterLock = new();
private bool _disposed;
private SemaphoreSlim GetConcurrencyLimiter(SmtpConfiguration config)
{
// NS-018: the limiter is sized once; capture the size now so the Lazy
// factory does not close over a value that could change between calls.
var configured = config.MaxConcurrentConnections > 0
? config.MaxConcurrentConnections
// NS-017: fall back to the NotificationOptions value, then the
// design-doc default of 5, when the deployed row leaves it unset.
: _options.MaxConcurrentConnections > 0 ? _options.MaxConcurrentConnections : 5;
lock (_limiterLock)
{
ObjectDisposedException.ThrowIf(_disposed, this);
_concurrencyLimiter ??= new Lazy<SemaphoreSlim>(
() => new SemaphoreSlim(configured, configured));
return _concurrencyLimiter.Value;
}
}
/// <summary>
/// NS-018: disposes the lazily-created concurrency limiter. The service is a
/// scoped DI service; without this the <see cref="SemaphoreSlim"/> leaked a
/// handle per scope.
/// </summary>
public void Dispose()
{
lock (_limiterLock)
{
if (_disposed)
{
return;
}
_disposed = true;
if (_concurrencyLimiter is { IsValueCreated: true } limiter)
{
limiter.Value.Dispose();
}
}
GC.SuppressFinalize(this);
}
/// <summary>
/// Delivers an email via SMTP. Throws on failure (transient errors and
/// <see cref="SmtpPermanentException"/> propagate; the caller classifies them).
/// </summary>
internal async Task DeliverAsync(
SmtpConfiguration config,
IReadOnlyList<NotificationRecipient> recipients,
string subject,
string body,
CancellationToken cancellationToken)
{
var tlsMode = SmtpTlsModeParser.Parse(config.TlsMode);
// NS-007: bound the number of concurrent SMTP connections per site.
var limiter = GetConcurrencyLimiter(config);
await limiter.WaitAsync(cancellationToken);
// NS-004: create exactly one client and dispose the one actually used.
var smtp = _smtpClientFactory();
using var disposable = smtp as IDisposable;
try
{
// NS-005/NS-007: explicit TLS mode and the configured connection timeout.
// NS-017: when the deployed SmtpConfiguration row leaves the timeout
// unset (non-positive), fall back to the NotificationOptions value.
var timeoutSeconds = config.ConnectionTimeoutSeconds > 0
? config.ConnectionTimeoutSeconds
: _options.ConnectionTimeoutSeconds;
await smtp.ConnectAsync(
config.Host, config.Port, tlsMode, timeoutSeconds, cancellationToken);
// Resolve credentials (OAuth2 token fetched/cached by the token service).
var credentials = config.Credentials;
if (config.AuthType.Equals("oauth2", StringComparison.OrdinalIgnoreCase) && _tokenService != null && credentials != null)
{
var token = await _tokenService.GetTokenAsync(credentials, cancellationToken);
credentials = token;
}
await smtp.AuthenticateAsync(config.AuthType, credentials, cancellationToken);
var bccAddresses = recipients.Select(r => r.EmailAddress).ToList();
await smtp.SendAsync(config.FromAddress, bccAddresses, subject, body, cancellationToken);
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
// NS-002: A deliberately cancelled token must propagate as a cancellation,
// not be misclassified as a transient SMTP failure and buffered for retry.
throw;
}
catch (Exception ex) when (SmtpErrorClassifier.Classify(ex, cancellationToken) == SmtpErrorClass.Permanent
&& ex is not SmtpPermanentException)
{
// NS-003: Permanent SMTP failure (5xx) — surface a typed permanent exception.
throw new SmtpPermanentException(ex.Message, ex);
}
// Transient and SmtpPermanentException both propagate unchanged: SendAsync's
// catch filters (SmtpPermanentException / SmtpErrorClassifier.IsTransient) handle them.
finally
{
// NS-010: always tear the connection down, regardless of outcome. The
// SMTP QUIT used to run only on the success path inside the try block,
// so a failed Connect/Authenticate/Send left an open, authenticated
// connection until finalization reclaimed the socket — exhausting the
// server's connection slots under sustained transient failures.
// Disconnect is best-effort: a disconnect failure (e.g. the connection
// is already dead) must not mask the original delivery exception.
try
{
await smtp.DisconnectAsync(cancellationToken);
}
catch (Exception disconnectEx)
{
_logger.LogDebug(
"Ignoring SMTP disconnect failure during cleanup: {Reason}", disconnectEx.Message);
}
// NS-007: always release the concurrency slot, even on failure.
limiter.Release();
}
}
}