fix(concurrency): close 8 race / thread-safety findings across CD, DCL, SR
CD-015: rewrite NotificationOutboxRepository.InsertIfNotExistsAsync as raw-SQL IF NOT EXISTS … INSERT with SqlException 2601/2627 catch, ending the at-least-once livelock on the site→central notification handoff. DCL-018/019/020/021/022: add _subscribesInFlight guard so concurrent same-tag subscribes don't orphan an adapter handle; delete the latent dead _subscriptionHandles dictionary; stop double-counting _totalSubscribed when an unresolved tag is promoted via another instance; release adapter handles on mid-flight unsubscribe; gate the tag-resolution retry timer with IsTimerActive so subscribe bursts don't reset it into starvation. SR-020: add _terminatingActorsByName shadow so a third deploy arriving during a pending redeploy doesn't crash on InvalidActorNameException — displaced senders get a Failed/superseded response and the latest command wins on Terminated. SR-024: split OperationTrackingStore reads from writes (fresh SqliteConnection per GetStatusAsync) so long writes don't block status queries; rewrite Dispose to drop the sync-over-async bridge that could deadlock on a non-reentrant SyncContext; Interlocked.Exchange makes the dispose-once flag race-safe across both paths.
This commit is contained in:
@@ -1,4 +1,7 @@
|
||||
using Microsoft.Data.SqlClient;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using ScadaLink.Commons.Entities.Notifications;
|
||||
using ScadaLink.Commons.Interfaces.Repositories;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
@@ -12,7 +15,20 @@ namespace ScadaLink.ConfigurationDatabase.Repositories;
|
||||
/// </summary>
|
||||
public class NotificationOutboxRepository : INotificationOutboxRepository
|
||||
{
|
||||
// SQL Server duplicate-key error numbers, matching the AuditLogRepository
|
||||
// and SiteCallAuditRepository race-fixes. 2601 is a unique-index violation;
|
||||
// 2627 is a primary-key/unique-constraint violation. The IF NOT EXISTS …
|
||||
// INSERT pattern has a check-then-act race window — two sessions can both
|
||||
// pass the EXISTS check and then both attempt the INSERT — and the loser
|
||||
// surfaces as one of these. The site→central handoff is documented
|
||||
// at-least-once with insert-if-not-exists, so the collision IS the expected
|
||||
// contention mode; idempotency demands we swallow them rather than let the
|
||||
// site retry the same NotificationId forever.
|
||||
private const int SqlErrorUniqueIndexViolation = 2601;
|
||||
private const int SqlErrorPrimaryKeyViolation = 2627;
|
||||
|
||||
private readonly ScadaLinkDbContext _context;
|
||||
private readonly ILogger<NotificationOutboxRepository> _logger;
|
||||
|
||||
// Statuses that represent a finished notification lifecycle. Non-terminal is the complement.
|
||||
private static readonly NotificationStatus[] TerminalStatuses =
|
||||
@@ -24,24 +40,67 @@ public class NotificationOutboxRepository : INotificationOutboxRepository
|
||||
|
||||
/// <summary>Initializes a new instance of <see cref="NotificationOutboxRepository"/> with the given EF Core context.</summary>
|
||||
/// <param name="context">The EF Core database context.</param>
|
||||
public NotificationOutboxRepository(ScadaLinkDbContext context)
|
||||
/// <param name="logger">Optional logger instance.</param>
|
||||
public NotificationOutboxRepository(ScadaLinkDbContext context, ILogger<NotificationOutboxRepository>? logger = null)
|
||||
{
|
||||
_context = context ?? throw new ArgumentNullException(nameof(context));
|
||||
_logger = logger ?? NullLogger<NotificationOutboxRepository>.Instance;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public async Task<bool> InsertIfNotExistsAsync(Notification n, CancellationToken cancellationToken = default)
|
||||
{
|
||||
var exists = await _context.Notifications
|
||||
.AnyAsync(x => x.NotificationId == n.NotificationId, cancellationToken);
|
||||
if (exists)
|
||||
if (n is null)
|
||||
{
|
||||
return false;
|
||||
throw new ArgumentNullException(nameof(n));
|
||||
}
|
||||
|
||||
await _context.Notifications.AddAsync(n, cancellationToken);
|
||||
await _context.SaveChangesAsync(cancellationToken);
|
||||
return true;
|
||||
// Enum columns are stored as varchar(32) (HasConversion<string>()); convert
|
||||
// in C# rather than relying on parameter type inference (SqlClient would
|
||||
// otherwise bind enums as int by default and break the column conversion).
|
||||
var type = n.Type.ToString();
|
||||
var status = n.Status.ToString();
|
||||
|
||||
// FormattableString interpolation parameterises every value (no concatenation),
|
||||
// so this is safe against injection even for the string columns.
|
||||
try
|
||||
{
|
||||
var rowsAffected = await _context.Database.ExecuteSqlInterpolatedAsync(
|
||||
$@"IF NOT EXISTS (SELECT 1 FROM dbo.Notifications WHERE NotificationId = {n.NotificationId})
|
||||
INSERT INTO dbo.Notifications
|
||||
(NotificationId, Type, ListName, Subject, Body, TypeData, Status, RetryCount, LastError,
|
||||
ResolvedTargets, SourceSiteId, SourceNode, SourceInstanceId, SourceScript,
|
||||
OriginExecutionId, OriginParentExecutionId,
|
||||
SiteEnqueuedAt, CreatedAt, LastAttemptAt, NextAttemptAt, DeliveredAt)
|
||||
VALUES
|
||||
({n.NotificationId}, {type}, {n.ListName}, {n.Subject}, {n.Body}, {n.TypeData}, {status}, {n.RetryCount}, {n.LastError},
|
||||
{n.ResolvedTargets}, {n.SourceSiteId}, {n.SourceNode}, {n.SourceInstanceId}, {n.SourceScript},
|
||||
{n.OriginExecutionId}, {n.OriginParentExecutionId},
|
||||
{n.SiteEnqueuedAt}, {n.CreatedAt}, {n.LastAttemptAt}, {n.NextAttemptAt}, {n.DeliveredAt});",
|
||||
cancellationToken);
|
||||
|
||||
// rowsAffected == 1 -> we inserted; 0 -> a prior row was already there
|
||||
// (IF NOT EXISTS short-circuited the INSERT).
|
||||
return rowsAffected == 1;
|
||||
}
|
||||
catch (SqlException ex) when (
|
||||
ex.Number == SqlErrorUniqueIndexViolation
|
||||
|| ex.Number == SqlErrorPrimaryKeyViolation)
|
||||
{
|
||||
// Two concurrent sessions both passed IF NOT EXISTS and both
|
||||
// attempted the INSERT — the loser raises 2601/2627 against the
|
||||
// NotificationId primary key. First-write-wins idempotency is the
|
||||
// documented contract (the site→central handoff is at-least-once,
|
||||
// and the actor discards the return value), so the race outcome is
|
||||
// semantically a no-op. Returning false here matches the
|
||||
// "row already existed" branch of the success path.
|
||||
_logger.LogDebug(
|
||||
ex,
|
||||
"InsertIfNotExistsAsync swallowed duplicate-key violation (error {SqlErrorNumber}) for NotificationId {NotificationId}; treating as no-op.",
|
||||
ex.Number,
|
||||
n.NotificationId);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
|
||||
Reference in New Issue
Block a user