fix(transport): robust failure-audit when rollback throws + doc clarifications
Address one Blocker and three Important findings from code review of
2c34f12 (BundleImporter.ApplyAsync):
- BLOCKER: wrap RollbackAsync in nested try/catch so a rollback fault
does not swallow the BundleImportFailed audit row. Dispose the
failed transaction before the audit-write so the new SaveChangesAsync
uses a fresh implicit transaction instead of enlisting in the broken
one. Surface the rollback exception's message on the failure row
alongside the original cause, and swallow audit-write faults per the
design's best-effort-audit invariant. Add regression integration
test using a SQLite transaction interceptor that throws on rollback.
- Document re-entrancy assumption on IAuditCorrelationContext: scoped
lifetime, single circuit, concurrent imports within a shared scope
must serialize externally.
- Document repository audit responsibility on BundleImporter: repos
are thin EF wrappers; ApplyAsync writes audit rows explicitly. If
repos ever start emitting audit rows, the explicit calls here must
be removed to avoid double-logging.
- Document BundleSessionStore thread-safety: ConcurrentDictionary
primitives are safe under concurrent callers; BundleSession itself
is not thread-safe.
This commit is contained in:
@@ -4,6 +4,17 @@ namespace ScadaLink.Commons.Interfaces.Transport;
|
||||
/// Scoped service the bundle importer sets to thread a BundleImportId through to
|
||||
/// the audit log entries emitted by the audited repository methods invoked during
|
||||
/// ApplyAsync. AuditService reads this and stamps every AuditLogEntry it writes.
|
||||
/// <para>
|
||||
/// Re-entrancy / thread-safety: mutating <see cref="BundleImportId"/> is NOT
|
||||
/// thread-safe. The service is registered scoped, and the assumed usage is a
|
||||
/// single Blazor Server circuit (or single API request) at a time — within that
|
||||
/// scope <see cref="BundleImporter.ApplyAsync"/> is the sole writer, and the
|
||||
/// audit service is the sole reader, in a strictly sequential await chain.
|
||||
/// Callers that perform concurrent imports within a shared scope (e.g. two
|
||||
/// <c>ApplyAsync</c> calls awaited via <c>Task.WhenAll</c> on the same circuit)
|
||||
/// MUST serialize access externally — there is no internal lock and the last
|
||||
/// writer wins, which would cross-contaminate audit rows between imports.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public interface IAuditCorrelationContext
|
||||
{
|
||||
|
||||
@@ -25,6 +25,19 @@ namespace ScadaLink.Transport.Import;
|
||||
/// resolutions through the audited repositories. Only LoadAsync is
|
||||
/// implemented in this slice — the other two are wired into DI now so
|
||||
/// follow-up tasks can fill them in without churning the constructor.
|
||||
/// <para>
|
||||
/// Audit-row responsibility: repository mutation methods in
|
||||
/// <c>ScadaLink.ConfigurationDatabase.Repositories</c> are thin EF wrappers
|
||||
/// and do NOT emit audit rows. <see cref="ApplyAsync"/> therefore writes
|
||||
/// each per-entity audit row explicitly via <see cref="IAuditService.LogAsync"/>;
|
||||
/// the scoped <see cref="IAuditCorrelationContext.BundleImportId"/> is
|
||||
/// automatically stamped on each row by the audit service.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// If repository methods are ever changed to emit audit rows themselves,
|
||||
/// the explicit <c>LogAsync</c> calls in this class must be removed to
|
||||
/// avoid double-logging.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
public sealed class BundleImporter : IBundleImporter
|
||||
{
|
||||
@@ -562,11 +575,41 @@ public sealed class BundleImporter : IBundleImporter
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
await tx.RollbackAsync(ct).ConfigureAwait(false);
|
||||
// Rollback can itself throw (connection drop mid-rollback, provider
|
||||
// bug, etc). If it does, we must STILL write the BundleImportFailed
|
||||
// audit row — otherwise a rollback-failure path silently swallows
|
||||
// the import's audit trail. Capture the rollback exception (if any)
|
||||
// and surface it on the failure row alongside the original cause.
|
||||
Exception? rollbackFailure = null;
|
||||
try
|
||||
{
|
||||
await tx.RollbackAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception rbEx)
|
||||
{
|
||||
rollbackFailure = rbEx;
|
||||
}
|
||||
|
||||
// If rollback threw the IDbContextTransaction is in an indeterminate
|
||||
// state and still associated with the DbContext — a subsequent
|
||||
// SaveChangesAsync would attempt to enlist in (or commit to) that
|
||||
// broken transaction, and the failure-row would itself be rolled
|
||||
// back when the transaction is finally disposed. Dispose it now so
|
||||
// the audit-row write below uses a fresh implicit transaction. On
|
||||
// the happy rollback path Dispose is a benign no-op (the using
|
||||
// would call it on scope exit anyway).
|
||||
if (rollbackFailure is not null)
|
||||
{
|
||||
try { await tx.DisposeAsync().ConfigureAwait(false); }
|
||||
catch { /* dispose-after-throw must not mask the original cause */ }
|
||||
}
|
||||
|
||||
// Clear the change tracker before writing the failure row — on the
|
||||
// in-memory provider the rollback is a no-op and the staged adds
|
||||
// would otherwise persist when the next SaveChangesAsync runs.
|
||||
// would otherwise persist when the next SaveChangesAsync runs. This
|
||||
// also matters when rollback threw: the change tracker is in an
|
||||
// ambiguous state and we don't want the failure-write to sweep up
|
||||
// any of the staged apply mutations.
|
||||
_dbContext.ChangeTracker.Clear();
|
||||
|
||||
// Clear correlation FIRST so the failure row doesn't carry the now-
|
||||
@@ -574,20 +617,32 @@ public sealed class BundleImporter : IBundleImporter
|
||||
// exists at top level (no correlation) so audit consumers can see
|
||||
// imports that aborted before any rows landed.
|
||||
_correlationContext.BundleImportId = null;
|
||||
await _auditService.LogAsync(
|
||||
user: user,
|
||||
action: "BundleImportFailed",
|
||||
entityType: "Bundle",
|
||||
entityId: bundleImportId.ToString(),
|
||||
entityName: session.Manifest.SourceEnvironment,
|
||||
afterState: new
|
||||
{
|
||||
BundleImportId = bundleImportId,
|
||||
Reason = ex.Message,
|
||||
ExceptionType = ex.GetType().FullName,
|
||||
},
|
||||
cancellationToken: ct).ConfigureAwait(false);
|
||||
await _dbContext.SaveChangesAsync(ct).ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
await _auditService.LogAsync(
|
||||
user: user,
|
||||
action: "BundleImportFailed",
|
||||
entityType: "Bundle",
|
||||
entityId: bundleImportId.ToString(),
|
||||
entityName: session.Manifest.SourceEnvironment,
|
||||
afterState: new
|
||||
{
|
||||
BundleImportId = bundleImportId,
|
||||
Reason = ex.Message,
|
||||
ExceptionType = ex.GetType().FullName,
|
||||
RollbackException = rollbackFailure?.Message,
|
||||
},
|
||||
cancellationToken: ct).ConfigureAwait(false);
|
||||
await _dbContext.SaveChangesAsync(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Audit-write is best-effort per design §10 ("Audit-write failure
|
||||
// NEVER aborts the user-facing action — audit is best-effort, the
|
||||
// action's own success/failure path is authoritative"). Swallow
|
||||
// any failure here so the original exception below propagates
|
||||
// unchanged rather than being masked by an audit-layer fault.
|
||||
}
|
||||
throw;
|
||||
}
|
||||
finally
|
||||
|
||||
@@ -11,6 +11,17 @@ namespace ScadaLink.Transport.Import;
|
||||
/// at read time (<see cref="Get"/>) and on-demand via <see cref="EvictExpired"/>;
|
||||
/// there is no background timer.
|
||||
/// <para>
|
||||
/// Thread-safety: backed by <see cref="ConcurrentDictionary{TKey,TValue}"/> of
|
||||
/// <see cref="Guid"/> to <see cref="BundleSession"/>. All store operations
|
||||
/// (<see cref="Get"/> / <see cref="Open"/> / <see cref="Remove"/> /
|
||||
/// <see cref="EvictExpired"/>) use the concurrent dictionary's safe primitives
|
||||
/// (<c>TryGetValue</c>, indexer assignment, <c>TryRemove</c>) and are safe
|
||||
/// under concurrent callers. The <see cref="BundleSession"/> instance itself
|
||||
/// is NOT thread-safe — callers that share a session reference (e.g. two
|
||||
/// importers mutating <c>FailedUnlockAttempts</c> on the same session) MUST
|
||||
/// serialize their mutations on that shared reference.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// TTL is supplied by the importer via <see cref="BundleSession.ExpiresAt"/>;
|
||||
/// this store does not impose its own. The injected <see cref="TimeProvider"/>
|
||||
/// is used purely to determine <c>now</c> when checking <c>ExpiresAt</c>, which
|
||||
|
||||
Reference in New Issue
Block a user