fix(db): classify transient vs permanent SQL errors in Database.CachedWrite (#7)
CachedWrite buffered ALL write failures and retried forever, never returning a synchronous failure to the script — permanent SQL errors (constraint/syntax/ permission) were treated as transient. Mirror the External-System API path: attempt immediately, return Failed synchronously on permanent SQL errors (no buffering), buffer only transient errors; the S&F retry path parks permanent failures instead of retrying forever. New SqlErrorClassifier + PermanentDatabaseException.
This commit is contained in:
@@ -1326,9 +1326,20 @@ public class ScriptRuntimeContext
|
||||
name, trackedId, target, occurredAtUtc, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
// M2.3 (#7): the gateway now attempts the write immediately and
|
||||
// classifies the outcome (mirroring ExternalSystem.CachedCall). The
|
||||
// result is retained because the immediate paths (WasBuffered=false —
|
||||
// immediate success OR a synchronous permanent failure) bypass the
|
||||
// S&F retry loop entirely, so no retry-loop telemetry ever fires.
|
||||
// This helper must emit the Attempted + CachedResolve terminal rows
|
||||
// itself, otherwise Tracking.Status(id) would stay Submitted forever
|
||||
// and the audit log would be missing the terminal lifecycle. The
|
||||
// WasBuffered=true path is unaffected — the S&F retry loop owns the
|
||||
// Attempted + Resolve emissions there.
|
||||
ExternalCallResult? result;
|
||||
try
|
||||
{
|
||||
await _gateway.CachedWriteAsync(
|
||||
result = await _gateway.CachedWriteAsync(
|
||||
name, sql, parameters, _instanceName, cancellationToken, trackedId,
|
||||
// Audit Log #23 (ExecutionId Task 4): thread the script
|
||||
// execution's ExecutionId + SourceScript so a buffered
|
||||
@@ -1350,9 +1361,148 @@ public class ScriptRuntimeContext
|
||||
throw;
|
||||
}
|
||||
|
||||
// M2.3 (#7): immediate-completion lifecycle — emit the missing
|
||||
// Attempted + CachedResolve rows when the underlying write resolved
|
||||
// without engaging the store-and-forward retry loop (immediate
|
||||
// success or a synchronous permanent failure).
|
||||
if (result is { WasBuffered: false })
|
||||
{
|
||||
await EmitImmediateDbTerminalTelemetryAsync(
|
||||
name, target, trackedId, result, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return trackedId;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// M2.3 (#7): best-effort emission of the immediate-completion lifecycle
|
||||
/// for a <c>Database.CachedWrite</c> that resolved without the S&F
|
||||
/// retry loop — emits an <c>Attempted</c> row then a terminal
|
||||
/// <c>CachedResolve</c> row (<c>Delivered</c> on success, <c>Failed</c> on
|
||||
/// a synchronous permanent SQL error). The DB parallel of
|
||||
/// <see cref="EmitImmediateTerminalTelemetryAsync"/>. Any forwarder
|
||||
/// failure is logged and swallowed (alog.md §7).
|
||||
/// </summary>
|
||||
private async Task EmitImmediateDbTerminalTelemetryAsync(
|
||||
string connectionName,
|
||||
string target,
|
||||
TrackedOperationId trackedId,
|
||||
ExternalCallResult result,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (_cachedForwarder == null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var occurredAtUtc = DateTime.UtcNow;
|
||||
|
||||
// Status mapping mirrors the API path: success -> Delivered, a
|
||||
// synchronous permanent failure -> Failed. A transient failure never
|
||||
// reaches here (WasBuffered=true), so "the immediate attempt failed
|
||||
// and the operation is done" always means a permanent failure.
|
||||
var auditTerminalStatus = result.Success ? AuditStatus.Delivered : AuditStatus.Failed;
|
||||
var operationalTerminalStatus = result.Success ? "Delivered" : "Failed";
|
||||
|
||||
// --- Attempted row -------------------------------------------------
|
||||
CachedCallTelemetry? attempted = TryBuildDbTerminalTelemetry(
|
||||
connectionName, target, trackedId, occurredAtUtc,
|
||||
AuditKind.DbWriteCached, AuditStatus.Attempted, "Attempted",
|
||||
result, isTerminal: false);
|
||||
|
||||
if (attempted is not null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _cachedForwarder.ForwardAsync(attempted, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Immediate-Attempted telemetry forward failed for Database.CachedWrite {Connection} (TrackedOperationId {Id})",
|
||||
connectionName, trackedId);
|
||||
}
|
||||
}
|
||||
|
||||
// --- CachedResolve row --------------------------------------------
|
||||
CachedCallTelemetry? resolve = TryBuildDbTerminalTelemetry(
|
||||
connectionName, target, trackedId, occurredAtUtc,
|
||||
AuditKind.CachedResolve, auditTerminalStatus, operationalTerminalStatus,
|
||||
result, isTerminal: true);
|
||||
|
||||
if (resolve is not null)
|
||||
{
|
||||
try
|
||||
{
|
||||
await _cachedForwarder.ForwardAsync(resolve, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Immediate-CachedResolve telemetry forward failed for Database.CachedWrite {Connection} (TrackedOperationId {Id})",
|
||||
connectionName, trackedId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds one immediate-completion <c>DbOutbound</c> telemetry packet, or
|
||||
/// returns <c>null</c> (and logs) when construction throws — so a build
|
||||
/// failure skips emission rather than aborting the script.
|
||||
/// </summary>
|
||||
private CachedCallTelemetry? TryBuildDbTerminalTelemetry(
|
||||
string connectionName,
|
||||
string target,
|
||||
TrackedOperationId trackedId,
|
||||
DateTime occurredAtUtc,
|
||||
AuditKind kind,
|
||||
AuditStatus auditStatus,
|
||||
string operationalStatus,
|
||||
ExternalCallResult result,
|
||||
bool isTerminal)
|
||||
{
|
||||
try
|
||||
{
|
||||
return new CachedCallTelemetry(
|
||||
Audit: ScadaBridgeAuditEventFactory.Create(
|
||||
channel: AuditChannel.DbOutbound,
|
||||
kind: kind,
|
||||
status: auditStatus,
|
||||
occurredAtUtc: DateTime.SpecifyKind(occurredAtUtc, DateTimeKind.Utc),
|
||||
target: target,
|
||||
correlationId: trackedId.Value,
|
||||
executionId: _executionId,
|
||||
parentExecutionId: _parentExecutionId,
|
||||
sourceSiteId: string.IsNullOrEmpty(_siteId) ? null : _siteId,
|
||||
sourceInstanceId: _instanceName,
|
||||
sourceScript: _sourceScript,
|
||||
errorMessage: result.Success ? null : result.ErrorMessage),
|
||||
Operational: new SiteCallOperational(
|
||||
TrackedOperationId: trackedId,
|
||||
Channel: "DbOutbound",
|
||||
Target: target,
|
||||
SourceSite: _siteId,
|
||||
SourceNode: _sourceNode,
|
||||
Status: operationalStatus,
|
||||
RetryCount: 0,
|
||||
LastError: result.Success ? null : result.ErrorMessage,
|
||||
HttpStatus: null,
|
||||
CreatedAtUtc: occurredAtUtc,
|
||||
UpdatedAtUtc: occurredAtUtc,
|
||||
TerminalAtUtc: isTerminal ? occurredAtUtc : null));
|
||||
}
|
||||
catch (Exception buildEx)
|
||||
{
|
||||
_logger.LogWarning(buildEx,
|
||||
"Failed to build immediate-{Kind} telemetry for Database.CachedWrite {Connection} (TrackedOperationId {Id}) — skipping emission",
|
||||
kind, connectionName, trackedId);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task EmitCachedDbSubmitTelemetryAsync(
|
||||
string connectionName,
|
||||
TrackedOperationId trackedId,
|
||||
|
||||
Reference in New Issue
Block a user