diff --git a/src/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway/DatabaseGateway.cs b/src/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway/DatabaseGateway.cs
index 0096fc57..058cd67c 100644
--- a/src/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway/DatabaseGateway.cs
+++ b/src/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway/DatabaseGateway.cs
@@ -281,15 +281,21 @@ public class DatabaseGateway : IDatabaseGateway
///
/// M2.3 (#7): executes a parameterised SQL write against the given connection
- /// string and classifies any into
- /// /
- /// via . This is the single SQL-execution seam
- /// shared by the immediate attempt and the
+ /// string and classifies the outcome into
+ /// / ,
+ /// mirroring the ordered catches of
+ /// on the API path:
+ /// caller-requested cancellation propagates unchanged; a
+ /// is classified by error number via ; a
+ /// non- transport/connection outage is classified
+ /// transient via ;
+ /// genuinely-unexpected exceptions propagate. This is the single classification
+ /// seam shared by the immediate attempt and the
/// retry path. Marked internal virtual
- /// so tests can substitute success / transient / permanent outcomes without a
- /// real SQL Server (and without fabricating a , which
- /// has no public constructor). Mirrors the role of
- /// on the API path.
+ /// so tests can substitute already-classified outcomes; the raw I/O lives in
+ /// the inner seam so tests can also drive raw outage
+ /// exceptions through this classification (without fabricating a
+ /// , which has no public constructor).
///
/// The human-readable connection name, used only for the classified error message (never the connection string — that would leak credentials into logs / script-visible errors).
/// The ADO.NET connection string to write through.
@@ -297,7 +303,8 @@ public class DatabaseGateway : IDatabaseGateway
/// Materialised CLR parameter values (may be empty).
/// Cancellation token for the write.
/// A task that completes when the write succeeds.
- /// Thrown for a transient SQL error number.
+ /// Rethrown unchanged when the caller's requested cancellation.
+ /// Thrown for a transient SQL error number or a non-Sql transport/connection outage.
/// Thrown for a permanent (or unknown) SQL error number.
internal virtual async Task ExecuteWriteAsync(
string connectionName,
@@ -306,20 +313,28 @@ public class DatabaseGateway : IDatabaseGateway
IReadOnlyDictionary parameters,
CancellationToken cancellationToken)
{
+ // M2.3 (#7) code-review fix: the catch ordering MIRRORS
+ // ExternalSystemClient.InvokeHttpAsync exactly so the SQL path classifies
+ // a live outage the same way the HTTP path does:
+ // 1. caller-requested cancellation propagates UNCHANGED (never a "DB error");
+ // 2. a SqlException is classified by error number (transient/permanent);
+ // 3. a NON-SqlException transport/connection failure (InvalidOperationException
+ // "connection not open", IOException, SocketException, TimeoutException,
+ // a non-Sql DbException, …) is TRANSIENT — buffered + retried, because a
+ // retry can succeed once the server is reachable. The pre-fix code only
+ // caught SqlException, so these escaped unclassified and crashed the
+ // Script Execution Actor instead of buffering;
+ // 4. genuinely-unexpected exceptions (e.g. an authoring ArgumentException)
+ // propagate — same as the HTTP path lets unexpected exceptions escape.
try
{
- await using var connection = new SqlConnection(connectionString);
- await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
- using var command = connection.CreateCommand();
- command.CommandText = sql;
- foreach (var (key, value) in parameters)
- {
- var parameter = command.CreateParameter();
- parameter.ParameterName = key.StartsWith('@') ? key : "@" + key;
- parameter.Value = value ?? DBNull.Value;
- command.Parameters.Add(parameter);
- }
- await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
+ await RunSqlAsync(connectionString, sql, parameters, cancellationToken).ConfigureAwait(false);
+ }
+ catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
+ {
+ // [2] The caller asked to abandon the work — propagate the cancellation
+ // unchanged; it must never be reclassified as a transient DB error.
+ throw;
}
catch (SqlException ex)
{
@@ -328,6 +343,50 @@ public class DatabaseGateway : IDatabaseGateway
// is the connection NAME, never the connection string.
throw SqlErrorClassifier.Throw(connectionName, ex);
}
+ catch (Exception ex) when (SqlErrorClassifier.IsTransient(ex))
+ {
+ // [1] A live outage that did not surface as a SqlException — treat as
+ // transient so the caller buffers + retries. The message uses the
+ // connection NAME, never the connection string (credential safety).
+ throw new TransientDatabaseException(
+ $"Transient database error on {connectionName}: {ex.Message}",
+ errorNumber: null,
+ ex);
+ }
+ }
+
+ ///
+ /// M2.3 (#7): the raw ADO.NET write — opens the connection, builds the
+ /// command, and executes it. Marked internal virtual so tests can throw
+ /// RAW outage-shaped exceptions (e.g. ,
+ /// ) through the PRODUCTION
+ /// classification in . This is the SQL parallel
+ /// of client.SendAsync inside :
+ /// the actual I/O, wrapped by the ordered classification catches in the caller.
+ ///
+ /// The ADO.NET connection string to write through.
+ /// The SQL statement to execute.
+ /// Materialised CLR parameter values (may be empty).
+ /// Cancellation token for the write.
+ /// A task that completes when the write succeeds.
+ internal virtual async Task RunSqlAsync(
+ string connectionString,
+ string sql,
+ IReadOnlyDictionary parameters,
+ CancellationToken cancellationToken)
+ {
+ await using var connection = new SqlConnection(connectionString);
+ await connection.OpenAsync(cancellationToken).ConfigureAwait(false);
+ using var command = connection.CreateCommand();
+ command.CommandText = sql;
+ foreach (var (key, value) in parameters)
+ {
+ var parameter = command.CreateParameter();
+ parameter.ParameterName = key.StartsWith('@') ? key : "@" + key;
+ parameter.Value = value ?? DBNull.Value;
+ command.Parameters.Add(parameter);
+ }
+ await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false);
}
// ExternalSystemGateway-020: a JSON number that does not fit in Int64 must
diff --git a/src/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway/SqlErrorClassifier.cs b/src/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway/SqlErrorClassifier.cs
index f1e7a89f..bdba8298 100644
--- a/src/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway/SqlErrorClassifier.cs
+++ b/src/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway/SqlErrorClassifier.cs
@@ -1,3 +1,6 @@
+using System.Data.Common;
+using System.IO;
+using System.Net.Sockets;
using Microsoft.Data.SqlClient;
namespace ZB.MOM.WW.ScadaBridge.ExternalSystemGateway;
@@ -84,6 +87,60 @@ public static class SqlErrorClassifier
return IsTransient(exception.Number);
}
+ ///
+ /// Determines whether an arbitrary represents a
+ /// transient database failure — the SQL-path parallel of
+ /// on the HTTP path.
+ ///
+ ///
+ ///
+ /// A live DB outage does not always surface as a :
+ /// once the underlying connection / socket is torn down, the driver raises
+ /// transport-level exceptions instead. These are retryable — a retry
+ /// can succeed once the server is reachable again — so they are classified
+ /// transient (buffered to store-and-forward) rather than escaping unclassified
+ /// to crash the calling Script Execution Actor. The transient set:
+ ///
+ ///
+ /// — connection-state error (e.g. "the connection is not open" / pooled connection broken).
+ /// — transport read/write failure mid-session.
+ /// — TCP-level failure (connection refused/reset/timed out).
+ /// — command / connection timeout surfaced as a CLR .
+ /// — driver-level cancellation/timeout NOT tied to a caller token (the caller-token case is handled before classification — see the gateway's ordered catches).
+ /// Any that is NOT a — a provider/driver transport error (a real is classified by error number via the overloads above, never here).
+ ///
+ ///
+ /// Everything else is NOT transient and must propagate, exactly as the
+ /// HTTP path lets genuinely-unexpected exceptions escape past its
+ /// catch (Exception ex) when (ErrorClassifier.IsTransient(ex)) filter.
+ /// Authoring bugs (, ,
+ /// etc.) are loud, fixable failures — silently buffering and retrying them
+ /// forever would hide the bug.
+ ///
+ ///
+ /// The exception to classify.
+ /// for a transport/connection/timeout/driver exception; otherwise .
+ public static bool IsTransient(Exception exception)
+ {
+ ArgumentNullException.ThrowIfNull(exception);
+
+ // A real SqlException is classified by its error number (the overloads
+ // above), never by type — fall back to the number-based policy so an
+ // unknown SqlException stays permanent (fail-fast) rather than being
+ // swept up as transient by the DbException catch-all below.
+ if (exception is SqlException sql)
+ {
+ return IsTransient(sql);
+ }
+
+ return exception is InvalidOperationException
+ or IOException
+ or SocketException
+ or TimeoutException
+ or TaskCanceledException
+ or DbException; // any non-SqlException DbException (SqlException handled above)
+ }
+
///
/// Classifies a and rethrows it as the matching
/// strongly-typed failure: for a
diff --git a/tests/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway.Tests/DatabaseGatewayTests.cs b/tests/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway.Tests/DatabaseGatewayTests.cs
index 46d18014..c77da622 100644
--- a/tests/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway.Tests/DatabaseGatewayTests.cs
+++ b/tests/ZB.MOM.WW.ScadaBridge.ExternalSystemGateway.Tests/DatabaseGatewayTests.cs
@@ -347,6 +347,132 @@ public class DatabaseGatewayTests
Assert.False(delivered); // permanent — the S&F engine parks the message
}
+ // ── M2.3 (#7) code-review fix: ExecuteWriteAsync must classify NON-SqlException
+ // DB outages as transient (buffer+retry) and propagate cancellation —
+ // mirroring the HTTP path's ordered catches in InvokeHttpAsync. The pre-fix
+ // code only caught SqlException, so a live outage surfacing as
+ // InvalidOperationException / SocketException / IOException / TimeoutException
+ // escaped unclassified and crashed the Script Execution Actor instead of
+ // buffering. These tests drive the RAW execution seam (RunSqlAsync) so the
+ // PRODUCTION classification in ExecuteWriteAsync runs end-to-end. ──
+
+ public static IEnumerable