using System.Data.Common; using System.IO; using System.Net.Sockets; using Microsoft.Data.SqlClient; namespace ZB.MOM.WW.ScadaBridge.ExternalSystemGateway; /// /// M2.3 (#7): classifies a SQL Server failure as transient (a brief wait / /// retry may succeed — buffer to store-and-forward) or permanent (the identical /// statement cannot succeed — return to the script / park the buffered message). /// /// /// /// This is the database-side parallel of (the /// HTTP path). The two are kept separate because the inputs differ: HTTP keys /// off status codes / exception types, SQL keys off /// . /// /// /// Transient set. Only connection-loss, timeout, deadlock, and Azure SQL /// throttle/availability error numbers are transient — failures whose cause is /// external to the statement and may clear on its own: /// /// -2 — query / command timeout expired. /// -1 — a connection-level error (general SqlClient connection failure). /// 2 — SQL Server / network instance not found or not accessible. /// 53 — network path to the server was not found. /// 64 — connection terminated mid-session (transport error). /// 233 — no process on the other end of the named pipe. /// 1205 — the session was chosen as a deadlock victim. /// 10053 — transport-level abort (software caused connection abort). /// 10054 — connection reset by peer. /// 10060 — connection attempt timed out. /// 40197 — Azure SQL service error processing the request; retry. /// 40501 — Azure SQL service is busy. /// 40613 — Azure SQL database is currently unavailable. /// 49918 / 49919 / 49920 — Azure SQL throttling (too many requests / operations). /// /// /// /// Everything else is permanent. Constraint violations (547, 2627, 2601), /// syntax errors (102, 156, 207, 208), and permission errors (229, 230, 262) are /// the obvious permanent cases, but the policy is broader: any error number not /// in the transient set — including unknown / undocumented / ambiguous numbers — /// is treated as permanent. Fail-fast is the safer default: silently /// retrying an unrecognised error forever (the pre-M2.3 behaviour) hides /// authoring bugs and can replay duplicate side effects. A genuinely transient /// number we have not enumerated will, at worst, surface to the script as a /// permanent failure — a loud, fixable outcome — rather than spin in an /// unbounded retry loop. /// /// public static class SqlErrorClassifier { /// /// The complete set of SQL Server error numbers treated as transient. See the /// type-level remarks for the per-number rationale. Anything outside this set /// is permanent. /// private static readonly HashSet TransientErrorNumbers = new() { -2, -1, 2, 53, 64, 233, 1205, 10053, 10054, 10060, 40197, 40501, 40613, 49918, 49919, 49920, }; /// /// Determines whether a SQL Server error number represents a transient /// failure. Unknown / undocumented numbers default to permanent /// () — see the type-level remarks. /// /// The SQL Server error number (e.g. ). /// if the number is in the transient set; otherwise . public static bool IsTransient(int errorNumber) => TransientErrorNumbers.Contains(errorNumber); /// /// Determines whether a represents a transient /// failure by classifying its top-level . /// /// The SQL exception to classify. /// if the exception's error number is transient; otherwise . public static bool IsTransient(SqlException exception) { ArgumentNullException.ThrowIfNull(exception); return IsTransient(exception.Number); } /// /// Determines whether an arbitrary represents a /// transient database failure — the SQL-path parallel of /// on the HTTP path. /// /// /// /// A live DB outage does not always surface as a : /// once the underlying connection / socket is torn down, the driver raises /// transport-level exceptions instead. These are retryable — a retry /// can succeed once the server is reachable again — so they are classified /// transient (buffered to store-and-forward) rather than escaping unclassified /// to crash the calling Script Execution Actor. The transient set: /// /// /// — connection-state error (e.g. "the connection is not open" / pooled connection broken). /// — transport read/write failure mid-session. /// — TCP-level failure (connection refused/reset/timed out). /// — command / connection timeout surfaced as a CLR . /// — driver-level cancellation/timeout NOT tied to a caller token (the caller-token case is handled before classification — see the gateway's ordered catches). /// Any that is NOT a — a provider/driver transport error (a real is classified by error number via the overloads above, never here). /// /// /// Everything else is NOT transient and must propagate, exactly as the /// HTTP path lets genuinely-unexpected exceptions escape past its /// catch (Exception ex) when (ErrorClassifier.IsTransient(ex)) filter. /// Authoring bugs (, , /// etc.) are loud, fixable failures — silently buffering and retrying them /// forever would hide the bug. /// /// /// The exception to classify. /// for a transport/connection/timeout/driver exception; otherwise . public static bool IsTransient(Exception exception) { ArgumentNullException.ThrowIfNull(exception); // A real SqlException is classified by its error number (the overloads // above), never by type — fall back to the number-based policy so an // unknown SqlException stays permanent (fail-fast) rather than being // swept up as transient by the DbException catch-all below. if (exception is SqlException sql) { return IsTransient(sql); } return exception is InvalidOperationException or IOException or SocketException or TimeoutException or TaskCanceledException or DbException; // any non-SqlException DbException (SqlException handled above) } /// /// Classifies a and rethrows it as the matching /// strongly-typed failure: for a /// transient error number, otherwise. /// Mirrors /// + the throw of on the HTTP /// path — the callers then branch on the typed exception rather than on the /// raw . /// /// A short human-readable description of the failing operation (e.g. the connection name). /// The SQL exception to classify and wrap. /// This method never returns normally — it always throws. /// Thrown when the error number is transient. /// Thrown when the error number is permanent (the default). public static Exception Throw(string context, SqlException exception) { ArgumentNullException.ThrowIfNull(exception); if (IsTransient(exception)) { throw new TransientDatabaseException( $"Transient SQL error {exception.Number} on {context}: {exception.Message}", exception.Number, exception); } throw new PermanentDatabaseException( $"Permanent SQL error {exception.Number} on {context}: {exception.Message}", exception.Number, exception); } } /// /// Signals a transient database failure suitable for store-and-forward retry — /// the SQL-path parallel of . /// public class TransientDatabaseException : Exception { /// Gets the SQL Server error number that caused the failure, if known. public int? SqlErrorNumber { get; } /// Initializes a new . /// The error message. /// The SQL Server error number, if available. /// Optional inner exception (typically the original ). public TransientDatabaseException(string message, int? errorNumber = null, Exception? innerException = null) : base(message, innerException) { SqlErrorNumber = errorNumber; } } /// /// Signals a permanent database failure that must not be retried — the SQL-path /// parallel of . Returned /// synchronously to the calling script on the immediate attempt and parks the /// message on the store-and-forward retry path. /// public class PermanentDatabaseException : Exception { /// Gets the SQL Server error number that caused the failure, if known. public int? SqlErrorNumber { get; } /// Initializes a new . /// The error message. /// The SQL Server error number, if available. /// Optional inner exception (typically the original ). public PermanentDatabaseException(string message, int? errorNumber = null, Exception? innerException = null) : base(message, innerException) { SqlErrorNumber = errorNumber; } }