fix(configuration): resolve Medium code-review findings (Configuration-002, -003, -006, -009)
Configuration-002: sp_PublishGeneration is transaction-nesting aware (BEGIN TRANSACTION vs SAVE TRANSACTION on @@TRANCOUNT) so a caller's outer transaction survives a publish failure; sp_ValidateDraft wrapped in TRY/CATCH. Configuration-003: ValidatePathLength uses the cluster's actual Enterprise/Site lengths when available, falling back to the conservative approximation. Configuration-006: ResilientConfigReader treats a command-timeout TaskCanceledException as a fault (not caller cancellation) and falls back. Configuration-009: removed the checked-in plaintext sa connection string; CreateDbContext now requires OTOPCUA_CONFIG_CONNECTION. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -5,19 +5,27 @@ namespace ZB.MOM.WW.OtOpcUa.Configuration;
|
||||
|
||||
/// <summary>
|
||||
/// Used by <c>dotnet ef</c> at design time (migrations, scaffolding). Reads the connection string
|
||||
/// from the <c>OTOPCUA_CONFIG_CONNECTION</c> environment variable, falling back to the local dev
|
||||
/// container on <c>localhost:1433</c>.
|
||||
/// from the <c>OTOPCUA_CONFIG_CONNECTION</c> environment variable.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Set the variable before running migration commands, e.g.:
|
||||
/// <code>
|
||||
/// $env:OTOPCUA_CONFIG_CONNECTION = "Server=10.100.0.35,14330;Database=OtOpcUaConfig;Trusted_Connection=True;TrustServerCertificate=True;"
|
||||
/// dotnet ef migrations add MyMigration --project src/Core/ZB.MOM.WW.OtOpcUa.Configuration
|
||||
/// </code>
|
||||
/// No credential is embedded in source. Do not add a plaintext password as a fallback.
|
||||
/// </remarks>
|
||||
public sealed class DesignTimeDbContextFactory : IDesignTimeDbContextFactory<OtOpcUaConfigDbContext>
|
||||
{
|
||||
// Host-port 14330 avoids collision with the native MSSQL14 instance on 1433 (Galaxy "ZB" DB).
|
||||
private const string DefaultConnectionString =
|
||||
"Server=localhost,14330;Database=OtOpcUaConfig;User Id=sa;Password=OtOpcUaDev_2026!;TrustServerCertificate=True;Encrypt=False;";
|
||||
|
||||
public OtOpcUaConfigDbContext CreateDbContext(string[] args)
|
||||
{
|
||||
var connection = Environment.GetEnvironmentVariable("OTOPCUA_CONFIG_CONNECTION")
|
||||
?? DefaultConnectionString;
|
||||
var connection = Environment.GetEnvironmentVariable("OTOPCUA_CONFIG_CONNECTION");
|
||||
|
||||
if (string.IsNullOrWhiteSpace(connection))
|
||||
throw new InvalidOperationException(
|
||||
"OTOPCUA_CONFIG_CONNECTION is not set. " +
|
||||
"Export the variable before running 'dotnet ef' commands. Example: " +
|
||||
"Server=10.100.0.35,14330;Database=OtOpcUaConfig;Trusted_Connection=True;TrustServerCertificate=True;");
|
||||
|
||||
var options = new DbContextOptionsBuilder<OtOpcUaConfigDbContext>()
|
||||
.UseSqlServer(connection, sql => sql.MigrationsAssembly(typeof(OtOpcUaConfigDbContext).Assembly.FullName))
|
||||
|
||||
@@ -48,7 +48,13 @@ public sealed class ResilientConfigReader
|
||||
UseJitter = true,
|
||||
Delay = TimeSpan.FromMilliseconds(100),
|
||||
MaxDelay = TimeSpan.FromSeconds(1),
|
||||
ShouldHandle = new PredicateBuilder().Handle<Exception>(ex => ex is not OperationCanceledException),
|
||||
// Handle ALL exceptions including OperationCanceledException. A SQL command-level
|
||||
// timeout surfaces as TaskCanceledException (derives from OperationCanceledException)
|
||||
// when the caller's token is NOT cancelled, and must be retried just like any other
|
||||
// transient error. Polly itself checks the cancellation token between retries and
|
||||
// stops with OperationCanceledException on genuine caller cancellation regardless of
|
||||
// this predicate.
|
||||
ShouldHandle = new PredicateBuilder().Handle<Exception>(),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -76,7 +82,11 @@ public sealed class ResilientConfigReader
|
||||
_staleFlag.MarkFresh();
|
||||
return result;
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
// Catch all exceptions that are NOT genuine caller cancellations. A SQL command-level
|
||||
// timeout surfaces as TaskCanceledException (derives from OperationCanceledException)
|
||||
// but the caller's token is NOT cancelled — we must fall back to the sealed cache for
|
||||
// that case, not propagate. Only rethrow if the caller actually requested cancellation.
|
||||
catch (Exception ex) when (ex is not OperationCanceledException || !cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
_logger.LogWarning(ex, "Central-DB read failed after retries; falling back to sealed cache for cluster {ClusterId}", clusterId);
|
||||
// GenerationCacheUnavailableException surfaces intentionally — fails the caller's
|
||||
|
||||
@@ -270,7 +270,22 @@ BEGIN
|
||||
SET NOCOUNT ON;
|
||||
SET XACT_ABORT ON;
|
||||
|
||||
BEGIN TRANSACTION;
|
||||
-- Transaction-nesting awareness: if a caller (e.g. sp_RollbackToGeneration) already
|
||||
-- holds a transaction, we use SAVE TRANSACTION so our failure path rolls back only to
|
||||
-- the savepoint instead of issuing a bare ROLLBACK that wipes the caller's transaction
|
||||
-- (which sets @@TRANCOUNT = 0 and causes error 3902 on the caller's subsequent COMMIT).
|
||||
DECLARE @OwnsTxn bit = 0;
|
||||
DECLARE @SaveName nvarchar(32) = N'sp_PublishGeneration';
|
||||
|
||||
IF @@TRANCOUNT = 0
|
||||
BEGIN
|
||||
BEGIN TRANSACTION;
|
||||
SET @OwnsTxn = 1;
|
||||
END
|
||||
ELSE
|
||||
BEGIN
|
||||
SAVE TRANSACTION sp_PublishGeneration;
|
||||
END
|
||||
|
||||
DECLARE @Lock nvarchar(255) = N'OtOpcUa_Publish_' + @ClusterId;
|
||||
DECLARE @LockResult int;
|
||||
@@ -278,20 +293,22 @@ BEGIN
|
||||
IF @LockResult < 0
|
||||
BEGIN
|
||||
RAISERROR('PublishConflict: another publish is in progress for cluster %s', 16, 1, @ClusterId);
|
||||
ROLLBACK;
|
||||
IF @OwnsTxn = 1 ROLLBACK;
|
||||
ELSE ROLLBACK TRANSACTION sp_PublishGeneration;
|
||||
RETURN;
|
||||
END
|
||||
|
||||
-- sp_ValidateDraft signals every rejection with RAISERROR(..., 16, 1) — a severity-16 error is
|
||||
-- NOT batch-aborting and SET XACT_ABORT ON does not abort the transaction for it, so without a
|
||||
-- TRY/CATCH control would return here and the draft would publish despite failed validation.
|
||||
-- Catch the validation error, roll back the publish transaction, and re-raise so the caller sees
|
||||
-- the real validation failure.
|
||||
-- Catch the validation error, roll back the publish transaction (only to our savepoint when a
|
||||
-- caller owns the outer transaction), and re-raise so the caller sees the real validation failure.
|
||||
BEGIN TRY
|
||||
EXEC dbo.sp_ValidateDraft @DraftGenerationId = @DraftGenerationId;
|
||||
END TRY
|
||||
BEGIN CATCH
|
||||
IF @@TRANCOUNT > 0 ROLLBACK;
|
||||
IF @OwnsTxn = 1 ROLLBACK;
|
||||
ELSE ROLLBACK TRANSACTION sp_PublishGeneration;
|
||||
THROW;
|
||||
END CATCH
|
||||
|
||||
@@ -324,15 +341,16 @@ BEGIN
|
||||
|
||||
IF @@ROWCOUNT = 0
|
||||
BEGIN
|
||||
RAISERROR('Draft %I64d for cluster %s not found (was it already published?)', 16, 1, @DraftGenerationId, @ClusterId);
|
||||
ROLLBACK;
|
||||
RAISERROR('Draft %I64d for cluster %s not in Draft status (was it already published?)', 16, 1, @DraftGenerationId, @ClusterId);
|
||||
IF @OwnsTxn = 1 ROLLBACK;
|
||||
ELSE ROLLBACK TRANSACTION sp_PublishGeneration;
|
||||
RETURN;
|
||||
END
|
||||
|
||||
INSERT dbo.ConfigAuditLog (Principal, EventType, ClusterId, GenerationId)
|
||||
VALUES (SUSER_SNAME(), 'Published', @ClusterId, @DraftGenerationId);
|
||||
|
||||
COMMIT;
|
||||
IF @OwnsTxn = 1 COMMIT;
|
||||
END
|
||||
";
|
||||
|
||||
|
||||
@@ -11,6 +11,18 @@ public sealed class DraftSnapshot
|
||||
public required long GenerationId { get; init; }
|
||||
public required string ClusterId { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Cluster's Enterprise segment (UNS level 1). When set, <see cref="DraftValidator"/> uses
|
||||
/// the actual length for path-length checks instead of a conservative 32-char upper bound.
|
||||
/// </summary>
|
||||
public string? Enterprise { get; init; }
|
||||
|
||||
/// <summary>
|
||||
/// Cluster's Site segment (UNS level 2). When set, <see cref="DraftValidator"/> uses the
|
||||
/// actual length for path-length checks instead of a conservative 32-char upper bound.
|
||||
/// </summary>
|
||||
public string? Site { get; init; }
|
||||
|
||||
public IReadOnlyList<Namespace> Namespaces { get; init; } = [];
|
||||
public IReadOnlyList<DriverInstance> DriverInstances { get; init; } = [];
|
||||
public IReadOnlyList<Device> Devices { get; init; } = [];
|
||||
|
||||
@@ -59,8 +59,13 @@ public static class DraftValidator
|
||||
/// <summary>Cluster.Enterprise + Site + area + line + equipment + 4 slashes ≤ 200 chars.</summary>
|
||||
private static void ValidatePathLength(DraftSnapshot draft, List<ValidationError> errors)
|
||||
{
|
||||
// The cluster row isn't in the snapshot — we assume caller pre-validated Enterprise+Site
|
||||
// length and bound them as constants <= 64 chars each. Here we validate the dynamic portion.
|
||||
// Use actual Enterprise/Site lengths when the snapshot carries them (populated by
|
||||
// DraftValidationService from the ServerCluster row). Fall back to a conservative
|
||||
// 32-char upper bound per segment when not supplied — over-penalises short values
|
||||
// but never under-penalises long ones, which is acceptable for the fallback case.
|
||||
var enterpriseLen = draft.Enterprise?.Length ?? 32;
|
||||
var siteLen = draft.Site?.Length ?? 32;
|
||||
|
||||
var areaById = draft.UnsAreas.ToDictionary(a => a.UnsAreaId);
|
||||
var lineById = draft.UnsLines.ToDictionary(l => l.UnsLineId);
|
||||
|
||||
@@ -69,8 +74,7 @@ public static class DraftValidator
|
||||
if (!lineById.TryGetValue(eq.UnsLineId!, out var line)) continue;
|
||||
if (!areaById.TryGetValue(line.UnsAreaId, out var area)) continue;
|
||||
|
||||
// rough upper bound: Enterprise+Site at most 32+32; add dynamic segments + 4 slashes
|
||||
var len = 32 + 32 + area.Name.Length + line.Name.Length + eq.Name.Length + 4;
|
||||
var len = enterpriseLen + siteLen + area.Name.Length + line.Name.Length + eq.Name.Length + 4;
|
||||
if (len > MaxPathLength)
|
||||
errors.Add(new("PathTooLong",
|
||||
$"Equipment path exceeds {MaxPathLength} chars (approx {len})",
|
||||
|
||||
@@ -18,10 +18,16 @@ public sealed class DraftValidationService(OtOpcUaConfigDbContext db)
|
||||
.FirstOrDefaultAsync(g => g.GenerationId == draftId, ct)
|
||||
?? throw new InvalidOperationException($"Draft {draftId} not found");
|
||||
|
||||
// Load the cluster row so path-length validation uses actual Enterprise/Site lengths.
|
||||
var cluster = await db.ServerClusters.AsNoTracking()
|
||||
.FirstOrDefaultAsync(c => c.ClusterId == draft.ClusterId, ct);
|
||||
|
||||
var snapshot = new DraftSnapshot
|
||||
{
|
||||
GenerationId = draft.GenerationId,
|
||||
ClusterId = draft.ClusterId,
|
||||
Enterprise = cluster?.Enterprise,
|
||||
Site = cluster?.Site,
|
||||
Namespaces = await db.Namespaces.AsNoTracking().Where(n => n.GenerationId == draftId).ToListAsync(ct),
|
||||
DriverInstances = await db.DriverInstances.AsNoTracking().Where(d => d.GenerationId == draftId).ToListAsync(ct),
|
||||
Devices = await db.Devices.AsNoTracking().Where(d => d.GenerationId == draftId).ToListAsync(ct),
|
||||
|
||||
Reference in New Issue
Block a user