fix(configuration): resolve Medium code-review findings (Configuration-002, -003, -006, -009)

Configuration-002: sp_PublishGeneration is transaction-nesting aware
(BEGIN TRANSACTION vs SAVE TRANSACTION on @@TRANCOUNT) so a caller's outer
transaction survives a publish failure; sp_ValidateDraft wrapped in TRY/CATCH.
Configuration-003: ValidatePathLength uses the cluster's actual Enterprise/Site
lengths when available, falling back to the conservative approximation.
Configuration-006: ResilientConfigReader treats a command-timeout
TaskCanceledException as a fault (not caller cancellation) and falls back.
Configuration-009: removed the checked-in plaintext sa connection string;
CreateDbContext now requires OTOPCUA_CONFIG_CONNECTION.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-22 08:13:27 -04:00
parent 7e54e1e4a0
commit c126fc7a7d
9 changed files with 274 additions and 31 deletions

View File

@@ -232,4 +232,90 @@ public sealed class DraftValidatorTests
Enabled = enabled,
CreatedBy = "t",
};
// ------------------------------------------------------------------------------------
// ValidatePathLength — Enterprise/Site length precision (Configuration-003)
// ------------------------------------------------------------------------------------
[Fact]
public void PathLength_uses_actual_Enterprise_Site_when_provided()
{
// Craft a snapshot where the 32+32 approximation would flag PathTooLong but the
// actual Enterprise/Site lengths would not. Equipment name intentionally exceeds the
// UNS-segment 32-char limit (it would also trigger UnsSegmentInvalid — that is fine;
// both checks are independent and the test is filtering only on PathTooLong).
//
// approx: 32+32 + 32+32+90 +4 = 222 > 200 → would flag PathTooLong
// actual: 2 +1 + 32+32+90 +4 = 161 ≤ 200 → no PathTooLong
var areaId = "area-a";
var lineId = "line-b";
var uuid = Guid.NewGuid();
var eqName = new string('x', 90); // 90 chars — exceeds UNS regex but that's a separate error
var snapshot = new DraftSnapshot
{
GenerationId = 1,
ClusterId = "c",
Enterprise = "zb", // 2 chars (actual)
Site = "s", // 1 char (actual)
UnsAreas = [new UnsArea { UnsAreaId = areaId, ClusterId = "c", Name = new string('a', 32) }],
UnsLines = [new UnsLine { UnsLineId = lineId, UnsAreaId = areaId, Name = new string('b', 32) }],
Equipment =
[
new Equipment
{
EquipmentUuid = uuid,
EquipmentId = DraftValidator.DeriveEquipmentId(uuid),
Name = eqName,
DriverInstanceId = "d",
UnsLineId = lineId,
MachineCode = "m",
},
],
};
var errors = DraftValidator.Validate(snapshot);
errors.ShouldNotContain(e => e.Code == "PathTooLong",
"actual Enterprise='zb' + Site='s' keeps total path at 161 chars — under the 200-char limit");
}
[Fact]
public void PathLength_conservative_fallback_when_Enterprise_Site_absent()
{
// Without Enterprise/Site on the snapshot the validator assumes 32+32.
// A path whose segments sum to 93 chars (area=32 + line=32 + eq=29) fits
// under 200 even with the 32+32 approximation (32+32+93+4 = 161 ≤ 200) and
// must NOT be flagged — the fallback must not over-penalise valid paths that
// would also be valid under real Enterprise/Site values.
var areaId = "area-x";
var lineId = "line-y";
var uuid = Guid.NewGuid();
var snapshot = new DraftSnapshot
{
GenerationId = 1,
ClusterId = "c",
// Enterprise and Site deliberately omitted — conservative fallback path
UnsAreas = [new UnsArea { UnsAreaId = areaId, ClusterId = "c", Name = new string('a', 32) }],
UnsLines = [new UnsLine { UnsLineId = lineId, UnsAreaId = areaId, Name = new string('b', 32) }],
Equipment =
[
new Equipment
{
EquipmentUuid = uuid,
EquipmentId = DraftValidator.DeriveEquipmentId(uuid),
Name = new string('c', 29),
DriverInstanceId = "d",
UnsLineId = lineId,
MachineCode = "m",
},
],
};
var errors = DraftValidator.Validate(snapshot);
errors.ShouldNotContain(e => e.Code == "PathTooLong",
"conservative 32+32+32+32+29+4 = 161 chars is still under the 200-char limit");
}
}

View File

@@ -1,4 +1,5 @@
using Microsoft.Extensions.Logging.Abstractions;
using Polly.Timeout;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Configuration.LocalCache;
@@ -119,6 +120,104 @@ public sealed class ResilientConfigReaderTests : IDisposable
attempts.ShouldBeLessThanOrEqualTo(1);
}
// ------------------------------------------------------------------------------------
// Configuration-006 — command-timeout TaskCanceledException and TimeoutRejectedException
// must fall back to the sealed cache, not propagate as caller cancellation.
// ------------------------------------------------------------------------------------
[Fact]
public async Task CommandTimeout_TaskCanceledException_FallsBackToCache()
{
// A SQL command-level timeout surfaces as a TaskCanceledException thrown by the
// delegate itself (not triggered by the caller's CancellationToken). It must be
// treated as a transient failure and trigger the cache fallback, not be mistaken
// for genuine caller cancellation and propagated.
var cache = new GenerationSealedCache(_root);
await cache.SealAsync(new GenerationSnapshot
{
ClusterId = "cluster-b", GenerationId = 7, CachedAt = DateTime.UtcNow,
PayloadJson = "{\"from\":\"cache\"}",
});
var flag = new StaleConfigFlag();
var reader = new ResilientConfigReader(cache, flag, NullLogger<ResilientConfigReader>.Instance,
timeout: TimeSpan.FromSeconds(10), retryCount: 0);
// Simulate a command-level timeout: TaskCanceledException with no linked token.
var result = await reader.ReadAsync(
"cluster-b",
_ => throw new TaskCanceledException("SQL command timeout (no caller token)"),
snap => snap.PayloadJson,
CancellationToken.None); // caller token is NOT cancelled
result.ShouldBe("{\"from\":\"cache\"}",
"command-timeout TaskCanceledException must fall back to sealed cache");
flag.IsStale.ShouldBeTrue("cache fallback marks the stale flag");
}
[Fact]
public async Task PollyTimeout_TimeoutRejectedException_FallsBackToCache()
{
// When Polly's own timeout strategy fires it throws TimeoutRejectedException.
// That should trigger the cache fallback just like any other transient error.
var cache = new GenerationSealedCache(_root);
await cache.SealAsync(new GenerationSnapshot
{
ClusterId = "cluster-c", GenerationId = 8, CachedAt = DateTime.UtcNow,
PayloadJson = "{\"from\":\"polly-timeout-cache\"}",
});
var flag = new StaleConfigFlag();
// Set an extremely short Polly timeout so the async delay triggers it.
var reader = new ResilientConfigReader(cache, flag, NullLogger<ResilientConfigReader>.Instance,
timeout: TimeSpan.FromMilliseconds(10), retryCount: 0);
var result = await reader.ReadAsync(
"cluster-c",
async ct =>
{
await Task.Delay(TimeSpan.FromSeconds(5), ct); // far exceeds 10 ms timeout
return "never";
},
snap => snap.PayloadJson,
CancellationToken.None);
result.ShouldBe("{\"from\":\"polly-timeout-cache\"}",
"Polly TimeoutRejectedException must fall back to sealed cache");
flag.IsStale.ShouldBeTrue("cache fallback marks the stale flag");
}
[Fact]
public async Task CallerCancellation_Propagates_NotFallback()
{
// Explicit caller cancellation must NOT fall back to the sealed cache — the
// caller said stop, so we must stop.
var cache = new GenerationSealedCache(_root);
await cache.SealAsync(new GenerationSnapshot
{
ClusterId = "cluster-d", GenerationId = 9, CachedAt = DateTime.UtcNow,
PayloadJson = "{\"should\":\"not be returned\"}",
});
var flag = new StaleConfigFlag();
var reader = new ResilientConfigReader(cache, flag, NullLogger<ResilientConfigReader>.Instance,
timeout: TimeSpan.FromSeconds(10), retryCount: 0);
using var cts = new CancellationTokenSource();
cts.Cancel();
await Should.ThrowAsync<OperationCanceledException>(async () =>
{
await reader.ReadAsync<string>(
"cluster-d",
ct =>
{
ct.ThrowIfCancellationRequested();
return ValueTask.FromResult("ok");
},
_ => "cache-should-not-be-used",
cts.Token);
});
flag.IsStale.ShouldBeFalse("no cache snapshot served on genuine cancellation");
}
}
[Trait("Category", "Unit")]