fix(dcl): robust static-tag seeding — bounded-retry+log initial seed (#1) and re-seed on reconnect (#3)

STATIC tags (no further OnDataChange after advise) depend entirely on the
seed read. Pre-fix HandleSubscribe seeded only on Success && Value != null,
silently dropping a seed that raced the just-created advise (VT_EMPTY) — so a
static tag stayed Uncertain forever while the source read Good. ReSubscribeAll
did no seeding at all, so a static tag could not self-heal across reconnect.

- New SeedTagsAsync helper: per-tag ReadAsync (not a bulk read — some gateways
  time out on large batches) with round-based bounded retry
  (SeedReadMaxAttempts/SeedReadRetryDelay), logging any tag that never yields a
  value (named — previously zero log trace).
- HandleSubscribe seed loop delegates to SeedTagsAsync.
- ReSubscribeAll re-seeds re-advised tags after reconnect via the
  generation-guarded TagValueReceived path (fan-out keys off
  _subscriptionsByInstance, preserved across reconnect).

Diagnosed live on wonder-app-vd03 2026-06-17 (see scadabridge-dcl-static-tag-false-bad).
Mechanism #2 (single transient-bad push) left as a follow-up.
This commit is contained in:
Joseph Doherty
2026-06-18 09:23:23 -04:00
parent 3782ebdadb
commit 72aec3b4d4
3 changed files with 229 additions and 9 deletions
@@ -37,7 +37,11 @@ public class DataConnectionActorTests : TestKit
{
ReconnectInterval = TimeSpan.FromMilliseconds(100),
TagResolutionRetryInterval = TimeSpan.FromMilliseconds(200),
WriteTimeout = TimeSpan.FromSeconds(5)
WriteTimeout = TimeSpan.FromSeconds(5),
// Default to a single seed-read attempt so existing tests behave exactly as
// before the DCL-027 retry was added; the retry-specific tests opt in.
SeedReadMaxAttempts = 1,
SeedReadRetryDelay = TimeSpan.FromMilliseconds(10)
};
}
@@ -583,6 +587,124 @@ public class DataConnectionActorTests : TestKit
Assert.Equal("Left54321", update.Value);
}
// ── DataConnectionLayer-027: robust seeding for STATIC tags ──
[Fact]
public async Task DCL027_SeedRead_EmptyThenGood_SeedsTagWithinRetryBudget()
{
// A STATIC tag's seed read races the just-issued advise on a cold connection and
// the first read comes back empty/failed (VT_EMPTY). Pre-fix the seed loop seeded
// only on Success && Value != null and silently dropped the empty read, so the
// static tag (no further OnDataChange) stayed Uncertain forever. After the fix the
// still-empty read is retried within the seed budget and the warmed-up read seeds
// the tag.
_options.SeedReadMaxAttempts = 2;
_options.SeedReadRetryDelay = TimeSpan.FromMilliseconds(20);
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
_mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns(_ => Task.FromResult("sub-static"));
// First read races the advise and fails; the second (after the retry delay) succeeds.
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(
new ReadResult(false, null, "no value yet"),
new ReadResult(true, new TagValue("HY12333", QualityCode.Good, DateTimeOffset.UtcNow), null));
var actor = CreateConnectionActor("dcl027-retry-seed");
await Task.Delay(300); // reach Connected state
actor.Tell(new SubscribeTagsRequest(
"c1", "inst1", "dcl027-retry-seed", ["Right_002.H2_SVC"], DateTimeOffset.UtcNow));
var update = FishForMessage<TagValueUpdate>(
m => m.TagPath == "Right_002.H2_SVC", TimeSpan.FromSeconds(5));
Assert.Equal(QualityCode.Good, update.Quality);
Assert.Equal("HY12333", update.Value);
}
[Fact]
public async Task DCL027_SeedRead_NeverReturnsValue_LogsWarningAndStillAcks()
{
// When the seed read never yields a usable value within the budget the tag is left
// to heal from a future change notification — but pre-fix this happened SILENTLY
// (// Best-effort, no log), so an operator had no trace naming the stuck tag. After
// the fix a Warning naming the tag is logged, and the subscribe still acks
// successfully (seeding is best-effort and must never fail the subscribe).
_options.SeedReadMaxAttempts = 2;
_options.SeedReadRetryDelay = TimeSpan.FromMilliseconds(20);
_mockAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
_mockAdapter.Status.Returns(ConnectionHealth.Connected);
_mockAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns(_ => Task.FromResult("sub-static"));
// Every seed read fails — the tag never gets a usable value.
_mockAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(false, null, "never"));
var actor = CreateConnectionActor("dcl027-warn-seed");
await Task.Delay(300); // reach Connected state
EventFilter.Warning(contains: "Right_002.CoilHeight").ExpectOne(() =>
actor.Tell(new SubscribeTagsRequest(
"c1", "inst1", "dcl027-warn-seed", ["Right_002.CoilHeight"], DateTimeOffset.UtcNow)));
var ack = ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(5));
Assert.True(ack.Success);
}
[Fact]
public async Task DCL027_Reconnect_ReSeedsStaticTag()
{
// Mechanism #3: ReSubscribeAll re-advises after a reconnect but pre-fix did NO
// seed read, so a STATIC tag (no further OnDataChange) could not self-heal across
// a reconnect/failover even though the source reads Good. After the fix the
// reconnect re-seeds the re-advised tags, so the post-reconnect value reaches the
// subscriber.
var primaryConfig = new Dictionary<string, string> { ["Endpoint"] = "opc.tcp://primary:4840" };
var backupConfig = new Dictionary<string, string> { ["Endpoint"] = "opc.tcp://backup:4840" };
var primaryAdapter = Substitute.For<IDataConnection>();
var backupAdapter = Substitute.For<IDataConnection>();
var primaryConnectCount = 0;
primaryAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(_ => Interlocked.Increment(ref primaryConnectCount) == 1
? Task.CompletedTask
: Task.FromException(new Exception("Primary down")));
primaryAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns("sub-primary-001");
primaryAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(true, new TagValue(42.0, QualityCode.Good, DateTimeOffset.UtcNow), null));
_mockFactory.Create("OpcUa", Arg.Is<IDictionary<string, string>>(d => d["Endpoint"] == "opc.tcp://backup:4840"))
.Returns(backupAdapter);
backupAdapter.ConnectAsync(Arg.Any<IDictionary<string, string>>(), Arg.Any<CancellationToken>())
.Returns(Task.CompletedTask);
backupAdapter.SubscribeAsync(Arg.Any<string>(), Arg.Any<SubscriptionCallback>(), Arg.Any<CancellationToken>())
.Returns("sub-backup-001");
// The static tag's current value on the backup endpoint after reconnect.
backupAdapter.ReadAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(new ReadResult(true, new TagValue(99.0, QualityCode.Good, DateTimeOffset.UtcNow), null));
var actor = CreateFailoverActor(primaryAdapter, "dcl027-reseed", primaryConfig, backupConfig, failoverRetryCount: 1);
AwaitCondition(() => primaryConnectCount >= 1, TimeSpan.FromSeconds(2));
await Task.Delay(200);
actor.Tell(new SubscribeTagsRequest("c1", "inst1", "dcl027-reseed", ["static/tag"], DateTimeOffset.UtcNow));
ExpectMsg<SubscribeTagsResponse>(TimeSpan.FromSeconds(3));
// Failover to backup → ReSubscribeAll → re-seed must deliver the backup value.
RaiseDisconnected(primaryAdapter);
var reseeded = FishForMessage<TagValueUpdate>(
m => m.TagPath == "static/tag" && m.Quality == QualityCode.Good && Equals(m.Value, 99.0),
TimeSpan.FromSeconds(10));
Assert.Equal(99.0, reseeded.Value);
}
[Fact]
public async Task DCL004_ConnectionLevelSubscribeFailure_TriggersReconnect_NotTagRetry()
{