fix(siteruntime): harden WaitAsync — no spurious match on quality republish, guard throwing predicate, Ask-timeout returns false

This commit is contained in:
Joseph Doherty
2026-06-17 08:44:03 -04:00
parent 75ffa09b8f
commit 04e97f4a87
5 changed files with 390 additions and 32 deletions
@@ -5,6 +5,7 @@ using Microsoft.Extensions.Logging.Abstractions;
using ZB.MOM.WW.ScadaBridge.Commons.Interfaces.Protocol;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.DataConnection;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Instance;
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Streaming;
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Persistence;
@@ -293,10 +294,12 @@ public class InstanceActorWaitForAttributeTests : TestKit, IDisposable
/// <summary>
/// Spec §4.1: a null TargetValueEncoded + null Predicate means "wait for any
/// change" — the next value update on that attribute matches.
/// change" (test <c>_ => true</c>). When the attribute ALREADY holds a value at
/// registration, the fast-path matches IMMEDIATELY — there is no need to wait for
/// a subsequent update. (A separate test covers the absent-at-registration case.)
/// </summary>
[Fact]
public void WaitForAttribute_AnyChange_MatchesOnNextUpdate()
public void WaitForAttribute_AnyChange_MatchesImmediatelyWhenAttributePresent()
{
const string tag = "ns=3;s=Speed";
var config = new FlattenedConfiguration
@@ -338,4 +341,227 @@ public class InstanceActorWaitForAttributeTests : TestKit, IDisposable
Assert.True(response.Matched);
Assert.False(response.TimedOut);
}
/// <summary>
/// Spec §4.1 (companion to the immediate-match case): when the attribute is
/// ABSENT at registration (no entry in <c>_attributes</c>), the "any change"
/// waiter does NOT fast-path — it registers, and a later value update on that
/// attribute is the first thing that satisfies it.
/// </summary>
[Fact]
public void WaitForAttribute_AnyChange_AttributeAbsent_MatchesOnLaterSet()
{
var config = new FlattenedConfiguration
{
InstanceUniqueName = "Pump1",
Attributes =
[
new ResolvedAttribute { CanonicalName = "Known", Value = "x", DataType = "String" }
]
};
var actor = CreateInstanceActor("Pump1", config);
// "Ghost" is not a configured attribute, so _attributes has no entry — the
// fast-path TryGetValue misses and the waiter registers rather than matching.
actor.Tell(new WaitForAttributeRequest(
"wfa-absent", "Pump1", "Ghost",
null, null, TimeSpan.FromSeconds(30), DateTimeOffset.UtcNow));
ExpectNoMsg(TimeSpan.FromMilliseconds(300));
// A direct AttributeValueChanged for "Ghost" populates _attributes and
// re-evaluates the waiter; the any-change test now matches the new value.
actor.Tell(new AttributeValueChanged(
"Pump1", "Ghost", "Ghost", "appeared", "Good", DateTimeOffset.UtcNow));
var response = ExpectMsg<WaitForAttributeResponse>(TimeSpan.FromSeconds(5));
Assert.True(response.Matched);
Assert.False(response.TimedOut);
Assert.Equal("wfa-absent", response.CorrelationId);
Assert.Equal("appeared", response.Value);
}
// ── 7. CRITICAL 1: no spurious match on a quality-only republish ─────────
/// <summary>
/// CRITICAL 1 regression: the List-coerce-failure Bad-quality path republishes
/// the OLD value (quality flipped to Bad) WITHOUT changing <c>_attributes</c>, so
/// it passes <c>evaluateWaiters:false</c> — registered waiters are NOT re-evaluated
/// on this non-change republish, must NOT spuriously fire, and must STILL resolve
/// on the next genuine value change.
///
/// <para>
/// We register an "any-change" waiter (which correctly fast-path matches the
/// present value and is drained) plus a pending predicate waiter that does not yet
/// match, then drive the Bad-quality republish and assert NO match is delivered for
/// the pending waiter, and that a subsequent REAL change resolves it. (Note: the
/// purest "any-change fires on a non-change republish" symptom is not directly
/// reproducible — an any-change waiter against a present attribute always fast-path
/// matches and so never stays pending across a republish; this test guards the
/// republish path against double-firing / stranding waiters and against the
/// predicate being re-evaluated on the non-change republish.)
/// </para>
/// </summary>
[Fact]
public void WaitForAttribute_BadQualityRepublish_NoValueChange_DoesNotMatch()
{
const string tag = "ns=3;s=Items";
var config = new FlattenedConfiguration
{
InstanceUniqueName = "Pump1",
Attributes =
[
new ResolvedAttribute
{
// Static default {1,2}: a real list value is present from
// construction so the Bad-quality republish has an OLD value to
// republish. The waiter below targets a DIFFERENT value so it is
// genuinely pending (no fast-path match) when the republish fires.
CanonicalName = "Items", Value = "[1,2]", DataType = "List",
ElementDataType = "Int32",
DataSourceReference = tag, BoundDataConnectionName = "PLC"
}
]
};
var dcl = CreateTestProbe();
var actor = ActorOf(Props.Create(() => new InstanceActor(
"Pump1",
JsonSerializer.Serialize(config),
_storage,
_compilationService,
_sharedScriptLibrary,
null,
_options,
NullLogger<InstanceActor>.Instance,
dcl.Ref)));
dcl.ExpectMsg<SubscribeTagsRequest>(TimeSpan.FromSeconds(5));
// A predicate waiter that matches a list of length >= 3. Current value is
// {1,2} (length 2) so it does NOT fast-path match — it registers and stays
// pending. Crucially, the Bad-quality republish below carries the SAME OLD
// value {1,2} (length 2); with the bug (evaluateWaiters always true) the
// predicate would be re-evaluated against {1,2} → still false, so this probe
// also guards the predicate-isolation contract on the republish path.
Func<object?, bool> lenAtLeast3 = v =>
v is System.Collections.IList list && list.Count >= 3;
actor.Tell(new WaitForAttributeRequest(
"wfa-len3", "Pump1", "Items",
null, lenAtLeast3, TimeSpan.FromSeconds(30), DateTimeOffset.UtcNow));
// Also register an "any-change" waiter while the attribute is present — it
// fast-path matches the current {1,2} immediately. Drain that correct match;
// it is the documented immediate-match behaviour, not the bug under test.
actor.Tell(new WaitForAttributeRequest(
"wfa-any", "Pump1", "Items",
null, null, TimeSpan.FromSeconds(30), DateTimeOffset.UtcNow));
var immediate = ExpectMsg<WaitForAttributeResponse>(TimeSpan.FromSeconds(5));
Assert.Equal("wfa-any", immediate.CorrelationId);
Assert.True(immediate.Matched);
// Drive the List-coerce-FAILURE Bad-quality republish: a scalar int cannot
// coerce to List<Int32>, so the actor sets quality Bad and republishes the
// OLD value {1,2} WITHOUT changing _attributes (evaluateWaiters:false).
actor.Tell(new TagValueUpdate("PLC", tag, 999, QualityCode.Good, DateTimeOffset.UtcNow));
// The pending length>=3 waiter must NOT fire on this non-change republish.
ExpectNoMsg(TimeSpan.FromMilliseconds(500));
// A REAL change to a length-3 list resolves the still-pending waiter.
actor.Tell(new TagValueUpdate("PLC", tag, new[] { 7, 8, 9 }, QualityCode.Good, DateTimeOffset.UtcNow));
var realChange = ExpectMsg<WaitForAttributeResponse>(TimeSpan.FromSeconds(5));
Assert.Equal("wfa-len3", realChange.CorrelationId);
Assert.True(realChange.Matched);
Assert.False(realChange.TimedOut);
}
// ── 8. CRITICAL 2: throwing predicate is isolated ────────────────────────
/// <summary>
/// CRITICAL 2 regression: two waiters on the SAME attribute — one with a
/// predicate that throws, one a normal value-equality. A single value change
/// must (a) NOT crash the actor, (b) evict the throwing waiter with a
/// non-matched error reply, and (c) STILL resolve the normal sibling. Finally
/// the actor must remain responsive to a subsequent request.
/// </summary>
[Fact]
public void WaitForAttribute_ThrowingPredicate_IsIsolated_SiblingStillMatches()
{
const string tag = "ns=3;s=State";
var config = new FlattenedConfiguration
{
InstanceUniqueName = "Pump1",
Attributes =
[
new ResolvedAttribute
{
CanonicalName = "State", Value = "init", DataType = "String",
DataSourceReference = tag, BoundDataConnectionName = "PLC"
}
]
};
var dcl = CreateTestProbe();
var actor = ActorOf(Props.Create(() => new InstanceActor(
"Pump1",
JsonSerializer.Serialize(config),
_storage,
_compilationService,
_sharedScriptLibrary,
null,
_options,
NullLogger<InstanceActor>.Instance,
dcl.Ref)));
dcl.ExpectMsg<SubscribeTagsRequest>(TimeSpan.FromSeconds(5));
// Waiter A: predicate that returns false for the CURRENT value ("init") so
// it clears the fast-path and registers, but THROWS once the value becomes
// "ready" — exercising the resolve-loop guard (not the fast-path guard).
Func<object?, bool> boom = v =>
v?.ToString() == "ready" ? throw new InvalidOperationException("kaboom") : false;
actor.Tell(new WaitForAttributeRequest(
"wfa-throw", "Pump1", "State",
null, boom, TimeSpan.FromSeconds(30), DateTimeOffset.UtcNow));
// Waiter B: normal value-equality waiting for "ready".
var target = ZB.MOM.WW.ScadaBridge.Commons.Types.AttributeValueCodec.Encode("ready");
actor.Tell(new WaitForAttributeRequest(
"wfa-normal", "Pump1", "State",
target, null, TimeSpan.FromSeconds(30), DateTimeOffset.UtcNow));
ExpectNoMsg(TimeSpan.FromMilliseconds(200));
// One change to "ready": evaluates BOTH waiters on this attribute. The
// throwing one must be evicted (error reply); the normal one must match.
actor.Tell(new TagValueUpdate("PLC", tag, "ready", QualityCode.Good, DateTimeOffset.UtcNow));
// Collect the two replies (order is registry-iteration dependent).
var r1 = ExpectMsg<WaitForAttributeResponse>(TimeSpan.FromSeconds(5));
var r2 = ExpectMsg<WaitForAttributeResponse>(TimeSpan.FromSeconds(5));
var byId = new[] { r1, r2 }.ToDictionary(r => r.CorrelationId);
var thrown = byId["wfa-throw"];
Assert.False(thrown.Matched);
Assert.False(thrown.TimedOut);
Assert.NotNull(thrown.ErrorMessage);
Assert.Contains("Wait predicate threw", thrown.ErrorMessage);
var normal = byId["wfa-normal"];
Assert.True(normal.Matched);
Assert.False(normal.TimedOut);
Assert.Equal("ready", normal.Value);
// The actor stayed alive and responsive: a follow-up request resolves.
actor.Tell(new GetAttributeRequest("get-after", "Pump1", "State", DateTimeOffset.UtcNow));
var get = ExpectMsg<GetAttributeResponse>(TimeSpan.FromSeconds(5));
Assert.Equal("ready", get.Value);
// And the throwing waiter was REMOVED (no longer in the registry): driving
// another change produces NO further reply for it.
actor.Tell(new TagValueUpdate("PLC", tag, "again", QualityCode.Good, DateTimeOffset.UtcNow));
ExpectNoMsg(TimeSpan.FromMilliseconds(500));
}
}