Files
lmxopcua/tests/Core/ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms.Tests/ScriptedAlarmEngineTests.cs
Joseph Doherty 99354bfaf2 fix(core-scripted-alarms): resolve Low code-review findings (Core.ScriptedAlarms-003,006,008,010,011; -009 documented)
- Core.ScriptedAlarms-003: emit OnEvent OUTSIDE _evalGate by collecting
  pending emissions during the gate-held section and flushing them after
  release; eliminates re-entrancy deadlock the docs already promised.
- Core.ScriptedAlarms-006: track every fire-and-forget Reevaluate /
  ShelvingCheck task in _inFlight; Dispose drains the set so the engine
  no longer races store writes against teardown.
- Core.ScriptedAlarms-008: store comments as ImmutableList<AlarmComment>
  so AppendComment is O(log n) instead of O(n).
- Core.ScriptedAlarms-010: document the deliberate input-quality
  asymmetry (Uncertain drives the predicate, renders {?} in the message)
  in docs/ScriptedAlarms.md and on MessageTemplate.Resolve remarks.
- Core.ScriptedAlarms-011: propagate the no-op reason through
  TransitionResult.NoOp(state, reason) and log it from
  ScriptedAlarmEngine.ApplyAsync.
- Core.ScriptedAlarms-009 (Won't Fix per recommendation): documented the
  per-evaluation dictionary allocation in docs/v2/Galaxy.Performance.md
  with a mitigation path if a future soak surfaces pressure.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 07:23:31 -04:00

929 lines
41 KiB
C#

using Serilog;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Scripting;
using ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms;
namespace ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms.Tests;
/// <summary>
/// End-to-end engine tests: load, predicate evaluation, change-triggered
/// re-evaluation, state persistence, startup recovery, error isolation.
/// </summary>
[Trait("Category", "Unit")]
public sealed class ScriptedAlarmEngineTests
{
private static ScriptedAlarmEngine Build(FakeUpstream up, out IAlarmStateStore store)
{
store = new InMemoryAlarmStateStore();
var logger = new LoggerConfiguration().CreateLogger();
return new ScriptedAlarmEngine(up, store, new ScriptLoggerFactory(logger), logger);
}
private static ScriptedAlarmDefinition Alarm(string id, string predicate,
string msg = "condition", AlarmSeverity sev = AlarmSeverity.High) =>
new(AlarmId: id,
EquipmentPath: "Plant/Line1/Reactor",
AlarmName: id,
Kind: AlarmKind.AlarmCondition,
Severity: sev,
MessageTemplate: msg,
PredicateScriptSource: predicate);
[Fact]
public async Task Load_compiles_and_subscribes_to_referenced_upstreams()
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("a1", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
eng.LoadedAlarmIds.ShouldContain("a1");
up.ActiveSubscriptionCount.ShouldBe(1);
}
[Fact]
public async Task Compile_failures_aggregated_into_one_error()
{
var up = new FakeUpstream();
using var eng = Build(up, out _);
var ex = await Should.ThrowAsync<InvalidOperationException>(async () =>
await eng.LoadAsync([
Alarm("bad1", "return unknownIdentifier;"),
Alarm("good", "return true;"),
Alarm("bad2", "var x = alsoUnknown; return x;"),
], TestContext.Current.CancellationToken));
ex.Message.ShouldContain("2 alarm(s) did not compile");
}
[Fact]
public async Task Upstream_change_re_evaluates_predicate_and_emits_Activated()
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
up.Push("Temp", 150);
await WaitForAsync(() => events.Count > 0);
events[0].AlarmId.ShouldBe("HighTemp");
events[0].Emission.ShouldBe(EmissionKind.Activated);
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active);
}
[Fact]
public async Task Clearing_upstream_emits_Cleared_event()
{
var up = new FakeUpstream();
up.Set("Temp", 150);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// Startup sees 150 → active.
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
up.Push("Temp", 50);
await WaitForAsync(() => events.Any(e => e.Emission == EmissionKind.Cleared));
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Inactive);
}
[Fact]
public async Task Message_template_resolves_tag_values_at_emission()
{
var up = new FakeUpstream();
up.Set("Temp", 50);
up.Set("Limit", 100);
using var eng = Build(up, out _);
await eng.LoadAsync([
new ScriptedAlarmDefinition(
"HighTemp", "Plant/Line1", "HighTemp",
AlarmKind.LimitAlarm, AlarmSeverity.High,
"Temp {Temp}C exceeded limit {Limit}C",
"""return (int)ctx.GetTag("Temp").Value > (int)ctx.GetTag("Limit").Value;"""),
], TestContext.Current.CancellationToken);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
up.Push("Temp", 150);
await WaitForAsync(() => events.Any());
events[0].Message.ShouldBe("Temp 150C exceeded limit 100C");
}
[Fact]
public async Task Ack_records_user_and_persists_to_store()
{
var up = new FakeUpstream();
up.Set("Temp", 150);
using var eng = Build(up, out var store);
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
await eng.AcknowledgeAsync("HighTemp", "alice", "checking", TestContext.Current.CancellationToken);
var persisted = await store.LoadAsync("HighTemp", TestContext.Current.CancellationToken);
persisted.ShouldNotBeNull();
persisted!.Acked.ShouldBe(AlarmAckedState.Acknowledged);
persisted.LastAckUser.ShouldBe("alice");
persisted.LastAckComment.ShouldBe("checking");
persisted.Comments.Any(c => c.Kind == "Acknowledge" && c.User == "alice").ShouldBeTrue();
}
[Fact]
public async Task Startup_recovery_preserves_ack_but_rederives_active_from_predicate()
{
var up = new FakeUpstream();
up.Set("Temp", 50); // predicate will go false on second load
// First run — alarm goes active + operator acks.
using (var eng1 = Build(up, out var sharedStore))
{
up.Set("Temp", 150);
await eng1.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
eng1.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active);
await eng1.AcknowledgeAsync("HighTemp", "alice", null, TestContext.Current.CancellationToken);
eng1.GetState("HighTemp")!.Acked.ShouldBe(AlarmAckedState.Acknowledged);
}
// Simulate restart — temp is back to 50 (below threshold).
up.Set("Temp", 50);
var logger = new LoggerConfiguration().CreateLogger();
var store2 = new InMemoryAlarmStateStore();
// seed store2 with the acked state from before restart
await store2.SaveAsync(new AlarmConditionState(
"HighTemp",
AlarmEnabledState.Enabled,
AlarmActiveState.Active, // was active pre-restart
AlarmAckedState.Acknowledged, // ack persisted
AlarmConfirmedState.Unconfirmed,
ShelvingState.Unshelved,
DateTime.UtcNow,
DateTime.UtcNow, null,
DateTime.UtcNow, "alice", null,
null, null, null,
[new AlarmComment(DateTime.UtcNow, "alice", "Acknowledge", "")]),
TestContext.Current.CancellationToken);
using var eng2 = new ScriptedAlarmEngine(up, store2, new ScriptLoggerFactory(logger), logger);
await eng2.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
var s = eng2.GetState("HighTemp")!;
s.Active.ShouldBe(AlarmActiveState.Inactive, "Active recomputed from current tag value");
s.Acked.ShouldBe(AlarmAckedState.Acknowledged, "Ack persisted across restart");
s.LastAckUser.ShouldBe("alice");
}
[Fact]
public async Task Shelved_active_transitions_state_but_suppresses_emission()
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
await eng.OneShotShelveAsync("HighTemp", "alice", TestContext.Current.CancellationToken);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
up.Push("Temp", 150);
await Task.Delay(200);
events.Any(e => e.Emission == EmissionKind.Activated).ShouldBeFalse(
"OneShot shelve suppresses activation emission");
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active,
"state still advances so startup recovery is consistent");
}
[Fact]
public async Task Predicate_runtime_exception_does_not_transition_state()
{
var up = new FakeUpstream();
up.Set("Temp", 150);
using var eng = Build(up, out _);
await eng.LoadAsync([
Alarm("BadScript", """throw new InvalidOperationException("boom");"""),
Alarm("GoodScript", """return (int)ctx.GetTag("Temp").Value > 100;"""),
], TestContext.Current.CancellationToken);
// Bad script doesn't activate + doesn't disable other alarms.
eng.GetState("BadScript")!.Active.ShouldBe(AlarmActiveState.Inactive);
eng.GetState("GoodScript")!.Active.ShouldBe(AlarmActiveState.Active);
}
[Fact]
public async Task Disable_prevents_activation_until_re_enabled()
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
await eng.DisableAsync("HighTemp", "alice", TestContext.Current.CancellationToken);
up.Push("Temp", 150);
await Task.Delay(100);
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Inactive,
"disabled alarm ignores predicate");
await eng.EnableAsync("HighTemp", "alice", TestContext.Current.CancellationToken);
up.Push("Temp", 160);
await WaitForAsync(() => eng.GetState("HighTemp")!.Active == AlarmActiveState.Active);
}
[Fact]
public async Task AddComment_appends_to_audit_without_state_change()
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out var store);
await eng.LoadAsync([Alarm("A", """return false;""")], TestContext.Current.CancellationToken);
await eng.AddCommentAsync("A", "alice", "peeking at this", TestContext.Current.CancellationToken);
var s = await store.LoadAsync("A", TestContext.Current.CancellationToken);
s.ShouldNotBeNull();
s!.Comments.Count.ShouldBe(1);
s.Comments[0].User.ShouldBe("alice");
s.Comments[0].Kind.ShouldBe("AddComment");
}
[Fact]
public async Task Predicate_scripts_cannot_SetVirtualTag()
{
var up = new FakeUpstream();
up.Set("Temp", 100);
using var eng = Build(up, out _);
// The script compiles fine but throws at runtime when SetVirtualTag is called.
// The engine swallows the exception + leaves state unchanged.
await eng.LoadAsync([
new ScriptedAlarmDefinition(
"Bad", "Plant/Line1", "Bad",
AlarmKind.AlarmCondition, AlarmSeverity.High, "bad",
"""
ctx.SetVirtualTag("NotAllowed", 1);
return true;
"""),
], TestContext.Current.CancellationToken);
// Bad alarm's predicate threw — state unchanged.
eng.GetState("Bad")!.Active.ShouldBe(AlarmActiveState.Inactive);
}
[Fact]
public async Task Dispose_releases_upstream_subscriptions()
{
var up = new FakeUpstream();
up.Set("Temp", 50);
var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
up.ActiveSubscriptionCount.ShouldBe(1);
eng.Dispose();
up.ActiveSubscriptionCount.ShouldBe(0);
}
[Fact]
public async Task Concurrent_reads_during_mutation_do_not_throw(/* Core.ScriptedAlarms-001 */)
{
// Regression for Core.ScriptedAlarms-001: GetState / GetAllStates /
// LoadedAlarmIds touch _alarms from arbitrary threads with no lock while
// ApplyAsync / ReevaluateAsync reassign dictionary entries under _evalGate.
// With a plain Dictionary this race throws InvalidOperationException or
// returns torn state; with a ConcurrentDictionary the reads are safe.
var up = new FakeUpstream();
const int alarmCount = 40;
var defs = new List<ScriptedAlarmDefinition>();
for (var i = 0; i < alarmCount; i++)
{
up.Set($"Temp{i}", 50);
defs.Add(Alarm($"A{i}", $$"""return (int)ctx.GetTag("Temp{{i}}").Value > 100;"""));
}
using var eng = Build(up, out _);
await eng.LoadAsync(defs, TestContext.Current.CancellationToken);
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
Exception? readerFailure = null;
// Writer: hammer the engine with state mutations that reassign _alarms entries.
var writer = Task.Run(async () =>
{
var round = 0;
while (!cts.IsCancellationRequested)
{
var id = $"A{round % alarmCount}";
await eng.AddCommentAsync(id, "tester", $"r{round}", CancellationToken.None);
up.Push($"Temp{round % alarmCount}", round % 2 == 0 ? 150 : 50);
round++;
}
});
// Readers: continuously touch the three unguarded read paths.
var readers = Enumerable.Range(0, 4).Select(_reader => Task.Run(() =>
{
try
{
while (!cts.IsCancellationRequested)
{
_ = eng.LoadedAlarmIds.Count;
_ = eng.GetAllStates().Count;
for (var i = 0; i < alarmCount; i++)
_ = eng.GetState($"A{i}");
}
}
catch (Exception ex)
{
readerFailure = ex;
}
})).ToArray();
await Task.WhenAll([writer, .. readers]);
readerFailure.ShouldBeNull(
"concurrent reads of _alarms while it is being mutated must not throw");
}
// -------------------------------------------------------------------------
// Core.ScriptedAlarms-012: coverage gaps
// -------------------------------------------------------------------------
// (1) Timed-shelve auto-expiry driven by the engine's shelving timer via an
// injectable clock — the clock and scriptTimeout constructor parameters
// exist for exactly this.
[Fact]
public async Task TimedShelve_auto_expires_when_engine_shelving_check_runs(/* -012 (1) */)
{
// Use a controllable clock; start it at T0 so we can advance it manually.
var now = new DateTime(2026, 1, 1, 12, 0, 0, DateTimeKind.Utc);
Func<DateTime> clock = () => now;
var up = new FakeUpstream();
up.Set("Temp", 50);
var logger = new LoggerConfiguration().CreateLogger();
var store = new InMemoryAlarmStateStore();
using var eng = new ScriptedAlarmEngine(up, store, new ScriptLoggerFactory(logger), logger, clock);
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// Shelve for 10 minutes from now.
var unshelveAt = now.AddMinutes(10);
await eng.TimedShelveAsync("HighTemp", "alice", unshelveAt, TestContext.Current.CancellationToken);
eng.GetState("HighTemp")!.Shelving.Kind.ShouldBe(ShelvingKind.Timed);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
// Advance clock past the unshelve time and invoke RunShelvingCheck directly
// (the 5-second real timer would be non-deterministic in tests).
now = now.AddMinutes(11);
eng.RunShelvingCheckForTest();
await WaitForAsync(() => eng.GetState("HighTemp")!.Shelving.Kind == ShelvingKind.Unshelved);
eng.GetState("HighTemp")!.Shelving.Kind.ShouldBe(ShelvingKind.Unshelved, "timed shelve expired");
events.Any(e => e.Emission == EmissionKind.Unshelved).ShouldBeTrue("Unshelved event emitted");
}
// (2a) ConfirmAsync end-to-end through the engine.
[Fact]
public async Task ConfirmAsync_records_user_and_emits_Confirmed(/* -012 (2) */)
{
var up = new FakeUpstream();
up.Set("Temp", 150);
using var eng = Build(up, out var store);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// activate → ack → clear → confirm
await eng.AcknowledgeAsync("A", "alice", null, TestContext.Current.CancellationToken);
up.Push("Temp", 50);
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Inactive);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
await eng.ConfirmAsync("A", "bob", "resolved", TestContext.Current.CancellationToken);
eng.GetState("A")!.Confirmed.ShouldBe(AlarmConfirmedState.Confirmed);
eng.GetState("A")!.LastConfirmUser.ShouldBe("bob");
events.Any(e => e.Emission == EmissionKind.Confirmed).ShouldBeTrue();
var persisted = await store.LoadAsync("A", TestContext.Current.CancellationToken);
persisted!.Confirmed.ShouldBe(AlarmConfirmedState.Confirmed);
}
// (2b) TimedShelveAsync / UnshelveAsync end-to-end through the engine.
[Fact]
public async Task TimedShelveAsync_and_UnshelveAsync_round_trip(/* -012 (2) */)
{
var now = new DateTime(2026, 1, 1, 12, 0, 0, DateTimeKind.Utc);
Func<DateTime> clock = () => now;
var up = new FakeUpstream();
up.Set("Temp", 50);
var logger = new LoggerConfiguration().CreateLogger();
using var eng = new ScriptedAlarmEngine(up, new InMemoryAlarmStateStore(),
new ScriptLoggerFactory(logger), logger, clock);
await eng.LoadAsync([Alarm("A", "return false;")], TestContext.Current.CancellationToken);
var unshelveAt = now.AddMinutes(30);
await eng.TimedShelveAsync("A", "alice", unshelveAt, TestContext.Current.CancellationToken);
eng.GetState("A")!.Shelving.Kind.ShouldBe(ShelvingKind.Timed);
eng.GetState("A")!.Shelving.UnshelveAtUtc.ShouldBe(unshelveAt);
// Operator unshelves manually before the timer expires.
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
await eng.UnshelveAsync("A", "bob", TestContext.Current.CancellationToken);
eng.GetState("A")!.Shelving.Kind.ShouldBe(ShelvingKind.Unshelved);
events.Any(e => e.Emission == EmissionKind.Unshelved).ShouldBeTrue();
}
// (2c) EnableAsync end-to-end through the engine.
[Fact]
public async Task EnableAsync_re_enables_after_disable(/* -012 (2) */)
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
await eng.DisableAsync("A", "alice", TestContext.Current.CancellationToken);
eng.GetState("A")!.Enabled.ShouldBe(AlarmEnabledState.Disabled);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
await eng.EnableAsync("A", "bob", TestContext.Current.CancellationToken);
eng.GetState("A")!.Enabled.ShouldBe(AlarmEnabledState.Enabled);
events.Any(e => e.Emission == EmissionKind.Enabled).ShouldBeTrue();
}
// (3) OnEvent subscriber-throws isolation — a bad subscriber must not crash
// the engine or prevent subsequent alarm state transitions. The engine logs
// the exception and continues operating; any later alarm changes still work.
[Fact]
public async Task OnEvent_subscriber_exception_does_not_crash_engine(/* -012 (3) */)
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// Single subscriber that always throws.
eng.OnEvent += (_, _) => throw new InvalidOperationException("bad subscriber");
// The engine must not throw or get stuck when the subscriber throws.
// Up.Push triggers ReevaluateAsync → EvaluatePredicateToStateAsync → EmitEvent.
up.Push("Temp", 150);
// Wait for the engine to process the push (it will try+catch the subscriber
// throw and continue). State must advance even though the subscriber blew up.
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Active);
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active,
"engine state advances even when the OnEvent subscriber threw");
// Verify the engine is still operational: a second state change must work.
up.Push("Temp", 50);
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Inactive);
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive,
"engine keeps processing after subscriber exception");
}
// (4) IAlarmStateStore.SaveAsync failure — in-memory state must remain at the
// prior value after finding -007 fix (persist-before-update).
[Fact]
public async Task Store_save_failure_leaves_in_memory_state_unchanged(/* -012 (4) */)
{
var up = new FakeUpstream();
up.Set("Temp", 150);
var logger = new LoggerConfiguration().CreateLogger();
var failingStore = new FailOnSaveAlarmStateStore();
using var eng = new ScriptedAlarmEngine(up, failingStore, new ScriptLoggerFactory(logger), logger);
// LoadAsync seeds + persists startup state — make it succeed for now.
failingStore.FailSave = false;
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// Startup evaluation activated the alarm.
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active);
// Now make every save throw.
failingStore.FailSave = true;
// Try to acknowledge — the save will fail, so the in-memory ack state must
// remain Unacknowledged (persist-before-update: -007 fix).
await Should.ThrowAsync<InvalidOperationException>(
() => eng.AcknowledgeAsync("A", "alice", null, TestContext.Current.CancellationToken));
eng.GetState("A")!.Acked.ShouldBe(AlarmAckedState.Unacknowledged,
"in-memory state must stay at prior value when store save fails");
}
// (5) Re-entrant LoadAsync — the old timer must not keep firing after a second
// call (regression for finding -002: _shelvingTimer?.Dispose() fix).
[Fact]
public async Task Second_LoadAsync_does_not_leak_old_timer(/* -012 (5) */)
{
// Use a clock whose invocation count we can observe indirectly through
// shelving-check side effects: if the old timer leaked it would call
// RunShelvingCheck an additional time, but we assert no double-disposal
// or double-subscriptions instead.
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
var subsAfterFirst = up.ActiveSubscriptionCount;
// Second load with a different alarm definition set.
await eng.LoadAsync([Alarm("B", """return (int)ctx.GetTag("Temp").Value > 200;""")],
TestContext.Current.CancellationToken);
// After reload, only "B" should be present; "A" subscriptions released.
eng.LoadedAlarmIds.ShouldContain("B");
eng.LoadedAlarmIds.ShouldNotContain("A");
// Subscriptions should match the new set only (one path "Temp" → 1 sub).
up.ActiveSubscriptionCount.ShouldBe(subsAfterFirst, "subscription count same after reload on same path");
// Engine is still functional.
up.Push("Temp", 250);
await WaitForAsync(() => eng.GetState("B")!.Active == AlarmActiveState.Active);
}
// (6) Cold-start AreInputsReady guard — null value, Bad status, and Uncertain
// status inputs are all handled correctly.
[Fact]
public async Task AreInputsReady_blocks_evaluation_for_null_and_bad_inputs(/* -012 (6) */)
{
var up = new FakeUpstream();
// "Temp" is missing entirely (ReadTag returns BadNodeIdUnknown).
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// No tag value → bad status → AreInputsReady returns false → stays Inactive.
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive,
"predicate not evaluated when input has Bad status");
// Now provide a bad-quality value explicitly.
up.Set("Temp", 150, statusCode: 0x80000000u); // Bad severity bit
up.Push("Temp", 150, statusCode: 0x80000000u);
await Task.Delay(100, TestContext.Current.CancellationToken);
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive,
"predicate not evaluated when input has explicit Bad status code");
// Uncertain quality (non-zero but bit 31 clear) — should be treated as ready.
// The alarm should activate when the value is above 100 with Uncertain quality.
up.Set("Temp", 150, statusCode: 0x40000000u); // Uncertain severity
up.Push("Temp", 150, statusCode: 0x40000000u);
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Active);
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active,
"Uncertain-quality inputs are treated as ready — predicate evaluates");
}
// -------------------------------------------------------------------------
// Core.ScriptedAlarms-003: OnEvent emission must not block under _evalGate.
// (1) A slow subscriber must not block the gate for other alarms.
// (2) A subscriber that re-enters the engine (e.g. AcknowledgeAsync) must
// not deadlock against _evalGate. Both regressions are covered here.
// -------------------------------------------------------------------------
[Fact]
public async Task OnEvent_subscriber_can_call_back_into_engine_without_deadlock(/* -003 */)
{
// Re-entrancy regression. When OnEvent emission was inside _evalGate, a
// subscriber that called an engine method (e.g. AcknowledgeAsync) hung
// forever because the non-reentrant SemaphoreSlim refused to re-grant
// the gate the dispatch path was still holding. After the fix, emission
// happens AFTER Release() so the subscriber's call acquires the gate
// cleanly and the operator-driven action completes.
var up = new FakeUpstream();
up.Set("Temp", 50);
var eng = Build(up, out _);
try
{
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// Subscriber re-enters the engine via Task.Run so the OnEvent
// dispatch thread is not blocked while waiting. Either way, with
// the fix in place AcknowledgeAsync must acquire _evalGate (the
// dispatch path released it before invoking the subscriber) and
// complete in well under the timeout.
var ackDone = new TaskCompletionSource();
eng.OnEvent += (_, e) =>
{
if (e.Emission != EmissionKind.Activated) return;
_ = Task.Run(async () =>
{
try
{
await eng.AcknowledgeAsync(e.AlarmId, "sub", null, CancellationToken.None);
ackDone.TrySetResult();
}
catch (Exception ex) { ackDone.TrySetException(ex); }
});
};
up.Push("Temp", 150);
var winner = await Task.WhenAny(ackDone.Task, Task.Delay(TimeSpan.FromSeconds(3)));
winner.ShouldBe(ackDone.Task,
"subscriber re-entering the engine must not deadlock against _evalGate");
await ackDone.Task; // surface any inner exception
eng.GetState("HighTemp")!.Acked.ShouldBe(AlarmAckedState.Acknowledged);
}
finally
{
eng.Dispose();
}
}
[Fact]
public void OnEvent_emission_happens_outside_evalGate(/* -003 */)
{
// Direct white-box check on the gate-release ordering: AcknowledgeAsync
// emits the Acknowledged event AFTER releasing the gate. We assert that
// by observing the gate is acquirable from inside the subscriber.
// SemaphoreSlim.Wait(0) returns true only if the count > 0 (gate free).
var up = new FakeUpstream();
up.Set("Temp", 50);
var eng = Build(up, out _);
try
{
eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken).GetAwaiter().GetResult();
// Drive to Active so Acknowledge has something to ack.
up.Push("Temp", 150);
// Use the same WaitForAsync that other tests use — synchronously
// here since this is a non-async test.
for (var i = 0; i < 80 && eng.GetState("HighTemp")!.Active != AlarmActiveState.Active; i++)
Thread.Sleep(25);
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active);
// Use reflection to peek at _evalGate so the subscriber can probe it.
var gateField = typeof(ScriptedAlarmEngine).GetField(
"_evalGate", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
gateField.ShouldNotBeNull();
var gate = (SemaphoreSlim)gateField.GetValue(eng)!;
var gateFreeInsideEmission = false;
eng.OnEvent += (_, e) =>
{
if (e.Emission != EmissionKind.Acknowledged) return;
// SemaphoreSlim.Wait(0) — non-blocking try-take. If the gate is
// free we acquire it (count back to 0); release immediately.
if (gate.Wait(0))
{
gateFreeInsideEmission = true;
gate.Release();
}
};
eng.AcknowledgeAsync("HighTemp", "alice", null, CancellationToken.None)
.GetAwaiter().GetResult();
gateFreeInsideEmission.ShouldBeTrue(
"_evalGate must be released before OnEvent fires so subscribers " +
"can call back into the engine without deadlocking");
}
finally
{
eng.Dispose();
}
}
// -------------------------------------------------------------------------
// Core.ScriptedAlarms-006: Dispose must drain in-flight background tasks
// launched by OnUpstreamChange / RunShelvingCheck. Otherwise a re-evaluation
// or shelving check started just before Dispose can keep running and write
// to a (possibly disposed) store after the engine has returned.
// -------------------------------------------------------------------------
[Fact]
public async Task Dispose_drains_in_flight_reevaluation_tasks(/* -006 */)
{
var up = new FakeUpstream();
up.Set("Temp", 50);
var logger = new LoggerConfiguration().CreateLogger();
var slowStore = new BlockingSaveAlarmStateStore();
var eng = new ScriptedAlarmEngine(up, slowStore, new ScriptLoggerFactory(logger), logger);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// Block the NEXT save (the one triggered by the push below).
var saveGate = new TaskCompletionSource();
slowStore.BlockNextSave = saveGate;
// Trigger a re-evaluation that will go inside _evalGate and call SaveAsync.
up.Push("Temp", 150);
// Wait until the store's SaveAsync is actually blocked.
await WaitForAsync(() => slowStore.SaveInProgress, timeoutMs: 1000);
// Dispose must wait for the in-flight reevaluation to complete rather
// than returning while a background task still runs.
var disposeTask = Task.Run(() => eng.Dispose());
// Verify Dispose does NOT complete immediately — it should block waiting
// for the in-flight task. Without the -006 fix Dispose returns straight
// away and the background reevaluation can outlive the engine.
var prematureFinish = await Task.WhenAny(disposeTask, Task.Delay(200));
prematureFinish.ShouldNotBe(disposeTask,
"Dispose must block until in-flight background tasks complete");
// Let the save complete and verify Dispose then returns.
saveGate.SetResult();
await disposeTask.WaitAsync(TimeSpan.FromSeconds(3), TestContext.Current.CancellationToken);
slowStore.SaveInProgress.ShouldBeFalse("background task drained before Dispose returned");
}
// -------------------------------------------------------------------------
// Core.ScriptedAlarms-010: predicate evaluation and message-template
// resolution apply different quality bars on purpose. Predicate evaluation
// accepts Uncertain (the predicate can still inspect the value); message
// resolution renders Uncertain as "{?}" so the operator sees the doubt
// explicitly. The two policies are documented in docs/ScriptedAlarms.md.
// -------------------------------------------------------------------------
[Fact]
public async Task Uncertain_quality_drives_predicate_but_renders_question_mark_in_message(/* -010 */)
{
var up = new FakeUpstream();
// Seed with Uncertain quality (severity bit 30 set, bit 31 clear).
up.Set("Temp", 150, statusCode: 0x40000000u);
using var eng = Build(up, out _);
await eng.LoadAsync([
new ScriptedAlarmDefinition(
"HighTemp", "Plant/Line1", "HighTemp",
AlarmKind.LimitAlarm, AlarmSeverity.High,
"Temp {Temp} exceeded limit",
"""return (int)ctx.GetTag("Temp").Value > 100;"""),
], TestContext.Current.CancellationToken);
// Predicate evaluated (Uncertain treated as ready) → alarm Active.
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active,
"AreInputsReady accepts Uncertain so the predicate runs");
// But the resolved emission message must show "{?}" for the Uncertain
// tag — only Good substitutes into the operator-facing message.
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
up.Push("Temp", 200, statusCode: 0x40000000u); // still Uncertain
// Trigger another evaluation to get an emission (already active, so
// we need a clear → re-activate cycle). Easier: force the same path
// through a comment which emits a CommentAdded message. But comments
// don't run the template. Instead clear it then re-activate.
up.Push("Temp", 50, statusCode: 0u); // Good, predicate becomes false
await WaitForAsync(() => events.Any(e => e.Emission == EmissionKind.Cleared));
events.Clear();
up.Push("Temp", 200, statusCode: 0x40000000u); // Uncertain, predicate true
await WaitForAsync(() => events.Any(e => e.Emission == EmissionKind.Activated));
// The Activated message must show {?} for the Uncertain input.
events.Single(e => e.Emission == EmissionKind.Activated).Message
.ShouldBe("Temp {?} exceeded limit",
"MessageTemplate.Resolve renders non-Good StatusCode as {?} " +
"even though predicate evaluation accepted the Uncertain value");
}
// -------------------------------------------------------------------------
// Core.ScriptedAlarms-008: switch Comments to ImmutableList for O(log n)
// append. The persisted runtime type must be ImmutableList<AlarmComment>
// (which still satisfies IReadOnlyList<AlarmComment> for existing
// consumers).
// -------------------------------------------------------------------------
[Fact]
public async Task Comments_collection_uses_ImmutableList_for_efficient_append(/* -008 */)
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", "return false;")], TestContext.Current.CancellationToken);
// Add a comment so AppendComment runs.
await eng.AddCommentAsync("A", "alice", "note", TestContext.Current.CancellationToken);
var s = eng.GetState("A")!;
s.Comments.ShouldBeOfType<System.Collections.Immutable.ImmutableList<AlarmComment>>(
"Comments should be an ImmutableList so append is O(log n), not O(n)");
}
// -------------------------------------------------------------------------
// Core.ScriptedAlarms-011: TransitionResult.NoOp's reason parameter must be
// propagated, not silently discarded. The class-level remarks promise a
// diagnostic log line for no-op disabled-alarm evaluations.
// -------------------------------------------------------------------------
[Fact]
public void TransitionResult_NoOp_propagates_reason(/* -011 */)
{
var fresh = AlarmConditionState.Fresh("a-1", DateTime.UtcNow);
var r = TransitionResult.NoOp(fresh, "disabled — predicate result ignored");
r.NoOpReason.ShouldBe("disabled — predicate result ignored",
"NoOp reason must be preserved on the TransitionResult so callers can log it");
}
[Fact]
public void TransitionResult_None_carries_no_reason(/* -011 */)
{
var fresh = AlarmConditionState.Fresh("a-1", DateTime.UtcNow);
var r = TransitionResult.None(fresh);
r.NoOpReason.ShouldBeNull("None() factory has no reason — only NoOp() carries one");
}
private static async Task WaitForAsync(Func<bool> cond, int timeoutMs = 2000)
{
var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs);
while (DateTime.UtcNow < deadline)
{
if (cond()) return;
await Task.Delay(25);
}
throw new TimeoutException("Condition did not become true in time");
}
// -------------------------------------------------------------------------
// Test helpers
// -------------------------------------------------------------------------
/// <summary>
/// A store that can be instructed to throw on every SaveAsync call.
/// Used to exercise the persist-before-update invariant (finding -007).
/// </summary>
private sealed class FailOnSaveAlarmStateStore : IAlarmStateStore
{
private readonly InMemoryAlarmStateStore _inner = new();
public bool FailSave { get; set; }
public Task<AlarmConditionState?> LoadAsync(string alarmId, CancellationToken ct)
=> _inner.LoadAsync(alarmId, ct);
public Task<IReadOnlyList<AlarmConditionState>> LoadAllAsync(CancellationToken ct)
=> _inner.LoadAllAsync(ct);
public Task SaveAsync(AlarmConditionState state, CancellationToken ct)
{
if (FailSave) throw new InvalidOperationException("Simulated store failure");
return _inner.SaveAsync(state, ct);
}
public Task RemoveAsync(string alarmId, CancellationToken ct)
=> _inner.RemoveAsync(alarmId, ct);
}
/// <summary>
/// A store whose SaveAsync can be made to block until the test signals it.
/// Used to verify Dispose drains in-flight background tasks (finding -006).
/// </summary>
private sealed class BlockingSaveAlarmStateStore : IAlarmStateStore
{
private readonly InMemoryAlarmStateStore _inner = new();
public TaskCompletionSource? BlockNextSave { get; set; }
public bool SaveInProgress { get; private set; }
public Task<AlarmConditionState?> LoadAsync(string alarmId, CancellationToken ct)
=> _inner.LoadAsync(alarmId, ct);
public Task<IReadOnlyList<AlarmConditionState>> LoadAllAsync(CancellationToken ct)
=> _inner.LoadAllAsync(ct);
public async Task SaveAsync(AlarmConditionState state, CancellationToken ct)
{
var gate = BlockNextSave;
if (gate is not null)
{
BlockNextSave = null;
SaveInProgress = true;
try { await gate.Task.WaitAsync(ct).ConfigureAwait(false); }
finally { SaveInProgress = false; }
}
await _inner.SaveAsync(state, ct).ConfigureAwait(false);
}
public Task RemoveAsync(string alarmId, CancellationToken ct)
=> _inner.RemoveAsync(alarmId, ct);
}
}