- Core.ScriptedAlarms-003: emit OnEvent OUTSIDE _evalGate by collecting
pending emissions during the gate-held section and flushing them after
release; eliminates re-entrancy deadlock the docs already promised.
- Core.ScriptedAlarms-006: track every fire-and-forget Reevaluate /
ShelvingCheck task in _inFlight; Dispose drains the set so the engine
no longer races store writes against teardown.
- Core.ScriptedAlarms-008: store comments as ImmutableList<AlarmComment>
so AppendComment is O(log n) instead of O(n).
- Core.ScriptedAlarms-010: document the deliberate input-quality
asymmetry (Uncertain drives the predicate, renders {?} in the message)
in docs/ScriptedAlarms.md and on MessageTemplate.Resolve remarks.
- Core.ScriptedAlarms-011: propagate the no-op reason through
TransitionResult.NoOp(state, reason) and log it from
ScriptedAlarmEngine.ApplyAsync.
- Core.ScriptedAlarms-009 (Won't Fix per recommendation): documented the
per-evaluation dictionary allocation in docs/v2/Galaxy.Performance.md
with a mitigation path if a future soak surfaces pressure.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
929 lines
41 KiB
C#
929 lines
41 KiB
C#
using Serilog;
|
|
using Shouldly;
|
|
using Xunit;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
|
using ZB.MOM.WW.OtOpcUa.Core.Scripting;
|
|
using ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms.Tests;
|
|
|
|
/// <summary>
|
|
/// End-to-end engine tests: load, predicate evaluation, change-triggered
|
|
/// re-evaluation, state persistence, startup recovery, error isolation.
|
|
/// </summary>
|
|
[Trait("Category", "Unit")]
|
|
public sealed class ScriptedAlarmEngineTests
|
|
{
|
|
private static ScriptedAlarmEngine Build(FakeUpstream up, out IAlarmStateStore store)
|
|
{
|
|
store = new InMemoryAlarmStateStore();
|
|
var logger = new LoggerConfiguration().CreateLogger();
|
|
return new ScriptedAlarmEngine(up, store, new ScriptLoggerFactory(logger), logger);
|
|
}
|
|
|
|
private static ScriptedAlarmDefinition Alarm(string id, string predicate,
|
|
string msg = "condition", AlarmSeverity sev = AlarmSeverity.High) =>
|
|
new(AlarmId: id,
|
|
EquipmentPath: "Plant/Line1/Reactor",
|
|
AlarmName: id,
|
|
Kind: AlarmKind.AlarmCondition,
|
|
Severity: sev,
|
|
MessageTemplate: msg,
|
|
PredicateScriptSource: predicate);
|
|
|
|
[Fact]
|
|
public async Task Load_compiles_and_subscribes_to_referenced_upstreams()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
using var eng = Build(up, out _);
|
|
|
|
await eng.LoadAsync([Alarm("a1", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
eng.LoadedAlarmIds.ShouldContain("a1");
|
|
up.ActiveSubscriptionCount.ShouldBe(1);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Compile_failures_aggregated_into_one_error()
|
|
{
|
|
var up = new FakeUpstream();
|
|
using var eng = Build(up, out _);
|
|
|
|
var ex = await Should.ThrowAsync<InvalidOperationException>(async () =>
|
|
await eng.LoadAsync([
|
|
Alarm("bad1", "return unknownIdentifier;"),
|
|
Alarm("good", "return true;"),
|
|
Alarm("bad2", "var x = alsoUnknown; return x;"),
|
|
], TestContext.Current.CancellationToken));
|
|
ex.Message.ShouldContain("2 alarm(s) did not compile");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Upstream_change_re_evaluates_predicate_and_emits_Activated()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
var events = new List<ScriptedAlarmEvent>();
|
|
eng.OnEvent += (_, e) => events.Add(e);
|
|
|
|
up.Push("Temp", 150);
|
|
await WaitForAsync(() => events.Count > 0);
|
|
|
|
events[0].AlarmId.ShouldBe("HighTemp");
|
|
events[0].Emission.ShouldBe(EmissionKind.Activated);
|
|
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Clearing_upstream_emits_Cleared_event()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 150);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
// Startup sees 150 → active.
|
|
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active);
|
|
|
|
var events = new List<ScriptedAlarmEvent>();
|
|
eng.OnEvent += (_, e) => events.Add(e);
|
|
|
|
up.Push("Temp", 50);
|
|
await WaitForAsync(() => events.Any(e => e.Emission == EmissionKind.Cleared));
|
|
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Inactive);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Message_template_resolves_tag_values_at_emission()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
up.Set("Limit", 100);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([
|
|
new ScriptedAlarmDefinition(
|
|
"HighTemp", "Plant/Line1", "HighTemp",
|
|
AlarmKind.LimitAlarm, AlarmSeverity.High,
|
|
"Temp {Temp}C exceeded limit {Limit}C",
|
|
"""return (int)ctx.GetTag("Temp").Value > (int)ctx.GetTag("Limit").Value;"""),
|
|
], TestContext.Current.CancellationToken);
|
|
|
|
var events = new List<ScriptedAlarmEvent>();
|
|
eng.OnEvent += (_, e) => events.Add(e);
|
|
|
|
up.Push("Temp", 150);
|
|
await WaitForAsync(() => events.Any());
|
|
|
|
events[0].Message.ShouldBe("Temp 150C exceeded limit 100C");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Ack_records_user_and_persists_to_store()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 150);
|
|
using var eng = Build(up, out var store);
|
|
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
await eng.AcknowledgeAsync("HighTemp", "alice", "checking", TestContext.Current.CancellationToken);
|
|
|
|
var persisted = await store.LoadAsync("HighTemp", TestContext.Current.CancellationToken);
|
|
persisted.ShouldNotBeNull();
|
|
persisted!.Acked.ShouldBe(AlarmAckedState.Acknowledged);
|
|
persisted.LastAckUser.ShouldBe("alice");
|
|
persisted.LastAckComment.ShouldBe("checking");
|
|
persisted.Comments.Any(c => c.Kind == "Acknowledge" && c.User == "alice").ShouldBeTrue();
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Startup_recovery_preserves_ack_but_rederives_active_from_predicate()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50); // predicate will go false on second load
|
|
|
|
// First run — alarm goes active + operator acks.
|
|
using (var eng1 = Build(up, out var sharedStore))
|
|
{
|
|
up.Set("Temp", 150);
|
|
await eng1.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
eng1.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active);
|
|
|
|
await eng1.AcknowledgeAsync("HighTemp", "alice", null, TestContext.Current.CancellationToken);
|
|
eng1.GetState("HighTemp")!.Acked.ShouldBe(AlarmAckedState.Acknowledged);
|
|
}
|
|
|
|
// Simulate restart — temp is back to 50 (below threshold).
|
|
up.Set("Temp", 50);
|
|
var logger = new LoggerConfiguration().CreateLogger();
|
|
var store2 = new InMemoryAlarmStateStore();
|
|
// seed store2 with the acked state from before restart
|
|
await store2.SaveAsync(new AlarmConditionState(
|
|
"HighTemp",
|
|
AlarmEnabledState.Enabled,
|
|
AlarmActiveState.Active, // was active pre-restart
|
|
AlarmAckedState.Acknowledged, // ack persisted
|
|
AlarmConfirmedState.Unconfirmed,
|
|
ShelvingState.Unshelved,
|
|
DateTime.UtcNow,
|
|
DateTime.UtcNow, null,
|
|
DateTime.UtcNow, "alice", null,
|
|
null, null, null,
|
|
[new AlarmComment(DateTime.UtcNow, "alice", "Acknowledge", "")]),
|
|
TestContext.Current.CancellationToken);
|
|
|
|
using var eng2 = new ScriptedAlarmEngine(up, store2, new ScriptLoggerFactory(logger), logger);
|
|
await eng2.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
var s = eng2.GetState("HighTemp")!;
|
|
s.Active.ShouldBe(AlarmActiveState.Inactive, "Active recomputed from current tag value");
|
|
s.Acked.ShouldBe(AlarmAckedState.Acknowledged, "Ack persisted across restart");
|
|
s.LastAckUser.ShouldBe("alice");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Shelved_active_transitions_state_but_suppresses_emission()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
await eng.OneShotShelveAsync("HighTemp", "alice", TestContext.Current.CancellationToken);
|
|
|
|
var events = new List<ScriptedAlarmEvent>();
|
|
eng.OnEvent += (_, e) => events.Add(e);
|
|
|
|
up.Push("Temp", 150);
|
|
await Task.Delay(200);
|
|
|
|
events.Any(e => e.Emission == EmissionKind.Activated).ShouldBeFalse(
|
|
"OneShot shelve suppresses activation emission");
|
|
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active,
|
|
"state still advances so startup recovery is consistent");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Predicate_runtime_exception_does_not_transition_state()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 150);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([
|
|
Alarm("BadScript", """throw new InvalidOperationException("boom");"""),
|
|
Alarm("GoodScript", """return (int)ctx.GetTag("Temp").Value > 100;"""),
|
|
], TestContext.Current.CancellationToken);
|
|
|
|
// Bad script doesn't activate + doesn't disable other alarms.
|
|
eng.GetState("BadScript")!.Active.ShouldBe(AlarmActiveState.Inactive);
|
|
eng.GetState("GoodScript")!.Active.ShouldBe(AlarmActiveState.Active);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Disable_prevents_activation_until_re_enabled()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
await eng.DisableAsync("HighTemp", "alice", TestContext.Current.CancellationToken);
|
|
up.Push("Temp", 150);
|
|
await Task.Delay(100);
|
|
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Inactive,
|
|
"disabled alarm ignores predicate");
|
|
|
|
await eng.EnableAsync("HighTemp", "alice", TestContext.Current.CancellationToken);
|
|
up.Push("Temp", 160);
|
|
await WaitForAsync(() => eng.GetState("HighTemp")!.Active == AlarmActiveState.Active);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task AddComment_appends_to_audit_without_state_change()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
using var eng = Build(up, out var store);
|
|
await eng.LoadAsync([Alarm("A", """return false;""")], TestContext.Current.CancellationToken);
|
|
|
|
await eng.AddCommentAsync("A", "alice", "peeking at this", TestContext.Current.CancellationToken);
|
|
|
|
var s = await store.LoadAsync("A", TestContext.Current.CancellationToken);
|
|
s.ShouldNotBeNull();
|
|
s!.Comments.Count.ShouldBe(1);
|
|
s.Comments[0].User.ShouldBe("alice");
|
|
s.Comments[0].Kind.ShouldBe("AddComment");
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Predicate_scripts_cannot_SetVirtualTag()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 100);
|
|
using var eng = Build(up, out _);
|
|
|
|
// The script compiles fine but throws at runtime when SetVirtualTag is called.
|
|
// The engine swallows the exception + leaves state unchanged.
|
|
await eng.LoadAsync([
|
|
new ScriptedAlarmDefinition(
|
|
"Bad", "Plant/Line1", "Bad",
|
|
AlarmKind.AlarmCondition, AlarmSeverity.High, "bad",
|
|
"""
|
|
ctx.SetVirtualTag("NotAllowed", 1);
|
|
return true;
|
|
"""),
|
|
], TestContext.Current.CancellationToken);
|
|
|
|
// Bad alarm's predicate threw — state unchanged.
|
|
eng.GetState("Bad")!.Active.ShouldBe(AlarmActiveState.Inactive);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Dispose_releases_upstream_subscriptions()
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
var eng = Build(up, out _);
|
|
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
up.ActiveSubscriptionCount.ShouldBe(1);
|
|
|
|
eng.Dispose();
|
|
up.ActiveSubscriptionCount.ShouldBe(0);
|
|
}
|
|
|
|
[Fact]
|
|
public async Task Concurrent_reads_during_mutation_do_not_throw(/* Core.ScriptedAlarms-001 */)
|
|
{
|
|
// Regression for Core.ScriptedAlarms-001: GetState / GetAllStates /
|
|
// LoadedAlarmIds touch _alarms from arbitrary threads with no lock while
|
|
// ApplyAsync / ReevaluateAsync reassign dictionary entries under _evalGate.
|
|
// With a plain Dictionary this race throws InvalidOperationException or
|
|
// returns torn state; with a ConcurrentDictionary the reads are safe.
|
|
var up = new FakeUpstream();
|
|
const int alarmCount = 40;
|
|
var defs = new List<ScriptedAlarmDefinition>();
|
|
for (var i = 0; i < alarmCount; i++)
|
|
{
|
|
up.Set($"Temp{i}", 50);
|
|
defs.Add(Alarm($"A{i}", $$"""return (int)ctx.GetTag("Temp{{i}}").Value > 100;"""));
|
|
}
|
|
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync(defs, TestContext.Current.CancellationToken);
|
|
|
|
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
|
|
Exception? readerFailure = null;
|
|
|
|
// Writer: hammer the engine with state mutations that reassign _alarms entries.
|
|
var writer = Task.Run(async () =>
|
|
{
|
|
var round = 0;
|
|
while (!cts.IsCancellationRequested)
|
|
{
|
|
var id = $"A{round % alarmCount}";
|
|
await eng.AddCommentAsync(id, "tester", $"r{round}", CancellationToken.None);
|
|
up.Push($"Temp{round % alarmCount}", round % 2 == 0 ? 150 : 50);
|
|
round++;
|
|
}
|
|
});
|
|
|
|
// Readers: continuously touch the three unguarded read paths.
|
|
var readers = Enumerable.Range(0, 4).Select(_reader => Task.Run(() =>
|
|
{
|
|
try
|
|
{
|
|
while (!cts.IsCancellationRequested)
|
|
{
|
|
_ = eng.LoadedAlarmIds.Count;
|
|
_ = eng.GetAllStates().Count;
|
|
for (var i = 0; i < alarmCount; i++)
|
|
_ = eng.GetState($"A{i}");
|
|
}
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
readerFailure = ex;
|
|
}
|
|
})).ToArray();
|
|
|
|
await Task.WhenAll([writer, .. readers]);
|
|
|
|
readerFailure.ShouldBeNull(
|
|
"concurrent reads of _alarms while it is being mutated must not throw");
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Core.ScriptedAlarms-012: coverage gaps
|
|
// -------------------------------------------------------------------------
|
|
|
|
// (1) Timed-shelve auto-expiry driven by the engine's shelving timer via an
|
|
// injectable clock — the clock and scriptTimeout constructor parameters
|
|
// exist for exactly this.
|
|
[Fact]
|
|
public async Task TimedShelve_auto_expires_when_engine_shelving_check_runs(/* -012 (1) */)
|
|
{
|
|
// Use a controllable clock; start it at T0 so we can advance it manually.
|
|
var now = new DateTime(2026, 1, 1, 12, 0, 0, DateTimeKind.Utc);
|
|
Func<DateTime> clock = () => now;
|
|
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
var logger = new LoggerConfiguration().CreateLogger();
|
|
var store = new InMemoryAlarmStateStore();
|
|
using var eng = new ScriptedAlarmEngine(up, store, new ScriptLoggerFactory(logger), logger, clock);
|
|
|
|
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
// Shelve for 10 minutes from now.
|
|
var unshelveAt = now.AddMinutes(10);
|
|
await eng.TimedShelveAsync("HighTemp", "alice", unshelveAt, TestContext.Current.CancellationToken);
|
|
eng.GetState("HighTemp")!.Shelving.Kind.ShouldBe(ShelvingKind.Timed);
|
|
|
|
var events = new List<ScriptedAlarmEvent>();
|
|
eng.OnEvent += (_, e) => events.Add(e);
|
|
|
|
// Advance clock past the unshelve time and invoke RunShelvingCheck directly
|
|
// (the 5-second real timer would be non-deterministic in tests).
|
|
now = now.AddMinutes(11);
|
|
eng.RunShelvingCheckForTest();
|
|
|
|
await WaitForAsync(() => eng.GetState("HighTemp")!.Shelving.Kind == ShelvingKind.Unshelved);
|
|
|
|
eng.GetState("HighTemp")!.Shelving.Kind.ShouldBe(ShelvingKind.Unshelved, "timed shelve expired");
|
|
events.Any(e => e.Emission == EmissionKind.Unshelved).ShouldBeTrue("Unshelved event emitted");
|
|
}
|
|
|
|
// (2a) ConfirmAsync end-to-end through the engine.
|
|
[Fact]
|
|
public async Task ConfirmAsync_records_user_and_emits_Confirmed(/* -012 (2) */)
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 150);
|
|
using var eng = Build(up, out var store);
|
|
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
// activate → ack → clear → confirm
|
|
await eng.AcknowledgeAsync("A", "alice", null, TestContext.Current.CancellationToken);
|
|
up.Push("Temp", 50);
|
|
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Inactive);
|
|
|
|
var events = new List<ScriptedAlarmEvent>();
|
|
eng.OnEvent += (_, e) => events.Add(e);
|
|
await eng.ConfirmAsync("A", "bob", "resolved", TestContext.Current.CancellationToken);
|
|
|
|
eng.GetState("A")!.Confirmed.ShouldBe(AlarmConfirmedState.Confirmed);
|
|
eng.GetState("A")!.LastConfirmUser.ShouldBe("bob");
|
|
events.Any(e => e.Emission == EmissionKind.Confirmed).ShouldBeTrue();
|
|
var persisted = await store.LoadAsync("A", TestContext.Current.CancellationToken);
|
|
persisted!.Confirmed.ShouldBe(AlarmConfirmedState.Confirmed);
|
|
}
|
|
|
|
// (2b) TimedShelveAsync / UnshelveAsync end-to-end through the engine.
|
|
[Fact]
|
|
public async Task TimedShelveAsync_and_UnshelveAsync_round_trip(/* -012 (2) */)
|
|
{
|
|
var now = new DateTime(2026, 1, 1, 12, 0, 0, DateTimeKind.Utc);
|
|
Func<DateTime> clock = () => now;
|
|
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
var logger = new LoggerConfiguration().CreateLogger();
|
|
using var eng = new ScriptedAlarmEngine(up, new InMemoryAlarmStateStore(),
|
|
new ScriptLoggerFactory(logger), logger, clock);
|
|
await eng.LoadAsync([Alarm("A", "return false;")], TestContext.Current.CancellationToken);
|
|
|
|
var unshelveAt = now.AddMinutes(30);
|
|
await eng.TimedShelveAsync("A", "alice", unshelveAt, TestContext.Current.CancellationToken);
|
|
eng.GetState("A")!.Shelving.Kind.ShouldBe(ShelvingKind.Timed);
|
|
eng.GetState("A")!.Shelving.UnshelveAtUtc.ShouldBe(unshelveAt);
|
|
|
|
// Operator unshelves manually before the timer expires.
|
|
var events = new List<ScriptedAlarmEvent>();
|
|
eng.OnEvent += (_, e) => events.Add(e);
|
|
await eng.UnshelveAsync("A", "bob", TestContext.Current.CancellationToken);
|
|
eng.GetState("A")!.Shelving.Kind.ShouldBe(ShelvingKind.Unshelved);
|
|
events.Any(e => e.Emission == EmissionKind.Unshelved).ShouldBeTrue();
|
|
}
|
|
|
|
// (2c) EnableAsync end-to-end through the engine.
|
|
[Fact]
|
|
public async Task EnableAsync_re_enables_after_disable(/* -012 (2) */)
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
await eng.DisableAsync("A", "alice", TestContext.Current.CancellationToken);
|
|
eng.GetState("A")!.Enabled.ShouldBe(AlarmEnabledState.Disabled);
|
|
|
|
var events = new List<ScriptedAlarmEvent>();
|
|
eng.OnEvent += (_, e) => events.Add(e);
|
|
await eng.EnableAsync("A", "bob", TestContext.Current.CancellationToken);
|
|
eng.GetState("A")!.Enabled.ShouldBe(AlarmEnabledState.Enabled);
|
|
events.Any(e => e.Emission == EmissionKind.Enabled).ShouldBeTrue();
|
|
}
|
|
|
|
// (3) OnEvent subscriber-throws isolation — a bad subscriber must not crash
|
|
// the engine or prevent subsequent alarm state transitions. The engine logs
|
|
// the exception and continues operating; any later alarm changes still work.
|
|
[Fact]
|
|
public async Task OnEvent_subscriber_exception_does_not_crash_engine(/* -012 (3) */)
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
// Single subscriber that always throws.
|
|
eng.OnEvent += (_, _) => throw new InvalidOperationException("bad subscriber");
|
|
|
|
// The engine must not throw or get stuck when the subscriber throws.
|
|
// Up.Push triggers ReevaluateAsync → EvaluatePredicateToStateAsync → EmitEvent.
|
|
up.Push("Temp", 150);
|
|
|
|
// Wait for the engine to process the push (it will try+catch the subscriber
|
|
// throw and continue). State must advance even though the subscriber blew up.
|
|
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Active);
|
|
|
|
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active,
|
|
"engine state advances even when the OnEvent subscriber threw");
|
|
|
|
// Verify the engine is still operational: a second state change must work.
|
|
up.Push("Temp", 50);
|
|
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Inactive);
|
|
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive,
|
|
"engine keeps processing after subscriber exception");
|
|
}
|
|
|
|
// (4) IAlarmStateStore.SaveAsync failure — in-memory state must remain at the
|
|
// prior value after finding -007 fix (persist-before-update).
|
|
[Fact]
|
|
public async Task Store_save_failure_leaves_in_memory_state_unchanged(/* -012 (4) */)
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 150);
|
|
var logger = new LoggerConfiguration().CreateLogger();
|
|
var failingStore = new FailOnSaveAlarmStateStore();
|
|
using var eng = new ScriptedAlarmEngine(up, failingStore, new ScriptLoggerFactory(logger), logger);
|
|
|
|
// LoadAsync seeds + persists startup state — make it succeed for now.
|
|
failingStore.FailSave = false;
|
|
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
// Startup evaluation activated the alarm.
|
|
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active);
|
|
|
|
// Now make every save throw.
|
|
failingStore.FailSave = true;
|
|
|
|
// Try to acknowledge — the save will fail, so the in-memory ack state must
|
|
// remain Unacknowledged (persist-before-update: -007 fix).
|
|
await Should.ThrowAsync<InvalidOperationException>(
|
|
() => eng.AcknowledgeAsync("A", "alice", null, TestContext.Current.CancellationToken));
|
|
|
|
eng.GetState("A")!.Acked.ShouldBe(AlarmAckedState.Unacknowledged,
|
|
"in-memory state must stay at prior value when store save fails");
|
|
}
|
|
|
|
// (5) Re-entrant LoadAsync — the old timer must not keep firing after a second
|
|
// call (regression for finding -002: _shelvingTimer?.Dispose() fix).
|
|
[Fact]
|
|
public async Task Second_LoadAsync_does_not_leak_old_timer(/* -012 (5) */)
|
|
{
|
|
// Use a clock whose invocation count we can observe indirectly through
|
|
// shelving-check side effects: if the old timer leaked it would call
|
|
// RunShelvingCheck an additional time, but we assert no double-disposal
|
|
// or double-subscriptions instead.
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
using var eng = Build(up, out _);
|
|
|
|
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
var subsAfterFirst = up.ActiveSubscriptionCount;
|
|
|
|
// Second load with a different alarm definition set.
|
|
await eng.LoadAsync([Alarm("B", """return (int)ctx.GetTag("Temp").Value > 200;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
// After reload, only "B" should be present; "A" subscriptions released.
|
|
eng.LoadedAlarmIds.ShouldContain("B");
|
|
eng.LoadedAlarmIds.ShouldNotContain("A");
|
|
// Subscriptions should match the new set only (one path "Temp" → 1 sub).
|
|
up.ActiveSubscriptionCount.ShouldBe(subsAfterFirst, "subscription count same after reload on same path");
|
|
|
|
// Engine is still functional.
|
|
up.Push("Temp", 250);
|
|
await WaitForAsync(() => eng.GetState("B")!.Active == AlarmActiveState.Active);
|
|
}
|
|
|
|
// (6) Cold-start AreInputsReady guard — null value, Bad status, and Uncertain
|
|
// status inputs are all handled correctly.
|
|
[Fact]
|
|
public async Task AreInputsReady_blocks_evaluation_for_null_and_bad_inputs(/* -012 (6) */)
|
|
{
|
|
var up = new FakeUpstream();
|
|
// "Temp" is missing entirely (ReadTag returns BadNodeIdUnknown).
|
|
using var eng = Build(up, out _);
|
|
|
|
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
// No tag value → bad status → AreInputsReady returns false → stays Inactive.
|
|
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive,
|
|
"predicate not evaluated when input has Bad status");
|
|
|
|
// Now provide a bad-quality value explicitly.
|
|
up.Set("Temp", 150, statusCode: 0x80000000u); // Bad severity bit
|
|
up.Push("Temp", 150, statusCode: 0x80000000u);
|
|
await Task.Delay(100, TestContext.Current.CancellationToken);
|
|
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive,
|
|
"predicate not evaluated when input has explicit Bad status code");
|
|
|
|
// Uncertain quality (non-zero but bit 31 clear) — should be treated as ready.
|
|
// The alarm should activate when the value is above 100 with Uncertain quality.
|
|
up.Set("Temp", 150, statusCode: 0x40000000u); // Uncertain severity
|
|
up.Push("Temp", 150, statusCode: 0x40000000u);
|
|
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Active);
|
|
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active,
|
|
"Uncertain-quality inputs are treated as ready — predicate evaluates");
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Core.ScriptedAlarms-003: OnEvent emission must not block under _evalGate.
|
|
// (1) A slow subscriber must not block the gate for other alarms.
|
|
// (2) A subscriber that re-enters the engine (e.g. AcknowledgeAsync) must
|
|
// not deadlock against _evalGate. Both regressions are covered here.
|
|
// -------------------------------------------------------------------------
|
|
[Fact]
|
|
public async Task OnEvent_subscriber_can_call_back_into_engine_without_deadlock(/* -003 */)
|
|
{
|
|
// Re-entrancy regression. When OnEvent emission was inside _evalGate, a
|
|
// subscriber that called an engine method (e.g. AcknowledgeAsync) hung
|
|
// forever because the non-reentrant SemaphoreSlim refused to re-grant
|
|
// the gate the dispatch path was still holding. After the fix, emission
|
|
// happens AFTER Release() so the subscriber's call acquires the gate
|
|
// cleanly and the operator-driven action completes.
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
var eng = Build(up, out _);
|
|
try
|
|
{
|
|
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
// Subscriber re-enters the engine via Task.Run so the OnEvent
|
|
// dispatch thread is not blocked while waiting. Either way, with
|
|
// the fix in place AcknowledgeAsync must acquire _evalGate (the
|
|
// dispatch path released it before invoking the subscriber) and
|
|
// complete in well under the timeout.
|
|
var ackDone = new TaskCompletionSource();
|
|
eng.OnEvent += (_, e) =>
|
|
{
|
|
if (e.Emission != EmissionKind.Activated) return;
|
|
_ = Task.Run(async () =>
|
|
{
|
|
try
|
|
{
|
|
await eng.AcknowledgeAsync(e.AlarmId, "sub", null, CancellationToken.None);
|
|
ackDone.TrySetResult();
|
|
}
|
|
catch (Exception ex) { ackDone.TrySetException(ex); }
|
|
});
|
|
};
|
|
|
|
up.Push("Temp", 150);
|
|
|
|
var winner = await Task.WhenAny(ackDone.Task, Task.Delay(TimeSpan.FromSeconds(3)));
|
|
winner.ShouldBe(ackDone.Task,
|
|
"subscriber re-entering the engine must not deadlock against _evalGate");
|
|
await ackDone.Task; // surface any inner exception
|
|
eng.GetState("HighTemp")!.Acked.ShouldBe(AlarmAckedState.Acknowledged);
|
|
}
|
|
finally
|
|
{
|
|
eng.Dispose();
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void OnEvent_emission_happens_outside_evalGate(/* -003 */)
|
|
{
|
|
// Direct white-box check on the gate-release ordering: AcknowledgeAsync
|
|
// emits the Acknowledged event AFTER releasing the gate. We assert that
|
|
// by observing the gate is acquirable from inside the subscriber.
|
|
// SemaphoreSlim.Wait(0) returns true only if the count > 0 (gate free).
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
var eng = Build(up, out _);
|
|
try
|
|
{
|
|
eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken).GetAwaiter().GetResult();
|
|
// Drive to Active so Acknowledge has something to ack.
|
|
up.Push("Temp", 150);
|
|
// Use the same WaitForAsync that other tests use — synchronously
|
|
// here since this is a non-async test.
|
|
for (var i = 0; i < 80 && eng.GetState("HighTemp")!.Active != AlarmActiveState.Active; i++)
|
|
Thread.Sleep(25);
|
|
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active);
|
|
|
|
// Use reflection to peek at _evalGate so the subscriber can probe it.
|
|
var gateField = typeof(ScriptedAlarmEngine).GetField(
|
|
"_evalGate", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance);
|
|
gateField.ShouldNotBeNull();
|
|
var gate = (SemaphoreSlim)gateField.GetValue(eng)!;
|
|
|
|
var gateFreeInsideEmission = false;
|
|
eng.OnEvent += (_, e) =>
|
|
{
|
|
if (e.Emission != EmissionKind.Acknowledged) return;
|
|
// SemaphoreSlim.Wait(0) — non-blocking try-take. If the gate is
|
|
// free we acquire it (count back to 0); release immediately.
|
|
if (gate.Wait(0))
|
|
{
|
|
gateFreeInsideEmission = true;
|
|
gate.Release();
|
|
}
|
|
};
|
|
|
|
eng.AcknowledgeAsync("HighTemp", "alice", null, CancellationToken.None)
|
|
.GetAwaiter().GetResult();
|
|
|
|
gateFreeInsideEmission.ShouldBeTrue(
|
|
"_evalGate must be released before OnEvent fires so subscribers " +
|
|
"can call back into the engine without deadlocking");
|
|
}
|
|
finally
|
|
{
|
|
eng.Dispose();
|
|
}
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Core.ScriptedAlarms-006: Dispose must drain in-flight background tasks
|
|
// launched by OnUpstreamChange / RunShelvingCheck. Otherwise a re-evaluation
|
|
// or shelving check started just before Dispose can keep running and write
|
|
// to a (possibly disposed) store after the engine has returned.
|
|
// -------------------------------------------------------------------------
|
|
[Fact]
|
|
public async Task Dispose_drains_in_flight_reevaluation_tasks(/* -006 */)
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
var logger = new LoggerConfiguration().CreateLogger();
|
|
var slowStore = new BlockingSaveAlarmStateStore();
|
|
var eng = new ScriptedAlarmEngine(up, slowStore, new ScriptLoggerFactory(logger), logger);
|
|
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
|
|
TestContext.Current.CancellationToken);
|
|
|
|
// Block the NEXT save (the one triggered by the push below).
|
|
var saveGate = new TaskCompletionSource();
|
|
slowStore.BlockNextSave = saveGate;
|
|
|
|
// Trigger a re-evaluation that will go inside _evalGate and call SaveAsync.
|
|
up.Push("Temp", 150);
|
|
|
|
// Wait until the store's SaveAsync is actually blocked.
|
|
await WaitForAsync(() => slowStore.SaveInProgress, timeoutMs: 1000);
|
|
|
|
// Dispose must wait for the in-flight reevaluation to complete rather
|
|
// than returning while a background task still runs.
|
|
var disposeTask = Task.Run(() => eng.Dispose());
|
|
|
|
// Verify Dispose does NOT complete immediately — it should block waiting
|
|
// for the in-flight task. Without the -006 fix Dispose returns straight
|
|
// away and the background reevaluation can outlive the engine.
|
|
var prematureFinish = await Task.WhenAny(disposeTask, Task.Delay(200));
|
|
prematureFinish.ShouldNotBe(disposeTask,
|
|
"Dispose must block until in-flight background tasks complete");
|
|
|
|
// Let the save complete and verify Dispose then returns.
|
|
saveGate.SetResult();
|
|
await disposeTask.WaitAsync(TimeSpan.FromSeconds(3), TestContext.Current.CancellationToken);
|
|
slowStore.SaveInProgress.ShouldBeFalse("background task drained before Dispose returned");
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Core.ScriptedAlarms-010: predicate evaluation and message-template
|
|
// resolution apply different quality bars on purpose. Predicate evaluation
|
|
// accepts Uncertain (the predicate can still inspect the value); message
|
|
// resolution renders Uncertain as "{?}" so the operator sees the doubt
|
|
// explicitly. The two policies are documented in docs/ScriptedAlarms.md.
|
|
// -------------------------------------------------------------------------
|
|
[Fact]
|
|
public async Task Uncertain_quality_drives_predicate_but_renders_question_mark_in_message(/* -010 */)
|
|
{
|
|
var up = new FakeUpstream();
|
|
// Seed with Uncertain quality (severity bit 30 set, bit 31 clear).
|
|
up.Set("Temp", 150, statusCode: 0x40000000u);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([
|
|
new ScriptedAlarmDefinition(
|
|
"HighTemp", "Plant/Line1", "HighTemp",
|
|
AlarmKind.LimitAlarm, AlarmSeverity.High,
|
|
"Temp {Temp} exceeded limit",
|
|
"""return (int)ctx.GetTag("Temp").Value > 100;"""),
|
|
], TestContext.Current.CancellationToken);
|
|
|
|
// Predicate evaluated (Uncertain treated as ready) → alarm Active.
|
|
eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active,
|
|
"AreInputsReady accepts Uncertain so the predicate runs");
|
|
|
|
// But the resolved emission message must show "{?}" for the Uncertain
|
|
// tag — only Good substitutes into the operator-facing message.
|
|
var events = new List<ScriptedAlarmEvent>();
|
|
eng.OnEvent += (_, e) => events.Add(e);
|
|
up.Push("Temp", 200, statusCode: 0x40000000u); // still Uncertain
|
|
// Trigger another evaluation to get an emission (already active, so
|
|
// we need a clear → re-activate cycle). Easier: force the same path
|
|
// through a comment which emits a CommentAdded message. But comments
|
|
// don't run the template. Instead clear it then re-activate.
|
|
up.Push("Temp", 50, statusCode: 0u); // Good, predicate becomes false
|
|
await WaitForAsync(() => events.Any(e => e.Emission == EmissionKind.Cleared));
|
|
events.Clear();
|
|
up.Push("Temp", 200, statusCode: 0x40000000u); // Uncertain, predicate true
|
|
await WaitForAsync(() => events.Any(e => e.Emission == EmissionKind.Activated));
|
|
|
|
// The Activated message must show {?} for the Uncertain input.
|
|
events.Single(e => e.Emission == EmissionKind.Activated).Message
|
|
.ShouldBe("Temp {?} exceeded limit",
|
|
"MessageTemplate.Resolve renders non-Good StatusCode as {?} " +
|
|
"even though predicate evaluation accepted the Uncertain value");
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Core.ScriptedAlarms-008: switch Comments to ImmutableList for O(log n)
|
|
// append. The persisted runtime type must be ImmutableList<AlarmComment>
|
|
// (which still satisfies IReadOnlyList<AlarmComment> for existing
|
|
// consumers).
|
|
// -------------------------------------------------------------------------
|
|
[Fact]
|
|
public async Task Comments_collection_uses_ImmutableList_for_efficient_append(/* -008 */)
|
|
{
|
|
var up = new FakeUpstream();
|
|
up.Set("Temp", 50);
|
|
using var eng = Build(up, out _);
|
|
await eng.LoadAsync([Alarm("A", "return false;")], TestContext.Current.CancellationToken);
|
|
|
|
// Add a comment so AppendComment runs.
|
|
await eng.AddCommentAsync("A", "alice", "note", TestContext.Current.CancellationToken);
|
|
|
|
var s = eng.GetState("A")!;
|
|
s.Comments.ShouldBeOfType<System.Collections.Immutable.ImmutableList<AlarmComment>>(
|
|
"Comments should be an ImmutableList so append is O(log n), not O(n)");
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Core.ScriptedAlarms-011: TransitionResult.NoOp's reason parameter must be
|
|
// propagated, not silently discarded. The class-level remarks promise a
|
|
// diagnostic log line for no-op disabled-alarm evaluations.
|
|
// -------------------------------------------------------------------------
|
|
[Fact]
|
|
public void TransitionResult_NoOp_propagates_reason(/* -011 */)
|
|
{
|
|
var fresh = AlarmConditionState.Fresh("a-1", DateTime.UtcNow);
|
|
var r = TransitionResult.NoOp(fresh, "disabled — predicate result ignored");
|
|
r.NoOpReason.ShouldBe("disabled — predicate result ignored",
|
|
"NoOp reason must be preserved on the TransitionResult so callers can log it");
|
|
}
|
|
|
|
[Fact]
|
|
public void TransitionResult_None_carries_no_reason(/* -011 */)
|
|
{
|
|
var fresh = AlarmConditionState.Fresh("a-1", DateTime.UtcNow);
|
|
var r = TransitionResult.None(fresh);
|
|
r.NoOpReason.ShouldBeNull("None() factory has no reason — only NoOp() carries one");
|
|
}
|
|
|
|
private static async Task WaitForAsync(Func<bool> cond, int timeoutMs = 2000)
|
|
{
|
|
var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs);
|
|
while (DateTime.UtcNow < deadline)
|
|
{
|
|
if (cond()) return;
|
|
await Task.Delay(25);
|
|
}
|
|
throw new TimeoutException("Condition did not become true in time");
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Test helpers
|
|
// -------------------------------------------------------------------------
|
|
|
|
/// <summary>
|
|
/// A store that can be instructed to throw on every SaveAsync call.
|
|
/// Used to exercise the persist-before-update invariant (finding -007).
|
|
/// </summary>
|
|
private sealed class FailOnSaveAlarmStateStore : IAlarmStateStore
|
|
{
|
|
private readonly InMemoryAlarmStateStore _inner = new();
|
|
public bool FailSave { get; set; }
|
|
|
|
public Task<AlarmConditionState?> LoadAsync(string alarmId, CancellationToken ct)
|
|
=> _inner.LoadAsync(alarmId, ct);
|
|
|
|
public Task<IReadOnlyList<AlarmConditionState>> LoadAllAsync(CancellationToken ct)
|
|
=> _inner.LoadAllAsync(ct);
|
|
|
|
public Task SaveAsync(AlarmConditionState state, CancellationToken ct)
|
|
{
|
|
if (FailSave) throw new InvalidOperationException("Simulated store failure");
|
|
return _inner.SaveAsync(state, ct);
|
|
}
|
|
|
|
public Task RemoveAsync(string alarmId, CancellationToken ct)
|
|
=> _inner.RemoveAsync(alarmId, ct);
|
|
}
|
|
|
|
/// <summary>
|
|
/// A store whose SaveAsync can be made to block until the test signals it.
|
|
/// Used to verify Dispose drains in-flight background tasks (finding -006).
|
|
/// </summary>
|
|
private sealed class BlockingSaveAlarmStateStore : IAlarmStateStore
|
|
{
|
|
private readonly InMemoryAlarmStateStore _inner = new();
|
|
public TaskCompletionSource? BlockNextSave { get; set; }
|
|
public bool SaveInProgress { get; private set; }
|
|
|
|
public Task<AlarmConditionState?> LoadAsync(string alarmId, CancellationToken ct)
|
|
=> _inner.LoadAsync(alarmId, ct);
|
|
|
|
public Task<IReadOnlyList<AlarmConditionState>> LoadAllAsync(CancellationToken ct)
|
|
=> _inner.LoadAllAsync(ct);
|
|
|
|
public async Task SaveAsync(AlarmConditionState state, CancellationToken ct)
|
|
{
|
|
var gate = BlockNextSave;
|
|
if (gate is not null)
|
|
{
|
|
BlockNextSave = null;
|
|
SaveInProgress = true;
|
|
try { await gate.Task.WaitAsync(ct).ConfigureAwait(false); }
|
|
finally { SaveInProgress = false; }
|
|
}
|
|
await _inner.SaveAsync(state, ct).ConfigureAwait(false);
|
|
}
|
|
|
|
public Task RemoveAsync(string alarmId, CancellationToken ct)
|
|
=> _inner.RemoveAsync(alarmId, ct);
|
|
}
|
|
}
|