using Serilog; using Shouldly; using Xunit; using ZB.MOM.WW.OtOpcUa.Core.Abstractions; using ZB.MOM.WW.OtOpcUa.Core.Scripting; using ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms; namespace ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms.Tests; /// /// End-to-end engine tests: load, predicate evaluation, change-triggered /// re-evaluation, state persistence, startup recovery, error isolation. /// [Trait("Category", "Unit")] public sealed class ScriptedAlarmEngineTests { private static ScriptedAlarmEngine Build(FakeUpstream up, out IAlarmStateStore store) { store = new InMemoryAlarmStateStore(); var logger = new LoggerConfiguration().CreateLogger(); return new ScriptedAlarmEngine(up, store, new ScriptLoggerFactory(logger), logger); } private static ScriptedAlarmDefinition Alarm(string id, string predicate, string msg = "condition", AlarmSeverity sev = AlarmSeverity.High) => new(AlarmId: id, EquipmentPath: "Plant/Line1/Reactor", AlarmName: id, Kind: AlarmKind.AlarmCondition, Severity: sev, MessageTemplate: msg, PredicateScriptSource: predicate); [Fact] public async Task Load_compiles_and_subscribes_to_referenced_upstreams() { var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync([Alarm("a1", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); eng.LoadedAlarmIds.ShouldContain("a1"); up.ActiveSubscriptionCount.ShouldBe(1); } [Fact] public async Task Compile_failures_aggregated_into_one_error() { var up = new FakeUpstream(); using var eng = Build(up, out _); var ex = await Should.ThrowAsync(async () => await eng.LoadAsync([ Alarm("bad1", "return unknownIdentifier;"), Alarm("good", "return true;"), Alarm("bad2", "var x = alsoUnknown; return x;"), ], TestContext.Current.CancellationToken)); ex.Message.ShouldContain("2 alarm(s) did not compile"); } [Fact] public async Task Upstream_change_re_evaluates_predicate_and_emits_Activated() { var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); var events = new List(); eng.OnEvent += (_, e) => events.Add(e); up.Push("Temp", 150); await WaitForAsync(() => events.Count > 0); events[0].AlarmId.ShouldBe("HighTemp"); events[0].Emission.ShouldBe(EmissionKind.Activated); eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active); } [Fact] public async Task Clearing_upstream_emits_Cleared_event() { var up = new FakeUpstream(); up.Set("Temp", 150); using var eng = Build(up, out _); await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); // Startup sees 150 → active. eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active); var events = new List(); eng.OnEvent += (_, e) => events.Add(e); up.Push("Temp", 50); await WaitForAsync(() => events.Any(e => e.Emission == EmissionKind.Cleared)); eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Inactive); } [Fact] public async Task Message_template_resolves_tag_values_at_emission() { var up = new FakeUpstream(); up.Set("Temp", 50); up.Set("Limit", 100); using var eng = Build(up, out _); await eng.LoadAsync([ new ScriptedAlarmDefinition( "HighTemp", "Plant/Line1", "HighTemp", AlarmKind.LimitAlarm, AlarmSeverity.High, "Temp {Temp}C exceeded limit {Limit}C", """return (int)ctx.GetTag("Temp").Value > (int)ctx.GetTag("Limit").Value;"""), ], TestContext.Current.CancellationToken); var events = new List(); eng.OnEvent += (_, e) => events.Add(e); up.Push("Temp", 150); await WaitForAsync(() => events.Any()); events[0].Message.ShouldBe("Temp 150C exceeded limit 100C"); } [Fact] public async Task Ack_records_user_and_persists_to_store() { var up = new FakeUpstream(); up.Set("Temp", 150); using var eng = Build(up, out var store); await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); await eng.AcknowledgeAsync("HighTemp", "alice", "checking", TestContext.Current.CancellationToken); var persisted = await store.LoadAsync("HighTemp", TestContext.Current.CancellationToken); persisted.ShouldNotBeNull(); persisted!.Acked.ShouldBe(AlarmAckedState.Acknowledged); persisted.LastAckUser.ShouldBe("alice"); persisted.LastAckComment.ShouldBe("checking"); persisted.Comments.Any(c => c.Kind == "Acknowledge" && c.User == "alice").ShouldBeTrue(); } [Fact] public async Task Startup_recovery_preserves_ack_but_rederives_active_from_predicate() { var up = new FakeUpstream(); up.Set("Temp", 50); // predicate will go false on second load // First run — alarm goes active + operator acks. using (var eng1 = Build(up, out var sharedStore)) { up.Set("Temp", 150); await eng1.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); eng1.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active); await eng1.AcknowledgeAsync("HighTemp", "alice", null, TestContext.Current.CancellationToken); eng1.GetState("HighTemp")!.Acked.ShouldBe(AlarmAckedState.Acknowledged); } // Simulate restart — temp is back to 50 (below threshold). up.Set("Temp", 50); var logger = new LoggerConfiguration().CreateLogger(); var store2 = new InMemoryAlarmStateStore(); // seed store2 with the acked state from before restart await store2.SaveAsync(new AlarmConditionState( "HighTemp", AlarmEnabledState.Enabled, AlarmActiveState.Active, // was active pre-restart AlarmAckedState.Acknowledged, // ack persisted AlarmConfirmedState.Unconfirmed, ShelvingState.Unshelved, DateTime.UtcNow, DateTime.UtcNow, null, DateTime.UtcNow, "alice", null, null, null, null, [new AlarmComment(DateTime.UtcNow, "alice", "Acknowledge", "")]), TestContext.Current.CancellationToken); using var eng2 = new ScriptedAlarmEngine(up, store2, new ScriptLoggerFactory(logger), logger); await eng2.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); var s = eng2.GetState("HighTemp")!; s.Active.ShouldBe(AlarmActiveState.Inactive, "Active recomputed from current tag value"); s.Acked.ShouldBe(AlarmAckedState.Acknowledged, "Ack persisted across restart"); s.LastAckUser.ShouldBe("alice"); } [Fact] public async Task Shelved_active_transitions_state_but_suppresses_emission() { var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); await eng.OneShotShelveAsync("HighTemp", "alice", TestContext.Current.CancellationToken); var events = new List(); eng.OnEvent += (_, e) => events.Add(e); up.Push("Temp", 150); await Task.Delay(200); events.Any(e => e.Emission == EmissionKind.Activated).ShouldBeFalse( "OneShot shelve suppresses activation emission"); eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active, "state still advances so startup recovery is consistent"); } [Fact] public async Task Predicate_runtime_exception_does_not_transition_state() { var up = new FakeUpstream(); up.Set("Temp", 150); using var eng = Build(up, out _); await eng.LoadAsync([ Alarm("BadScript", """throw new InvalidOperationException("boom");"""), Alarm("GoodScript", """return (int)ctx.GetTag("Temp").Value > 100;"""), ], TestContext.Current.CancellationToken); // Bad script doesn't activate + doesn't disable other alarms. eng.GetState("BadScript")!.Active.ShouldBe(AlarmActiveState.Inactive); eng.GetState("GoodScript")!.Active.ShouldBe(AlarmActiveState.Active); } [Fact] public async Task Disable_prevents_activation_until_re_enabled() { var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); await eng.DisableAsync("HighTemp", "alice", TestContext.Current.CancellationToken); up.Push("Temp", 150); await Task.Delay(100); eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Inactive, "disabled alarm ignores predicate"); await eng.EnableAsync("HighTemp", "alice", TestContext.Current.CancellationToken); up.Push("Temp", 160); await WaitForAsync(() => eng.GetState("HighTemp")!.Active == AlarmActiveState.Active); } [Fact] public async Task AddComment_appends_to_audit_without_state_change() { var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out var store); await eng.LoadAsync([Alarm("A", """return false;""")], TestContext.Current.CancellationToken); await eng.AddCommentAsync("A", "alice", "peeking at this", TestContext.Current.CancellationToken); var s = await store.LoadAsync("A", TestContext.Current.CancellationToken); s.ShouldNotBeNull(); s!.Comments.Count.ShouldBe(1); s.Comments[0].User.ShouldBe("alice"); s.Comments[0].Kind.ShouldBe("AddComment"); } [Fact] public async Task Predicate_scripts_cannot_SetVirtualTag() { var up = new FakeUpstream(); up.Set("Temp", 100); using var eng = Build(up, out _); // The script compiles fine but throws at runtime when SetVirtualTag is called. // The engine swallows the exception + leaves state unchanged. await eng.LoadAsync([ new ScriptedAlarmDefinition( "Bad", "Plant/Line1", "Bad", AlarmKind.AlarmCondition, AlarmSeverity.High, "bad", """ ctx.SetVirtualTag("NotAllowed", 1); return true; """), ], TestContext.Current.CancellationToken); // Bad alarm's predicate threw — state unchanged. eng.GetState("Bad")!.Active.ShouldBe(AlarmActiveState.Inactive); } [Fact] public async Task Dispose_releases_upstream_subscriptions() { var up = new FakeUpstream(); up.Set("Temp", 50); var eng = Build(up, out _); await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); up.ActiveSubscriptionCount.ShouldBe(1); eng.Dispose(); up.ActiveSubscriptionCount.ShouldBe(0); } [Fact] public async Task Concurrent_reads_during_mutation_do_not_throw(/* Core.ScriptedAlarms-001 */) { // Regression for Core.ScriptedAlarms-001: GetState / GetAllStates / // LoadedAlarmIds touch _alarms from arbitrary threads with no lock while // ApplyAsync / ReevaluateAsync reassign dictionary entries under _evalGate. // With a plain Dictionary this race throws InvalidOperationException or // returns torn state; with a ConcurrentDictionary the reads are safe. var up = new FakeUpstream(); const int alarmCount = 40; var defs = new List(); for (var i = 0; i < alarmCount; i++) { up.Set($"Temp{i}", 50); defs.Add(Alarm($"A{i}", $$"""return (int)ctx.GetTag("Temp{{i}}").Value > 100;""")); } using var eng = Build(up, out _); await eng.LoadAsync(defs, TestContext.Current.CancellationToken); using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(2)); Exception? readerFailure = null; // Writer: hammer the engine with state mutations that reassign _alarms entries. var writer = Task.Run(async () => { var round = 0; while (!cts.IsCancellationRequested) { var id = $"A{round % alarmCount}"; await eng.AddCommentAsync(id, "tester", $"r{round}", CancellationToken.None); up.Push($"Temp{round % alarmCount}", round % 2 == 0 ? 150 : 50); round++; } }); // Readers: continuously touch the three unguarded read paths. var readers = Enumerable.Range(0, 4).Select(_reader => Task.Run(() => { try { while (!cts.IsCancellationRequested) { _ = eng.LoadedAlarmIds.Count; _ = eng.GetAllStates().Count; for (var i = 0; i < alarmCount; i++) _ = eng.GetState($"A{i}"); } } catch (Exception ex) { readerFailure = ex; } })).ToArray(); await Task.WhenAll([writer, .. readers]); readerFailure.ShouldBeNull( "concurrent reads of _alarms while it is being mutated must not throw"); } // ------------------------------------------------------------------------- // Core.ScriptedAlarms-012: coverage gaps // ------------------------------------------------------------------------- // (1) Timed-shelve auto-expiry driven by the engine's shelving timer via an // injectable clock — the clock and scriptTimeout constructor parameters // exist for exactly this. [Fact] public async Task TimedShelve_auto_expires_when_engine_shelving_check_runs(/* -012 (1) */) { // Use a controllable clock; start it at T0 so we can advance it manually. var now = new DateTime(2026, 1, 1, 12, 0, 0, DateTimeKind.Utc); Func clock = () => now; var up = new FakeUpstream(); up.Set("Temp", 50); var logger = new LoggerConfiguration().CreateLogger(); var store = new InMemoryAlarmStateStore(); using var eng = new ScriptedAlarmEngine(up, store, new ScriptLoggerFactory(logger), logger, clock); await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); // Shelve for 10 minutes from now. var unshelveAt = now.AddMinutes(10); await eng.TimedShelveAsync("HighTemp", "alice", unshelveAt, TestContext.Current.CancellationToken); eng.GetState("HighTemp")!.Shelving.Kind.ShouldBe(ShelvingKind.Timed); var events = new List(); eng.OnEvent += (_, e) => events.Add(e); // Advance clock past the unshelve time and invoke RunShelvingCheck directly // (the 5-second real timer would be non-deterministic in tests). now = now.AddMinutes(11); eng.RunShelvingCheckForTest(); await WaitForAsync(() => eng.GetState("HighTemp")!.Shelving.Kind == ShelvingKind.Unshelved); eng.GetState("HighTemp")!.Shelving.Kind.ShouldBe(ShelvingKind.Unshelved, "timed shelve expired"); events.Any(e => e.Emission == EmissionKind.Unshelved).ShouldBeTrue("Unshelved event emitted"); } // (2a) ConfirmAsync end-to-end through the engine. [Fact] public async Task ConfirmAsync_records_user_and_emits_Confirmed(/* -012 (2) */) { var up = new FakeUpstream(); up.Set("Temp", 150); using var eng = Build(up, out var store); await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); // activate → ack → clear → confirm await eng.AcknowledgeAsync("A", "alice", null, TestContext.Current.CancellationToken); up.Push("Temp", 50); await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Inactive); var events = new List(); eng.OnEvent += (_, e) => events.Add(e); await eng.ConfirmAsync("A", "bob", "resolved", TestContext.Current.CancellationToken); eng.GetState("A")!.Confirmed.ShouldBe(AlarmConfirmedState.Confirmed); eng.GetState("A")!.LastConfirmUser.ShouldBe("bob"); events.Any(e => e.Emission == EmissionKind.Confirmed).ShouldBeTrue(); var persisted = await store.LoadAsync("A", TestContext.Current.CancellationToken); persisted!.Confirmed.ShouldBe(AlarmConfirmedState.Confirmed); } // (2b) TimedShelveAsync / UnshelveAsync end-to-end through the engine. [Fact] public async Task TimedShelveAsync_and_UnshelveAsync_round_trip(/* -012 (2) */) { var now = new DateTime(2026, 1, 1, 12, 0, 0, DateTimeKind.Utc); Func clock = () => now; var up = new FakeUpstream(); up.Set("Temp", 50); var logger = new LoggerConfiguration().CreateLogger(); using var eng = new ScriptedAlarmEngine(up, new InMemoryAlarmStateStore(), new ScriptLoggerFactory(logger), logger, clock); await eng.LoadAsync([Alarm("A", "return false;")], TestContext.Current.CancellationToken); var unshelveAt = now.AddMinutes(30); await eng.TimedShelveAsync("A", "alice", unshelveAt, TestContext.Current.CancellationToken); eng.GetState("A")!.Shelving.Kind.ShouldBe(ShelvingKind.Timed); eng.GetState("A")!.Shelving.UnshelveAtUtc.ShouldBe(unshelveAt); // Operator unshelves manually before the timer expires. var events = new List(); eng.OnEvent += (_, e) => events.Add(e); await eng.UnshelveAsync("A", "bob", TestContext.Current.CancellationToken); eng.GetState("A")!.Shelving.Kind.ShouldBe(ShelvingKind.Unshelved); events.Any(e => e.Emission == EmissionKind.Unshelved).ShouldBeTrue(); } // (2c) EnableAsync end-to-end through the engine. [Fact] public async Task EnableAsync_re_enables_after_disable(/* -012 (2) */) { var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); await eng.DisableAsync("A", "alice", TestContext.Current.CancellationToken); eng.GetState("A")!.Enabled.ShouldBe(AlarmEnabledState.Disabled); var events = new List(); eng.OnEvent += (_, e) => events.Add(e); await eng.EnableAsync("A", "bob", TestContext.Current.CancellationToken); eng.GetState("A")!.Enabled.ShouldBe(AlarmEnabledState.Enabled); events.Any(e => e.Emission == EmissionKind.Enabled).ShouldBeTrue(); } // (3) OnEvent subscriber-throws isolation — a bad subscriber must not crash // the engine or prevent subsequent alarm state transitions. The engine logs // the exception and continues operating; any later alarm changes still work. [Fact] public async Task OnEvent_subscriber_exception_does_not_crash_engine(/* -012 (3) */) { var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); // Single subscriber that always throws. eng.OnEvent += (_, _) => throw new InvalidOperationException("bad subscriber"); // The engine must not throw or get stuck when the subscriber throws. // Up.Push triggers ReevaluateAsync → EvaluatePredicateToStateAsync → EmitEvent. up.Push("Temp", 150); // Wait for the engine to process the push (it will try+catch the subscriber // throw and continue). State must advance even though the subscriber blew up. await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Active); eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active, "engine state advances even when the OnEvent subscriber threw"); // Verify the engine is still operational: a second state change must work. up.Push("Temp", 50); await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Inactive); eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive, "engine keeps processing after subscriber exception"); } // (4) IAlarmStateStore.SaveAsync failure — in-memory state must remain at the // prior value after finding -007 fix (persist-before-update). [Fact] public async Task Store_save_failure_leaves_in_memory_state_unchanged(/* -012 (4) */) { var up = new FakeUpstream(); up.Set("Temp", 150); var logger = new LoggerConfiguration().CreateLogger(); var failingStore = new FailOnSaveAlarmStateStore(); using var eng = new ScriptedAlarmEngine(up, failingStore, new ScriptLoggerFactory(logger), logger); // LoadAsync seeds + persists startup state — make it succeed for now. failingStore.FailSave = false; await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); // Startup evaluation activated the alarm. eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active); // Now make every save throw. failingStore.FailSave = true; // Try to acknowledge — the save will fail, so the in-memory ack state must // remain Unacknowledged (persist-before-update: -007 fix). await Should.ThrowAsync( () => eng.AcknowledgeAsync("A", "alice", null, TestContext.Current.CancellationToken)); eng.GetState("A")!.Acked.ShouldBe(AlarmAckedState.Unacknowledged, "in-memory state must stay at prior value when store save fails"); } // (5) Re-entrant LoadAsync — the old timer must not keep firing after a second // call (regression for finding -002: _shelvingTimer?.Dispose() fix). [Fact] public async Task Second_LoadAsync_does_not_leak_old_timer(/* -012 (5) */) { // Use a clock whose invocation count we can observe indirectly through // shelving-check side effects: if the old timer leaked it would call // RunShelvingCheck an additional time, but we assert no double-disposal // or double-subscriptions instead. var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); var subsAfterFirst = up.ActiveSubscriptionCount; // Second load with a different alarm definition set. await eng.LoadAsync([Alarm("B", """return (int)ctx.GetTag("Temp").Value > 200;""")], TestContext.Current.CancellationToken); // After reload, only "B" should be present; "A" subscriptions released. eng.LoadedAlarmIds.ShouldContain("B"); eng.LoadedAlarmIds.ShouldNotContain("A"); // Subscriptions should match the new set only (one path "Temp" → 1 sub). up.ActiveSubscriptionCount.ShouldBe(subsAfterFirst, "subscription count same after reload on same path"); // Engine is still functional. up.Push("Temp", 250); await WaitForAsync(() => eng.GetState("B")!.Active == AlarmActiveState.Active); } // (6) Cold-start AreInputsReady guard — null value, Bad status, and Uncertain // status inputs are all handled correctly. [Fact] public async Task AreInputsReady_blocks_evaluation_for_null_and_bad_inputs(/* -012 (6) */) { var up = new FakeUpstream(); // "Temp" is missing entirely (ReadTag returns BadNodeIdUnknown). using var eng = Build(up, out _); await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); // No tag value → bad status → AreInputsReady returns false → stays Inactive. eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive, "predicate not evaluated when input has Bad status"); // Now provide a bad-quality value explicitly. up.Set("Temp", 150, statusCode: 0x80000000u); // Bad severity bit up.Push("Temp", 150, statusCode: 0x80000000u); await Task.Delay(100, TestContext.Current.CancellationToken); eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive, "predicate not evaluated when input has explicit Bad status code"); // Uncertain quality (non-zero but bit 31 clear) — should be treated as ready. // The alarm should activate when the value is above 100 with Uncertain quality. up.Set("Temp", 150, statusCode: 0x40000000u); // Uncertain severity up.Push("Temp", 150, statusCode: 0x40000000u); await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Active); eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active, "Uncertain-quality inputs are treated as ready — predicate evaluates"); } // ------------------------------------------------------------------------- // Core.ScriptedAlarms-003: OnEvent emission must not block under _evalGate. // (1) A slow subscriber must not block the gate for other alarms. // (2) A subscriber that re-enters the engine (e.g. AcknowledgeAsync) must // not deadlock against _evalGate. Both regressions are covered here. // ------------------------------------------------------------------------- [Fact] public async Task OnEvent_subscriber_can_call_back_into_engine_without_deadlock(/* -003 */) { // Re-entrancy regression. When OnEvent emission was inside _evalGate, a // subscriber that called an engine method (e.g. AcknowledgeAsync) hung // forever because the non-reentrant SemaphoreSlim refused to re-grant // the gate the dispatch path was still holding. After the fix, emission // happens AFTER Release() so the subscriber's call acquires the gate // cleanly and the operator-driven action completes. var up = new FakeUpstream(); up.Set("Temp", 50); var eng = Build(up, out _); try { await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); // Subscriber re-enters the engine via Task.Run so the OnEvent // dispatch thread is not blocked while waiting. Either way, with // the fix in place AcknowledgeAsync must acquire _evalGate (the // dispatch path released it before invoking the subscriber) and // complete in well under the timeout. var ackDone = new TaskCompletionSource(); eng.OnEvent += (_, e) => { if (e.Emission != EmissionKind.Activated) return; _ = Task.Run(async () => { try { await eng.AcknowledgeAsync(e.AlarmId, "sub", null, CancellationToken.None); ackDone.TrySetResult(); } catch (Exception ex) { ackDone.TrySetException(ex); } }); }; up.Push("Temp", 150); var winner = await Task.WhenAny(ackDone.Task, Task.Delay(TimeSpan.FromSeconds(3))); winner.ShouldBe(ackDone.Task, "subscriber re-entering the engine must not deadlock against _evalGate"); await ackDone.Task; // surface any inner exception eng.GetState("HighTemp")!.Acked.ShouldBe(AlarmAckedState.Acknowledged); } finally { eng.Dispose(); } } [Fact] public void OnEvent_emission_happens_outside_evalGate(/* -003 */) { // Direct white-box check on the gate-release ordering: AcknowledgeAsync // emits the Acknowledged event AFTER releasing the gate. We assert that // by observing the gate is acquirable from inside the subscriber. // SemaphoreSlim.Wait(0) returns true only if the count > 0 (gate free). var up = new FakeUpstream(); up.Set("Temp", 50); var eng = Build(up, out _); try { eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken).GetAwaiter().GetResult(); // Drive to Active so Acknowledge has something to ack. up.Push("Temp", 150); // Use the same WaitForAsync that other tests use — synchronously // here since this is a non-async test. for (var i = 0; i < 80 && eng.GetState("HighTemp")!.Active != AlarmActiveState.Active; i++) Thread.Sleep(25); eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active); // Use reflection to peek at _evalGate so the subscriber can probe it. var gateField = typeof(ScriptedAlarmEngine).GetField( "_evalGate", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance); gateField.ShouldNotBeNull(); var gate = (SemaphoreSlim)gateField.GetValue(eng)!; var gateFreeInsideEmission = false; eng.OnEvent += (_, e) => { if (e.Emission != EmissionKind.Acknowledged) return; // SemaphoreSlim.Wait(0) — non-blocking try-take. If the gate is // free we acquire it (count back to 0); release immediately. if (gate.Wait(0)) { gateFreeInsideEmission = true; gate.Release(); } }; eng.AcknowledgeAsync("HighTemp", "alice", null, CancellationToken.None) .GetAwaiter().GetResult(); gateFreeInsideEmission.ShouldBeTrue( "_evalGate must be released before OnEvent fires so subscribers " + "can call back into the engine without deadlocking"); } finally { eng.Dispose(); } } // ------------------------------------------------------------------------- // Core.ScriptedAlarms-006: Dispose must drain in-flight background tasks // launched by OnUpstreamChange / RunShelvingCheck. Otherwise a re-evaluation // or shelving check started just before Dispose can keep running and write // to a (possibly disposed) store after the engine has returned. // ------------------------------------------------------------------------- [Fact] public async Task Dispose_drains_in_flight_reevaluation_tasks(/* -006 */) { var up = new FakeUpstream(); up.Set("Temp", 50); var logger = new LoggerConfiguration().CreateLogger(); var slowStore = new BlockingSaveAlarmStateStore(); var eng = new ScriptedAlarmEngine(up, slowStore, new ScriptLoggerFactory(logger), logger); await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); // Block the NEXT save (the one triggered by the push below). var saveGate = new TaskCompletionSource(); slowStore.BlockNextSave = saveGate; // Trigger a re-evaluation that will go inside _evalGate and call SaveAsync. up.Push("Temp", 150); // Wait until the store's SaveAsync is actually blocked. await WaitForAsync(() => slowStore.SaveInProgress, timeoutMs: 1000); // Dispose must wait for the in-flight reevaluation to complete rather // than returning while a background task still runs. var disposeTask = Task.Run(() => eng.Dispose()); // Verify Dispose does NOT complete immediately — it should block waiting // for the in-flight task. Without the -006 fix Dispose returns straight // away and the background reevaluation can outlive the engine. var prematureFinish = await Task.WhenAny(disposeTask, Task.Delay(200)); prematureFinish.ShouldNotBe(disposeTask, "Dispose must block until in-flight background tasks complete"); // Let the save complete and verify Dispose then returns. saveGate.SetResult(); await disposeTask.WaitAsync(TimeSpan.FromSeconds(3), TestContext.Current.CancellationToken); slowStore.SaveInProgress.ShouldBeFalse("background task drained before Dispose returned"); } // ------------------------------------------------------------------------- // Core.ScriptedAlarms-010: predicate evaluation and message-template // resolution apply different quality bars on purpose. Predicate evaluation // accepts Uncertain (the predicate can still inspect the value); message // resolution renders Uncertain as "{?}" so the operator sees the doubt // explicitly. The two policies are documented in docs/ScriptedAlarms.md. // ------------------------------------------------------------------------- [Fact] public async Task Uncertain_quality_drives_predicate_but_renders_question_mark_in_message(/* -010 */) { var up = new FakeUpstream(); // Seed with Uncertain quality (severity bit 30 set, bit 31 clear). up.Set("Temp", 150, statusCode: 0x40000000u); using var eng = Build(up, out _); await eng.LoadAsync([ new ScriptedAlarmDefinition( "HighTemp", "Plant/Line1", "HighTemp", AlarmKind.LimitAlarm, AlarmSeverity.High, "Temp {Temp} exceeded limit", """return (int)ctx.GetTag("Temp").Value > 100;"""), ], TestContext.Current.CancellationToken); // Predicate evaluated (Uncertain treated as ready) → alarm Active. eng.GetState("HighTemp")!.Active.ShouldBe(AlarmActiveState.Active, "AreInputsReady accepts Uncertain so the predicate runs"); // But the resolved emission message must show "{?}" for the Uncertain // tag — only Good substitutes into the operator-facing message. var events = new List(); eng.OnEvent += (_, e) => events.Add(e); up.Push("Temp", 200, statusCode: 0x40000000u); // still Uncertain // Trigger another evaluation to get an emission (already active, so // we need a clear → re-activate cycle). Easier: force the same path // through a comment which emits a CommentAdded message. But comments // don't run the template. Instead clear it then re-activate. up.Push("Temp", 50, statusCode: 0u); // Good, predicate becomes false await WaitForAsync(() => events.Any(e => e.Emission == EmissionKind.Cleared)); events.Clear(); up.Push("Temp", 200, statusCode: 0x40000000u); // Uncertain, predicate true await WaitForAsync(() => events.Any(e => e.Emission == EmissionKind.Activated)); // The Activated message must show {?} for the Uncertain input. events.Single(e => e.Emission == EmissionKind.Activated).Message .ShouldBe("Temp {?} exceeded limit", "MessageTemplate.Resolve renders non-Good StatusCode as {?} " + "even though predicate evaluation accepted the Uncertain value"); } // ------------------------------------------------------------------------- // Core.ScriptedAlarms-008: switch Comments to ImmutableList for O(log n) // append. The persisted runtime type must be ImmutableList // (which still satisfies IReadOnlyList for existing // consumers). // ------------------------------------------------------------------------- [Fact] public async Task Comments_collection_uses_ImmutableList_for_efficient_append(/* -008 */) { var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync([Alarm("A", "return false;")], TestContext.Current.CancellationToken); // Add a comment so AppendComment runs. await eng.AddCommentAsync("A", "alice", "note", TestContext.Current.CancellationToken); var s = eng.GetState("A")!; s.Comments.ShouldBeOfType>( "Comments should be an ImmutableList so append is O(log n), not O(n)"); } // ------------------------------------------------------------------------- // Core.ScriptedAlarms-011: TransitionResult.NoOp's reason parameter must be // propagated, not silently discarded. The class-level remarks promise a // diagnostic log line for no-op disabled-alarm evaluations. // ------------------------------------------------------------------------- [Fact] public void TransitionResult_NoOp_propagates_reason(/* -011 */) { var fresh = AlarmConditionState.Fresh("a-1", DateTime.UtcNow); var r = TransitionResult.NoOp(fresh, "disabled — predicate result ignored"); r.NoOpReason.ShouldBe("disabled — predicate result ignored", "NoOp reason must be preserved on the TransitionResult so callers can log it"); } [Fact] public void TransitionResult_None_carries_no_reason(/* -011 */) { var fresh = AlarmConditionState.Fresh("a-1", DateTime.UtcNow); var r = TransitionResult.None(fresh); r.NoOpReason.ShouldBeNull("None() factory has no reason — only NoOp() carries one"); } private static async Task WaitForAsync(Func cond, int timeoutMs = 2000) { var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs); while (DateTime.UtcNow < deadline) { if (cond()) return; await Task.Delay(25); } throw new TimeoutException("Condition did not become true in time"); } // ------------------------------------------------------------------------- // Test helpers // ------------------------------------------------------------------------- /// /// A store that can be instructed to throw on every SaveAsync call. /// Used to exercise the persist-before-update invariant (finding -007). /// private sealed class FailOnSaveAlarmStateStore : IAlarmStateStore { private readonly InMemoryAlarmStateStore _inner = new(); public bool FailSave { get; set; } public Task LoadAsync(string alarmId, CancellationToken ct) => _inner.LoadAsync(alarmId, ct); public Task> LoadAllAsync(CancellationToken ct) => _inner.LoadAllAsync(ct); public Task SaveAsync(AlarmConditionState state, CancellationToken ct) { if (FailSave) throw new InvalidOperationException("Simulated store failure"); return _inner.SaveAsync(state, ct); } public Task RemoveAsync(string alarmId, CancellationToken ct) => _inner.RemoveAsync(alarmId, ct); } /// /// A store whose SaveAsync can be made to block until the test signals it. /// Used to verify Dispose drains in-flight background tasks (finding -006). /// private sealed class BlockingSaveAlarmStateStore : IAlarmStateStore { private readonly InMemoryAlarmStateStore _inner = new(); public TaskCompletionSource? BlockNextSave { get; set; } public bool SaveInProgress { get; private set; } public Task LoadAsync(string alarmId, CancellationToken ct) => _inner.LoadAsync(alarmId, ct); public Task> LoadAllAsync(CancellationToken ct) => _inner.LoadAllAsync(ct); public async Task SaveAsync(AlarmConditionState state, CancellationToken ct) { var gate = BlockNextSave; if (gate is not null) { BlockNextSave = null; SaveInProgress = true; try { await gate.Task.WaitAsync(ct).ConfigureAwait(false); } finally { SaveInProgress = false; } } await _inner.SaveAsync(state, ct).ConfigureAwait(false); } public Task RemoveAsync(string alarmId, CancellationToken ct) => _inner.RemoveAsync(alarmId, ct); } // --- Core.ScriptedAlarms-009: per-alarm evaluation-scratch reuse --- [Fact] public async Task Reevaluation_reuses_the_same_read_cache_dictionary() { // Pre-009 the engine allocated a fresh Dictionary // on every upstream-change tick; on a busy line this was a steady allocation // stream on the hot path. The fix: one dictionary per alarm, refilled in place // under _evalGate. Test asserts the dictionary instance is identical across // two consecutive evaluations of the same alarm. var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync( [Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); // First evaluation runs during LoadAsync. Capture the scratch reference now. var scratchAfterLoad = eng.TryGetScratchReadCacheForTest("HighTemp"); scratchAfterLoad.ShouldNotBeNull( "the scratch should have been allocated during LoadAsync's initial evaluation"); // Trigger a re-evaluation by pushing an upstream change. up.Push("Temp", 150); await WaitForAsync(() => eng.GetState("HighTemp")!.Active == AlarmActiveState.Active); var scratchAfterPush = eng.TryGetScratchReadCacheForTest("HighTemp"); ReferenceEquals(scratchAfterLoad, scratchAfterPush).ShouldBeTrue( "the read-cache dictionary must be the *same* instance across evaluations " + "(Core.ScriptedAlarms-009) — a per-call allocation would defeat the fix."); scratchAfterPush!["Temp"].Value.ShouldBe(150, "refill must update the existing dictionary in place"); } [Fact] public async Task Reevaluation_reuses_the_same_predicate_context() { // The context wraps the read-cache by reference; refilling the dictionary // updates what the script sees. Reusing the context spares a per-call object // allocation as well as the dictionary one. var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync( [Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); var ctxAfterLoad = eng.TryGetScratchContextForTest("HighTemp"); ctxAfterLoad.ShouldNotBeNull(); up.Push("Temp", 150); await WaitForAsync(() => eng.GetState("HighTemp")!.Active == AlarmActiveState.Active); var ctxAfterPush = eng.TryGetScratchContextForTest("HighTemp"); ReferenceEquals(ctxAfterLoad, ctxAfterPush).ShouldBeTrue( "the AlarmPredicateContext must be reused across evaluations (Core.ScriptedAlarms-009)."); } [Fact] public async Task LoadAsync_drops_the_prior_generations_scratch() { // A config-publish recreates AlarmStates with potentially different Inputs + // Loggers; reusing the prior generation's scratch would attach an outdated // logger to the new alarm. LoadAsync must clear _scratchByAlarmId so the // next evaluation lazily re-allocates against the fresh AlarmState. var up = new FakeUpstream(); up.Set("Temp", 50); using var eng = Build(up, out _); await eng.LoadAsync( [Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); var scratchAfterFirstLoad = eng.TryGetScratchReadCacheForTest("HighTemp"); scratchAfterFirstLoad.ShouldNotBeNull(); // Second LoadAsync — same alarm id, same predicate, but the scratch should be // wiped and re-allocated on the next evaluation. (LoadAsync itself triggers a // first evaluation, so the scratch is reborn before we look.) await eng.LoadAsync( [Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")], TestContext.Current.CancellationToken); var scratchAfterSecondLoad = eng.TryGetScratchReadCacheForTest("HighTemp"); scratchAfterSecondLoad.ShouldNotBeNull(); ReferenceEquals(scratchAfterFirstLoad, scratchAfterSecondLoad).ShouldBeFalse( "LoadAsync must drop the prior generation's scratch — reuse across a publish " + "would attach a stale Logger / Inputs to the new alarm definition."); } }