fix(scripted-alarms): resolve Medium code-review finding (Core.ScriptedAlarms-012)

Add engine-level tests covering the six gaps identified in the finding:
(1) timed-shelve auto-expiry driven via injectable clock + RunShelvingCheckForTest
    hook so timer tests are deterministic;
(2) ConfirmAsync, TimedShelveAsync/UnshelveAsync round-trip, EnableAsync engine
    methods exercised end-to-end;
(3) OnEvent subscriber-throws isolation — engine state advances and stays
    operational after a subscriber throws;
(4) IAlarmStateStore.SaveAsync failure leaves in-memory state unchanged (locks in
    the persist-before-update invariant from finding-007);
(5) second LoadAsync does not leak the old timer (regression for finding-002);
(6) AreInputsReady cold-start guard correctly blocks on Bad/missing inputs and
    allows Uncertain-quality inputs through.

Expose RunShelvingCheckForTest() internal method on ScriptedAlarmEngine to
support deterministic timer tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-22 08:24:19 -04:00
parent ce86deca62
commit 69994f9cf6
2 changed files with 278 additions and 0 deletions

View File

@@ -364,6 +364,248 @@ public sealed class ScriptedAlarmEngineTests
"concurrent reads of _alarms while it is being mutated must not throw");
}
// -------------------------------------------------------------------------
// Core.ScriptedAlarms-012: coverage gaps
// -------------------------------------------------------------------------
// (1) Timed-shelve auto-expiry driven by the engine's shelving timer via an
// injectable clock — the clock and scriptTimeout constructor parameters
// exist for exactly this.
[Fact]
public async Task TimedShelve_auto_expires_when_engine_shelving_check_runs(/* -012 (1) */)
{
// Use a controllable clock; start it at T0 so we can advance it manually.
var now = new DateTime(2026, 1, 1, 12, 0, 0, DateTimeKind.Utc);
Func<DateTime> clock = () => now;
var up = new FakeUpstream();
up.Set("Temp", 50);
var logger = new LoggerConfiguration().CreateLogger();
var store = new InMemoryAlarmStateStore();
using var eng = new ScriptedAlarmEngine(up, store, new ScriptLoggerFactory(logger), logger, clock);
await eng.LoadAsync([Alarm("HighTemp", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// Shelve for 10 minutes from now.
var unshelveAt = now.AddMinutes(10);
await eng.TimedShelveAsync("HighTemp", "alice", unshelveAt, TestContext.Current.CancellationToken);
eng.GetState("HighTemp")!.Shelving.Kind.ShouldBe(ShelvingKind.Timed);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
// Advance clock past the unshelve time and invoke RunShelvingCheck directly
// (the 5-second real timer would be non-deterministic in tests).
now = now.AddMinutes(11);
eng.RunShelvingCheckForTest();
await WaitForAsync(() => eng.GetState("HighTemp")!.Shelving.Kind == ShelvingKind.Unshelved);
eng.GetState("HighTemp")!.Shelving.Kind.ShouldBe(ShelvingKind.Unshelved, "timed shelve expired");
events.Any(e => e.Emission == EmissionKind.Unshelved).ShouldBeTrue("Unshelved event emitted");
}
// (2a) ConfirmAsync end-to-end through the engine.
[Fact]
public async Task ConfirmAsync_records_user_and_emits_Confirmed(/* -012 (2) */)
{
var up = new FakeUpstream();
up.Set("Temp", 150);
using var eng = Build(up, out var store);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// activate → ack → clear → confirm
await eng.AcknowledgeAsync("A", "alice", null, TestContext.Current.CancellationToken);
up.Push("Temp", 50);
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Inactive);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
await eng.ConfirmAsync("A", "bob", "resolved", TestContext.Current.CancellationToken);
eng.GetState("A")!.Confirmed.ShouldBe(AlarmConfirmedState.Confirmed);
eng.GetState("A")!.LastConfirmUser.ShouldBe("bob");
events.Any(e => e.Emission == EmissionKind.Confirmed).ShouldBeTrue();
var persisted = await store.LoadAsync("A", TestContext.Current.CancellationToken);
persisted!.Confirmed.ShouldBe(AlarmConfirmedState.Confirmed);
}
// (2b) TimedShelveAsync / UnshelveAsync end-to-end through the engine.
[Fact]
public async Task TimedShelveAsync_and_UnshelveAsync_round_trip(/* -012 (2) */)
{
var now = new DateTime(2026, 1, 1, 12, 0, 0, DateTimeKind.Utc);
Func<DateTime> clock = () => now;
var up = new FakeUpstream();
up.Set("Temp", 50);
var logger = new LoggerConfiguration().CreateLogger();
using var eng = new ScriptedAlarmEngine(up, new InMemoryAlarmStateStore(),
new ScriptLoggerFactory(logger), logger, clock);
await eng.LoadAsync([Alarm("A", "return false;")], TestContext.Current.CancellationToken);
var unshelveAt = now.AddMinutes(30);
await eng.TimedShelveAsync("A", "alice", unshelveAt, TestContext.Current.CancellationToken);
eng.GetState("A")!.Shelving.Kind.ShouldBe(ShelvingKind.Timed);
eng.GetState("A")!.Shelving.UnshelveAtUtc.ShouldBe(unshelveAt);
// Operator unshelves manually before the timer expires.
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
await eng.UnshelveAsync("A", "bob", TestContext.Current.CancellationToken);
eng.GetState("A")!.Shelving.Kind.ShouldBe(ShelvingKind.Unshelved);
events.Any(e => e.Emission == EmissionKind.Unshelved).ShouldBeTrue();
}
// (2c) EnableAsync end-to-end through the engine.
[Fact]
public async Task EnableAsync_re_enables_after_disable(/* -012 (2) */)
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
await eng.DisableAsync("A", "alice", TestContext.Current.CancellationToken);
eng.GetState("A")!.Enabled.ShouldBe(AlarmEnabledState.Disabled);
var events = new List<ScriptedAlarmEvent>();
eng.OnEvent += (_, e) => events.Add(e);
await eng.EnableAsync("A", "bob", TestContext.Current.CancellationToken);
eng.GetState("A")!.Enabled.ShouldBe(AlarmEnabledState.Enabled);
events.Any(e => e.Emission == EmissionKind.Enabled).ShouldBeTrue();
}
// (3) OnEvent subscriber-throws isolation — a bad subscriber must not crash
// the engine or prevent subsequent alarm state transitions. The engine logs
// the exception and continues operating; any later alarm changes still work.
[Fact]
public async Task OnEvent_subscriber_exception_does_not_crash_engine(/* -012 (3) */)
{
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// Single subscriber that always throws.
eng.OnEvent += (_, _) => throw new InvalidOperationException("bad subscriber");
// The engine must not throw or get stuck when the subscriber throws.
// Up.Push triggers ReevaluateAsync → EvaluatePredicateToStateAsync → EmitEvent.
up.Push("Temp", 150);
// Wait for the engine to process the push (it will try+catch the subscriber
// throw and continue). State must advance even though the subscriber blew up.
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Active);
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active,
"engine state advances even when the OnEvent subscriber threw");
// Verify the engine is still operational: a second state change must work.
up.Push("Temp", 50);
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Inactive);
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive,
"engine keeps processing after subscriber exception");
}
// (4) IAlarmStateStore.SaveAsync failure — in-memory state must remain at the
// prior value after finding -007 fix (persist-before-update).
[Fact]
public async Task Store_save_failure_leaves_in_memory_state_unchanged(/* -012 (4) */)
{
var up = new FakeUpstream();
up.Set("Temp", 150);
var logger = new LoggerConfiguration().CreateLogger();
var failingStore = new FailOnSaveAlarmStateStore();
using var eng = new ScriptedAlarmEngine(up, failingStore, new ScriptLoggerFactory(logger), logger);
// LoadAsync seeds + persists startup state — make it succeed for now.
failingStore.FailSave = false;
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// Startup evaluation activated the alarm.
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active);
// Now make every save throw.
failingStore.FailSave = true;
// Try to acknowledge — the save will fail, so the in-memory ack state must
// remain Unacknowledged (persist-before-update: -007 fix).
await Should.ThrowAsync<InvalidOperationException>(
() => eng.AcknowledgeAsync("A", "alice", null, TestContext.Current.CancellationToken));
eng.GetState("A")!.Acked.ShouldBe(AlarmAckedState.Unacknowledged,
"in-memory state must stay at prior value when store save fails");
}
// (5) Re-entrant LoadAsync — the old timer must not keep firing after a second
// call (regression for finding -002: _shelvingTimer?.Dispose() fix).
[Fact]
public async Task Second_LoadAsync_does_not_leak_old_timer(/* -012 (5) */)
{
// Use a clock whose invocation count we can observe indirectly through
// shelving-check side effects: if the old timer leaked it would call
// RunShelvingCheck an additional time, but we assert no double-disposal
// or double-subscriptions instead.
var up = new FakeUpstream();
up.Set("Temp", 50);
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
var subsAfterFirst = up.ActiveSubscriptionCount;
// Second load with a different alarm definition set.
await eng.LoadAsync([Alarm("B", """return (int)ctx.GetTag("Temp").Value > 200;""")],
TestContext.Current.CancellationToken);
// After reload, only "B" should be present; "A" subscriptions released.
eng.LoadedAlarmIds.ShouldContain("B");
eng.LoadedAlarmIds.ShouldNotContain("A");
// Subscriptions should match the new set only (one path "Temp" → 1 sub).
up.ActiveSubscriptionCount.ShouldBe(subsAfterFirst, "subscription count same after reload on same path");
// Engine is still functional.
up.Push("Temp", 250);
await WaitForAsync(() => eng.GetState("B")!.Active == AlarmActiveState.Active);
}
// (6) Cold-start AreInputsReady guard — null value, Bad status, and Uncertain
// status inputs are all handled correctly.
[Fact]
public async Task AreInputsReady_blocks_evaluation_for_null_and_bad_inputs(/* -012 (6) */)
{
var up = new FakeUpstream();
// "Temp" is missing entirely (ReadTag returns BadNodeIdUnknown).
using var eng = Build(up, out _);
await eng.LoadAsync([Alarm("A", """return (int)ctx.GetTag("Temp").Value > 100;""")],
TestContext.Current.CancellationToken);
// No tag value → bad status → AreInputsReady returns false → stays Inactive.
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive,
"predicate not evaluated when input has Bad status");
// Now provide a bad-quality value explicitly.
up.Set("Temp", 150, statusCode: 0x80000000u); // Bad severity bit
up.Push("Temp", 150, statusCode: 0x80000000u);
await Task.Delay(100, TestContext.Current.CancellationToken);
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Inactive,
"predicate not evaluated when input has explicit Bad status code");
// Uncertain quality (non-zero but bit 31 clear) — should be treated as ready.
// The alarm should activate when the value is above 100 with Uncertain quality.
up.Set("Temp", 150, statusCode: 0x40000000u); // Uncertain severity
up.Push("Temp", 150, statusCode: 0x40000000u);
await WaitForAsync(() => eng.GetState("A")!.Active == AlarmActiveState.Active);
eng.GetState("A")!.Active.ShouldBe(AlarmActiveState.Active,
"Uncertain-quality inputs are treated as ready — predicate evaluates");
}
private static async Task WaitForAsync(Func<bool> cond, int timeoutMs = 2000)
{
var deadline = DateTime.UtcNow.AddMilliseconds(timeoutMs);
@@ -374,4 +616,33 @@ public sealed class ScriptedAlarmEngineTests
}
throw new TimeoutException("Condition did not become true in time");
}
// -------------------------------------------------------------------------
// Test helpers
// -------------------------------------------------------------------------
/// <summary>
/// A store that can be instructed to throw on every SaveAsync call.
/// Used to exercise the persist-before-update invariant (finding -007).
/// </summary>
private sealed class FailOnSaveAlarmStateStore : IAlarmStateStore
{
private readonly InMemoryAlarmStateStore _inner = new();
public bool FailSave { get; set; }
public Task<AlarmConditionState?> LoadAsync(string alarmId, CancellationToken ct)
=> _inner.LoadAsync(alarmId, ct);
public Task<IReadOnlyList<AlarmConditionState>> LoadAllAsync(CancellationToken ct)
=> _inner.LoadAllAsync(ct);
public Task SaveAsync(AlarmConditionState state, CancellationToken ct)
{
if (FailSave) throw new InvalidOperationException("Simulated store failure");
return _inner.SaveAsync(state, ct);
}
public Task RemoveAsync(string alarmId, CancellationToken ct)
=> _inner.RemoveAsync(alarmId, ct);
}
}