using System.Collections.Concurrent; using Serilog; using ZB.MOM.WW.OtOpcUa.Core.Abstractions; using ZB.MOM.WW.OtOpcUa.Core.Scripting; namespace ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms; /// /// Phase 7 scripted-alarm orchestrator. Compiles every configured alarm's predicate /// against the Stream A sandbox, subscribes to the referenced upstream tags, /// re-evaluates the predicate on every input change + on a shelving-check timer, /// applies the resulting transition through , /// persists state via , and emits the resulting events /// through (which wires into the existing /// IAlarmSource fan-out). /// /// /// /// Scripted alarms are leaves in the evaluation DAG — no alarm's state drives /// another alarm's predicate. The engine maintains only an inverse index from /// upstream tag path → alarms referencing it; no topological sort needed /// (unlike the virtual-tag engine). /// /// /// Evaluation errors (script throws, timeout, coercion fail) surface as /// structured errors in the dedicated scripts-*.log sink plus a WARN companion /// in the main log. The alarm's ActiveState stays at its prior value — the /// engine does NOT invent a clear transition just because the predicate broke. /// Operators investigating a broken predicate shouldn't see a phantom /// clear-event preceding the failure. /// /// public sealed class ScriptedAlarmEngine : IDisposable { private readonly ITagUpstreamSource _upstream; private readonly IAlarmStateStore _store; private readonly ScriptLoggerFactory _loggerFactory; private readonly ILogger _engineLogger; private readonly Func _clock; private readonly TimeSpan _scriptTimeout; // ConcurrentDictionary, not a plain Dictionary: every mutation happens under // _evalGate, but four read paths (GetState, GetAllStates, LoadedAlarmIds, // RunShelvingCheck) touch _alarms from arbitrary threads (Admin UI request // threads, the shelving Timer thread-pool callback) without holding the gate. // A plain Dictionary read concurrent with a writer's entry reassignment can // throw or return torn state; ConcurrentDictionary makes entry assignment and // snapshot enumeration safe. The only write shapes are indexer-set and Clear, // both of which ConcurrentDictionary supports atomically. (Core.ScriptedAlarms-001) private readonly ConcurrentDictionary _alarms = new(StringComparer.Ordinal); /// /// Per-alarm reusable evaluation scratch. The read-cache dictionary and the /// instance are both allocated once per /// alarm (on first evaluation) and reused across every subsequent re-eval — /// the hot path no longer allocates a fresh dictionary + context per upstream /// tag change. Safe because only /// runs under , which serialises every evaluation: /// two threads can never observe the same scratch in a half-refilled state. /// Cleared in alongside . /// (Core.ScriptedAlarms-009) /// private readonly ConcurrentDictionary _scratchByAlarmId = new(StringComparer.Ordinal); /// /// Compile cache for every alarm predicate. Routes 's /// calls through the /// cache so the collectible /// each compile produces is actually disposed on the publish-replace path /// (Core.Scripting-016): the cache's /// disposes every materialised evaluator before dropping its dictionary entry, /// so a config-publish releases the prior generation's ALCs and the per-publish /// accretion the Core.Scripting-008 fix targeted is actually freed in production. /// Pre-fix the engine called ScriptEvaluator.Compile directly, which left /// the ALCs rooted until the process exited — defeating -008 on the real path. /// private readonly CompiledScriptCache _compileCache = new(); /// /// Test-only diagnostic: returns the per-alarm scratch read-cache dictionary /// if one has been allocated, else null. Used by Core.ScriptedAlarms-009 /// regression tests to assert the scratch is reused across evaluations /// (two reads return the same instance). /// /// /// Synchronization: the returned /// is the engine's live mutable read-cache. It is refilled in place by /// RefillReadCache on every predicate evaluation, under _evalGate. /// Test callers MUST NOT iterate this dictionary while the engine is /// actively evaluating (i.e. while an upstream change is mid-flight); the /// refill clears the dict before repopulating and a concurrent iterator /// would observe torn / partial state. Safe uses are: reference-identity /// comparisons (e.g. asserting the same instance is reused across calls), /// and single-key reads against an engine that has quiesced after a /// deterministic upstream push. Anything more involved should snapshot a /// copy under the gate. (Core.ScriptedAlarms-013.) /// internal IReadOnlyDictionary? TryGetScratchReadCacheForTest(string alarmId) => _scratchByAlarmId.TryGetValue(alarmId, out var s) ? s.ReadCache : null; /// /// Test-only diagnostic: returns the per-alarm /// if one has been allocated, else null. Companion to /// . /// /// /// Synchronization: the returned context wraps the same live /// read-cache as — the same /// "don't iterate during an in-flight evaluation" caveat applies. Safe /// for reference-identity assertions on a quiesced engine. /// (Core.ScriptedAlarms-013.) /// internal AlarmPredicateContext? TryGetScratchContextForTest(string alarmId) => _scratchByAlarmId.TryGetValue(alarmId, out var s) ? s.Context : null; private readonly ConcurrentDictionary _valueCache = new(StringComparer.Ordinal); private readonly Dictionary> _alarmsReferencing = new(StringComparer.Ordinal); // tag path -> alarm ids private readonly List _upstreamSubscriptions = []; private readonly SemaphoreSlim _evalGate = new(1, 1); private Timer? _shelvingTimer; private bool _loaded; private bool _disposed; // Tracks fire-and-forget background work launched by OnUpstreamChange // (ReevaluateAsync) and RunShelvingCheck (ShelvingCheckAsync). Dispose drains // these so a re-evaluation in flight when shutdown begins finishes its // SaveAsync before the engine returns control to the caller. The HashSet is // accessed under its own lock — never under _evalGate — so registration / // unregistration cannot deadlock against the gate. (Core.ScriptedAlarms-006) private readonly HashSet _inFlight = []; private readonly object _inFlightLock = new(); public ScriptedAlarmEngine( ITagUpstreamSource upstream, IAlarmStateStore store, ScriptLoggerFactory loggerFactory, ILogger engineLogger, Func? clock = null, TimeSpan? scriptTimeout = null) { _upstream = upstream ?? throw new ArgumentNullException(nameof(upstream)); _store = store ?? throw new ArgumentNullException(nameof(store)); _loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory)); _engineLogger = engineLogger ?? throw new ArgumentNullException(nameof(engineLogger)); _clock = clock ?? (() => DateTime.UtcNow); _scriptTimeout = scriptTimeout ?? TimedScriptEvaluator.DefaultTimeout; } /// Raised for every emission the Part9StateMachine produces that the engine should publish. public event EventHandler? OnEvent; public IReadOnlyCollection LoadedAlarmIds => _alarms.Keys.ToArray(); /// /// Load a batch of alarm definitions. Compiles every predicate, aggregates any /// compile failures into one , subscribes /// to upstream input tags, seeds the value cache, loads persisted state from /// the store (falling back to Fresh for first-load alarms), and recomputes /// ActiveState per Phase 7 plan decision #14 (startup recovery). /// public async Task LoadAsync(IReadOnlyList definitions, CancellationToken ct) { if (_disposed) throw new ObjectDisposedException(nameof(ScriptedAlarmEngine)); if (definitions is null) throw new ArgumentNullException(nameof(definitions)); var pending = new List(0); await _evalGate.WaitAsync(ct).ConfigureAwait(false); try { UnsubscribeFromUpstream(); _alarms.Clear(); _alarmsReferencing.Clear(); // Drop the prior generation's per-alarm scratch buffers — definitions may // have changed (different Inputs, different Logger), so any reuse would be // unsafe. (Core.ScriptedAlarms-009) _scratchByAlarmId.Clear(); // Dispose every compiled-predicate ALC from the prior generation BEFORE we // recompile this one. Skipping this is what made Core.Scripting-008 a // no-op in production. (Core.Scripting-016) _compileCache.Clear(); var compileFailures = new List(); foreach (var def in definitions) { try { var extraction = DependencyExtractor.Extract(def.PredicateScriptSource); if (!extraction.IsValid) { var joined = string.Join("; ", extraction.Rejections.Select(r => r.Message)); compileFailures.Add($"{def.AlarmId}: dependency extraction rejected — {joined}"); continue; } // Route through CompiledScriptCache so the emitted assembly's // collectible ALC participates in publish-replace cleanup. // (Core.Scripting-016) var evaluator = _compileCache.GetOrCompile(def.PredicateScriptSource); var timed = new TimedScriptEvaluator(evaluator, _scriptTimeout); var logger = _loggerFactory.Create(def.AlarmId); var templateTokens = MessageTemplate.ExtractTokenPaths(def.MessageTemplate); var allInputs = new HashSet(extraction.Reads, StringComparer.Ordinal); foreach (var t in templateTokens) allInputs.Add(t); _alarms[def.AlarmId] = new AlarmState(def, timed, extraction.Reads, templateTokens, logger, AlarmConditionState.Fresh(def.AlarmId, _clock())); foreach (var path in allInputs) { if (!_alarmsReferencing.TryGetValue(path, out var set)) _alarmsReferencing[path] = set = new HashSet(StringComparer.Ordinal); set.Add(def.AlarmId); } } catch (Exception ex) { compileFailures.Add($"{def.AlarmId}: {ex.Message}"); } } if (compileFailures.Count > 0) { throw new InvalidOperationException( $"ScriptedAlarmEngine load failed. {compileFailures.Count} alarm(s) did not compile:\n " + string.Join("\n ", compileFailures)); } // Seed the value cache with current tag values before subscribing. The // ReadTag calls happen first so that the initial predicate evaluation below // (startup recovery, decision #14) uses a consistent snapshot. // Subscriptions are established AFTER _loaded = true so that any synchronous // initial-push an ITagUpstreamSource delivers from inside SubscribeTag arrives // when _alarms is fully initialised. Before _loaded = true, a synchronous push // would race the in-progress state restore and could overwrite the carefully // seeded cache with a push that has no defined ordering relative to ReadTag. // (Core.ScriptedAlarms-004) foreach (var path in _alarmsReferencing.Keys) _valueCache[path] = _upstream.ReadTag(path); // Restore persisted state, falling back to Fresh where nothing was saved, // then re-derive ActiveState from the current predicate per decision #14. // Any predicate emissions queue into `pending` and fire after the gate // is released — so a startup-recovery activation event can call back into // the engine without deadlocking. (Core.ScriptedAlarms-003) foreach (var (alarmId, state) in _alarms) { var persisted = await _store.LoadAsync(alarmId, ct).ConfigureAwait(false); var seed = persisted ?? state.Condition; var afterPredicate = await EvaluatePredicateToStateAsync(state, seed, nowUtc: _clock(), ct, pending) .ConfigureAwait(false); _alarms[alarmId] = state with { Condition = afterPredicate }; await _store.SaveAsync(afterPredicate, ct).ConfigureAwait(false); } _loaded = true; // Subscribe after _loaded = true and full state restore. If an upstream // implementation pushes its initial value synchronously from inside // SubscribeTag, OnUpstreamChange will queue a ReevaluateAsync that acquires // _evalGate — it will correctly block until LoadAsync releases the gate, then // re-evaluate against the fully-populated _alarms dict. foreach (var path in _alarmsReferencing.Keys) _upstreamSubscriptions.Add(_upstream.SubscribeTag(path, OnUpstreamChange)); _engineLogger.Information("ScriptedAlarmEngine loaded {Count} alarm(s)", _alarms.Count); // Dispose any previously-created timer before reassigning; a second LoadAsync // call without this would leave two timers firing against the same engine. // (Core.ScriptedAlarms-002) _shelvingTimer?.Dispose(); // Start the shelving-check timer — ticks every 5s, expires any timed shelves // that have passed their UnshelveAtUtc. _shelvingTimer = new Timer(_ => RunShelvingCheck(), null, TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(5)); } finally { _evalGate.Release(); } // Fire any emissions collected during startup recovery OUTSIDE the gate so // subscribers can re-enter the engine safely. (Core.ScriptedAlarms-003) foreach (var evt in pending) FireEvent(evt); } /// /// Current persisted state for . Returns null for /// unknown alarm. Mainly used for diagnostics + the Admin UI status page. /// public AlarmConditionState? GetState(string alarmId) => _alarms.TryGetValue(alarmId, out var s) ? s.Condition : null; public IReadOnlyCollection GetAllStates() => _alarms.Values.Select(a => a.Condition).ToArray(); public Task AcknowledgeAsync(string alarmId, string user, string? comment, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyAcknowledge(cur, user, comment, _clock())); public Task ConfirmAsync(string alarmId, string user, string? comment, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyConfirm(cur, user, comment, _clock())); public Task OneShotShelveAsync(string alarmId, string user, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyOneShotShelve(cur, user, _clock())); public Task TimedShelveAsync(string alarmId, string user, DateTime unshelveAtUtc, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyTimedShelve(cur, user, unshelveAtUtc, _clock())); public Task UnshelveAsync(string alarmId, string user, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyUnshelve(cur, user, _clock())); public Task EnableAsync(string alarmId, string user, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyEnable(cur, user, _clock())); public Task DisableAsync(string alarmId, string user, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyDisable(cur, user, _clock())); public Task AddCommentAsync(string alarmId, string user, string text, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyAddComment(cur, user, text, _clock())); private async Task ApplyAsync(string alarmId, CancellationToken ct, Func op) { EnsureLoaded(); if (!_alarms.TryGetValue(alarmId, out var state)) throw new ArgumentException($"Unknown alarm {alarmId}", nameof(alarmId)); ScriptedAlarmEvent? pending = null; await _evalGate.WaitAsync(ct).ConfigureAwait(false); try { var result = op(state.Condition); // Persist BEFORE updating in-memory so a store failure leaves both // in-memory and persisted at the prior state rather than diverging. // If SaveAsync throws the in-memory _alarms entry stays unchanged and // the exception propagates to the caller. (Core.ScriptedAlarms-007) await _store.SaveAsync(result.State, ct).ConfigureAwait(false); _alarms[alarmId] = state with { Condition = result.State }; // Build the emission event under the gate (it captures a coherent // snapshot of state + message-template values) but defer the actual // OnEvent dispatch until after Release() so a slow subscriber or a // subscriber that re-enters the engine doesn't block / deadlock. // (Core.ScriptedAlarms-003) if (result.Emission != EmissionKind.None) pending = BuildEmission(state, result.State, result.Emission); else if (result.NoOpReason is { } reason) { // The Part9StateMachine remarks promise a diagnostic log line for // disabled-alarm no-ops + idempotent ack/confirm/shelve/unshelve // calls. We surface them at debug so they're available when // investigating "why didn't my ack take effect?" without spamming // the main info log. (Core.ScriptedAlarms-011) state.Logger.Debug("Alarm {AlarmId} no-op transition: {Reason}", alarmId, reason); } } finally { _evalGate.Release(); } // OnEvent dispatch happens OUTSIDE _evalGate so subscribers can call back // into the engine (e.g. AcknowledgeAsync from inside an Activated handler) // without deadlocking against the non-reentrant SemaphoreSlim. if (pending is not null) FireEvent(pending); } /// /// Upstream-change callback. Updates the value cache + enqueues predicate /// re-evaluation for every alarm referencing the changed path. Fire-and-forget /// so driver-side dispatch isn't blocked; the background task is tracked so /// can drain it. (Core.ScriptedAlarms-006) /// internal void OnUpstreamChange(string path, DataValueSnapshot value) { _valueCache[path] = value; if (_disposed) return; // don't queue new work against a disposing engine if (_alarmsReferencing.TryGetValue(path, out var alarmIds)) { TrackBackgroundTask(ReevaluateAsync(alarmIds.ToArray(), CancellationToken.None)); } } private async Task ReevaluateAsync(IReadOnlyList alarmIds, CancellationToken ct) { var pending = new List(0); try { await _evalGate.WaitAsync(ct).ConfigureAwait(false); try { // Re-check after acquiring the gate: a Dispose() call may have // completed between our _evalGate.WaitAsync and here. Writing to a // disposing store or mutating _alarms after clear is unsafe. // (Core.ScriptedAlarms-005) if (_disposed) return; foreach (var id in alarmIds) { if (!_alarms.TryGetValue(id, out var state)) continue; var newState = await EvaluatePredicateToStateAsync( state, state.Condition, _clock(), ct, pending).ConfigureAwait(false); if (!ReferenceEquals(newState, state.Condition)) { // Persist before updating in-memory so a store failure leaves // both sides at the prior state. (Core.ScriptedAlarms-007) await _store.SaveAsync(newState, ct).ConfigureAwait(false); _alarms[id] = state with { Condition = newState }; } } } finally { _evalGate.Release(); } } catch (Exception ex) { _engineLogger.Error(ex, "ScriptedAlarmEngine reevaluate failed"); return; } // Fire emissions OUTSIDE _evalGate so subscriber callbacks can re-enter // the engine without deadlocking. (Core.ScriptedAlarms-003) foreach (var evt in pending) FireEvent(evt); } /// /// Evaluate the predicate + apply the resulting state-machine transition. /// Returns the new condition state. If the transition produces an emission, /// appends it to so the caller can fire /// them after releasing _evalGate — keeping subscriber callbacks /// outside the gate. (Core.ScriptedAlarms-003) /// private async Task EvaluatePredicateToStateAsync( AlarmState state, AlarmConditionState seed, DateTime nowUtc, CancellationToken ct, List? pendingEmissions = null) { // Look up (or lazily allocate) the per-alarm scratch and refill its read cache // in place. The dictionary + context survive across evaluations so the hot path // no longer allocates per upstream tag change. (Core.ScriptedAlarms-009) var scratch = _scratchByAlarmId.GetOrAdd( state.Definition.AlarmId, _ => new AlarmScratch(state.Inputs, state.Logger, _clock)); RefillReadCache(scratch.ReadCache, state.Inputs); // Cold-start guard — skip the predicate when any referenced upstream tag has no // cached value yet (the upstream subscription hasn't delivered its first push). // Without this, predicates that cast `(double)ctx.GetTag(path).Value` throw NRE on // every tick until the cache fills, spamming the log with identical stack traces. // Bad quality is treated the same: the input isn't available at the predicate's // expected type, so the only defensible move is to hold the prior condition state. if (!AreInputsReady(scratch.ReadCache)) return seed; var context = scratch.Context; bool predicateTrue; try { predicateTrue = await state.Evaluator.RunAsync(context, ct).ConfigureAwait(false); } catch (OperationCanceledException) { throw; } catch (ScriptTimeoutException tex) { state.Logger.Warning("Alarm predicate timed out after {Timeout} — state unchanged", tex.Timeout); return seed; } catch (Exception ex) { state.Logger.Error(ex, "Alarm predicate threw — state unchanged"); return seed; } var result = Part9StateMachine.ApplyPredicate(seed, predicateTrue, nowUtc); if (result.Emission != EmissionKind.None) { var evt = BuildEmission(state, result.State, result.Emission); if (evt is not null) { if (pendingEmissions is not null) pendingEmissions.Add(evt); else FireEvent(evt); // LoadAsync path: no caller-supplied list, fire here. } } return result.State; } /// /// Refill in place from _valueCache, falling /// back to a synchronous ITagUpstreamSource.ReadTag for paths whose /// first upstream push hasn't arrived yet. The dictionary is cleared and /// repopulated under _evalGate so no concurrent reader can observe /// a partial state. Replaces the old BuildReadCache which allocated a /// fresh dictionary every call (Core.ScriptedAlarms-009). /// private void RefillReadCache( Dictionary cache, IReadOnlySet inputs) { cache.Clear(); foreach (var p in inputs) cache[p] = _valueCache.TryGetValue(p, out var v) ? v : _upstream.ReadTag(p); } /// /// Returns true when every entry in has a non-null value /// and a Good-quality . A false here lets /// callers short-circuit script evaluation — predicates that unconditionally cast /// ctx.GetTag(path).Value to a numeric type would otherwise throw /// until the upstream subscription delivers /// its first push. /// private static bool AreInputsReady(IReadOnlyDictionary cache) { foreach (var kv in cache) { if (kv.Value is null || kv.Value.Value is null) return false; // OPC UA Part 4 StatusCode: bit 31 = severity 10 (Bad). Treat Good + Uncertain // as "ready"; Uncertain carries a value the script can still inspect + make a // qualified decision from. Only outright Bad is skipped. if ((kv.Value.StatusCode & 0x80000000u) != 0) return false; } return true; } /// /// Build (but do not fire) the for a /// transition. Returns null for kinds that should not be published /// ( and /// ). Pure construction — called under /// _evalGate so the message-template resolution uses a coherent /// value-cache snapshot. The actual dispatch is /// done by AFTER the gate is /// released. (Core.ScriptedAlarms-003) /// private ScriptedAlarmEvent? BuildEmission(AlarmState state, AlarmConditionState condition, EmissionKind kind) { // Suppressed kind means shelving ate the emission — we don't fire for subscribers // but the state record still advanced so startup recovery reflects reality. if (kind == EmissionKind.Suppressed || kind == EmissionKind.None) return null; var message = MessageTemplate.Resolve(state.Definition.MessageTemplate, TryLookup); return new ScriptedAlarmEvent( AlarmId: state.Definition.AlarmId, EquipmentPath: state.Definition.EquipmentPath, AlarmName: state.Definition.AlarmName, Kind: state.Definition.Kind, Severity: state.Definition.Severity, Message: message, Condition: condition, Emission: kind, TimestampUtc: _clock()); } /// /// Invoke the handler for a built emission. Must be /// called OUTSIDE _evalGate: a slow subscriber would otherwise /// block the gate for every other engine operation, and a subscriber /// that re-enters the engine (e.g. calls AcknowledgeAsync) would /// deadlock against the non-reentrant SemaphoreSlim. /// (Core.ScriptedAlarms-003) /// private void FireEvent(ScriptedAlarmEvent evt) { try { OnEvent?.Invoke(this, evt); } catch (Exception ex) { _engineLogger.Warning(ex, "ScriptedAlarmEngine OnEvent subscriber threw for {AlarmId}", evt.AlarmId); } } private DataValueSnapshot? TryLookup(string path) => _valueCache.TryGetValue(path, out var v) ? v : null; private void RunShelvingCheck() { if (_disposed) return; var ids = _alarms.Keys.ToArray(); TrackBackgroundTask(ShelvingCheckAsync(ids, CancellationToken.None)); } /// /// Register a fire-and-forget task so can await it. /// The task removes itself from the set on completion via a continuation. /// (Core.ScriptedAlarms-006) /// private void TrackBackgroundTask(Task task) { lock (_inFlightLock) { _inFlight.Add(task); } // Use ContinueWith with ExecuteSynchronously so the removal runs on the // completing thread — avoids scheduler delay between completion and // unregistration that would otherwise let Dispose see a stale set. task.ContinueWith(t => { lock (_inFlightLock) { _inFlight.Remove(t); } }, CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default); } /// /// Test hook — triggers a shelving check synchronously without waiting for /// the 5-second timer. Allows tests that inject a controllable clock to advance /// time and immediately drive timed-shelve expiry. (Core.ScriptedAlarms-012) /// internal void RunShelvingCheckForTest() => RunShelvingCheck(); private async Task ShelvingCheckAsync(IReadOnlyList alarmIds, CancellationToken ct) { var pending = new List(0); try { await _evalGate.WaitAsync(ct).ConfigureAwait(false); try { // Re-check after acquiring the gate: Timer.Dispose() does not wait for // running callbacks, so a shelving-check callback that passed the _disposed // check in RunShelvingCheck can arrive here after Dispose() has returned. // Mutating _alarms or saving to a disposed store here is unsafe. // (Core.ScriptedAlarms-005) if (_disposed) return; var now = _clock(); foreach (var id in alarmIds) { if (!_alarms.TryGetValue(id, out var state)) continue; var result = Part9StateMachine.ApplyShelvingCheck(state.Condition, now); if (!ReferenceEquals(result.State, state.Condition)) { // Persist before updating in-memory so a store failure leaves // both sides at the prior state. (Core.ScriptedAlarms-007) await _store.SaveAsync(result.State, ct).ConfigureAwait(false); _alarms[id] = state with { Condition = result.State }; if (result.Emission != EmissionKind.None) { var evt = BuildEmission(state, result.State, result.Emission); if (evt is not null) pending.Add(evt); } } } } finally { _evalGate.Release(); } } catch (Exception ex) { _engineLogger.Warning(ex, "ScriptedAlarmEngine shelving-check failed"); return; } // Fire emissions OUTSIDE _evalGate. (Core.ScriptedAlarms-003) foreach (var evt in pending) FireEvent(evt); } private void UnsubscribeFromUpstream() { foreach (var s in _upstreamSubscriptions) { try { s.Dispose(); } catch { } } _upstreamSubscriptions.Clear(); } private void EnsureLoaded() { if (!_loaded) throw new InvalidOperationException( "ScriptedAlarmEngine not loaded. Call LoadAsync first."); } public void Dispose() { if (_disposed) return; _disposed = true; _shelvingTimer?.Dispose(); UnsubscribeFromUpstream(); // Drain any fire-and-forget background work (ReevaluateAsync from // OnUpstreamChange + ShelvingCheckAsync from the 5s timer) that started // before _disposed = true was visible. Without this, a SaveAsync in // flight can outlive the engine and write to a (possibly disposed) store // after Dispose() has returned. The tasks re-check _disposed after // acquiring the gate and bail out, but the await still has to complete. // (Core.ScriptedAlarms-006) Task[] toAwait; lock (_inFlightLock) { toAwait = [.. _inFlight]; } if (toAwait.Length > 0) { try { Task.WhenAll(toAwait).GetAwaiter().GetResult(); } catch (Exception ex) { // Background task failures already logged inside ReevaluateAsync / // ShelvingCheckAsync; surface here at debug so a parent shutdown is // not noisy. The key invariant is that the tasks have COMPLETED. _engineLogger.Debug(ex, "ScriptedAlarmEngine background task threw during shutdown drain"); } } // Safe to clear here: the Task.WhenAll drain above guaranteed no // ReevaluateAsync / ShelvingCheckAsync is mid-flight, and _disposed=true // prevents new background work from being queued (OnUpstreamChange bails on // line 334). Pre-Core.Scripting-016 the comment said "Do NOT clear _alarms", // but that was when the engine called ScriptEvaluator.Compile directly and // held the script ALCs through _alarms→AlarmState→TimedScriptEvaluator // forever — leaving them rooted defeated the -008 collectible-ALC unload. // Clearing now drops the delegate references so the cache's Dispose call // below can actually unload the emitted assemblies. (Core.ScriptedAlarms-005 // re-evaluated under -016.) _alarms.Clear(); _alarmsReferencing.Clear(); _scratchByAlarmId.Clear(); // Dispose every compiled-predicate ALC so the engine's shutdown actually // releases the emitted assemblies. The drain above ensures no evaluator is // mid-call; CompiledScriptCache.Dispose internally guards against use-after- // dispose. (Core.Scripting-016) _compileCache.Dispose(); } private sealed record AlarmState( ScriptedAlarmDefinition Definition, TimedScriptEvaluator Evaluator, IReadOnlySet Inputs, IReadOnlyList TemplateTokens, ILogger Logger, AlarmConditionState Condition); /// /// Per-alarm reusable evaluation scratch. The dictionary /// is the same instance across every evaluation of the owning alarm — it is /// cleared and refilled in on /// each call. wraps that dictionary by reference, so a /// refilled is what the predicate's /// ctx.GetTag(path) calls observe. (Core.ScriptedAlarms-009) /// /// /// Reuse is safe because serialises every /// evaluation under _evalGate: two threads can never observe the same /// scratch in a half-refilled state. /// private sealed class AlarmScratch { public Dictionary ReadCache { get; } public AlarmPredicateContext Context { get; } public AlarmScratch(IReadOnlySet inputs, ILogger logger, Func clock) { // Pre-size to the expected input count so the first refill doesn't pay the // dictionary-grow cost. The dictionary auto-grows if Inputs changes (it // cannot under the current contract — Inputs is fixed at LoadAsync — but // pre-sizing is defensive against future changes). ReadCache = new Dictionary(inputs.Count, StringComparer.Ordinal); // Context holds the read cache by reference. Refilling the dictionary // updates what the context (and the script) observes. Context = new AlarmPredicateContext(ReadCache, logger, clock); } } } /// /// One alarm emission the engine pushed to subscribers. Carries everything /// downstream consumers (OPC UA alarm-source adapter + historian sink) need to /// publish the event without re-querying the engine. /// public sealed record ScriptedAlarmEvent( string AlarmId, string EquipmentPath, string AlarmName, AlarmKind Kind, AlarmSeverity Severity, string Message, AlarmConditionState Condition, EmissionKind Emission, DateTime TimestampUtc); /// /// Upstream source abstraction — intentionally identical shape to the virtual-tag /// engine's so Stream G can compose them behind one driver bridge. /// public interface ITagUpstreamSource { DataValueSnapshot ReadTag(string path); IDisposable SubscribeTag(string path, Action observer); }