using System.Collections.Concurrent; using Serilog; using ZB.MOM.WW.OtOpcUa.Core.Abstractions; using ZB.MOM.WW.OtOpcUa.Core.Scripting; namespace ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms; /// /// Phase 7 scripted-alarm orchestrator. Compiles every configured alarm's predicate /// against the Stream A sandbox, subscribes to the referenced upstream tags, /// re-evaluates the predicate on every input change + on a shelving-check timer, /// applies the resulting transition through , /// persists state via , and emits the resulting events /// through (which wires into the existing /// IAlarmSource fan-out). /// /// /// /// Scripted alarms are leaves in the evaluation DAG — no alarm's state drives /// another alarm's predicate. The engine maintains only an inverse index from /// upstream tag path → alarms referencing it; no topological sort needed /// (unlike the virtual-tag engine). /// /// /// Evaluation errors (script throws, timeout, coercion fail) surface as /// structured errors in the dedicated scripts-*.log sink plus a WARN companion /// in the main log. The alarm's ActiveState stays at its prior value — the /// engine does NOT invent a clear transition just because the predicate broke. /// Operators investigating a broken predicate shouldn't see a phantom /// clear-event preceding the failure. /// /// public sealed class ScriptedAlarmEngine : IDisposable { private readonly ITagUpstreamSource _upstream; private readonly IAlarmStateStore _store; private readonly ScriptLoggerFactory _loggerFactory; private readonly ILogger _engineLogger; private readonly Func _clock; private readonly TimeSpan _scriptTimeout; private readonly Dictionary _alarms = new(StringComparer.Ordinal); private readonly ConcurrentDictionary _valueCache = new(StringComparer.Ordinal); private readonly Dictionary> _alarmsReferencing = new(StringComparer.Ordinal); // tag path -> alarm ids private readonly List _upstreamSubscriptions = []; private readonly SemaphoreSlim _evalGate = new(1, 1); private Timer? _shelvingTimer; private bool _loaded; private bool _disposed; public ScriptedAlarmEngine( ITagUpstreamSource upstream, IAlarmStateStore store, ScriptLoggerFactory loggerFactory, ILogger engineLogger, Func? clock = null, TimeSpan? scriptTimeout = null) { _upstream = upstream ?? throw new ArgumentNullException(nameof(upstream)); _store = store ?? throw new ArgumentNullException(nameof(store)); _loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory)); _engineLogger = engineLogger ?? throw new ArgumentNullException(nameof(engineLogger)); _clock = clock ?? (() => DateTime.UtcNow); _scriptTimeout = scriptTimeout ?? TimedScriptEvaluator.DefaultTimeout; } /// Raised for every emission the Part9StateMachine produces that the engine should publish. public event EventHandler? OnEvent; public IReadOnlyCollection LoadedAlarmIds => _alarms.Keys; /// /// Load a batch of alarm definitions. Compiles every predicate, aggregates any /// compile failures into one , subscribes /// to upstream input tags, seeds the value cache, loads persisted state from /// the store (falling back to Fresh for first-load alarms), and recomputes /// ActiveState per Phase 7 plan decision #14 (startup recovery). /// public async Task LoadAsync(IReadOnlyList definitions, CancellationToken ct) { if (_disposed) throw new ObjectDisposedException(nameof(ScriptedAlarmEngine)); if (definitions is null) throw new ArgumentNullException(nameof(definitions)); await _evalGate.WaitAsync(ct).ConfigureAwait(false); try { UnsubscribeFromUpstream(); _alarms.Clear(); _alarmsReferencing.Clear(); var compileFailures = new List(); foreach (var def in definitions) { try { var extraction = DependencyExtractor.Extract(def.PredicateScriptSource); if (!extraction.IsValid) { var joined = string.Join("; ", extraction.Rejections.Select(r => r.Message)); compileFailures.Add($"{def.AlarmId}: dependency extraction rejected — {joined}"); continue; } var evaluator = ScriptEvaluator.Compile(def.PredicateScriptSource); var timed = new TimedScriptEvaluator(evaluator, _scriptTimeout); var logger = _loggerFactory.Create(def.AlarmId); var templateTokens = MessageTemplate.ExtractTokenPaths(def.MessageTemplate); var allInputs = new HashSet(extraction.Reads, StringComparer.Ordinal); foreach (var t in templateTokens) allInputs.Add(t); _alarms[def.AlarmId] = new AlarmState(def, timed, extraction.Reads, templateTokens, logger, AlarmConditionState.Fresh(def.AlarmId, _clock())); foreach (var path in allInputs) { if (!_alarmsReferencing.TryGetValue(path, out var set)) _alarmsReferencing[path] = set = new HashSet(StringComparer.Ordinal); set.Add(def.AlarmId); } } catch (Exception ex) { compileFailures.Add($"{def.AlarmId}: {ex.Message}"); } } if (compileFailures.Count > 0) { throw new InvalidOperationException( $"ScriptedAlarmEngine load failed. {compileFailures.Count} alarm(s) did not compile:\n " + string.Join("\n ", compileFailures)); } // Seed the value cache with current upstream values + subscribe for changes. foreach (var path in _alarmsReferencing.Keys) { _valueCache[path] = _upstream.ReadTag(path); _upstreamSubscriptions.Add(_upstream.SubscribeTag(path, OnUpstreamChange)); } // Restore persisted state, falling back to Fresh where nothing was saved, // then re-derive ActiveState from the current predicate per decision #14. foreach (var (alarmId, state) in _alarms) { var persisted = await _store.LoadAsync(alarmId, ct).ConfigureAwait(false); var seed = persisted ?? state.Condition; var afterPredicate = await EvaluatePredicateToStateAsync(state, seed, nowUtc: _clock(), ct) .ConfigureAwait(false); _alarms[alarmId] = state with { Condition = afterPredicate }; await _store.SaveAsync(afterPredicate, ct).ConfigureAwait(false); } _loaded = true; _engineLogger.Information("ScriptedAlarmEngine loaded {Count} alarm(s)", _alarms.Count); // Start the shelving-check timer — ticks every 5s, expires any timed shelves // that have passed their UnshelveAtUtc. _shelvingTimer = new Timer(_ => RunShelvingCheck(), null, TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(5)); } finally { _evalGate.Release(); } } /// /// Current persisted state for . Returns null for /// unknown alarm. Mainly used for diagnostics + the Admin UI status page. /// public AlarmConditionState? GetState(string alarmId) => _alarms.TryGetValue(alarmId, out var s) ? s.Condition : null; public IReadOnlyCollection GetAllStates() => _alarms.Values.Select(a => a.Condition).ToArray(); public Task AcknowledgeAsync(string alarmId, string user, string? comment, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyAcknowledge(cur, user, comment, _clock())); public Task ConfirmAsync(string alarmId, string user, string? comment, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyConfirm(cur, user, comment, _clock())); public Task OneShotShelveAsync(string alarmId, string user, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyOneShotShelve(cur, user, _clock())); public Task TimedShelveAsync(string alarmId, string user, DateTime unshelveAtUtc, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyTimedShelve(cur, user, unshelveAtUtc, _clock())); public Task UnshelveAsync(string alarmId, string user, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyUnshelve(cur, user, _clock())); public Task EnableAsync(string alarmId, string user, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyEnable(cur, user, _clock())); public Task DisableAsync(string alarmId, string user, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyDisable(cur, user, _clock())); public Task AddCommentAsync(string alarmId, string user, string text, CancellationToken ct) => ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyAddComment(cur, user, text, _clock())); private async Task ApplyAsync(string alarmId, CancellationToken ct, Func op) { EnsureLoaded(); if (!_alarms.TryGetValue(alarmId, out var state)) throw new ArgumentException($"Unknown alarm {alarmId}", nameof(alarmId)); await _evalGate.WaitAsync(ct).ConfigureAwait(false); try { var result = op(state.Condition); _alarms[alarmId] = state with { Condition = result.State }; await _store.SaveAsync(result.State, ct).ConfigureAwait(false); if (result.Emission != EmissionKind.None) EmitEvent(state, result.State, result.Emission); } finally { _evalGate.Release(); } } /// /// Upstream-change callback. Updates the value cache + enqueues predicate /// re-evaluation for every alarm referencing the changed path. Fire-and-forget /// so driver-side dispatch isn't blocked. /// internal void OnUpstreamChange(string path, DataValueSnapshot value) { _valueCache[path] = value; if (_alarmsReferencing.TryGetValue(path, out var alarmIds)) { _ = ReevaluateAsync(alarmIds.ToArray(), CancellationToken.None); } } private async Task ReevaluateAsync(IReadOnlyList alarmIds, CancellationToken ct) { try { await _evalGate.WaitAsync(ct).ConfigureAwait(false); try { foreach (var id in alarmIds) { if (!_alarms.TryGetValue(id, out var state)) continue; var newState = await EvaluatePredicateToStateAsync( state, state.Condition, _clock(), ct).ConfigureAwait(false); if (!ReferenceEquals(newState, state.Condition)) { _alarms[id] = state with { Condition = newState }; await _store.SaveAsync(newState, ct).ConfigureAwait(false); } } } finally { _evalGate.Release(); } } catch (Exception ex) { _engineLogger.Error(ex, "ScriptedAlarmEngine reevaluate failed"); } } /// /// Evaluate the predicate + apply the resulting state-machine transition. /// Returns the new condition state. Emits the appropriate event if the /// transition produces one. /// private async Task EvaluatePredicateToStateAsync( AlarmState state, AlarmConditionState seed, DateTime nowUtc, CancellationToken ct) { var inputs = BuildReadCache(state.Inputs); var context = new AlarmPredicateContext(inputs, state.Logger, _clock); bool predicateTrue; try { predicateTrue = await state.Evaluator.RunAsync(context, ct).ConfigureAwait(false); } catch (OperationCanceledException) { throw; } catch (ScriptTimeoutException tex) { state.Logger.Warning("Alarm predicate timed out after {Timeout} — state unchanged", tex.Timeout); return seed; } catch (Exception ex) { state.Logger.Error(ex, "Alarm predicate threw — state unchanged"); return seed; } var result = Part9StateMachine.ApplyPredicate(seed, predicateTrue, nowUtc); if (result.Emission != EmissionKind.None) EmitEvent(state, result.State, result.Emission); return result.State; } private IReadOnlyDictionary BuildReadCache(IReadOnlySet inputs) { var d = new Dictionary(StringComparer.Ordinal); foreach (var p in inputs) d[p] = _valueCache.TryGetValue(p, out var v) ? v : _upstream.ReadTag(p); return d; } private void EmitEvent(AlarmState state, AlarmConditionState condition, EmissionKind kind) { // Suppressed kind means shelving ate the emission — we don't fire for subscribers // but the state record still advanced so startup recovery reflects reality. if (kind == EmissionKind.Suppressed || kind == EmissionKind.None) return; var message = MessageTemplate.Resolve(state.Definition.MessageTemplate, TryLookup); var evt = new ScriptedAlarmEvent( AlarmId: state.Definition.AlarmId, EquipmentPath: state.Definition.EquipmentPath, AlarmName: state.Definition.AlarmName, Kind: state.Definition.Kind, Severity: state.Definition.Severity, Message: message, Condition: condition, Emission: kind, TimestampUtc: _clock()); try { OnEvent?.Invoke(this, evt); } catch (Exception ex) { _engineLogger.Warning(ex, "ScriptedAlarmEngine OnEvent subscriber threw for {AlarmId}", state.Definition.AlarmId); } } private DataValueSnapshot? TryLookup(string path) => _valueCache.TryGetValue(path, out var v) ? v : null; private void RunShelvingCheck() { if (_disposed) return; var ids = _alarms.Keys.ToArray(); _ = ShelvingCheckAsync(ids, CancellationToken.None); } private async Task ShelvingCheckAsync(IReadOnlyList alarmIds, CancellationToken ct) { try { await _evalGate.WaitAsync(ct).ConfigureAwait(false); try { var now = _clock(); foreach (var id in alarmIds) { if (!_alarms.TryGetValue(id, out var state)) continue; var result = Part9StateMachine.ApplyShelvingCheck(state.Condition, now); if (!ReferenceEquals(result.State, state.Condition)) { _alarms[id] = state with { Condition = result.State }; await _store.SaveAsync(result.State, ct).ConfigureAwait(false); if (result.Emission != EmissionKind.None) EmitEvent(state, result.State, result.Emission); } } } finally { _evalGate.Release(); } } catch (Exception ex) { _engineLogger.Warning(ex, "ScriptedAlarmEngine shelving-check failed"); } } private void UnsubscribeFromUpstream() { foreach (var s in _upstreamSubscriptions) { try { s.Dispose(); } catch { } } _upstreamSubscriptions.Clear(); } private void EnsureLoaded() { if (!_loaded) throw new InvalidOperationException( "ScriptedAlarmEngine not loaded. Call LoadAsync first."); } public void Dispose() { if (_disposed) return; _disposed = true; _shelvingTimer?.Dispose(); UnsubscribeFromUpstream(); _alarms.Clear(); _alarmsReferencing.Clear(); } private sealed record AlarmState( ScriptedAlarmDefinition Definition, TimedScriptEvaluator Evaluator, IReadOnlySet Inputs, IReadOnlyList TemplateTokens, ILogger Logger, AlarmConditionState Condition); } /// /// One alarm emission the engine pushed to subscribers. Carries everything /// downstream consumers (OPC UA alarm-source adapter + historian sink) need to /// publish the event without re-querying the engine. /// public sealed record ScriptedAlarmEvent( string AlarmId, string EquipmentPath, string AlarmName, AlarmKind Kind, AlarmSeverity Severity, string Message, AlarmConditionState Condition, EmissionKind Emission, DateTime TimestampUtc); /// /// Upstream source abstraction — intentionally identical shape to the virtual-tag /// engine's so Stream G can compose them behind one driver bridge. /// public interface ITagUpstreamSource { DataValueSnapshot ReadTag(string path); IDisposable SubscribeTag(string path, Action observer); }