using System.Collections.Concurrent;
using Serilog;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.Scripting;
namespace ZB.MOM.WW.OtOpcUa.Core.ScriptedAlarms;
///
/// Phase 7 scripted-alarm orchestrator. Compiles every configured alarm's predicate
/// against the Stream A sandbox, subscribes to the referenced upstream tags,
/// re-evaluates the predicate on every input change + on a shelving-check timer,
/// applies the resulting transition through ,
/// persists state via , and emits the resulting events
/// through (which wires into the existing
/// IAlarmSource fan-out).
///
///
///
/// Scripted alarms are leaves in the evaluation DAG — no alarm's state drives
/// another alarm's predicate. The engine maintains only an inverse index from
/// upstream tag path → alarms referencing it; no topological sort needed
/// (unlike the virtual-tag engine).
///
///
/// Evaluation errors (script throws, timeout, coercion fail) surface as
/// structured errors in the dedicated scripts-*.log sink plus a WARN companion
/// in the main log. The alarm's ActiveState stays at its prior value — the
/// engine does NOT invent a clear transition just because the predicate broke.
/// Operators investigating a broken predicate shouldn't see a phantom
/// clear-event preceding the failure.
///
///
public sealed class ScriptedAlarmEngine : IDisposable
{
private readonly ITagUpstreamSource _upstream;
private readonly IAlarmStateStore _store;
private readonly ScriptLoggerFactory _loggerFactory;
private readonly ILogger _engineLogger;
private readonly Func _clock;
private readonly TimeSpan _scriptTimeout;
private readonly Dictionary _alarms = new(StringComparer.Ordinal);
private readonly ConcurrentDictionary _valueCache
= new(StringComparer.Ordinal);
private readonly Dictionary> _alarmsReferencing
= new(StringComparer.Ordinal); // tag path -> alarm ids
private readonly List _upstreamSubscriptions = [];
private readonly SemaphoreSlim _evalGate = new(1, 1);
private Timer? _shelvingTimer;
private bool _loaded;
private bool _disposed;
public ScriptedAlarmEngine(
ITagUpstreamSource upstream,
IAlarmStateStore store,
ScriptLoggerFactory loggerFactory,
ILogger engineLogger,
Func? clock = null,
TimeSpan? scriptTimeout = null)
{
_upstream = upstream ?? throw new ArgumentNullException(nameof(upstream));
_store = store ?? throw new ArgumentNullException(nameof(store));
_loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory));
_engineLogger = engineLogger ?? throw new ArgumentNullException(nameof(engineLogger));
_clock = clock ?? (() => DateTime.UtcNow);
_scriptTimeout = scriptTimeout ?? TimedScriptEvaluator.DefaultTimeout;
}
/// Raised for every emission the Part9StateMachine produces that the engine should publish.
public event EventHandler? OnEvent;
public IReadOnlyCollection LoadedAlarmIds => _alarms.Keys;
///
/// Load a batch of alarm definitions. Compiles every predicate, aggregates any
/// compile failures into one , subscribes
/// to upstream input tags, seeds the value cache, loads persisted state from
/// the store (falling back to Fresh for first-load alarms), and recomputes
/// ActiveState per Phase 7 plan decision #14 (startup recovery).
///
public async Task LoadAsync(IReadOnlyList definitions, CancellationToken ct)
{
if (_disposed) throw new ObjectDisposedException(nameof(ScriptedAlarmEngine));
if (definitions is null) throw new ArgumentNullException(nameof(definitions));
await _evalGate.WaitAsync(ct).ConfigureAwait(false);
try
{
UnsubscribeFromUpstream();
_alarms.Clear();
_alarmsReferencing.Clear();
var compileFailures = new List();
foreach (var def in definitions)
{
try
{
var extraction = DependencyExtractor.Extract(def.PredicateScriptSource);
if (!extraction.IsValid)
{
var joined = string.Join("; ", extraction.Rejections.Select(r => r.Message));
compileFailures.Add($"{def.AlarmId}: dependency extraction rejected — {joined}");
continue;
}
var evaluator = ScriptEvaluator.Compile(def.PredicateScriptSource);
var timed = new TimedScriptEvaluator(evaluator, _scriptTimeout);
var logger = _loggerFactory.Create(def.AlarmId);
var templateTokens = MessageTemplate.ExtractTokenPaths(def.MessageTemplate);
var allInputs = new HashSet(extraction.Reads, StringComparer.Ordinal);
foreach (var t in templateTokens) allInputs.Add(t);
_alarms[def.AlarmId] = new AlarmState(def, timed, extraction.Reads, templateTokens, logger,
AlarmConditionState.Fresh(def.AlarmId, _clock()));
foreach (var path in allInputs)
{
if (!_alarmsReferencing.TryGetValue(path, out var set))
_alarmsReferencing[path] = set = new HashSet(StringComparer.Ordinal);
set.Add(def.AlarmId);
}
}
catch (Exception ex)
{
compileFailures.Add($"{def.AlarmId}: {ex.Message}");
}
}
if (compileFailures.Count > 0)
{
throw new InvalidOperationException(
$"ScriptedAlarmEngine load failed. {compileFailures.Count} alarm(s) did not compile:\n "
+ string.Join("\n ", compileFailures));
}
// Seed the value cache with current upstream values + subscribe for changes.
foreach (var path in _alarmsReferencing.Keys)
{
_valueCache[path] = _upstream.ReadTag(path);
_upstreamSubscriptions.Add(_upstream.SubscribeTag(path, OnUpstreamChange));
}
// Restore persisted state, falling back to Fresh where nothing was saved,
// then re-derive ActiveState from the current predicate per decision #14.
foreach (var (alarmId, state) in _alarms)
{
var persisted = await _store.LoadAsync(alarmId, ct).ConfigureAwait(false);
var seed = persisted ?? state.Condition;
var afterPredicate = await EvaluatePredicateToStateAsync(state, seed, nowUtc: _clock(), ct)
.ConfigureAwait(false);
_alarms[alarmId] = state with { Condition = afterPredicate };
await _store.SaveAsync(afterPredicate, ct).ConfigureAwait(false);
}
_loaded = true;
_engineLogger.Information("ScriptedAlarmEngine loaded {Count} alarm(s)", _alarms.Count);
// Start the shelving-check timer — ticks every 5s, expires any timed shelves
// that have passed their UnshelveAtUtc.
_shelvingTimer = new Timer(_ => RunShelvingCheck(),
null, TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(5));
}
finally
{
_evalGate.Release();
}
}
///
/// Current persisted state for . Returns null for
/// unknown alarm. Mainly used for diagnostics + the Admin UI status page.
///
public AlarmConditionState? GetState(string alarmId)
=> _alarms.TryGetValue(alarmId, out var s) ? s.Condition : null;
public IReadOnlyCollection GetAllStates()
=> _alarms.Values.Select(a => a.Condition).ToArray();
public Task AcknowledgeAsync(string alarmId, string user, string? comment, CancellationToken ct)
=> ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyAcknowledge(cur, user, comment, _clock()));
public Task ConfirmAsync(string alarmId, string user, string? comment, CancellationToken ct)
=> ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyConfirm(cur, user, comment, _clock()));
public Task OneShotShelveAsync(string alarmId, string user, CancellationToken ct)
=> ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyOneShotShelve(cur, user, _clock()));
public Task TimedShelveAsync(string alarmId, string user, DateTime unshelveAtUtc, CancellationToken ct)
=> ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyTimedShelve(cur, user, unshelveAtUtc, _clock()));
public Task UnshelveAsync(string alarmId, string user, CancellationToken ct)
=> ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyUnshelve(cur, user, _clock()));
public Task EnableAsync(string alarmId, string user, CancellationToken ct)
=> ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyEnable(cur, user, _clock()));
public Task DisableAsync(string alarmId, string user, CancellationToken ct)
=> ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyDisable(cur, user, _clock()));
public Task AddCommentAsync(string alarmId, string user, string text, CancellationToken ct)
=> ApplyAsync(alarmId, ct, cur => Part9StateMachine.ApplyAddComment(cur, user, text, _clock()));
private async Task ApplyAsync(string alarmId, CancellationToken ct, Func op)
{
EnsureLoaded();
if (!_alarms.TryGetValue(alarmId, out var state))
throw new ArgumentException($"Unknown alarm {alarmId}", nameof(alarmId));
await _evalGate.WaitAsync(ct).ConfigureAwait(false);
try
{
var result = op(state.Condition);
_alarms[alarmId] = state with { Condition = result.State };
await _store.SaveAsync(result.State, ct).ConfigureAwait(false);
if (result.Emission != EmissionKind.None) EmitEvent(state, result.State, result.Emission);
}
finally { _evalGate.Release(); }
}
///
/// Upstream-change callback. Updates the value cache + enqueues predicate
/// re-evaluation for every alarm referencing the changed path. Fire-and-forget
/// so driver-side dispatch isn't blocked.
///
internal void OnUpstreamChange(string path, DataValueSnapshot value)
{
_valueCache[path] = value;
if (_alarmsReferencing.TryGetValue(path, out var alarmIds))
{
_ = ReevaluateAsync(alarmIds.ToArray(), CancellationToken.None);
}
}
private async Task ReevaluateAsync(IReadOnlyList alarmIds, CancellationToken ct)
{
try
{
await _evalGate.WaitAsync(ct).ConfigureAwait(false);
try
{
foreach (var id in alarmIds)
{
if (!_alarms.TryGetValue(id, out var state)) continue;
var newState = await EvaluatePredicateToStateAsync(
state, state.Condition, _clock(), ct).ConfigureAwait(false);
if (!ReferenceEquals(newState, state.Condition))
{
_alarms[id] = state with { Condition = newState };
await _store.SaveAsync(newState, ct).ConfigureAwait(false);
}
}
}
finally { _evalGate.Release(); }
}
catch (Exception ex)
{
_engineLogger.Error(ex, "ScriptedAlarmEngine reevaluate failed");
}
}
///
/// Evaluate the predicate + apply the resulting state-machine transition.
/// Returns the new condition state. Emits the appropriate event if the
/// transition produces one.
///
private async Task EvaluatePredicateToStateAsync(
AlarmState state, AlarmConditionState seed, DateTime nowUtc, CancellationToken ct)
{
var inputs = BuildReadCache(state.Inputs);
var context = new AlarmPredicateContext(inputs, state.Logger, _clock);
bool predicateTrue;
try
{
predicateTrue = await state.Evaluator.RunAsync(context, ct).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
throw;
}
catch (ScriptTimeoutException tex)
{
state.Logger.Warning("Alarm predicate timed out after {Timeout} — state unchanged", tex.Timeout);
return seed;
}
catch (Exception ex)
{
state.Logger.Error(ex, "Alarm predicate threw — state unchanged");
return seed;
}
var result = Part9StateMachine.ApplyPredicate(seed, predicateTrue, nowUtc);
if (result.Emission != EmissionKind.None)
EmitEvent(state, result.State, result.Emission);
return result.State;
}
private IReadOnlyDictionary BuildReadCache(IReadOnlySet inputs)
{
var d = new Dictionary(StringComparer.Ordinal);
foreach (var p in inputs)
d[p] = _valueCache.TryGetValue(p, out var v) ? v : _upstream.ReadTag(p);
return d;
}
private void EmitEvent(AlarmState state, AlarmConditionState condition, EmissionKind kind)
{
// Suppressed kind means shelving ate the emission — we don't fire for subscribers
// but the state record still advanced so startup recovery reflects reality.
if (kind == EmissionKind.Suppressed || kind == EmissionKind.None) return;
var message = MessageTemplate.Resolve(state.Definition.MessageTemplate, TryLookup);
var evt = new ScriptedAlarmEvent(
AlarmId: state.Definition.AlarmId,
EquipmentPath: state.Definition.EquipmentPath,
AlarmName: state.Definition.AlarmName,
Kind: state.Definition.Kind,
Severity: state.Definition.Severity,
Message: message,
Condition: condition,
Emission: kind,
TimestampUtc: _clock());
try { OnEvent?.Invoke(this, evt); }
catch (Exception ex)
{
_engineLogger.Warning(ex, "ScriptedAlarmEngine OnEvent subscriber threw for {AlarmId}", state.Definition.AlarmId);
}
}
private DataValueSnapshot? TryLookup(string path)
=> _valueCache.TryGetValue(path, out var v) ? v : null;
private void RunShelvingCheck()
{
if (_disposed) return;
var ids = _alarms.Keys.ToArray();
_ = ShelvingCheckAsync(ids, CancellationToken.None);
}
private async Task ShelvingCheckAsync(IReadOnlyList alarmIds, CancellationToken ct)
{
try
{
await _evalGate.WaitAsync(ct).ConfigureAwait(false);
try
{
var now = _clock();
foreach (var id in alarmIds)
{
if (!_alarms.TryGetValue(id, out var state)) continue;
var result = Part9StateMachine.ApplyShelvingCheck(state.Condition, now);
if (!ReferenceEquals(result.State, state.Condition))
{
_alarms[id] = state with { Condition = result.State };
await _store.SaveAsync(result.State, ct).ConfigureAwait(false);
if (result.Emission != EmissionKind.None)
EmitEvent(state, result.State, result.Emission);
}
}
}
finally { _evalGate.Release(); }
}
catch (Exception ex)
{
_engineLogger.Warning(ex, "ScriptedAlarmEngine shelving-check failed");
}
}
private void UnsubscribeFromUpstream()
{
foreach (var s in _upstreamSubscriptions)
{
try { s.Dispose(); } catch { }
}
_upstreamSubscriptions.Clear();
}
private void EnsureLoaded()
{
if (!_loaded) throw new InvalidOperationException(
"ScriptedAlarmEngine not loaded. Call LoadAsync first.");
}
public void Dispose()
{
if (_disposed) return;
_disposed = true;
_shelvingTimer?.Dispose();
UnsubscribeFromUpstream();
_alarms.Clear();
_alarmsReferencing.Clear();
}
private sealed record AlarmState(
ScriptedAlarmDefinition Definition,
TimedScriptEvaluator Evaluator,
IReadOnlySet Inputs,
IReadOnlyList TemplateTokens,
ILogger Logger,
AlarmConditionState Condition);
}
///
/// One alarm emission the engine pushed to subscribers. Carries everything
/// downstream consumers (OPC UA alarm-source adapter + historian sink) need to
/// publish the event without re-querying the engine.
///
public sealed record ScriptedAlarmEvent(
string AlarmId,
string EquipmentPath,
string AlarmName,
AlarmKind Kind,
AlarmSeverity Severity,
string Message,
AlarmConditionState Condition,
EmissionKind Emission,
DateTime TimestampUtc);
///
/// Upstream source abstraction — intentionally identical shape to the virtual-tag
/// engine's so Stream G can compose them behind one driver bridge.
///
public interface ITagUpstreamSource
{
DataValueSnapshot ReadTag(string path);
IDisposable SubscribeTag(string path, Action observer);
}