781 lines
34 KiB
C#
781 lines
34 KiB
C#
using Akka.Actor;
|
||
using Microsoft.CodeAnalysis.Scripting;
|
||
using Microsoft.Extensions.DependencyInjection;
|
||
using Microsoft.Extensions.Logging;
|
||
using ZB.MOM.WW.ScadaBridge.Commons.Messages.Streaming;
|
||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Enums;
|
||
using ZB.MOM.WW.ScadaBridge.Commons.Types.Flattening;
|
||
using ZB.MOM.WW.ScadaBridge.HealthMonitoring;
|
||
using ZB.MOM.WW.ScadaBridge.SiteEventLogging;
|
||
using ZB.MOM.WW.ScadaBridge.SiteRuntime.Scripts;
|
||
using System.Globalization;
|
||
using System.Text.Json;
|
||
|
||
namespace ZB.MOM.WW.ScadaBridge.SiteRuntime.Actors;
|
||
|
||
/// <summary>
|
||
/// WP-16: Alarm Actor — coordinator actor, child of Instance Actor, peer to Script Actors.
|
||
/// Subscribes to attribute change notifications from Instance Actor.
|
||
///
|
||
/// Evaluates alarm conditions:
|
||
/// - ValueMatch: attribute equals a specific value
|
||
/// - RangeViolation: attribute outside min/max range
|
||
/// - RateOfChange: attribute rate exceeds threshold (configurable window, default per-second)
|
||
///
|
||
/// State (active/normal) is in memory only, NOT persisted.
|
||
/// On restart: starts normal, re-evaluates from incoming values.
|
||
///
|
||
/// WP-21: AlarmExecutionActor CAN call Instance.CallScript() (ask to sibling Script Actor).
|
||
/// Instance scripts CANNOT call alarm on-trigger scripts (no Instance.CallAlarmScript API).
|
||
///
|
||
/// Supervision: Resume on exception; AlarmExecutionActor stopped on exception.
|
||
/// </summary>
|
||
public class AlarmActor : ReceiveActor
|
||
{
|
||
private readonly string _alarmName;
|
||
private readonly string _instanceName;
|
||
private readonly IActorRef _instanceActor;
|
||
private readonly SharedScriptLibrary _sharedScriptLibrary;
|
||
private readonly SiteRuntimeOptions _options;
|
||
private readonly ILogger _logger;
|
||
private readonly ISiteHealthCollector? _healthCollector;
|
||
private readonly IServiceProvider? _serviceProvider;
|
||
|
||
/// <summary>
|
||
/// M1.5: the optional site operational-event log, resolved once from
|
||
/// <see cref="_serviceProvider"/> at construction and cached. The
|
||
/// registration is process-lifetime (a singleton), so resolving once on
|
||
/// the actor's own thread is both correct and cheaper than a per-event
|
||
/// <c>GetService</c> on the hot path. <c>null</c> when no provider was
|
||
/// supplied (the test/no-logging path) — <see cref="LogAlarmEvent"/> then
|
||
/// no-ops.
|
||
/// </summary>
|
||
private readonly ISiteEventLogger? _siteEventLogger;
|
||
|
||
/// <summary>
|
||
/// M1.5: priority at or above which a computed-alarm raise is logged as
|
||
/// <c>Error</c> to the site event log; below it, raises log as <c>Warning</c>.
|
||
/// Mirrors the 0–1000 alarm-severity scale.
|
||
/// </summary>
|
||
private const int ErrorPriorityThreshold = 700;
|
||
|
||
private AlarmState _currentState = AlarmState.Normal;
|
||
/// <summary>
|
||
/// Always <see cref="AlarmLevel.None"/> for binary trigger types. For
|
||
/// <see cref="AlarmTriggerType.HiLo"/> this is the source of truth — the
|
||
/// state machine transitions when the computed level changes.
|
||
/// </summary>
|
||
private AlarmLevel _currentLevel = AlarmLevel.None;
|
||
private readonly AlarmTriggerType _triggerType;
|
||
private readonly AlarmEvalConfig _evalConfig;
|
||
private readonly int _priority;
|
||
private readonly string? _onTriggerScriptName;
|
||
private readonly Script<object?>? _onTriggerCompiledScript;
|
||
|
||
/// <summary>
|
||
/// M2.5 (#9): the on-trigger script's per-script execution timeout in seconds,
|
||
/// or null to use the global default. Forwarded to each spawned
|
||
/// <see cref="AlarmExecutionActor"/>, which applies <c>perScript ?? global</c>
|
||
/// (treating ≤ 0 as "use global"). The value comes from the referenced
|
||
/// on-trigger script's <see cref="ResolvedScript.ExecutionTimeoutSeconds"/>.
|
||
/// </summary>
|
||
private readonly int? _onTriggerExecutionTimeoutSeconds;
|
||
|
||
// Expression trigger: compiled expression + the attribute snapshot it
|
||
// evaluates against. This field is the single home for the compiled
|
||
// expression on the hot path.
|
||
private readonly Script<object?>? _compiledTriggerExpression;
|
||
private readonly Dictionary<string, object?> _attributeSnapshot = new();
|
||
|
||
/// <summary>
|
||
/// SiteRuntime-017: the exact dictionary instance this actor was seeded from
|
||
/// at construction. The Instance Actor must pass a private snapshot here, not
|
||
/// its live <c>_attributes</c> field. Exposed for regression coverage of that
|
||
/// isolation contract.
|
||
/// </summary>
|
||
internal IReadOnlyDictionary<string, object?>? SeedAttributesReference { get; }
|
||
|
||
// Rate of change tracking
|
||
private readonly Queue<(DateTimeOffset Timestamp, double Value)> _rateOfChangeWindow = new();
|
||
private readonly TimeSpan _rateOfChangeWindowDuration;
|
||
|
||
private int _executionCounter;
|
||
|
||
/// <summary>Initializes a new <see cref="AlarmActor"/> and configures message handlers for the alarm.</summary>
|
||
/// <param name="alarmName">The canonical name of this alarm.</param>
|
||
/// <param name="instanceName">The name of the owning instance.</param>
|
||
/// <param name="instanceActor">Reference to the parent instance actor used for attribute access and script calls.</param>
|
||
/// <param name="alarmConfig">The resolved alarm configuration including trigger type, priority, and script references.</param>
|
||
/// <param name="onTriggerCompiledScript">Pre-compiled on-trigger script, or <c>null</c> if no script is defined.</param>
|
||
/// <param name="sharedScriptLibrary">Shared script library providing common utilities to executed scripts.</param>
|
||
/// <param name="options">Site runtime configuration options.</param>
|
||
/// <param name="logger">Logger for alarm diagnostics.</param>
|
||
/// <param name="compiledTriggerExpression">Pre-compiled trigger expression, or <c>null</c> for non-expression triggers.</param>
|
||
/// <param name="initialAttributes">Seed attribute snapshot so static attributes evaluate correctly at startup.</param>
|
||
/// <param name="healthCollector">Optional health collector for surfacing alarm execution metrics.</param>
|
||
/// <param name="serviceProvider">Optional DI service provider used to resolve the optional
|
||
/// <see cref="ISiteEventLogger"/> for M1.5 <c>alarm</c> operational events. Fire-and-forget;
|
||
/// a logging failure never affects alarm evaluation.</param>
|
||
/// <param name="onTriggerExecutionTimeoutSeconds">M2.5 (#9): the on-trigger script's per-script
|
||
/// execution timeout in seconds (from its <see cref="ResolvedScript.ExecutionTimeoutSeconds"/>),
|
||
/// or null/non-positive to use the global default.</param>
|
||
public AlarmActor(
|
||
string alarmName,
|
||
string instanceName,
|
||
IActorRef instanceActor,
|
||
ResolvedAlarm alarmConfig,
|
||
Script<object?>? onTriggerCompiledScript,
|
||
SharedScriptLibrary sharedScriptLibrary,
|
||
SiteRuntimeOptions options,
|
||
ILogger logger,
|
||
Script<object?>? compiledTriggerExpression = null,
|
||
IReadOnlyDictionary<string, object?>? initialAttributes = null,
|
||
ISiteHealthCollector? healthCollector = null,
|
||
IServiceProvider? serviceProvider = null,
|
||
// M2.5 (#9): per-script timeout for the on-trigger script (null = global).
|
||
int? onTriggerExecutionTimeoutSeconds = null)
|
||
{
|
||
_alarmName = alarmName;
|
||
_instanceName = instanceName;
|
||
_instanceActor = instanceActor;
|
||
_sharedScriptLibrary = sharedScriptLibrary;
|
||
_options = options;
|
||
_logger = logger;
|
||
_healthCollector = healthCollector;
|
||
_serviceProvider = serviceProvider;
|
||
// M1.5: resolve the optional site event logger once and cache it,
|
||
// rather than calling GetService on every alarm transition.
|
||
_siteEventLogger = serviceProvider?.GetService<ISiteEventLogger>();
|
||
_priority = alarmConfig.PriorityLevel;
|
||
_onTriggerScriptName = alarmConfig.OnTriggerScriptCanonicalName;
|
||
_onTriggerCompiledScript = onTriggerCompiledScript;
|
||
_onTriggerExecutionTimeoutSeconds = onTriggerExecutionTimeoutSeconds;
|
||
_compiledTriggerExpression = compiledTriggerExpression;
|
||
|
||
// Seed the trigger-expression attribute snapshot from the instance's
|
||
// initial attribute set so static attributes (which never re-emit an
|
||
// AttributeValueChanged after deploy) evaluate correctly at startup.
|
||
SeedAttributesReference = initialAttributes;
|
||
if (initialAttributes != null)
|
||
{
|
||
foreach (var kvp in initialAttributes)
|
||
_attributeSnapshot[kvp.Key] = kvp.Value;
|
||
}
|
||
|
||
// Parse trigger type
|
||
_triggerType = Enum.TryParse<AlarmTriggerType>(alarmConfig.TriggerType, true, out var tt)
|
||
? tt : AlarmTriggerType.ValueMatch;
|
||
|
||
_evalConfig = ParseEvalConfig(alarmConfig.TriggerConfiguration);
|
||
_rateOfChangeWindowDuration = _evalConfig is RateOfChangeEvalConfig roc
|
||
? roc.WindowDuration
|
||
: TimeSpan.FromSeconds(1);
|
||
|
||
// Handle attribute value changes
|
||
Receive<AttributeValueChanged>(HandleAttributeValueChanged);
|
||
|
||
// Handle alarm execution completion
|
||
Receive<AlarmExecutionCompleted>(_ =>
|
||
_logger.LogDebug("Alarm {Alarm} execution completed on {Instance}", _alarmName, _instanceName));
|
||
}
|
||
|
||
/// <inheritdoc />
|
||
protected override void PreStart()
|
||
{
|
||
base.PreStart();
|
||
_logger.LogInformation(
|
||
"AlarmActor {Alarm} started on instance {Instance}, trigger={TriggerType}",
|
||
_alarmName, _instanceName, _triggerType);
|
||
}
|
||
|
||
/// <inheritdoc />
|
||
protected override SupervisorStrategy SupervisorStrategy()
|
||
{
|
||
return new OneForOneStrategy(
|
||
maxNrOfRetries: -1,
|
||
withinTimeRange: TimeSpan.FromMinutes(1),
|
||
decider: Decider.From(ex =>
|
||
{
|
||
_logger.LogWarning(ex,
|
||
"AlarmExecutionActor for {Alarm} on {Instance} failed, stopping",
|
||
_alarmName, _instanceName);
|
||
return Directive.Stop;
|
||
}));
|
||
}
|
||
|
||
/// <summary>
|
||
/// Evaluates alarm condition on attribute change. Alarm evaluation errors are logged,
|
||
/// actor continues (does not crash).
|
||
/// </summary>
|
||
private void HandleAttributeValueChanged(AttributeValueChanged changed)
|
||
{
|
||
// Expression triggers evaluate against a snapshot of every attribute,
|
||
// not a single monitored attribute. Keep the snapshot current for every
|
||
// change before the IsMonitoredAttribute gate (which does not apply).
|
||
if (_triggerType == AlarmTriggerType.Expression)
|
||
{
|
||
_attributeSnapshot[changed.AttributeName] = changed.Value;
|
||
}
|
||
else if (!IsMonitoredAttribute(changed.AttributeName))
|
||
{
|
||
// Only evaluate if this change is for an attribute we're monitoring
|
||
return;
|
||
}
|
||
|
||
try
|
||
{
|
||
if (_triggerType == AlarmTriggerType.HiLo)
|
||
{
|
||
HandleHiLoTransition(EvaluateHiLo(changed.Value));
|
||
return;
|
||
}
|
||
|
||
var isTriggered = _triggerType switch
|
||
{
|
||
AlarmTriggerType.ValueMatch => EvaluateValueMatch(changed.Value),
|
||
AlarmTriggerType.RangeViolation => EvaluateRangeViolation(changed.Value),
|
||
AlarmTriggerType.RateOfChange => EvaluateRateOfChange(changed.Value, changed.Timestamp),
|
||
AlarmTriggerType.Expression => EvaluateExpression(),
|
||
_ => false
|
||
};
|
||
|
||
if (isTriggered && _currentState == AlarmState.Normal)
|
||
{
|
||
// Transition: Normal → Active
|
||
_currentState = AlarmState.Active;
|
||
_logger.LogInformation(
|
||
"Alarm {Alarm} ACTIVATED on instance {Instance}",
|
||
_alarmName, _instanceName);
|
||
|
||
// Notify Instance Actor of alarm state change
|
||
var alarmChanged = new AlarmStateChanged(
|
||
_instanceName, _alarmName, AlarmState.Active, _priority, DateTimeOffset.UtcNow);
|
||
_instanceActor.Tell(alarmChanged);
|
||
|
||
// M1.5: operational `alarm` event — raise. Severity by priority.
|
||
LogAlarmEvent(RaiseSeverity(_priority), $"Alarm {_alarmName} activated (priority {_priority})");
|
||
|
||
// Spawn AlarmExecutionActor if on-trigger script defined
|
||
if (_onTriggerCompiledScript != null)
|
||
{
|
||
SpawnAlarmExecution(AlarmLevel.None, _priority, string.Empty);
|
||
}
|
||
}
|
||
else if (!isTriggered && _currentState == AlarmState.Active)
|
||
{
|
||
// Transition: Active → Normal (no script on clear)
|
||
_currentState = AlarmState.Normal;
|
||
_logger.LogInformation(
|
||
"Alarm {Alarm} CLEARED on instance {Instance}",
|
||
_alarmName, _instanceName);
|
||
|
||
var alarmChanged = new AlarmStateChanged(
|
||
_instanceName, _alarmName, AlarmState.Normal, _priority, DateTimeOffset.UtcNow);
|
||
_instanceActor.Tell(alarmChanged);
|
||
|
||
// M1.5: operational `alarm` event — return to normal.
|
||
LogAlarmEvent("Info", $"Alarm {_alarmName} cleared");
|
||
}
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
_healthCollector?.IncrementAlarmError();
|
||
// Alarm evaluation errors logged, actor continues
|
||
_logger.LogError(ex,
|
||
"Alarm {Alarm} evaluation error on {Instance}",
|
||
_alarmName, _instanceName);
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// HiLo state machine: emit an AlarmStateChanged whenever the evaluated
|
||
/// level changes. Spawns the on-trigger script only on the Normal→Active
|
||
/// edge (i.e., when entering an alarm band from the normal band) — not on
|
||
/// level escalations like Hi→HiHi or Low→LowLow.
|
||
/// </summary>
|
||
private void HandleHiLoTransition(AlarmLevel newLevel)
|
||
{
|
||
if (newLevel == _currentLevel) return;
|
||
|
||
var previousLevel = _currentLevel;
|
||
_currentLevel = newLevel;
|
||
_currentState = newLevel == AlarmLevel.None ? AlarmState.Normal : AlarmState.Active;
|
||
var priority = LevelPriority(newLevel);
|
||
var message = LevelMessage(newLevel);
|
||
|
||
_logger.LogInformation(
|
||
"Alarm {Alarm} on {Instance} transitioned {Prev} → {New} (priority={Priority})",
|
||
_alarmName, _instanceName, previousLevel, newLevel, priority);
|
||
|
||
var alarmChanged = new AlarmStateChanged(
|
||
_instanceName, _alarmName, _currentState, priority, DateTimeOffset.UtcNow)
|
||
{
|
||
Level = newLevel,
|
||
Message = message
|
||
};
|
||
_instanceActor.Tell(alarmChanged);
|
||
|
||
// M1.5: operational `alarm` event. Entering a band from Normal is a raise
|
||
// (severity by the band's priority); returning to None is a clear; a
|
||
// level-to-level escalation/de-escalation is an informational transition.
|
||
if (newLevel == AlarmLevel.None)
|
||
{
|
||
LogAlarmEvent("Info", $"Alarm {_alarmName} cleared ({previousLevel} → Normal)");
|
||
}
|
||
else if (previousLevel == AlarmLevel.None)
|
||
{
|
||
LogAlarmEvent(RaiseSeverity(priority),
|
||
$"Alarm {_alarmName} activated at {newLevel} (priority {priority})");
|
||
}
|
||
else
|
||
{
|
||
LogAlarmEvent("Info",
|
||
$"Alarm {_alarmName} transitioned {previousLevel} → {newLevel} (priority {priority})");
|
||
}
|
||
|
||
if (previousLevel == AlarmLevel.None
|
||
&& newLevel != AlarmLevel.None
|
||
&& _onTriggerCompiledScript != null)
|
||
{
|
||
SpawnAlarmExecution(newLevel, priority, message);
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// M1.5: maps an alarm priority (0–1000) to a site-event severity for a
|
||
/// <i>raise</i> transition — <c>Error</c> at or above
|
||
/// <see cref="ErrorPriorityThreshold"/>, otherwise <c>Warning</c>. Clears and
|
||
/// inter-band transitions always log as <c>Info</c>.
|
||
/// </summary>
|
||
private static string RaiseSeverity(int priority) =>
|
||
priority >= ErrorPriorityThreshold ? "Error" : "Warning";
|
||
|
||
/// <summary>
|
||
/// M1.5: fire-and-forget an <c>alarm</c> operational event to the optional
|
||
/// <see cref="ISiteEventLogger"/> (resolved once at construction and cached
|
||
/// in <see cref="_siteEventLogger"/>). Never awaited so a logging failure
|
||
/// cannot affect alarm evaluation (matching the established
|
||
/// ScriptActor/ScriptExecutionActor pattern).
|
||
/// </summary>
|
||
private void LogAlarmEvent(string severity, string message)
|
||
{
|
||
_ = _siteEventLogger?.LogEventAsync(
|
||
"alarm", severity, _instanceName, $"AlarmActor:{_alarmName}", message);
|
||
}
|
||
|
||
/// <summary>
|
||
/// Returns the per-setpoint priority for the given level. Falls back to
|
||
/// the alarm-level <see cref="_priority"/> when the HiLo config did not
|
||
/// override the priority for that band, or for <see cref="AlarmLevel.None"/>.
|
||
/// </summary>
|
||
private int LevelPriority(AlarmLevel level)
|
||
{
|
||
if (_evalConfig is not HiLoEvalConfig hiLo) return _priority;
|
||
return level switch
|
||
{
|
||
AlarmLevel.LowLow => hiLo.LoLoPriority ?? _priority,
|
||
AlarmLevel.Low => hiLo.LoPriority ?? _priority,
|
||
AlarmLevel.High => hiLo.HiPriority ?? _priority,
|
||
AlarmLevel.HighHigh => hiLo.HiHiPriority ?? _priority,
|
||
_ => _priority
|
||
};
|
||
}
|
||
|
||
/// <summary>
|
||
/// Per-band operator message. Empty string when no message is configured
|
||
/// for the band, or for non-HiLo trigger types, or for the None level
|
||
/// (alarm clear).
|
||
/// </summary>
|
||
private string LevelMessage(AlarmLevel level)
|
||
{
|
||
if (_evalConfig is not HiLoEvalConfig hiLo) return string.Empty;
|
||
return level switch
|
||
{
|
||
AlarmLevel.LowLow => hiLo.LoLoMessage ?? string.Empty,
|
||
AlarmLevel.Low => hiLo.LoMessage ?? string.Empty,
|
||
AlarmLevel.High => hiLo.HiMessage ?? string.Empty,
|
||
AlarmLevel.HighHigh => hiLo.HiHiMessage ?? string.Empty,
|
||
_ => string.Empty
|
||
};
|
||
}
|
||
|
||
private bool IsMonitoredAttribute(string attributeName)
|
||
{
|
||
return _evalConfig.MonitoredAttributeName == attributeName;
|
||
}
|
||
|
||
private bool EvaluateValueMatch(object? value)
|
||
{
|
||
if (_evalConfig is not ValueMatchEvalConfig config) return false;
|
||
if (config.MatchValue == null) return value == null;
|
||
|
||
var valueStr = value?.ToString() ?? "";
|
||
|
||
// Support "!=X" for not-equal matching
|
||
if (config.MatchValue.StartsWith("!="))
|
||
{
|
||
var expected = config.MatchValue[2..];
|
||
return !string.Equals(valueStr, expected, StringComparison.Ordinal);
|
||
}
|
||
|
||
return string.Equals(valueStr, config.MatchValue, StringComparison.Ordinal);
|
||
}
|
||
|
||
private bool EvaluateRangeViolation(object? value)
|
||
{
|
||
if (_evalConfig is not RangeViolationEvalConfig config) return false;
|
||
if (value == null) return false;
|
||
|
||
try
|
||
{
|
||
// InvariantCulture so string attribute values parse consistently
|
||
// regardless of host locale (SiteRuntime-023).
|
||
var numericValue = Convert.ToDouble(value, CultureInfo.InvariantCulture);
|
||
return numericValue < config.Min || numericValue > config.Max;
|
||
}
|
||
catch
|
||
{
|
||
return false;
|
||
}
|
||
}
|
||
|
||
private bool EvaluateRateOfChange(object? value, DateTimeOffset timestamp)
|
||
{
|
||
if (_evalConfig is not RateOfChangeEvalConfig config) return false;
|
||
if (value == null) return false;
|
||
|
||
try
|
||
{
|
||
// InvariantCulture so string attribute values parse consistently
|
||
// regardless of host locale (SiteRuntime-023).
|
||
var numericValue = Convert.ToDouble(value, CultureInfo.InvariantCulture);
|
||
|
||
// Add to window
|
||
_rateOfChangeWindow.Enqueue((timestamp, numericValue));
|
||
|
||
// Remove old entries outside the window
|
||
var cutoff = timestamp - _rateOfChangeWindowDuration;
|
||
while (_rateOfChangeWindow.Count > 0 && _rateOfChangeWindow.Peek().Timestamp < cutoff)
|
||
{
|
||
_rateOfChangeWindow.Dequeue();
|
||
}
|
||
|
||
if (_rateOfChangeWindow.Count < 2) return false;
|
||
|
||
var oldest = _rateOfChangeWindow.Peek();
|
||
var timeDelta = (timestamp - oldest.Timestamp).TotalSeconds;
|
||
if (timeDelta <= 0) return false;
|
||
|
||
var signedRate = (numericValue - oldest.Value) / timeDelta;
|
||
return config.Direction switch
|
||
{
|
||
RateOfChangeDirection.Rising => signedRate > config.ThresholdPerSecond,
|
||
RateOfChangeDirection.Falling => -signedRate > config.ThresholdPerSecond,
|
||
_ => Math.Abs(signedRate) > config.ThresholdPerSecond
|
||
};
|
||
}
|
||
catch
|
||
{
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// Evaluates the compiled trigger expression against the current attribute
|
||
/// snapshot, returning the resulting bool. This bool feeds the existing
|
||
/// binary Normal↔Active state path — the alarm is active while true. A
|
||
/// throwing, non-bool, or timed-out expression is treated as false (logged
|
||
/// as an alarm error) so that the state machine still runs — an Active
|
||
/// alarm correctly clears if the expression starts throwing.
|
||
/// </summary>
|
||
private bool EvaluateExpression()
|
||
{
|
||
if (_compiledTriggerExpression == null) return false;
|
||
|
||
try
|
||
{
|
||
var globals = new TriggerExpressionGlobals(_attributeSnapshot);
|
||
// Bound evaluation with a short timeout. The CancellationToken
|
||
// covers cooperative/async cases; a pathological CPU-bound
|
||
// expression is not fully interruptible. Acceptable because
|
||
// trigger expressions are authored by trusted Design-role users
|
||
// and are compile-checked pre-deployment.
|
||
using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(2));
|
||
var state = _compiledTriggerExpression
|
||
.RunAsync(globals, cancellationToken: cts.Token)
|
||
.GetAwaiter().GetResult();
|
||
return state.ReturnValue is bool b && b;
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
// OperationCanceledException (timeout) falls through here too,
|
||
// and is correctly treated as false.
|
||
_healthCollector?.IncrementAlarmError();
|
||
_logger.LogError(ex,
|
||
"Alarm {Alarm} trigger expression evaluation failed on {Instance}; treated as false",
|
||
_alarmName, _instanceName);
|
||
return false;
|
||
}
|
||
}
|
||
|
||
/// <summary>
|
||
/// HiLo level evaluator: returns the most-severe matching band for the
|
||
/// given value. Severity order checked from highest to lowest so that a
|
||
/// value at exactly Hi==HiHi resolves to HighHigh. Unset setpoints (null)
|
||
/// are skipped, allowing partial configs (e.g., HighHigh only).
|
||
///
|
||
/// Hysteresis: when the alarm is already in a level whose threshold the
|
||
/// value would re-cross from inside, the threshold is relaxed by the
|
||
/// configured deadband. This prevents flapping at the boundary — once at
|
||
/// HighHigh with HiHi=100 and hiHiDeadband=5, the alarm stays HighHigh
|
||
/// until the value drops below 95.
|
||
/// </summary>
|
||
private AlarmLevel EvaluateHiLo(object? value)
|
||
{
|
||
if (_evalConfig is not HiLoEvalConfig config) return AlarmLevel.None;
|
||
if (value == null) return _currentLevel;
|
||
|
||
double numericValue;
|
||
// InvariantCulture so string attribute values parse consistently
|
||
// regardless of host locale (SiteRuntime-023).
|
||
try { numericValue = Convert.ToDouble(value, CultureInfo.InvariantCulture); }
|
||
catch { return _currentLevel; }
|
||
|
||
// When the current level is at-or-above HighHigh, relax the HiHi exit.
|
||
// Same for the other directions.
|
||
var hiHiThreshold = config.HiHi;
|
||
if (hiHiThreshold is { } hh && _currentLevel == AlarmLevel.HighHigh)
|
||
hiHiThreshold = hh - Math.Max(0, config.HiHiDeadband ?? 0);
|
||
|
||
var hiThreshold = config.Hi;
|
||
if (hiThreshold is { } h && (_currentLevel == AlarmLevel.High || _currentLevel == AlarmLevel.HighHigh))
|
||
hiThreshold = h - Math.Max(0, config.HiDeadband ?? 0);
|
||
|
||
var loLoThreshold = config.LoLo;
|
||
if (loLoThreshold is { } ll && _currentLevel == AlarmLevel.LowLow)
|
||
loLoThreshold = ll + Math.Max(0, config.LoLoDeadband ?? 0);
|
||
|
||
var loThreshold = config.Lo;
|
||
if (loThreshold is { } l && (_currentLevel == AlarmLevel.Low || _currentLevel == AlarmLevel.LowLow))
|
||
loThreshold = l + Math.Max(0, config.LoDeadband ?? 0);
|
||
|
||
if (hiHiThreshold is { } effHiHi && numericValue >= effHiHi) return AlarmLevel.HighHigh;
|
||
if (hiThreshold is { } effHi && numericValue >= effHi) return AlarmLevel.High;
|
||
if (loLoThreshold is { } effLoLo && numericValue <= effLoLo) return AlarmLevel.LowLow;
|
||
if (loThreshold is { } effLo && numericValue <= effLo) return AlarmLevel.Low;
|
||
return AlarmLevel.None;
|
||
}
|
||
|
||
/// <summary>
|
||
/// Spawns an AlarmExecutionActor to run the on-trigger script.
|
||
/// Passes the firing alarm's level/priority/message so the script can
|
||
/// branch on severity via the <c>Alarm</c> global.
|
||
/// </summary>
|
||
/// <param name="level">The firing alarm severity level.</param>
|
||
/// <param name="priority">The firing alarm priority.</param>
|
||
/// <param name="message">The firing alarm message.</param>
|
||
/// <param name="parentExecutionId">
|
||
/// Audit Log #23 (M5.4 — ParentExecutionId tag-cascade): the execution id of
|
||
/// the context that fired this alarm, recorded as the on-trigger script run's
|
||
/// <c>ParentExecutionId</c> so the alarm-triggered run chains under its firing
|
||
/// context in the audit tree. The alarm subsystem currently has no Guid-typed
|
||
/// firing id, so the only call sites pass <c>null</c> (the on-trigger run is a
|
||
/// root). The parameter exists so a future firing-id can flow without
|
||
/// touching the actor wiring.
|
||
/// </param>
|
||
private void SpawnAlarmExecution(
|
||
AlarmLevel level, int priority, string message, Guid? parentExecutionId = null)
|
||
{
|
||
if (_onTriggerCompiledScript == null) return;
|
||
|
||
var executionId = $"{_alarmName}-alarm-exec-{_executionCounter++}";
|
||
|
||
// SiteRuntime-009: the on-trigger script body runs on the dedicated
|
||
// ScriptExecutionScheduler, not the shared .NET thread pool.
|
||
var props = Props.Create(() => new AlarmExecutionActor(
|
||
_alarmName,
|
||
_instanceName,
|
||
level,
|
||
priority,
|
||
message,
|
||
_onTriggerCompiledScript,
|
||
_instanceActor,
|
||
_sharedScriptLibrary,
|
||
_options,
|
||
_logger,
|
||
// M2.5 (#9): per-script timeout from the on-trigger script (null = global).
|
||
_onTriggerExecutionTimeoutSeconds,
|
||
// Audit Log #23 (M5.4): the firing context's execution id (null today).
|
||
parentExecutionId));
|
||
|
||
Context.ActorOf(props, executionId);
|
||
}
|
||
|
||
private AlarmEvalConfig ParseEvalConfig(string? triggerConfigJson)
|
||
{
|
||
if (string.IsNullOrEmpty(triggerConfigJson))
|
||
return new ValueMatchEvalConfig("", null);
|
||
|
||
try
|
||
{
|
||
var doc = JsonDocument.Parse(triggerConfigJson);
|
||
var root = doc.RootElement;
|
||
|
||
// Support both "attributeName" and "attribute" keys
|
||
var attr = root.TryGetProperty("attributeName", out var attrEl)
|
||
? attrEl.GetString() ?? ""
|
||
: root.TryGetProperty("attribute", out var attrEl2)
|
||
? attrEl2.GetString() ?? ""
|
||
: "";
|
||
|
||
return _triggerType switch
|
||
{
|
||
AlarmTriggerType.ValueMatch => new ValueMatchEvalConfig(
|
||
attr,
|
||
root.TryGetProperty("matchValue", out var mv) ? mv.GetString()
|
||
: root.TryGetProperty("value", out var mv2) ? mv2.GetString()
|
||
: null),
|
||
|
||
AlarmTriggerType.RangeViolation => new RangeViolationEvalConfig(
|
||
attr,
|
||
root.TryGetProperty("min", out var minEl) ? minEl.GetDouble()
|
||
: root.TryGetProperty("low", out var lowEl) ? lowEl.GetDouble()
|
||
: double.MinValue,
|
||
root.TryGetProperty("max", out var maxEl) ? maxEl.GetDouble()
|
||
: root.TryGetProperty("high", out var highEl) ? highEl.GetDouble()
|
||
: double.MaxValue),
|
||
|
||
AlarmTriggerType.RateOfChange => new RateOfChangeEvalConfig(
|
||
attr,
|
||
root.TryGetProperty("thresholdPerSecond", out var tps) ? tps.GetDouble() : 10.0,
|
||
root.TryGetProperty("windowSeconds", out var ws)
|
||
? TimeSpan.FromSeconds(ws.GetDouble())
|
||
: TimeSpan.FromSeconds(1),
|
||
root.TryGetProperty("direction", out var dirEl)
|
||
? ParseDirection(dirEl.GetString())
|
||
: RateOfChangeDirection.Either),
|
||
|
||
AlarmTriggerType.HiLo => new HiLoEvalConfig(
|
||
attr,
|
||
LoLo: TryReadDouble(root, "loLo"),
|
||
Lo: TryReadDouble(root, "lo"),
|
||
Hi: TryReadDouble(root, "hi"),
|
||
HiHi: TryReadDouble(root, "hiHi"),
|
||
LoLoPriority: TryReadInt(root, "loLoPriority"),
|
||
LoPriority: TryReadInt(root, "loPriority"),
|
||
HiPriority: TryReadInt(root, "hiPriority"),
|
||
HiHiPriority: TryReadInt(root, "hiHiPriority"),
|
||
LoLoDeadband: TryReadDouble(root, "loLoDeadband"),
|
||
LoDeadband: TryReadDouble(root, "loDeadband"),
|
||
HiDeadband: TryReadDouble(root, "hiDeadband"),
|
||
HiHiDeadband: TryReadDouble(root, "hiHiDeadband"),
|
||
LoLoMessage: TryReadString(root, "loLoMessage"),
|
||
LoMessage: TryReadString(root, "loMessage"),
|
||
HiMessage: TryReadString(root, "hiMessage"),
|
||
HiHiMessage: TryReadString(root, "hiHiMessage")),
|
||
|
||
// Expression triggers have no single monitored attribute; they
|
||
// evaluate the compiled expression (passed into the actor and
|
||
// cached in _compiledTriggerExpression) over the full attribute
|
||
// snapshot. MonitoredAttributeName is unused.
|
||
AlarmTriggerType.Expression => new ExpressionEvalConfig(
|
||
"",
|
||
TriggerExpressionGlobals.ExtractExpression(triggerConfigJson) ?? ""),
|
||
|
||
_ => new ValueMatchEvalConfig(attr, null)
|
||
};
|
||
}
|
||
catch (Exception ex)
|
||
{
|
||
_logger.LogWarning(ex, "Failed to parse alarm trigger config for {Alarm}", _alarmName);
|
||
return new ValueMatchEvalConfig("", null);
|
||
}
|
||
}
|
||
|
||
private static RateOfChangeDirection ParseDirection(string? raw) => raw?.ToLowerInvariant() switch
|
||
{
|
||
"rising" or "up" or "positive" => RateOfChangeDirection.Rising,
|
||
"falling" or "down" or "negative" => RateOfChangeDirection.Falling,
|
||
_ => RateOfChangeDirection.Either
|
||
};
|
||
|
||
private static double? TryReadDouble(JsonElement el, string name)
|
||
{
|
||
if (!el.TryGetProperty(name, out var p)) return null;
|
||
return p.ValueKind switch
|
||
{
|
||
JsonValueKind.Number => p.GetDouble(),
|
||
JsonValueKind.String when double.TryParse(p.GetString(), System.Globalization.NumberStyles.Float, System.Globalization.CultureInfo.InvariantCulture, out var v) => v,
|
||
_ => null
|
||
};
|
||
}
|
||
|
||
private static int? TryReadInt(JsonElement el, string name)
|
||
{
|
||
if (!el.TryGetProperty(name, out var p)) return null;
|
||
return p.ValueKind switch
|
||
{
|
||
JsonValueKind.Number when p.TryGetInt32(out var i) => i,
|
||
JsonValueKind.Number => (int)p.GetDouble(),
|
||
JsonValueKind.String when int.TryParse(p.GetString(), System.Globalization.NumberStyles.Integer, System.Globalization.CultureInfo.InvariantCulture, out var v) => v,
|
||
_ => null
|
||
};
|
||
}
|
||
|
||
private static string? TryReadString(JsonElement el, string name)
|
||
{
|
||
if (!el.TryGetProperty(name, out var p)) return null;
|
||
return p.ValueKind == JsonValueKind.String ? p.GetString() : null;
|
||
}
|
||
|
||
// ── Internal messages ──
|
||
internal record AlarmExecutionCompleted(string AlarmName, bool Success);
|
||
}
|
||
|
||
internal enum RateOfChangeDirection { Either, Rising, Falling }
|
||
|
||
// ── Alarm evaluation config types ──
|
||
internal abstract record AlarmEvalConfig(string MonitoredAttributeName);
|
||
internal record ValueMatchEvalConfig(string MonitoredAttributeName, string? MatchValue) : AlarmEvalConfig(MonitoredAttributeName);
|
||
internal record RangeViolationEvalConfig(string MonitoredAttributeName, double Min, double Max) : AlarmEvalConfig(MonitoredAttributeName);
|
||
internal record RateOfChangeEvalConfig(
|
||
string MonitoredAttributeName,
|
||
double ThresholdPerSecond,
|
||
TimeSpan WindowDuration,
|
||
RateOfChangeDirection Direction) : AlarmEvalConfig(MonitoredAttributeName);
|
||
|
||
/// <summary>
|
||
/// Expression evaluation config: a read-only boolean C# expression evaluated
|
||
/// over the full attribute snapshot. Has no single monitored attribute
|
||
/// (<see cref="AlarmEvalConfig.MonitoredAttributeName"/> is empty). The
|
||
/// compiled expression itself lives on the actor's <c>_compiledTriggerExpression</c>
|
||
/// field, the single source for the hot path.
|
||
/// </summary>
|
||
internal record ExpressionEvalConfig(
|
||
string MonitoredAttributeName,
|
||
string Expression) : AlarmEvalConfig(MonitoredAttributeName);
|
||
|
||
/// <summary>
|
||
/// HiLo evaluation config: any subset of the four setpoints may be set; null
|
||
/// means "don't evaluate that band". Per-setpoint priorities override the
|
||
/// alarm-level priority for AlarmStateChanged messages emitted for that band.
|
||
/// </summary>
|
||
internal record HiLoEvalConfig(
|
||
string MonitoredAttributeName,
|
||
double? LoLo,
|
||
double? Lo,
|
||
double? Hi,
|
||
double? HiHi,
|
||
int? LoLoPriority,
|
||
int? LoPriority,
|
||
int? HiPriority,
|
||
int? HiHiPriority,
|
||
double? LoLoDeadband = null,
|
||
double? LoDeadband = null,
|
||
double? HiDeadband = null,
|
||
double? HiHiDeadband = null,
|
||
string? LoLoMessage = null,
|
||
string? LoMessage = null,
|
||
string? HiMessage = null,
|
||
string? HiHiMessage = null) : AlarmEvalConfig(MonitoredAttributeName);
|