Phase 3B: Site I/O & Observability — Communication, DCL, Script/Alarm actors, Health, Event Logging
Communication Layer (WP-1–5): - 8 message patterns with correlation IDs, per-pattern timeouts - Central/Site communication actors, transport heartbeat config - Connection failure handling (no central buffering, debug streams killed) Data Connection Layer (WP-6–14, WP-34): - Connection actor with Become/Stash lifecycle (Connecting/Connected/Reconnecting) - OPC UA + LmxProxy adapters behind IDataConnection - Auto-reconnect, bad quality propagation, transparent re-subscribe - Write-back, tag path resolution with retry, health reporting - Protocol extensibility via DataConnectionFactory Site Runtime (WP-15–25, WP-32–33): - ScriptActor/ScriptExecutionActor (triggers, concurrent execution, blocking I/O dispatcher) - AlarmActor/AlarmExecutionActor (ValueMatch/RangeViolation/RateOfChange, in-memory state) - SharedScriptLibrary (inline execution), ScriptRuntimeContext (API) - ScriptCompilationService (Roslyn, forbidden API enforcement, execution timeout) - Recursion limit (default 10), call direction enforcement - SiteStreamManager (per-subscriber bounded buffers, fire-and-forget) - Debug view backend (snapshot + stream), concurrency serialization - Local artifact storage (4 SQLite tables) Health Monitoring (WP-26–28): - SiteHealthCollector (thread-safe counters, connection state) - HealthReportSender (30s interval, monotonic sequence numbers) - CentralHealthAggregator (offline detection 60s, online recovery) Site Event Logging (WP-29–31): - SiteEventLogger (SQLite, 6 event categories, ISO 8601 UTC) - EventLogPurgeService (30-day retention, 1GB cap) - EventLogQueryService (filters, keyword search, keyset pagination) 541 tests pass, zero warnings.
This commit is contained in:
305
src/ScadaLink.SiteRuntime/Actors/AlarmActor.cs
Normal file
305
src/ScadaLink.SiteRuntime/Actors/AlarmActor.cs
Normal file
@@ -0,0 +1,305 @@
|
||||
using Akka.Actor;
|
||||
using Microsoft.CodeAnalysis.Scripting;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using ScadaLink.Commons.Messages.Streaming;
|
||||
using ScadaLink.Commons.Types.Enums;
|
||||
using ScadaLink.Commons.Types.Flattening;
|
||||
using ScadaLink.SiteRuntime.Scripts;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace ScadaLink.SiteRuntime.Actors;
|
||||
|
||||
/// <summary>
|
||||
/// WP-16: Alarm Actor — coordinator actor, child of Instance Actor, peer to Script Actors.
|
||||
/// Subscribes to attribute change notifications from Instance Actor.
|
||||
///
|
||||
/// Evaluates alarm conditions:
|
||||
/// - ValueMatch: attribute equals a specific value
|
||||
/// - RangeViolation: attribute outside min/max range
|
||||
/// - RateOfChange: attribute rate exceeds threshold (configurable window, default per-second)
|
||||
///
|
||||
/// State (active/normal) is in memory only, NOT persisted.
|
||||
/// On restart: starts normal, re-evaluates from incoming values.
|
||||
///
|
||||
/// WP-21: AlarmExecutionActor CAN call Instance.CallScript() (ask to sibling Script Actor).
|
||||
/// Instance scripts CANNOT call alarm on-trigger scripts (no Instance.CallAlarmScript API).
|
||||
///
|
||||
/// Supervision: Resume on exception; AlarmExecutionActor stopped on exception.
|
||||
/// </summary>
|
||||
public class AlarmActor : ReceiveActor
|
||||
{
|
||||
private readonly string _alarmName;
|
||||
private readonly string _instanceName;
|
||||
private readonly IActorRef _instanceActor;
|
||||
private readonly SharedScriptLibrary _sharedScriptLibrary;
|
||||
private readonly SiteRuntimeOptions _options;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
private AlarmState _currentState = AlarmState.Normal;
|
||||
private readonly AlarmTriggerType _triggerType;
|
||||
private readonly AlarmEvalConfig _evalConfig;
|
||||
private readonly int _priority;
|
||||
private readonly string? _onTriggerScriptName;
|
||||
private readonly Script<object?>? _onTriggerCompiledScript;
|
||||
|
||||
// Rate of change tracking
|
||||
private readonly Queue<(DateTimeOffset Timestamp, double Value)> _rateOfChangeWindow = new();
|
||||
private readonly TimeSpan _rateOfChangeWindowDuration;
|
||||
|
||||
private int _executionCounter;
|
||||
|
||||
public AlarmActor(
|
||||
string alarmName,
|
||||
string instanceName,
|
||||
IActorRef instanceActor,
|
||||
ResolvedAlarm alarmConfig,
|
||||
Script<object?>? onTriggerCompiledScript,
|
||||
SharedScriptLibrary sharedScriptLibrary,
|
||||
SiteRuntimeOptions options,
|
||||
ILogger logger)
|
||||
{
|
||||
_alarmName = alarmName;
|
||||
_instanceName = instanceName;
|
||||
_instanceActor = instanceActor;
|
||||
_sharedScriptLibrary = sharedScriptLibrary;
|
||||
_options = options;
|
||||
_logger = logger;
|
||||
_priority = alarmConfig.PriorityLevel;
|
||||
_onTriggerScriptName = alarmConfig.OnTriggerScriptCanonicalName;
|
||||
_onTriggerCompiledScript = onTriggerCompiledScript;
|
||||
|
||||
// Parse trigger type
|
||||
_triggerType = Enum.TryParse<AlarmTriggerType>(alarmConfig.TriggerType, true, out var tt)
|
||||
? tt : AlarmTriggerType.ValueMatch;
|
||||
|
||||
_evalConfig = ParseEvalConfig(alarmConfig.TriggerConfiguration);
|
||||
_rateOfChangeWindowDuration = _evalConfig is RateOfChangeEvalConfig roc
|
||||
? roc.WindowDuration
|
||||
: TimeSpan.FromSeconds(1);
|
||||
|
||||
// Handle attribute value changes
|
||||
Receive<AttributeValueChanged>(HandleAttributeValueChanged);
|
||||
|
||||
// Handle alarm execution completion
|
||||
Receive<AlarmExecutionCompleted>(_ =>
|
||||
_logger.LogDebug("Alarm {Alarm} execution completed on {Instance}", _alarmName, _instanceName));
|
||||
}
|
||||
|
||||
protected override void PreStart()
|
||||
{
|
||||
base.PreStart();
|
||||
_logger.LogInformation(
|
||||
"AlarmActor {Alarm} started on instance {Instance}, trigger={TriggerType}",
|
||||
_alarmName, _instanceName, _triggerType);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Supervision: Resume on exception; AlarmExecutionActor stopped on exception.
|
||||
/// </summary>
|
||||
protected override SupervisorStrategy SupervisorStrategy()
|
||||
{
|
||||
return new OneForOneStrategy(
|
||||
maxNrOfRetries: -1,
|
||||
withinTimeRange: TimeSpan.FromMinutes(1),
|
||||
decider: Decider.From(ex =>
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"AlarmExecutionActor for {Alarm} on {Instance} failed, stopping",
|
||||
_alarmName, _instanceName);
|
||||
return Directive.Stop;
|
||||
}));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Evaluates alarm condition on attribute change. Alarm evaluation errors are logged,
|
||||
/// actor continues (does not crash).
|
||||
/// </summary>
|
||||
private void HandleAttributeValueChanged(AttributeValueChanged changed)
|
||||
{
|
||||
// Only evaluate if this change is for an attribute we're monitoring
|
||||
if (!IsMonitoredAttribute(changed.AttributeName))
|
||||
return;
|
||||
|
||||
try
|
||||
{
|
||||
var isTriggered = _triggerType switch
|
||||
{
|
||||
AlarmTriggerType.ValueMatch => EvaluateValueMatch(changed.Value),
|
||||
AlarmTriggerType.RangeViolation => EvaluateRangeViolation(changed.Value),
|
||||
AlarmTriggerType.RateOfChange => EvaluateRateOfChange(changed.Value, changed.Timestamp),
|
||||
_ => false
|
||||
};
|
||||
|
||||
if (isTriggered && _currentState == AlarmState.Normal)
|
||||
{
|
||||
// Transition: Normal → Active
|
||||
_currentState = AlarmState.Active;
|
||||
_logger.LogInformation(
|
||||
"Alarm {Alarm} ACTIVATED on instance {Instance}",
|
||||
_alarmName, _instanceName);
|
||||
|
||||
// Notify Instance Actor of alarm state change
|
||||
var alarmChanged = new AlarmStateChanged(
|
||||
_instanceName, _alarmName, AlarmState.Active, _priority, DateTimeOffset.UtcNow);
|
||||
_instanceActor.Tell(alarmChanged);
|
||||
|
||||
// Spawn AlarmExecutionActor if on-trigger script defined
|
||||
if (_onTriggerCompiledScript != null)
|
||||
{
|
||||
SpawnAlarmExecution();
|
||||
}
|
||||
}
|
||||
else if (!isTriggered && _currentState == AlarmState.Active)
|
||||
{
|
||||
// Transition: Active → Normal (no script on clear)
|
||||
_currentState = AlarmState.Normal;
|
||||
_logger.LogInformation(
|
||||
"Alarm {Alarm} CLEARED on instance {Instance}",
|
||||
_alarmName, _instanceName);
|
||||
|
||||
var alarmChanged = new AlarmStateChanged(
|
||||
_instanceName, _alarmName, AlarmState.Normal, _priority, DateTimeOffset.UtcNow);
|
||||
_instanceActor.Tell(alarmChanged);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Alarm evaluation errors logged, actor continues
|
||||
_logger.LogError(ex,
|
||||
"Alarm {Alarm} evaluation error on {Instance}",
|
||||
_alarmName, _instanceName);
|
||||
}
|
||||
}
|
||||
|
||||
private bool IsMonitoredAttribute(string attributeName)
|
||||
{
|
||||
return _evalConfig.MonitoredAttributeName == attributeName;
|
||||
}
|
||||
|
||||
private bool EvaluateValueMatch(object? value)
|
||||
{
|
||||
if (_evalConfig is not ValueMatchEvalConfig config) return false;
|
||||
if (value == null) return config.MatchValue == null;
|
||||
return string.Equals(value.ToString(), config.MatchValue, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
private bool EvaluateRangeViolation(object? value)
|
||||
{
|
||||
if (_evalConfig is not RangeViolationEvalConfig config) return false;
|
||||
if (value == null) return false;
|
||||
|
||||
try
|
||||
{
|
||||
var numericValue = Convert.ToDouble(value);
|
||||
return numericValue < config.Min || numericValue > config.Max;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private bool EvaluateRateOfChange(object? value, DateTimeOffset timestamp)
|
||||
{
|
||||
if (_evalConfig is not RateOfChangeEvalConfig config) return false;
|
||||
if (value == null) return false;
|
||||
|
||||
try
|
||||
{
|
||||
var numericValue = Convert.ToDouble(value);
|
||||
|
||||
// Add to window
|
||||
_rateOfChangeWindow.Enqueue((timestamp, numericValue));
|
||||
|
||||
// Remove old entries outside the window
|
||||
var cutoff = timestamp - _rateOfChangeWindowDuration;
|
||||
while (_rateOfChangeWindow.Count > 0 && _rateOfChangeWindow.Peek().Timestamp < cutoff)
|
||||
{
|
||||
_rateOfChangeWindow.Dequeue();
|
||||
}
|
||||
|
||||
if (_rateOfChangeWindow.Count < 2) return false;
|
||||
|
||||
var oldest = _rateOfChangeWindow.Peek();
|
||||
var timeDelta = (timestamp - oldest.Timestamp).TotalSeconds;
|
||||
if (timeDelta <= 0) return false;
|
||||
|
||||
var rate = Math.Abs(numericValue - oldest.Value) / timeDelta;
|
||||
return rate > config.ThresholdPerSecond;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Spawns an AlarmExecutionActor to run the on-trigger script.
|
||||
/// </summary>
|
||||
private void SpawnAlarmExecution()
|
||||
{
|
||||
if (_onTriggerCompiledScript == null) return;
|
||||
|
||||
var executionId = $"{_alarmName}-alarm-exec-{_executionCounter++}";
|
||||
|
||||
// NOTE: In production, configure a dedicated blocking I/O dispatcher via HOCON.
|
||||
var props = Props.Create(() => new AlarmExecutionActor(
|
||||
_alarmName,
|
||||
_instanceName,
|
||||
_onTriggerCompiledScript,
|
||||
_instanceActor,
|
||||
_sharedScriptLibrary,
|
||||
_options,
|
||||
_logger));
|
||||
|
||||
Context.ActorOf(props, executionId);
|
||||
}
|
||||
|
||||
private AlarmEvalConfig ParseEvalConfig(string? triggerConfigJson)
|
||||
{
|
||||
if (string.IsNullOrEmpty(triggerConfigJson))
|
||||
return new ValueMatchEvalConfig("", null);
|
||||
|
||||
try
|
||||
{
|
||||
var doc = JsonDocument.Parse(triggerConfigJson);
|
||||
var attr = doc.RootElement.TryGetProperty("attributeName", out var attrEl)
|
||||
? attrEl.GetString() ?? "" : "";
|
||||
|
||||
return _triggerType switch
|
||||
{
|
||||
AlarmTriggerType.ValueMatch => new ValueMatchEvalConfig(
|
||||
attr,
|
||||
doc.RootElement.TryGetProperty("matchValue", out var mv) ? mv.GetString() : null),
|
||||
|
||||
AlarmTriggerType.RangeViolation => new RangeViolationEvalConfig(
|
||||
attr,
|
||||
doc.RootElement.TryGetProperty("min", out var minEl) ? minEl.GetDouble() : double.MinValue,
|
||||
doc.RootElement.TryGetProperty("max", out var maxEl) ? maxEl.GetDouble() : double.MaxValue),
|
||||
|
||||
AlarmTriggerType.RateOfChange => new RateOfChangeEvalConfig(
|
||||
attr,
|
||||
doc.RootElement.TryGetProperty("thresholdPerSecond", out var tps) ? tps.GetDouble() : 10.0,
|
||||
doc.RootElement.TryGetProperty("windowSeconds", out var ws)
|
||||
? TimeSpan.FromSeconds(ws.GetDouble())
|
||||
: TimeSpan.FromSeconds(1)),
|
||||
|
||||
_ => new ValueMatchEvalConfig(attr, null)
|
||||
};
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Failed to parse alarm trigger config for {Alarm}", _alarmName);
|
||||
return new ValueMatchEvalConfig("", null);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Internal messages ──
|
||||
internal record AlarmExecutionCompleted(string AlarmName, bool Success);
|
||||
}
|
||||
|
||||
// ── Alarm evaluation config types ──
|
||||
internal abstract record AlarmEvalConfig(string MonitoredAttributeName);
|
||||
internal record ValueMatchEvalConfig(string MonitoredAttributeName, string? MatchValue) : AlarmEvalConfig(MonitoredAttributeName);
|
||||
internal record RangeViolationEvalConfig(string MonitoredAttributeName, double Min, double Max) : AlarmEvalConfig(MonitoredAttributeName);
|
||||
internal record RateOfChangeEvalConfig(string MonitoredAttributeName, double ThresholdPerSecond, TimeSpan WindowDuration) : AlarmEvalConfig(MonitoredAttributeName);
|
||||
Reference in New Issue
Block a user