Phase 3B: Site I/O & Observability — Communication, DCL, Script/Alarm actors, Health, Event Logging

Communication Layer (WP-1–5):
- 8 message patterns with correlation IDs, per-pattern timeouts
- Central/Site communication actors, transport heartbeat config
- Connection failure handling (no central buffering, debug streams killed)

Data Connection Layer (WP-6–14, WP-34):
- Connection actor with Become/Stash lifecycle (Connecting/Connected/Reconnecting)
- OPC UA + LmxProxy adapters behind IDataConnection
- Auto-reconnect, bad quality propagation, transparent re-subscribe
- Write-back, tag path resolution with retry, health reporting
- Protocol extensibility via DataConnectionFactory

Site Runtime (WP-15–25, WP-32–33):
- ScriptActor/ScriptExecutionActor (triggers, concurrent execution, blocking I/O dispatcher)
- AlarmActor/AlarmExecutionActor (ValueMatch/RangeViolation/RateOfChange, in-memory state)
- SharedScriptLibrary (inline execution), ScriptRuntimeContext (API)
- ScriptCompilationService (Roslyn, forbidden API enforcement, execution timeout)
- Recursion limit (default 10), call direction enforcement
- SiteStreamManager (per-subscriber bounded buffers, fire-and-forget)
- Debug view backend (snapshot + stream), concurrency serialization
- Local artifact storage (4 SQLite tables)

Health Monitoring (WP-26–28):
- SiteHealthCollector (thread-safe counters, connection state)
- HealthReportSender (30s interval, monotonic sequence numbers)
- CentralHealthAggregator (offline detection 60s, online recovery)

Site Event Logging (WP-29–31):
- SiteEventLogger (SQLite, 6 event categories, ISO 8601 UTC)
- EventLogPurgeService (30-day retention, 1GB cap)
- EventLogQueryService (filters, keyword search, keyset pagination)

541 tests pass, zero warnings.
This commit is contained in:
Joseph Doherty
2026-03-16 20:57:25 -04:00
parent a3bf0c43f3
commit 389f5a0378
97 changed files with 8308 additions and 127 deletions

View File

@@ -0,0 +1,305 @@
using Akka.Actor;
using Microsoft.CodeAnalysis.Scripting;
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Messages.Streaming;
using ScadaLink.Commons.Types.Enums;
using ScadaLink.Commons.Types.Flattening;
using ScadaLink.SiteRuntime.Scripts;
using System.Text.Json;
namespace ScadaLink.SiteRuntime.Actors;
/// <summary>
/// WP-16: Alarm Actor — coordinator actor, child of Instance Actor, peer to Script Actors.
/// Subscribes to attribute change notifications from Instance Actor.
///
/// Evaluates alarm conditions:
/// - ValueMatch: attribute equals a specific value
/// - RangeViolation: attribute outside min/max range
/// - RateOfChange: attribute rate exceeds threshold (configurable window, default per-second)
///
/// State (active/normal) is in memory only, NOT persisted.
/// On restart: starts normal, re-evaluates from incoming values.
///
/// WP-21: AlarmExecutionActor CAN call Instance.CallScript() (ask to sibling Script Actor).
/// Instance scripts CANNOT call alarm on-trigger scripts (no Instance.CallAlarmScript API).
///
/// Supervision: Resume on exception; AlarmExecutionActor stopped on exception.
/// </summary>
public class AlarmActor : ReceiveActor
{
private readonly string _alarmName;
private readonly string _instanceName;
private readonly IActorRef _instanceActor;
private readonly SharedScriptLibrary _sharedScriptLibrary;
private readonly SiteRuntimeOptions _options;
private readonly ILogger _logger;
private AlarmState _currentState = AlarmState.Normal;
private readonly AlarmTriggerType _triggerType;
private readonly AlarmEvalConfig _evalConfig;
private readonly int _priority;
private readonly string? _onTriggerScriptName;
private readonly Script<object?>? _onTriggerCompiledScript;
// Rate of change tracking
private readonly Queue<(DateTimeOffset Timestamp, double Value)> _rateOfChangeWindow = new();
private readonly TimeSpan _rateOfChangeWindowDuration;
private int _executionCounter;
public AlarmActor(
string alarmName,
string instanceName,
IActorRef instanceActor,
ResolvedAlarm alarmConfig,
Script<object?>? onTriggerCompiledScript,
SharedScriptLibrary sharedScriptLibrary,
SiteRuntimeOptions options,
ILogger logger)
{
_alarmName = alarmName;
_instanceName = instanceName;
_instanceActor = instanceActor;
_sharedScriptLibrary = sharedScriptLibrary;
_options = options;
_logger = logger;
_priority = alarmConfig.PriorityLevel;
_onTriggerScriptName = alarmConfig.OnTriggerScriptCanonicalName;
_onTriggerCompiledScript = onTriggerCompiledScript;
// Parse trigger type
_triggerType = Enum.TryParse<AlarmTriggerType>(alarmConfig.TriggerType, true, out var tt)
? tt : AlarmTriggerType.ValueMatch;
_evalConfig = ParseEvalConfig(alarmConfig.TriggerConfiguration);
_rateOfChangeWindowDuration = _evalConfig is RateOfChangeEvalConfig roc
? roc.WindowDuration
: TimeSpan.FromSeconds(1);
// Handle attribute value changes
Receive<AttributeValueChanged>(HandleAttributeValueChanged);
// Handle alarm execution completion
Receive<AlarmExecutionCompleted>(_ =>
_logger.LogDebug("Alarm {Alarm} execution completed on {Instance}", _alarmName, _instanceName));
}
protected override void PreStart()
{
base.PreStart();
_logger.LogInformation(
"AlarmActor {Alarm} started on instance {Instance}, trigger={TriggerType}",
_alarmName, _instanceName, _triggerType);
}
/// <summary>
/// Supervision: Resume on exception; AlarmExecutionActor stopped on exception.
/// </summary>
protected override SupervisorStrategy SupervisorStrategy()
{
return new OneForOneStrategy(
maxNrOfRetries: -1,
withinTimeRange: TimeSpan.FromMinutes(1),
decider: Decider.From(ex =>
{
_logger.LogWarning(ex,
"AlarmExecutionActor for {Alarm} on {Instance} failed, stopping",
_alarmName, _instanceName);
return Directive.Stop;
}));
}
/// <summary>
/// Evaluates alarm condition on attribute change. Alarm evaluation errors are logged,
/// actor continues (does not crash).
/// </summary>
private void HandleAttributeValueChanged(AttributeValueChanged changed)
{
// Only evaluate if this change is for an attribute we're monitoring
if (!IsMonitoredAttribute(changed.AttributeName))
return;
try
{
var isTriggered = _triggerType switch
{
AlarmTriggerType.ValueMatch => EvaluateValueMatch(changed.Value),
AlarmTriggerType.RangeViolation => EvaluateRangeViolation(changed.Value),
AlarmTriggerType.RateOfChange => EvaluateRateOfChange(changed.Value, changed.Timestamp),
_ => false
};
if (isTriggered && _currentState == AlarmState.Normal)
{
// Transition: Normal → Active
_currentState = AlarmState.Active;
_logger.LogInformation(
"Alarm {Alarm} ACTIVATED on instance {Instance}",
_alarmName, _instanceName);
// Notify Instance Actor of alarm state change
var alarmChanged = new AlarmStateChanged(
_instanceName, _alarmName, AlarmState.Active, _priority, DateTimeOffset.UtcNow);
_instanceActor.Tell(alarmChanged);
// Spawn AlarmExecutionActor if on-trigger script defined
if (_onTriggerCompiledScript != null)
{
SpawnAlarmExecution();
}
}
else if (!isTriggered && _currentState == AlarmState.Active)
{
// Transition: Active → Normal (no script on clear)
_currentState = AlarmState.Normal;
_logger.LogInformation(
"Alarm {Alarm} CLEARED on instance {Instance}",
_alarmName, _instanceName);
var alarmChanged = new AlarmStateChanged(
_instanceName, _alarmName, AlarmState.Normal, _priority, DateTimeOffset.UtcNow);
_instanceActor.Tell(alarmChanged);
}
}
catch (Exception ex)
{
// Alarm evaluation errors logged, actor continues
_logger.LogError(ex,
"Alarm {Alarm} evaluation error on {Instance}",
_alarmName, _instanceName);
}
}
private bool IsMonitoredAttribute(string attributeName)
{
return _evalConfig.MonitoredAttributeName == attributeName;
}
private bool EvaluateValueMatch(object? value)
{
if (_evalConfig is not ValueMatchEvalConfig config) return false;
if (value == null) return config.MatchValue == null;
return string.Equals(value.ToString(), config.MatchValue, StringComparison.Ordinal);
}
private bool EvaluateRangeViolation(object? value)
{
if (_evalConfig is not RangeViolationEvalConfig config) return false;
if (value == null) return false;
try
{
var numericValue = Convert.ToDouble(value);
return numericValue < config.Min || numericValue > config.Max;
}
catch
{
return false;
}
}
private bool EvaluateRateOfChange(object? value, DateTimeOffset timestamp)
{
if (_evalConfig is not RateOfChangeEvalConfig config) return false;
if (value == null) return false;
try
{
var numericValue = Convert.ToDouble(value);
// Add to window
_rateOfChangeWindow.Enqueue((timestamp, numericValue));
// Remove old entries outside the window
var cutoff = timestamp - _rateOfChangeWindowDuration;
while (_rateOfChangeWindow.Count > 0 && _rateOfChangeWindow.Peek().Timestamp < cutoff)
{
_rateOfChangeWindow.Dequeue();
}
if (_rateOfChangeWindow.Count < 2) return false;
var oldest = _rateOfChangeWindow.Peek();
var timeDelta = (timestamp - oldest.Timestamp).TotalSeconds;
if (timeDelta <= 0) return false;
var rate = Math.Abs(numericValue - oldest.Value) / timeDelta;
return rate > config.ThresholdPerSecond;
}
catch
{
return false;
}
}
/// <summary>
/// Spawns an AlarmExecutionActor to run the on-trigger script.
/// </summary>
private void SpawnAlarmExecution()
{
if (_onTriggerCompiledScript == null) return;
var executionId = $"{_alarmName}-alarm-exec-{_executionCounter++}";
// NOTE: In production, configure a dedicated blocking I/O dispatcher via HOCON.
var props = Props.Create(() => new AlarmExecutionActor(
_alarmName,
_instanceName,
_onTriggerCompiledScript,
_instanceActor,
_sharedScriptLibrary,
_options,
_logger));
Context.ActorOf(props, executionId);
}
private AlarmEvalConfig ParseEvalConfig(string? triggerConfigJson)
{
if (string.IsNullOrEmpty(triggerConfigJson))
return new ValueMatchEvalConfig("", null);
try
{
var doc = JsonDocument.Parse(triggerConfigJson);
var attr = doc.RootElement.TryGetProperty("attributeName", out var attrEl)
? attrEl.GetString() ?? "" : "";
return _triggerType switch
{
AlarmTriggerType.ValueMatch => new ValueMatchEvalConfig(
attr,
doc.RootElement.TryGetProperty("matchValue", out var mv) ? mv.GetString() : null),
AlarmTriggerType.RangeViolation => new RangeViolationEvalConfig(
attr,
doc.RootElement.TryGetProperty("min", out var minEl) ? minEl.GetDouble() : double.MinValue,
doc.RootElement.TryGetProperty("max", out var maxEl) ? maxEl.GetDouble() : double.MaxValue),
AlarmTriggerType.RateOfChange => new RateOfChangeEvalConfig(
attr,
doc.RootElement.TryGetProperty("thresholdPerSecond", out var tps) ? tps.GetDouble() : 10.0,
doc.RootElement.TryGetProperty("windowSeconds", out var ws)
? TimeSpan.FromSeconds(ws.GetDouble())
: TimeSpan.FromSeconds(1)),
_ => new ValueMatchEvalConfig(attr, null)
};
}
catch (Exception ex)
{
_logger.LogWarning(ex, "Failed to parse alarm trigger config for {Alarm}", _alarmName);
return new ValueMatchEvalConfig("", null);
}
}
// ── Internal messages ──
internal record AlarmExecutionCompleted(string AlarmName, bool Success);
}
// ── Alarm evaluation config types ──
internal abstract record AlarmEvalConfig(string MonitoredAttributeName);
internal record ValueMatchEvalConfig(string MonitoredAttributeName, string? MatchValue) : AlarmEvalConfig(MonitoredAttributeName);
internal record RangeViolationEvalConfig(string MonitoredAttributeName, double Min, double Max) : AlarmEvalConfig(MonitoredAttributeName);
internal record RateOfChangeEvalConfig(string MonitoredAttributeName, double ThresholdPerSecond, TimeSpan WindowDuration) : AlarmEvalConfig(MonitoredAttributeName);