Phase 3B: Site I/O & Observability — Communication, DCL, Script/Alarm actors, Health, Event Logging

Communication Layer (WP-1–5):
- 8 message patterns with correlation IDs, per-pattern timeouts
- Central/Site communication actors, transport heartbeat config
- Connection failure handling (no central buffering, debug streams killed)

Data Connection Layer (WP-6–14, WP-34):
- Connection actor with Become/Stash lifecycle (Connecting/Connected/Reconnecting)
- OPC UA + LmxProxy adapters behind IDataConnection
- Auto-reconnect, bad quality propagation, transparent re-subscribe
- Write-back, tag path resolution with retry, health reporting
- Protocol extensibility via DataConnectionFactory

Site Runtime (WP-15–25, WP-32–33):
- ScriptActor/ScriptExecutionActor (triggers, concurrent execution, blocking I/O dispatcher)
- AlarmActor/AlarmExecutionActor (ValueMatch/RangeViolation/RateOfChange, in-memory state)
- SharedScriptLibrary (inline execution), ScriptRuntimeContext (API)
- ScriptCompilationService (Roslyn, forbidden API enforcement, execution timeout)
- Recursion limit (default 10), call direction enforcement
- SiteStreamManager (per-subscriber bounded buffers, fire-and-forget)
- Debug view backend (snapshot + stream), concurrency serialization
- Local artifact storage (4 SQLite tables)

Health Monitoring (WP-26–28):
- SiteHealthCollector (thread-safe counters, connection state)
- HealthReportSender (30s interval, monotonic sequence numbers)
- CentralHealthAggregator (offline detection 60s, online recovery)

Site Event Logging (WP-29–31):
- SiteEventLogger (SQLite, 6 event categories, ISO 8601 UTC)
- EventLogPurgeService (30-day retention, 1GB cap)
- EventLogQueryService (filters, keyword search, keyset pagination)

541 tests pass, zero warnings.
This commit is contained in:
Joseph Doherty
2026-03-16 20:57:25 -04:00
parent a3bf0c43f3
commit 389f5a0378
97 changed files with 8308 additions and 127 deletions

View File

@@ -1,9 +1,15 @@
using Akka.Actor;
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Messages.DebugView;
using ScadaLink.Commons.Messages.Instance;
using ScadaLink.Commons.Messages.Lifecycle;
using ScadaLink.Commons.Messages.ScriptExecution;
using ScadaLink.Commons.Messages.Streaming;
using ScadaLink.Commons.Types.Enums;
using ScadaLink.Commons.Types.Flattening;
using ScadaLink.SiteRuntime.Persistence;
using ScadaLink.SiteRuntime.Scripts;
using ScadaLink.SiteRuntime.Streaming;
using System.Text.Json;
namespace ScadaLink.SiteRuntime.Actors;
@@ -13,24 +19,48 @@ namespace ScadaLink.SiteRuntime.Actors;
/// (loaded from FlattenedConfiguration + static overrides from SQLite).
///
/// The Instance Actor is the single source of truth for runtime instance state.
/// All state mutations are serialized through the actor mailbox.
/// WP-24: All state mutations are serialized through the actor mailbox.
/// Multiple Script Execution Actors run concurrently; state mutations through this actor.
///
/// WP-15/16: Creates child Script Actors and Alarm Actors on startup.
/// WP-22: Tell for tag value updates, attribute notifications, stream publishing.
/// Ask for CallScript, debug snapshot.
/// WP-25: Debug view backend — snapshot + stream subscription.
/// </summary>
public class InstanceActor : ReceiveActor
{
private readonly string _instanceUniqueName;
private readonly SiteStorageService _storage;
private readonly ScriptCompilationService _compilationService;
private readonly SharedScriptLibrary _sharedScriptLibrary;
private readonly SiteStreamManager? _streamManager;
private readonly SiteRuntimeOptions _options;
private readonly ILogger _logger;
private readonly Dictionary<string, object?> _attributes = new();
private readonly Dictionary<string, AlarmState> _alarmStates = new();
private readonly Dictionary<string, IActorRef> _scriptActors = new();
private readonly Dictionary<string, IActorRef> _alarmActors = new();
private FlattenedConfiguration? _configuration;
// WP-25: Debug view subscribers
private readonly Dictionary<string, IActorRef> _debugSubscribers = new();
public InstanceActor(
string instanceUniqueName,
string configJson,
SiteStorageService storage,
ScriptCompilationService compilationService,
SharedScriptLibrary sharedScriptLibrary,
SiteStreamManager? streamManager,
SiteRuntimeOptions options,
ILogger logger)
{
_instanceUniqueName = instanceUniqueName;
_storage = storage;
_compilationService = compilationService;
_sharedScriptLibrary = sharedScriptLibrary;
_streamManager = streamManager;
_options = options;
_logger = logger;
// Deserialize the flattened configuration
@@ -45,7 +75,7 @@ public class InstanceActor : ReceiveActor
}
}
// Handle attribute queries (Tell pattern sender gets response)
// Handle attribute queries (Tell pattern -- sender gets response)
Receive<GetAttributeRequest>(HandleGetAttribute);
// Handle static attribute writes
@@ -55,7 +85,6 @@ public class InstanceActor : ReceiveActor
Receive<DisableInstanceCommand>(_ =>
{
_logger.LogInformation("Instance {Instance} received disable command", _instanceUniqueName);
// Disable handled by parent DeploymentManagerActor
Sender.Tell(new InstanceLifecycleResponse(
_.CommandId, _instanceUniqueName, true, null, DateTimeOffset.UtcNow));
});
@@ -67,6 +96,19 @@ public class InstanceActor : ReceiveActor
_.CommandId, _instanceUniqueName, true, null, DateTimeOffset.UtcNow));
});
// WP-15: Handle script call requests — route to appropriate Script Actor (Ask pattern)
Receive<ScriptCallRequest>(HandleScriptCallRequest);
// WP-22/23: Handle attribute value changes from DCL (Tell pattern)
Receive<AttributeValueChanged>(HandleAttributeValueChanged);
// WP-16: Handle alarm state changes from Alarm Actors (Tell pattern)
Receive<AlarmStateChanged>(HandleAlarmStateChanged);
// WP-25: Debug view subscribe/unsubscribe (Ask pattern for snapshot)
Receive<SubscribeDebugViewRequest>(HandleSubscribeDebugView);
Receive<UnsubscribeDebugViewRequest>(HandleUnsubscribeDebugView);
// Handle internal messages
Receive<LoadOverridesResult>(HandleOverridesLoaded);
}
@@ -84,6 +126,26 @@ public class InstanceActor : ReceiveActor
return new LoadOverridesResult(t.Result, null);
return new LoadOverridesResult(new Dictionary<string, string>(), t.Exception?.GetBaseException().Message);
}).PipeTo(self);
// Create child Script Actors and Alarm Actors from configuration
CreateChildActors();
}
/// <summary>
/// Supervision: Resume for child coordinator actors (Script/Alarm Actors preserve state).
/// </summary>
protected override SupervisorStrategy SupervisorStrategy()
{
return new OneForOneStrategy(
maxNrOfRetries: -1,
withinTimeRange: TimeSpan.FromMinutes(1),
decider: Decider.From(ex =>
{
_logger.LogWarning(ex,
"Child actor on instance {Instance} threw exception, resuming",
_instanceUniqueName);
return Directive.Resume;
}));
}
/// <summary>
@@ -103,12 +165,24 @@ public class InstanceActor : ReceiveActor
/// <summary>
/// Updates a static attribute in memory and persists the override to SQLite.
/// WP-24: State mutation serialized through this actor's mailbox.
/// </summary>
private void HandleSetStaticAttribute(SetStaticAttributeCommand command)
{
_attributes[command.AttributeName] = command.Value;
// Persist asynchronously — fire and forget since the actor is the source of truth
// Publish attribute change to stream (WP-23) and notify children
var changed = new AttributeValueChanged(
_instanceUniqueName,
command.AttributeName,
command.AttributeName,
command.Value,
"Good",
DateTimeOffset.UtcNow);
PublishAndNotifyChildren(changed);
// Persist asynchronously -- fire and forget since the actor is the source of truth
var self = Self;
var sender = Sender;
_storage.SetStaticOverrideAsync(_instanceUniqueName, command.AttributeName, command.Value)
@@ -131,6 +205,138 @@ public class InstanceActor : ReceiveActor
}).PipeTo(sender);
}
/// <summary>
/// WP-15: Routes script call requests to the appropriate Script Actor.
/// Uses Ask pattern (WP-22).
/// </summary>
private void HandleScriptCallRequest(ScriptCallRequest request)
{
if (_scriptActors.TryGetValue(request.ScriptName, out var scriptActor))
{
// Forward the request to the Script Actor, preserving the original sender
scriptActor.Forward(request);
}
else
{
Sender.Tell(new ScriptCallResult(
request.CorrelationId,
false,
null,
$"Script '{request.ScriptName}' not found on instance '{_instanceUniqueName}'."));
}
}
/// <summary>
/// WP-22/23: Handles attribute value changes from DCL or static writes.
/// Updates in-memory state, publishes to stream, and notifies children.
/// </summary>
private void HandleAttributeValueChanged(AttributeValueChanged changed)
{
// WP-24: State mutation serialized through this actor
_attributes[changed.AttributeName] = changed.Value;
PublishAndNotifyChildren(changed);
}
/// <summary>
/// WP-16: Handles alarm state changes from Alarm Actors.
/// Updates in-memory alarm state and publishes to stream.
/// </summary>
private void HandleAlarmStateChanged(AlarmStateChanged changed)
{
_alarmStates[changed.AlarmName] = changed.State;
// WP-23: Publish to site-wide stream
_streamManager?.PublishAlarmStateChanged(changed);
// Forward to debug subscribers
foreach (var sub in _debugSubscribers.Values)
{
sub.Tell(changed);
}
}
/// <summary>
/// WP-25: Debug view subscribe — returns snapshot and begins streaming.
/// </summary>
private void HandleSubscribeDebugView(SubscribeDebugViewRequest request)
{
var subscriptionId = request.CorrelationId;
_debugSubscribers[subscriptionId] = Sender;
// Build snapshot from current state
var attributeValues = _attributes.Select(kvp => new AttributeValueChanged(
_instanceUniqueName,
kvp.Key,
kvp.Key,
kvp.Value,
"Good",
DateTimeOffset.UtcNow)).ToList();
var alarmStates = _alarmStates.Select(kvp => new AlarmStateChanged(
_instanceUniqueName,
kvp.Key,
kvp.Value,
0, // Priority not tracked in _alarmStates; would need separate tracking
DateTimeOffset.UtcNow)).ToList();
var snapshot = new DebugViewSnapshot(
_instanceUniqueName,
attributeValues,
alarmStates,
DateTimeOffset.UtcNow);
Sender.Tell(snapshot);
// Also register with stream manager for filtered events
_streamManager?.Subscribe(_instanceUniqueName, Sender);
_logger.LogDebug(
"Debug view subscriber added for {Instance}, subscriptionId={Id}",
_instanceUniqueName, subscriptionId);
}
/// <summary>
/// WP-25: Debug view unsubscribe — removes subscription.
/// </summary>
private void HandleUnsubscribeDebugView(UnsubscribeDebugViewRequest request)
{
_debugSubscribers.Remove(request.CorrelationId);
_streamManager?.RemoveSubscriber(Sender);
_logger.LogDebug(
"Debug view subscriber removed for {Instance}, correlationId={Id}",
_instanceUniqueName, request.CorrelationId);
}
/// <summary>
/// Publishes attribute change to stream and notifies child Script/Alarm actors.
/// WP-22: Tell for attribute notifications (fire-and-forget, never blocks).
/// </summary>
private void PublishAndNotifyChildren(AttributeValueChanged changed)
{
// WP-23: Publish to site-wide stream
_streamManager?.PublishAttributeValueChanged(changed);
// Notify Script Actors (for value-change and conditional triggers)
foreach (var scriptActor in _scriptActors.Values)
{
scriptActor.Tell(changed);
}
// Notify Alarm Actors (for alarm evaluation)
foreach (var alarmActor in _alarmActors.Values)
{
alarmActor.Tell(changed);
}
// Forward to debug subscribers
foreach (var sub in _debugSubscribers.Values)
{
sub.Tell(changed);
}
}
/// <summary>
/// Applies static overrides loaded from SQLite on top of default values.
/// </summary>
@@ -154,11 +360,105 @@ public class InstanceActor : ReceiveActor
result.Overrides.Count, _instanceUniqueName);
}
/// <summary>
/// Creates child Script Actors and Alarm Actors from the flattened configuration.
/// WP-15: Script Actors spawned per script definition.
/// WP-16: Alarm Actors spawned per alarm definition, as peers to Script Actors.
/// WP-32: Compilation errors reject entire instance deployment (logged but actor still starts).
/// </summary>
private void CreateChildActors()
{
if (_configuration == null) return;
// Create Script Actors
foreach (var script in _configuration.Scripts)
{
var compilationResult = _compilationService.Compile(script.CanonicalName, script.Code);
if (!compilationResult.IsSuccess)
{
_logger.LogError(
"Script '{Script}' on instance '{Instance}' failed to compile: {Errors}",
script.CanonicalName, _instanceUniqueName,
string.Join("; ", compilationResult.Errors));
continue;
}
var props = Props.Create(() => new ScriptActor(
script.CanonicalName,
_instanceUniqueName,
Self,
compilationResult.CompiledScript,
script,
_sharedScriptLibrary,
_options,
_logger));
var actorRef = Context.ActorOf(props, $"script-{script.CanonicalName}");
_scriptActors[script.CanonicalName] = actorRef;
}
// Create Alarm Actors
foreach (var alarm in _configuration.Alarms)
{
Microsoft.CodeAnalysis.Scripting.Script<object?>? onTriggerScript = null;
// Compile on-trigger script if defined
if (!string.IsNullOrEmpty(alarm.OnTriggerScriptCanonicalName))
{
var triggerScriptDef = _configuration.Scripts
.FirstOrDefault(s => s.CanonicalName == alarm.OnTriggerScriptCanonicalName);
if (triggerScriptDef != null)
{
var result = _compilationService.Compile(
$"alarm-trigger-{alarm.CanonicalName}", triggerScriptDef.Code);
if (result.IsSuccess)
{
onTriggerScript = result.CompiledScript;
}
else
{
_logger.LogWarning(
"Alarm trigger script for {Alarm} on {Instance} failed to compile",
alarm.CanonicalName, _instanceUniqueName);
}
}
}
var props = Props.Create(() => new AlarmActor(
alarm.CanonicalName,
_instanceUniqueName,
Self,
alarm,
onTriggerScript,
_sharedScriptLibrary,
_options,
_logger));
var actorRef = Context.ActorOf(props, $"alarm-{alarm.CanonicalName}");
_alarmActors[alarm.CanonicalName] = actorRef;
}
_logger.LogInformation(
"Instance {Instance}: created {Scripts} script actors and {Alarms} alarm actors",
_instanceUniqueName, _scriptActors.Count, _alarmActors.Count);
}
/// <summary>
/// Read-only access to current attribute count (for testing/diagnostics).
/// </summary>
public int AttributeCount => _attributes.Count;
/// <summary>
/// Read-only access to script actor count (for testing/diagnostics).
/// </summary>
public int ScriptActorCount => _scriptActors.Count;
/// <summary>
/// Read-only access to alarm actor count (for testing/diagnostics).
/// </summary>
public int AlarmActorCount => _alarmActors.Count;
/// <summary>
/// Internal message for async override loading result.
/// </summary>