From 17e24ddd2075ac65b2a16c4e62210afda14afcf8 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Fri, 15 May 2026 12:04:59 -0400 Subject: [PATCH] fix(site-event-log): record script errors and route queries to the active node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Script execution failures were only written to Serilog, never to the site event log — SiteRuntime did not reference the SiteEventLogging project. ScriptExecutionActor now resolves ISiteEventLogger and emits a 'script'/'Error' event on timeout and exception. The event-log query handler was a per-node actor bound to that node's local SQLite. A ClusterClient query could land on the standby (which records no events) and return nothing. The handler is now a cluster singleton with a proxy, so queries always reach the active node. --- .../Actors/AkkaHostedService.cs | 25 +++++++++++++++---- .../Actors/ScriptExecutionActor.cs | 13 +++++++++- .../ScadaLink.SiteRuntime.csproj | 1 + 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/src/ScadaLink.Host/Actors/AkkaHostedService.cs b/src/ScadaLink.Host/Actors/AkkaHostedService.cs index 49de15a..c8aac60 100644 --- a/src/ScadaLink.Host/Actors/AkkaHostedService.cs +++ b/src/ScadaLink.Host/Actors/AkkaHostedService.cs @@ -306,14 +306,29 @@ akka {{ // Register local handlers with SiteCommunicationActor siteCommActor.Tell(new RegisterLocalHandler(LocalHandlerType.Artifacts, dmProxy)); - // Event log handler — bridges Akka to IEventLogQueryService + // Event log handler — cluster singleton so queries always reach the + // active node. The event log is node-local SQLite and is not + // replicated; only the active node records events. A per-node handler + // would let a ClusterClient query land on the standby and find nothing. var eventLogQueryService = _serviceProvider.GetService(); if (eventLogQueryService != null) { - var eventLogHandler = _actorSystem.ActorOf( - Props.Create(() => new SiteEventLogging.EventLogHandlerActor(eventLogQueryService)), - "event-log-handler"); - siteCommActor.Tell(new RegisterLocalHandler(LocalHandlerType.EventLog, eventLogHandler)); + var eventLogSingletonProps = ClusterSingletonManager.Props( + singletonProps: Props.Create(() => new SiteEventLogging.EventLogHandlerActor(eventLogQueryService)), + terminationMessage: PoisonPill.Instance, + settings: ClusterSingletonManagerSettings.Create(_actorSystem) + .WithRole(siteRole) + .WithSingletonName("event-log-handler")); + _actorSystem.ActorOf(eventLogSingletonProps, "event-log-handler-singleton"); + + var eventLogProxyProps = ClusterSingletonProxy.Props( + singletonManagerPath: "/user/event-log-handler-singleton", + settings: ClusterSingletonProxySettings.Create(_actorSystem) + .WithRole(siteRole) + .WithSingletonName("event-log-handler")); + var eventLogProxy = _actorSystem.ActorOf(eventLogProxyProps, "event-log-handler-proxy"); + + siteCommActor.Tell(new RegisterLocalHandler(LocalHandlerType.EventLog, eventLogProxy)); } // Parked message handler — bridges Akka to StoreAndForwardService diff --git a/src/ScadaLink.SiteRuntime/Actors/ScriptExecutionActor.cs b/src/ScadaLink.SiteRuntime/Actors/ScriptExecutionActor.cs index f969afc..0038252 100644 --- a/src/ScadaLink.SiteRuntime/Actors/ScriptExecutionActor.cs +++ b/src/ScadaLink.SiteRuntime/Actors/ScriptExecutionActor.cs @@ -6,6 +6,7 @@ using ScadaLink.Commons.Interfaces.Services; using ScadaLink.Commons.Messages.ScriptExecution; using ScadaLink.Commons.Types; using ScadaLink.HealthMonitoring; +using ScadaLink.SiteEventLogging; using ScadaLink.SiteRuntime.Scripts; namespace ScadaLink.SiteRuntime.Actors; @@ -71,6 +72,9 @@ public class ScriptExecutionActor : ReceiveActor _ = Task.Run(async () => { IServiceScope? serviceScope = null; + // ISiteEventLogger is a singleton; resolve from the root provider so + // it is available to the catch blocks regardless of scope state. + var siteEventLogger = serviceProvider?.GetService(); using var cts = new CancellationTokenSource(timeout); try { @@ -125,6 +129,10 @@ public class ScriptExecutionActor : ReceiveActor var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' timed out after {timeout.TotalSeconds}s"; logger.LogWarning(errorMsg); + // WP-32: Failures recorded to site event log; script NOT disabled after failure. + _ = siteEventLogger?.LogEventAsync( + "script", "Error", instanceName, $"ScriptActor:{scriptName}", errorMsg); + if (!replyTo.IsNobody()) { replyTo.Tell(new ScriptCallResult(correlationId, false, null, errorMsg)); @@ -135,10 +143,13 @@ public class ScriptExecutionActor : ReceiveActor catch (Exception ex) { healthCollector?.IncrementScriptError(); - // WP-32: Failures logged to site event log; script NOT disabled after failure + // WP-32: Failures recorded to site event log; script NOT disabled after failure. var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' failed: {ex.Message}"; logger.LogError(ex, "Script execution failed: {Script} on {Instance}", scriptName, instanceName); + _ = siteEventLogger?.LogEventAsync( + "script", "Error", instanceName, $"ScriptActor:{scriptName}", errorMsg, ex.ToString()); + if (!replyTo.IsNobody()) { replyTo.Tell(new ScriptCallResult(correlationId, false, null, errorMsg)); diff --git a/src/ScadaLink.SiteRuntime/ScadaLink.SiteRuntime.csproj b/src/ScadaLink.SiteRuntime/ScadaLink.SiteRuntime.csproj index 8c2c9d5..f54462c 100644 --- a/src/ScadaLink.SiteRuntime/ScadaLink.SiteRuntime.csproj +++ b/src/ScadaLink.SiteRuntime/ScadaLink.SiteRuntime.csproj @@ -24,6 +24,7 @@ +