fix(site-event-log): record script errors and route queries to the active node
Script execution failures were only written to Serilog, never to the site event log — SiteRuntime did not reference the SiteEventLogging project. ScriptExecutionActor now resolves ISiteEventLogger and emits a 'script'/'Error' event on timeout and exception. The event-log query handler was a per-node actor bound to that node's local SQLite. A ClusterClient query could land on the standby (which records no events) and return nothing. The handler is now a cluster singleton with a proxy, so queries always reach the active node.
This commit is contained in:
@@ -6,6 +6,7 @@ using ScadaLink.Commons.Interfaces.Services;
|
||||
using ScadaLink.Commons.Messages.ScriptExecution;
|
||||
using ScadaLink.Commons.Types;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
using ScadaLink.SiteEventLogging;
|
||||
using ScadaLink.SiteRuntime.Scripts;
|
||||
|
||||
namespace ScadaLink.SiteRuntime.Actors;
|
||||
@@ -71,6 +72,9 @@ public class ScriptExecutionActor : ReceiveActor
|
||||
_ = Task.Run(async () =>
|
||||
{
|
||||
IServiceScope? serviceScope = null;
|
||||
// ISiteEventLogger is a singleton; resolve from the root provider so
|
||||
// it is available to the catch blocks regardless of scope state.
|
||||
var siteEventLogger = serviceProvider?.GetService<ISiteEventLogger>();
|
||||
using var cts = new CancellationTokenSource(timeout);
|
||||
try
|
||||
{
|
||||
@@ -125,6 +129,10 @@ public class ScriptExecutionActor : ReceiveActor
|
||||
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' timed out after {timeout.TotalSeconds}s";
|
||||
logger.LogWarning(errorMsg);
|
||||
|
||||
// WP-32: Failures recorded to site event log; script NOT disabled after failure.
|
||||
_ = siteEventLogger?.LogEventAsync(
|
||||
"script", "Error", instanceName, $"ScriptActor:{scriptName}", errorMsg);
|
||||
|
||||
if (!replyTo.IsNobody())
|
||||
{
|
||||
replyTo.Tell(new ScriptCallResult(correlationId, false, null, errorMsg));
|
||||
@@ -135,10 +143,13 @@ public class ScriptExecutionActor : ReceiveActor
|
||||
catch (Exception ex)
|
||||
{
|
||||
healthCollector?.IncrementScriptError();
|
||||
// WP-32: Failures logged to site event log; script NOT disabled after failure
|
||||
// WP-32: Failures recorded to site event log; script NOT disabled after failure.
|
||||
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' failed: {ex.Message}";
|
||||
logger.LogError(ex, "Script execution failed: {Script} on {Instance}", scriptName, instanceName);
|
||||
|
||||
_ = siteEventLogger?.LogEventAsync(
|
||||
"script", "Error", instanceName, $"ScriptActor:{scriptName}", errorMsg, ex.ToString());
|
||||
|
||||
if (!replyTo.IsNobody())
|
||||
{
|
||||
replyTo.Tell(new ScriptCallResult(correlationId, false, null, errorMsg));
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.Communication/ScadaLink.Communication.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.SiteEventLogging/ScadaLink.SiteEventLogging.csproj" />
|
||||
<ProjectReference Include="../ScadaLink.StoreAndForward/ScadaLink.StoreAndForward.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user