fix(site-event-log): record script errors and route queries to the active node
Script execution failures were only written to Serilog, never to the site event log — SiteRuntime did not reference the SiteEventLogging project. ScriptExecutionActor now resolves ISiteEventLogger and emits a 'script'/'Error' event on timeout and exception. The event-log query handler was a per-node actor bound to that node's local SQLite. A ClusterClient query could land on the standby (which records no events) and return nothing. The handler is now a cluster singleton with a proxy, so queries always reach the active node.
This commit is contained in:
@@ -306,14 +306,29 @@ akka {{
|
|||||||
// Register local handlers with SiteCommunicationActor
|
// Register local handlers with SiteCommunicationActor
|
||||||
siteCommActor.Tell(new RegisterLocalHandler(LocalHandlerType.Artifacts, dmProxy));
|
siteCommActor.Tell(new RegisterLocalHandler(LocalHandlerType.Artifacts, dmProxy));
|
||||||
|
|
||||||
// Event log handler — bridges Akka to IEventLogQueryService
|
// Event log handler — cluster singleton so queries always reach the
|
||||||
|
// active node. The event log is node-local SQLite and is not
|
||||||
|
// replicated; only the active node records events. A per-node handler
|
||||||
|
// would let a ClusterClient query land on the standby and find nothing.
|
||||||
var eventLogQueryService = _serviceProvider.GetService<SiteEventLogging.IEventLogQueryService>();
|
var eventLogQueryService = _serviceProvider.GetService<SiteEventLogging.IEventLogQueryService>();
|
||||||
if (eventLogQueryService != null)
|
if (eventLogQueryService != null)
|
||||||
{
|
{
|
||||||
var eventLogHandler = _actorSystem.ActorOf(
|
var eventLogSingletonProps = ClusterSingletonManager.Props(
|
||||||
Props.Create(() => new SiteEventLogging.EventLogHandlerActor(eventLogQueryService)),
|
singletonProps: Props.Create(() => new SiteEventLogging.EventLogHandlerActor(eventLogQueryService)),
|
||||||
"event-log-handler");
|
terminationMessage: PoisonPill.Instance,
|
||||||
siteCommActor.Tell(new RegisterLocalHandler(LocalHandlerType.EventLog, eventLogHandler));
|
settings: ClusterSingletonManagerSettings.Create(_actorSystem)
|
||||||
|
.WithRole(siteRole)
|
||||||
|
.WithSingletonName("event-log-handler"));
|
||||||
|
_actorSystem.ActorOf(eventLogSingletonProps, "event-log-handler-singleton");
|
||||||
|
|
||||||
|
var eventLogProxyProps = ClusterSingletonProxy.Props(
|
||||||
|
singletonManagerPath: "/user/event-log-handler-singleton",
|
||||||
|
settings: ClusterSingletonProxySettings.Create(_actorSystem)
|
||||||
|
.WithRole(siteRole)
|
||||||
|
.WithSingletonName("event-log-handler"));
|
||||||
|
var eventLogProxy = _actorSystem.ActorOf(eventLogProxyProps, "event-log-handler-proxy");
|
||||||
|
|
||||||
|
siteCommActor.Tell(new RegisterLocalHandler(LocalHandlerType.EventLog, eventLogProxy));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parked message handler — bridges Akka to StoreAndForwardService
|
// Parked message handler — bridges Akka to StoreAndForwardService
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ using ScadaLink.Commons.Interfaces.Services;
|
|||||||
using ScadaLink.Commons.Messages.ScriptExecution;
|
using ScadaLink.Commons.Messages.ScriptExecution;
|
||||||
using ScadaLink.Commons.Types;
|
using ScadaLink.Commons.Types;
|
||||||
using ScadaLink.HealthMonitoring;
|
using ScadaLink.HealthMonitoring;
|
||||||
|
using ScadaLink.SiteEventLogging;
|
||||||
using ScadaLink.SiteRuntime.Scripts;
|
using ScadaLink.SiteRuntime.Scripts;
|
||||||
|
|
||||||
namespace ScadaLink.SiteRuntime.Actors;
|
namespace ScadaLink.SiteRuntime.Actors;
|
||||||
@@ -71,6 +72,9 @@ public class ScriptExecutionActor : ReceiveActor
|
|||||||
_ = Task.Run(async () =>
|
_ = Task.Run(async () =>
|
||||||
{
|
{
|
||||||
IServiceScope? serviceScope = null;
|
IServiceScope? serviceScope = null;
|
||||||
|
// ISiteEventLogger is a singleton; resolve from the root provider so
|
||||||
|
// it is available to the catch blocks regardless of scope state.
|
||||||
|
var siteEventLogger = serviceProvider?.GetService<ISiteEventLogger>();
|
||||||
using var cts = new CancellationTokenSource(timeout);
|
using var cts = new CancellationTokenSource(timeout);
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@@ -125,6 +129,10 @@ public class ScriptExecutionActor : ReceiveActor
|
|||||||
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' timed out after {timeout.TotalSeconds}s";
|
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' timed out after {timeout.TotalSeconds}s";
|
||||||
logger.LogWarning(errorMsg);
|
logger.LogWarning(errorMsg);
|
||||||
|
|
||||||
|
// WP-32: Failures recorded to site event log; script NOT disabled after failure.
|
||||||
|
_ = siteEventLogger?.LogEventAsync(
|
||||||
|
"script", "Error", instanceName, $"ScriptActor:{scriptName}", errorMsg);
|
||||||
|
|
||||||
if (!replyTo.IsNobody())
|
if (!replyTo.IsNobody())
|
||||||
{
|
{
|
||||||
replyTo.Tell(new ScriptCallResult(correlationId, false, null, errorMsg));
|
replyTo.Tell(new ScriptCallResult(correlationId, false, null, errorMsg));
|
||||||
@@ -135,10 +143,13 @@ public class ScriptExecutionActor : ReceiveActor
|
|||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
healthCollector?.IncrementScriptError();
|
healthCollector?.IncrementScriptError();
|
||||||
// WP-32: Failures logged to site event log; script NOT disabled after failure
|
// WP-32: Failures recorded to site event log; script NOT disabled after failure.
|
||||||
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' failed: {ex.Message}";
|
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' failed: {ex.Message}";
|
||||||
logger.LogError(ex, "Script execution failed: {Script} on {Instance}", scriptName, instanceName);
|
logger.LogError(ex, "Script execution failed: {Script} on {Instance}", scriptName, instanceName);
|
||||||
|
|
||||||
|
_ = siteEventLogger?.LogEventAsync(
|
||||||
|
"script", "Error", instanceName, $"ScriptActor:{scriptName}", errorMsg, ex.ToString());
|
||||||
|
|
||||||
if (!replyTo.IsNobody())
|
if (!replyTo.IsNobody())
|
||||||
{
|
{
|
||||||
replyTo.Tell(new ScriptCallResult(correlationId, false, null, errorMsg));
|
replyTo.Tell(new ScriptCallResult(correlationId, false, null, errorMsg));
|
||||||
|
|||||||
@@ -24,6 +24,7 @@
|
|||||||
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
||||||
<ProjectReference Include="../ScadaLink.Communication/ScadaLink.Communication.csproj" />
|
<ProjectReference Include="../ScadaLink.Communication/ScadaLink.Communication.csproj" />
|
||||||
<ProjectReference Include="../ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
|
<ProjectReference Include="../ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
|
||||||
|
<ProjectReference Include="../ScadaLink.SiteEventLogging/ScadaLink.SiteEventLogging.csproj" />
|
||||||
<ProjectReference Include="../ScadaLink.StoreAndForward/ScadaLink.StoreAndForward.csproj" />
|
<ProjectReference Include="../ScadaLink.StoreAndForward/ScadaLink.StoreAndForward.csproj" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user