feat: wire all health metrics and add instance counts to dashboard

Wired ISiteHealthCollector calls for script errors (ScriptExecutionActor),
alarm eval errors (AlarmActor), dead letters (DeadLetterMonitorActor), and
S&F buffer depth placeholder. Added instance count tracking (deployed/
enabled/disabled) to SiteHealthReport via DeploymentManagerActor. Updated
Health Dashboard UI to show instance counts per site. All metrics flow
through the existing health report pipeline via ClusterClient.
This commit is contained in:
Joseph Doherty
2026-03-18 00:57:49 -04:00
parent 88b5f6cb54
commit f165ca2774
18 changed files with 151 additions and 28 deletions

View File

@@ -2,6 +2,7 @@ using Akka.Actor;
using Microsoft.CodeAnalysis.Scripting;
using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Messages.ScriptExecution;
using ScadaLink.HealthMonitoring;
using ScadaLink.SiteRuntime.Scripts;
namespace ScadaLink.SiteRuntime.Actors;
@@ -28,7 +29,8 @@ public class ScriptExecutionActor : ReceiveActor
SiteRuntimeOptions options,
IActorRef replyTo,
string correlationId,
ILogger logger)
ILogger logger,
ISiteHealthCollector? healthCollector = null)
{
// Immediately begin execution
var self = Self;
@@ -37,7 +39,7 @@ public class ScriptExecutionActor : ReceiveActor
ExecuteScript(
scriptName, instanceName, compiledScript, parameters, callDepth,
instanceActor, sharedScriptLibrary, options, replyTo, correlationId,
self, parent, logger);
self, parent, logger, healthCollector);
}
private static void ExecuteScript(
@@ -53,7 +55,8 @@ public class ScriptExecutionActor : ReceiveActor
string correlationId,
IActorRef self,
IActorRef parent,
ILogger logger)
ILogger logger,
ISiteHealthCollector? healthCollector)
{
var timeout = TimeSpan.FromSeconds(options.ScriptExecutionTimeoutSeconds);
@@ -93,6 +96,7 @@ public class ScriptExecutionActor : ReceiveActor
}
catch (OperationCanceledException)
{
healthCollector?.IncrementScriptError();
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' timed out after {timeout.TotalSeconds}s";
logger.LogWarning(errorMsg);
@@ -105,6 +109,7 @@ public class ScriptExecutionActor : ReceiveActor
}
catch (Exception ex)
{
healthCollector?.IncrementScriptError();
// WP-32: Failures logged to site event log; script NOT disabled after failure
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' failed: {ex.Message}";
logger.LogError(ex, "Script execution failed: {Script} on {Instance}", scriptName, instanceName);