feat: wire all health metrics and add instance counts to dashboard

Wired ISiteHealthCollector calls for script errors (ScriptExecutionActor),
alarm eval errors (AlarmActor), dead letters (DeadLetterMonitorActor), and
S&F buffer depth placeholder. Added instance count tracking (deployed/
enabled/disabled) to SiteHealthReport via DeploymentManagerActor. Updated
Health Dashboard UI to show instance counts per site. All metrics flow
through the existing health report pipeline via ClusterClient.
This commit is contained in:
Joseph Doherty
2026-03-18 00:57:49 -04:00
parent 88b5f6cb54
commit f165ca2774
18 changed files with 151 additions and 28 deletions

View File

@@ -4,6 +4,7 @@ using Microsoft.Extensions.Logging;
using ScadaLink.Commons.Messages.Streaming;
using ScadaLink.Commons.Types.Enums;
using ScadaLink.Commons.Types.Flattening;
using ScadaLink.HealthMonitoring;
using ScadaLink.SiteRuntime.Scripts;
using System.Text.Json;
@@ -34,6 +35,7 @@ public class AlarmActor : ReceiveActor
private readonly SharedScriptLibrary _sharedScriptLibrary;
private readonly SiteRuntimeOptions _options;
private readonly ILogger _logger;
private readonly ISiteHealthCollector? _healthCollector;
private AlarmState _currentState = AlarmState.Normal;
private readonly AlarmTriggerType _triggerType;
@@ -56,7 +58,8 @@ public class AlarmActor : ReceiveActor
Script<object?>? onTriggerCompiledScript,
SharedScriptLibrary sharedScriptLibrary,
SiteRuntimeOptions options,
ILogger logger)
ILogger logger,
ISiteHealthCollector? healthCollector = null)
{
_alarmName = alarmName;
_instanceName = instanceName;
@@ -64,6 +67,7 @@ public class AlarmActor : ReceiveActor
_sharedScriptLibrary = sharedScriptLibrary;
_options = options;
_logger = logger;
_healthCollector = healthCollector;
_priority = alarmConfig.PriorityLevel;
_onTriggerScriptName = alarmConfig.OnTriggerScriptCanonicalName;
_onTriggerCompiledScript = onTriggerCompiledScript;
@@ -164,6 +168,7 @@ public class AlarmActor : ReceiveActor
}
catch (Exception ex)
{
_healthCollector?.IncrementAlarmError();
// Alarm evaluation errors logged, actor continues
_logger.LogError(ex,
"Alarm {Alarm} evaluation error on {Instance}",