feat: wire all health metrics and add instance counts to dashboard
Wired ISiteHealthCollector calls for script errors (ScriptExecutionActor), alarm eval errors (AlarmActor), dead letters (DeadLetterMonitorActor), and S&F buffer depth placeholder. Added instance count tracking (deployed/ enabled/disabled) to SiteHealthReport via DeploymentManagerActor. Updated Health Dashboard UI to show instance counts per site. All metrics flow through the existing health report pipeline via ClusterClient.
This commit is contained in:
@@ -100,6 +100,23 @@
|
|||||||
}
|
}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@* Instances *@
|
||||||
|
<div class="col-md-4">
|
||||||
|
<h6 class="text-muted small mb-2">Instances</h6>
|
||||||
|
<div class="d-flex justify-content-between mb-1">
|
||||||
|
<span class="small">Deployed</span>
|
||||||
|
<span>@report.DeployedInstanceCount</span>
|
||||||
|
</div>
|
||||||
|
<div class="d-flex justify-content-between mb-1">
|
||||||
|
<span class="small">Enabled</span>
|
||||||
|
<span class="text-success">@report.EnabledInstanceCount</span>
|
||||||
|
</div>
|
||||||
|
<div class="d-flex justify-content-between mb-1">
|
||||||
|
<span class="small">Disabled</span>
|
||||||
|
<span class="text-warning">@report.DisabledInstanceCount</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
@* Error Counts *@
|
@* Error Counts *@
|
||||||
<div class="col-md-4">
|
<div class="col-md-4">
|
||||||
<h6 class="text-muted mb-2">Error Counts</h6>
|
<h6 class="text-muted mb-2">Error Counts</h6>
|
||||||
|
|||||||
@@ -11,4 +11,7 @@ public record SiteHealthReport(
|
|||||||
int ScriptErrorCount,
|
int ScriptErrorCount,
|
||||||
int AlarmEvaluationErrorCount,
|
int AlarmEvaluationErrorCount,
|
||||||
IReadOnlyDictionary<string, int> StoreAndForwardBufferDepths,
|
IReadOnlyDictionary<string, int> StoreAndForwardBufferDepths,
|
||||||
int DeadLetterCount);
|
int DeadLetterCount,
|
||||||
|
int DeployedInstanceCount,
|
||||||
|
int EnabledInstanceCount,
|
||||||
|
int DisabledInstanceCount);
|
||||||
|
|||||||
@@ -49,6 +49,10 @@ public class HealthReportSender : BackgroundService
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
// TODO: Wire S&F buffer depths when StoreAndForward service is available in DI
|
||||||
|
// e.g., var depths = await _bufferDepthProvider.GetDepthsAsync();
|
||||||
|
// _collector.SetStoreAndForwardDepths(depths);
|
||||||
|
|
||||||
var seq = Interlocked.Increment(ref _sequenceNumber);
|
var seq = Interlocked.Increment(ref _sequenceNumber);
|
||||||
var report = _collector.CollectReport(_siteId);
|
var report = _collector.CollectReport(_siteId);
|
||||||
|
|
||||||
|
|||||||
@@ -15,5 +15,7 @@ public interface ISiteHealthCollector
|
|||||||
void UpdateConnectionHealth(string connectionName, ConnectionHealth health);
|
void UpdateConnectionHealth(string connectionName, ConnectionHealth health);
|
||||||
void RemoveConnection(string connectionName);
|
void RemoveConnection(string connectionName);
|
||||||
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);
|
void UpdateTagResolution(string connectionName, int totalSubscribed, int successfullyResolved);
|
||||||
|
void SetStoreAndForwardDepths(IReadOnlyDictionary<string, int> depths);
|
||||||
|
void SetInstanceCounts(int deployed, int enabled, int disabled);
|
||||||
SiteHealthReport CollectReport(string siteId);
|
SiteHealthReport CollectReport(string siteId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
private int _deadLetterCount;
|
private int _deadLetterCount;
|
||||||
private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new();
|
private readonly ConcurrentDictionary<string, ConnectionHealth> _connectionStatuses = new();
|
||||||
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
|
private readonly ConcurrentDictionary<string, TagResolutionStatus> _tagResolutionCounts = new();
|
||||||
|
private IReadOnlyDictionary<string, int> _sfBufferDepths = new Dictionary<string, int>();
|
||||||
|
private int _deployedInstanceCount, _enabledInstanceCount, _disabledInstanceCount;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Increment the script error counter. Covers unhandled exceptions,
|
/// Increment the script error counter. Covers unhandled exceptions,
|
||||||
@@ -68,6 +70,26 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
_tagResolutionCounts[connectionName] = new TagResolutionStatus(totalSubscribed, successfullyResolved);
|
_tagResolutionCounts[connectionName] = new TagResolutionStatus(totalSubscribed, successfullyResolved);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Set the current store-and-forward buffer depths snapshot.
|
||||||
|
/// Called before report collection with data from the S&F service.
|
||||||
|
/// </summary>
|
||||||
|
public void SetStoreAndForwardDepths(IReadOnlyDictionary<string, int> depths)
|
||||||
|
{
|
||||||
|
_sfBufferDepths = depths;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Set the current instance counts.
|
||||||
|
/// Called by the Deployment Manager after instance state changes.
|
||||||
|
/// </summary>
|
||||||
|
public void SetInstanceCounts(int deployed, int enabled, int disabled)
|
||||||
|
{
|
||||||
|
Interlocked.Exchange(ref _deployedInstanceCount, deployed);
|
||||||
|
Interlocked.Exchange(ref _enabledInstanceCount, enabled);
|
||||||
|
Interlocked.Exchange(ref _disabledInstanceCount, disabled);
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Collect the current health report for the site and reset interval counters.
|
/// Collect the current health report for the site and reset interval counters.
|
||||||
/// Connection statuses and tag resolution counts are NOT reset (they reflect current state).
|
/// Connection statuses and tag resolution counts are NOT reset (they reflect current state).
|
||||||
@@ -84,8 +106,8 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
var connectionStatuses = new Dictionary<string, ConnectionHealth>(_connectionStatuses);
|
var connectionStatuses = new Dictionary<string, ConnectionHealth>(_connectionStatuses);
|
||||||
var tagResolution = new Dictionary<string, TagResolutionStatus>(_tagResolutionCounts);
|
var tagResolution = new Dictionary<string, TagResolutionStatus>(_tagResolutionCounts);
|
||||||
|
|
||||||
// S&F buffer depth: placeholder (Phase 3C)
|
// Snapshot current S&F buffer depths
|
||||||
var sfBufferDepths = new Dictionary<string, int>();
|
var sfBufferDepths = new Dictionary<string, int>(_sfBufferDepths);
|
||||||
|
|
||||||
return new SiteHealthReport(
|
return new SiteHealthReport(
|
||||||
SiteId: siteId,
|
SiteId: siteId,
|
||||||
@@ -96,6 +118,9 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
ScriptErrorCount: scriptErrors,
|
ScriptErrorCount: scriptErrors,
|
||||||
AlarmEvaluationErrorCount: alarmErrors,
|
AlarmEvaluationErrorCount: alarmErrors,
|
||||||
StoreAndForwardBufferDepths: sfBufferDepths,
|
StoreAndForwardBufferDepths: sfBufferDepths,
|
||||||
DeadLetterCount: deadLetters);
|
DeadLetterCount: deadLetters,
|
||||||
|
DeployedInstanceCount: _deployedInstanceCount,
|
||||||
|
EnabledInstanceCount: _enabledInstanceCount,
|
||||||
|
DisabledInstanceCount: _disabledInstanceCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -118,8 +118,9 @@ akka {{
|
|||||||
// Register the dead letter monitor actor
|
// Register the dead letter monitor actor
|
||||||
var loggerFactory = _serviceProvider.GetRequiredService<ILoggerFactory>();
|
var loggerFactory = _serviceProvider.GetRequiredService<ILoggerFactory>();
|
||||||
var dlmLogger = loggerFactory.CreateLogger<DeadLetterMonitorActor>();
|
var dlmLogger = loggerFactory.CreateLogger<DeadLetterMonitorActor>();
|
||||||
|
var dlmHealthCollector = _serviceProvider.GetService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
|
||||||
_actorSystem.ActorOf(
|
_actorSystem.ActorOf(
|
||||||
Props.Create(() => new DeadLetterMonitorActor(dlmLogger)),
|
Props.Create(() => new DeadLetterMonitorActor(dlmLogger, dlmHealthCollector)),
|
||||||
"dead-letter-monitor");
|
"dead-letter-monitor");
|
||||||
|
|
||||||
// Register role-specific actors
|
// Register role-specific actors
|
||||||
@@ -227,6 +228,9 @@ akka {{
|
|||||||
_logger.LogInformation("Data Connection Layer manager actor created");
|
_logger.LogInformation("Data Connection Layer manager actor created");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Resolve the health collector for the Deployment Manager
|
||||||
|
var siteHealthCollector = _serviceProvider.GetService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
|
||||||
|
|
||||||
// Create the Deployment Manager as a cluster singleton
|
// Create the Deployment Manager as a cluster singleton
|
||||||
var singletonProps = ClusterSingletonManager.Props(
|
var singletonProps = ClusterSingletonManager.Props(
|
||||||
singletonProps: Props.Create(() => new DeploymentManagerActor(
|
singletonProps: Props.Create(() => new DeploymentManagerActor(
|
||||||
@@ -236,7 +240,8 @@ akka {{
|
|||||||
streamManager,
|
streamManager,
|
||||||
siteRuntimeOptionsValue,
|
siteRuntimeOptionsValue,
|
||||||
dmLogger,
|
dmLogger,
|
||||||
dclManager)),
|
dclManager,
|
||||||
|
siteHealthCollector)),
|
||||||
terminationMessage: PoisonPill.Instance,
|
terminationMessage: PoisonPill.Instance,
|
||||||
settings: ClusterSingletonManagerSettings.Create(_actorSystem!)
|
settings: ClusterSingletonManagerSettings.Create(_actorSystem!)
|
||||||
.WithRole(siteRole)
|
.WithRole(siteRole)
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
using Akka.Actor;
|
using Akka.Actor;
|
||||||
using Akka.Event;
|
using Akka.Event;
|
||||||
using Microsoft.Extensions.Logging;
|
using Microsoft.Extensions.Logging;
|
||||||
|
using ScadaLink.HealthMonitoring;
|
||||||
|
|
||||||
namespace ScadaLink.Host.Actors;
|
namespace ScadaLink.Host.Actors;
|
||||||
|
|
||||||
@@ -11,12 +12,16 @@ namespace ScadaLink.Host.Actors;
|
|||||||
public class DeadLetterMonitorActor : ReceiveActor
|
public class DeadLetterMonitorActor : ReceiveActor
|
||||||
{
|
{
|
||||||
private long _deadLetterCount;
|
private long _deadLetterCount;
|
||||||
|
private readonly ISiteHealthCollector? _healthCollector;
|
||||||
|
|
||||||
public DeadLetterMonitorActor(ILogger<DeadLetterMonitorActor> logger)
|
public DeadLetterMonitorActor(ILogger<DeadLetterMonitorActor> logger, ISiteHealthCollector? healthCollector = null)
|
||||||
{
|
{
|
||||||
|
_healthCollector = healthCollector;
|
||||||
|
|
||||||
Receive<DeadLetter>(dl =>
|
Receive<DeadLetter>(dl =>
|
||||||
{
|
{
|
||||||
_deadLetterCount++;
|
_deadLetterCount++;
|
||||||
|
_healthCollector?.IncrementDeadLetter();
|
||||||
logger.LogWarning(
|
logger.LogWarning(
|
||||||
"Dead letter: {MessageType} from {Sender} to {Recipient}",
|
"Dead letter: {MessageType} from {Sender} to {Recipient}",
|
||||||
dl.Message.GetType().Name,
|
dl.Message.GetType().Name,
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ using Microsoft.Extensions.Logging;
|
|||||||
using ScadaLink.Commons.Messages.Streaming;
|
using ScadaLink.Commons.Messages.Streaming;
|
||||||
using ScadaLink.Commons.Types.Enums;
|
using ScadaLink.Commons.Types.Enums;
|
||||||
using ScadaLink.Commons.Types.Flattening;
|
using ScadaLink.Commons.Types.Flattening;
|
||||||
|
using ScadaLink.HealthMonitoring;
|
||||||
using ScadaLink.SiteRuntime.Scripts;
|
using ScadaLink.SiteRuntime.Scripts;
|
||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
|
|
||||||
@@ -34,6 +35,7 @@ public class AlarmActor : ReceiveActor
|
|||||||
private readonly SharedScriptLibrary _sharedScriptLibrary;
|
private readonly SharedScriptLibrary _sharedScriptLibrary;
|
||||||
private readonly SiteRuntimeOptions _options;
|
private readonly SiteRuntimeOptions _options;
|
||||||
private readonly ILogger _logger;
|
private readonly ILogger _logger;
|
||||||
|
private readonly ISiteHealthCollector? _healthCollector;
|
||||||
|
|
||||||
private AlarmState _currentState = AlarmState.Normal;
|
private AlarmState _currentState = AlarmState.Normal;
|
||||||
private readonly AlarmTriggerType _triggerType;
|
private readonly AlarmTriggerType _triggerType;
|
||||||
@@ -56,7 +58,8 @@ public class AlarmActor : ReceiveActor
|
|||||||
Script<object?>? onTriggerCompiledScript,
|
Script<object?>? onTriggerCompiledScript,
|
||||||
SharedScriptLibrary sharedScriptLibrary,
|
SharedScriptLibrary sharedScriptLibrary,
|
||||||
SiteRuntimeOptions options,
|
SiteRuntimeOptions options,
|
||||||
ILogger logger)
|
ILogger logger,
|
||||||
|
ISiteHealthCollector? healthCollector = null)
|
||||||
{
|
{
|
||||||
_alarmName = alarmName;
|
_alarmName = alarmName;
|
||||||
_instanceName = instanceName;
|
_instanceName = instanceName;
|
||||||
@@ -64,6 +67,7 @@ public class AlarmActor : ReceiveActor
|
|||||||
_sharedScriptLibrary = sharedScriptLibrary;
|
_sharedScriptLibrary = sharedScriptLibrary;
|
||||||
_options = options;
|
_options = options;
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
|
_healthCollector = healthCollector;
|
||||||
_priority = alarmConfig.PriorityLevel;
|
_priority = alarmConfig.PriorityLevel;
|
||||||
_onTriggerScriptName = alarmConfig.OnTriggerScriptCanonicalName;
|
_onTriggerScriptName = alarmConfig.OnTriggerScriptCanonicalName;
|
||||||
_onTriggerCompiledScript = onTriggerCompiledScript;
|
_onTriggerCompiledScript = onTriggerCompiledScript;
|
||||||
@@ -164,6 +168,7 @@ public class AlarmActor : ReceiveActor
|
|||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
|
_healthCollector?.IncrementAlarmError();
|
||||||
// Alarm evaluation errors logged, actor continues
|
// Alarm evaluation errors logged, actor continues
|
||||||
_logger.LogError(ex,
|
_logger.LogError(ex,
|
||||||
"Alarm {Alarm} evaluation error on {Instance}",
|
"Alarm {Alarm} evaluation error on {Instance}",
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ using ScadaLink.Commons.Messages.DebugView;
|
|||||||
using ScadaLink.Commons.Messages.Deployment;
|
using ScadaLink.Commons.Messages.Deployment;
|
||||||
using ScadaLink.Commons.Messages.Lifecycle;
|
using ScadaLink.Commons.Messages.Lifecycle;
|
||||||
using ScadaLink.Commons.Types.Enums;
|
using ScadaLink.Commons.Types.Enums;
|
||||||
|
using ScadaLink.HealthMonitoring;
|
||||||
using ScadaLink.SiteRuntime.Persistence;
|
using ScadaLink.SiteRuntime.Persistence;
|
||||||
using ScadaLink.SiteRuntime.Scripts;
|
using ScadaLink.SiteRuntime.Scripts;
|
||||||
using ScadaLink.SiteRuntime.Streaming;
|
using ScadaLink.SiteRuntime.Streaming;
|
||||||
@@ -30,7 +31,9 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
private readonly SiteRuntimeOptions _options;
|
private readonly SiteRuntimeOptions _options;
|
||||||
private readonly ILogger<DeploymentManagerActor> _logger;
|
private readonly ILogger<DeploymentManagerActor> _logger;
|
||||||
private readonly IActorRef? _dclManager;
|
private readonly IActorRef? _dclManager;
|
||||||
|
private readonly ISiteHealthCollector? _healthCollector;
|
||||||
private readonly Dictionary<string, IActorRef> _instanceActors = new();
|
private readonly Dictionary<string, IActorRef> _instanceActors = new();
|
||||||
|
private int _totalDeployedCount;
|
||||||
|
|
||||||
public ITimerScheduler Timers { get; set; } = null!;
|
public ITimerScheduler Timers { get; set; } = null!;
|
||||||
|
|
||||||
@@ -41,7 +44,8 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
SiteStreamManager? streamManager,
|
SiteStreamManager? streamManager,
|
||||||
SiteRuntimeOptions options,
|
SiteRuntimeOptions options,
|
||||||
ILogger<DeploymentManagerActor> logger,
|
ILogger<DeploymentManagerActor> logger,
|
||||||
IActorRef? dclManager = null)
|
IActorRef? dclManager = null,
|
||||||
|
ISiteHealthCollector? healthCollector = null)
|
||||||
{
|
{
|
||||||
_storage = storage;
|
_storage = storage;
|
||||||
_compilationService = compilationService;
|
_compilationService = compilationService;
|
||||||
@@ -49,6 +53,7 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
_streamManager = streamManager;
|
_streamManager = streamManager;
|
||||||
_options = options;
|
_options = options;
|
||||||
_dclManager = dclManager;
|
_dclManager = dclManager;
|
||||||
|
_healthCollector = healthCollector;
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
|
|
||||||
// Lifecycle commands
|
// Lifecycle commands
|
||||||
@@ -123,9 +128,11 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
}
|
}
|
||||||
|
|
||||||
var enabledConfigs = msg.Configs.Where(c => c.IsEnabled).ToList();
|
var enabledConfigs = msg.Configs.Where(c => c.IsEnabled).ToList();
|
||||||
|
_totalDeployedCount = msg.Configs.Count;
|
||||||
_logger.LogInformation(
|
_logger.LogInformation(
|
||||||
"Loaded {Total} deployed configs ({Enabled} enabled) from SQLite",
|
"Loaded {Total} deployed configs ({Enabled} enabled) from SQLite",
|
||||||
msg.Configs.Count, enabledConfigs.Count);
|
msg.Configs.Count, enabledConfigs.Count);
|
||||||
|
UpdateInstanceCounts();
|
||||||
|
|
||||||
if (enabledConfigs.Count == 0)
|
if (enabledConfigs.Count == 0)
|
||||||
return;
|
return;
|
||||||
@@ -200,6 +207,8 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
|
|
||||||
// Create the Instance Actor immediately (no existing actor to replace)
|
// Create the Instance Actor immediately (no existing actor to replace)
|
||||||
CreateInstanceActor(instanceName, command.FlattenedConfigurationJson);
|
CreateInstanceActor(instanceName, command.FlattenedConfigurationJson);
|
||||||
|
_totalDeployedCount++;
|
||||||
|
UpdateInstanceCounts();
|
||||||
|
|
||||||
// Persist to SQLite and clear static overrides asynchronously
|
// Persist to SQLite and clear static overrides asynchronously
|
||||||
var sender = Sender;
|
var sender = Sender;
|
||||||
@@ -257,6 +266,8 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
_instanceActors.Remove(instanceName);
|
_instanceActors.Remove(instanceName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UpdateInstanceCounts();
|
||||||
|
|
||||||
var sender = Sender;
|
var sender = Sender;
|
||||||
_storage.SetInstanceEnabledAsync(instanceName, false).ContinueWith(t =>
|
_storage.SetInstanceEnabledAsync(instanceName, false).ContinueWith(t =>
|
||||||
{
|
{
|
||||||
@@ -313,6 +324,7 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
{
|
{
|
||||||
CreateInstanceActor(instanceName, result.Config.ConfigJson);
|
CreateInstanceActor(instanceName, result.Config.ConfigJson);
|
||||||
}
|
}
|
||||||
|
UpdateInstanceCounts();
|
||||||
|
|
||||||
result.OriginalSender.Tell(new InstanceLifecycleResponse(
|
result.OriginalSender.Tell(new InstanceLifecycleResponse(
|
||||||
result.Command.CommandId, instanceName, true, null, DateTimeOffset.UtcNow));
|
result.Command.CommandId, instanceName, true, null, DateTimeOffset.UtcNow));
|
||||||
@@ -333,6 +345,8 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
Context.Stop(actor);
|
Context.Stop(actor);
|
||||||
_instanceActors.Remove(instanceName);
|
_instanceActors.Remove(instanceName);
|
||||||
}
|
}
|
||||||
|
_totalDeployedCount = Math.Max(0, _totalDeployedCount - 1);
|
||||||
|
UpdateInstanceCounts();
|
||||||
|
|
||||||
var sender = Sender;
|
var sender = Sender;
|
||||||
_storage.RemoveDeployedConfigAsync(instanceName).ContinueWith(t =>
|
_storage.RemoveDeployedConfigAsync(instanceName).ContinueWith(t =>
|
||||||
@@ -536,7 +550,8 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
_streamManager,
|
_streamManager,
|
||||||
_options,
|
_options,
|
||||||
loggerFactory.CreateLogger<InstanceActor>(),
|
loggerFactory.CreateLogger<InstanceActor>(),
|
||||||
_dclManager));
|
_dclManager,
|
||||||
|
_healthCollector));
|
||||||
|
|
||||||
var actorRef = Context.ActorOf(props, instanceName);
|
var actorRef = Context.ActorOf(props, instanceName);
|
||||||
_instanceActors[instanceName] = actorRef;
|
_instanceActors[instanceName] = actorRef;
|
||||||
@@ -549,6 +564,18 @@ public class DeploymentManagerActor : ReceiveActor, IWithTimers
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
internal int InstanceActorCount => _instanceActors.Count;
|
internal int InstanceActorCount => _instanceActors.Count;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Updates the health collector with current instance counts.
|
||||||
|
/// Total deployed = _totalDeployedCount, enabled = running actors, disabled = difference.
|
||||||
|
/// </summary>
|
||||||
|
private void UpdateInstanceCounts()
|
||||||
|
{
|
||||||
|
_healthCollector?.SetInstanceCounts(
|
||||||
|
deployed: _totalDeployedCount,
|
||||||
|
enabled: _instanceActors.Count,
|
||||||
|
disabled: _totalDeployedCount - _instanceActors.Count);
|
||||||
|
}
|
||||||
|
|
||||||
// ── Internal messages ──
|
// ── Internal messages ──
|
||||||
|
|
||||||
internal record StartupConfigsLoaded(List<DeployedInstance> Configs, string? Error);
|
internal record StartupConfigsLoaded(List<DeployedInstance> Configs, string? Error);
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ using ScadaLink.Commons.Messages.ScriptExecution;
|
|||||||
using ScadaLink.Commons.Messages.Streaming;
|
using ScadaLink.Commons.Messages.Streaming;
|
||||||
using ScadaLink.Commons.Types.Enums;
|
using ScadaLink.Commons.Types.Enums;
|
||||||
using ScadaLink.Commons.Types.Flattening;
|
using ScadaLink.Commons.Types.Flattening;
|
||||||
|
using ScadaLink.HealthMonitoring;
|
||||||
using ScadaLink.SiteRuntime.Persistence;
|
using ScadaLink.SiteRuntime.Persistence;
|
||||||
using ScadaLink.SiteRuntime.Scripts;
|
using ScadaLink.SiteRuntime.Scripts;
|
||||||
using ScadaLink.SiteRuntime.Streaming;
|
using ScadaLink.SiteRuntime.Streaming;
|
||||||
@@ -37,6 +38,7 @@ public class InstanceActor : ReceiveActor
|
|||||||
private readonly SiteStreamManager? _streamManager;
|
private readonly SiteStreamManager? _streamManager;
|
||||||
private readonly SiteRuntimeOptions _options;
|
private readonly SiteRuntimeOptions _options;
|
||||||
private readonly ILogger _logger;
|
private readonly ILogger _logger;
|
||||||
|
private readonly ISiteHealthCollector? _healthCollector;
|
||||||
private readonly Dictionary<string, object?> _attributes = new();
|
private readonly Dictionary<string, object?> _attributes = new();
|
||||||
private readonly Dictionary<string, string> _attributeQualities = new();
|
private readonly Dictionary<string, string> _attributeQualities = new();
|
||||||
private readonly Dictionary<string, AlarmState> _alarmStates = new();
|
private readonly Dictionary<string, AlarmState> _alarmStates = new();
|
||||||
@@ -61,7 +63,8 @@ public class InstanceActor : ReceiveActor
|
|||||||
SiteStreamManager? streamManager,
|
SiteStreamManager? streamManager,
|
||||||
SiteRuntimeOptions options,
|
SiteRuntimeOptions options,
|
||||||
ILogger logger,
|
ILogger logger,
|
||||||
IActorRef? dclManager = null)
|
IActorRef? dclManager = null,
|
||||||
|
ISiteHealthCollector? healthCollector = null)
|
||||||
{
|
{
|
||||||
_instanceUniqueName = instanceUniqueName;
|
_instanceUniqueName = instanceUniqueName;
|
||||||
_storage = storage;
|
_storage = storage;
|
||||||
@@ -71,6 +74,7 @@ public class InstanceActor : ReceiveActor
|
|||||||
_options = options;
|
_options = options;
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
_dclManager = dclManager;
|
_dclManager = dclManager;
|
||||||
|
_healthCollector = healthCollector;
|
||||||
|
|
||||||
// Deserialize the flattened configuration
|
// Deserialize the flattened configuration
|
||||||
_configuration = JsonSerializer.Deserialize<FlattenedConfiguration>(configJson);
|
_configuration = JsonSerializer.Deserialize<FlattenedConfiguration>(configJson);
|
||||||
@@ -474,7 +478,8 @@ public class InstanceActor : ReceiveActor
|
|||||||
script,
|
script,
|
||||||
_sharedScriptLibrary,
|
_sharedScriptLibrary,
|
||||||
_options,
|
_options,
|
||||||
_logger));
|
_logger,
|
||||||
|
_healthCollector));
|
||||||
|
|
||||||
var actorRef = Context.ActorOf(props, $"script-{script.CanonicalName}");
|
var actorRef = Context.ActorOf(props, $"script-{script.CanonicalName}");
|
||||||
_scriptActors[script.CanonicalName] = actorRef;
|
_scriptActors[script.CanonicalName] = actorRef;
|
||||||
@@ -516,7 +521,8 @@ public class InstanceActor : ReceiveActor
|
|||||||
onTriggerScript,
|
onTriggerScript,
|
||||||
_sharedScriptLibrary,
|
_sharedScriptLibrary,
|
||||||
_options,
|
_options,
|
||||||
_logger));
|
_logger,
|
||||||
|
_healthCollector));
|
||||||
|
|
||||||
var actorRef = Context.ActorOf(props, $"alarm-{alarm.CanonicalName}");
|
var actorRef = Context.ActorOf(props, $"alarm-{alarm.CanonicalName}");
|
||||||
_alarmActors[alarm.CanonicalName] = actorRef;
|
_alarmActors[alarm.CanonicalName] = actorRef;
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ using Microsoft.Extensions.Logging;
|
|||||||
using ScadaLink.Commons.Messages.ScriptExecution;
|
using ScadaLink.Commons.Messages.ScriptExecution;
|
||||||
using ScadaLink.Commons.Messages.Streaming;
|
using ScadaLink.Commons.Messages.Streaming;
|
||||||
using ScadaLink.Commons.Types.Flattening;
|
using ScadaLink.Commons.Types.Flattening;
|
||||||
|
using ScadaLink.HealthMonitoring;
|
||||||
using ScadaLink.SiteRuntime.Scripts;
|
using ScadaLink.SiteRuntime.Scripts;
|
||||||
using System.Text.Json;
|
using System.Text.Json;
|
||||||
|
|
||||||
@@ -29,6 +30,7 @@ public class ScriptActor : ReceiveActor, IWithTimers
|
|||||||
private readonly SharedScriptLibrary _sharedScriptLibrary;
|
private readonly SharedScriptLibrary _sharedScriptLibrary;
|
||||||
private readonly SiteRuntimeOptions _options;
|
private readonly SiteRuntimeOptions _options;
|
||||||
private readonly ILogger _logger;
|
private readonly ILogger _logger;
|
||||||
|
private readonly ISiteHealthCollector? _healthCollector;
|
||||||
|
|
||||||
private Script<object?>? _compiledScript;
|
private Script<object?>? _compiledScript;
|
||||||
private ScriptTriggerConfig? _triggerConfig;
|
private ScriptTriggerConfig? _triggerConfig;
|
||||||
@@ -46,7 +48,8 @@ public class ScriptActor : ReceiveActor, IWithTimers
|
|||||||
ResolvedScript scriptConfig,
|
ResolvedScript scriptConfig,
|
||||||
SharedScriptLibrary sharedScriptLibrary,
|
SharedScriptLibrary sharedScriptLibrary,
|
||||||
SiteRuntimeOptions options,
|
SiteRuntimeOptions options,
|
||||||
ILogger logger)
|
ILogger logger,
|
||||||
|
ISiteHealthCollector? healthCollector = null)
|
||||||
{
|
{
|
||||||
_scriptName = scriptName;
|
_scriptName = scriptName;
|
||||||
_instanceName = instanceName;
|
_instanceName = instanceName;
|
||||||
@@ -55,6 +58,7 @@ public class ScriptActor : ReceiveActor, IWithTimers
|
|||||||
_sharedScriptLibrary = sharedScriptLibrary;
|
_sharedScriptLibrary = sharedScriptLibrary;
|
||||||
_options = options;
|
_options = options;
|
||||||
_logger = logger;
|
_logger = logger;
|
||||||
|
_healthCollector = healthCollector;
|
||||||
_minTimeBetweenRuns = scriptConfig.MinTimeBetweenRuns;
|
_minTimeBetweenRuns = scriptConfig.MinTimeBetweenRuns;
|
||||||
|
|
||||||
// Parse trigger configuration
|
// Parse trigger configuration
|
||||||
@@ -207,7 +211,8 @@ public class ScriptActor : ReceiveActor, IWithTimers
|
|||||||
_options,
|
_options,
|
||||||
replyTo,
|
replyTo,
|
||||||
correlationId,
|
correlationId,
|
||||||
_logger));
|
_logger,
|
||||||
|
_healthCollector));
|
||||||
|
|
||||||
Context.ActorOf(props, executionId);
|
Context.ActorOf(props, executionId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ using Akka.Actor;
|
|||||||
using Microsoft.CodeAnalysis.Scripting;
|
using Microsoft.CodeAnalysis.Scripting;
|
||||||
using Microsoft.Extensions.Logging;
|
using Microsoft.Extensions.Logging;
|
||||||
using ScadaLink.Commons.Messages.ScriptExecution;
|
using ScadaLink.Commons.Messages.ScriptExecution;
|
||||||
|
using ScadaLink.HealthMonitoring;
|
||||||
using ScadaLink.SiteRuntime.Scripts;
|
using ScadaLink.SiteRuntime.Scripts;
|
||||||
|
|
||||||
namespace ScadaLink.SiteRuntime.Actors;
|
namespace ScadaLink.SiteRuntime.Actors;
|
||||||
@@ -28,7 +29,8 @@ public class ScriptExecutionActor : ReceiveActor
|
|||||||
SiteRuntimeOptions options,
|
SiteRuntimeOptions options,
|
||||||
IActorRef replyTo,
|
IActorRef replyTo,
|
||||||
string correlationId,
|
string correlationId,
|
||||||
ILogger logger)
|
ILogger logger,
|
||||||
|
ISiteHealthCollector? healthCollector = null)
|
||||||
{
|
{
|
||||||
// Immediately begin execution
|
// Immediately begin execution
|
||||||
var self = Self;
|
var self = Self;
|
||||||
@@ -37,7 +39,7 @@ public class ScriptExecutionActor : ReceiveActor
|
|||||||
ExecuteScript(
|
ExecuteScript(
|
||||||
scriptName, instanceName, compiledScript, parameters, callDepth,
|
scriptName, instanceName, compiledScript, parameters, callDepth,
|
||||||
instanceActor, sharedScriptLibrary, options, replyTo, correlationId,
|
instanceActor, sharedScriptLibrary, options, replyTo, correlationId,
|
||||||
self, parent, logger);
|
self, parent, logger, healthCollector);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void ExecuteScript(
|
private static void ExecuteScript(
|
||||||
@@ -53,7 +55,8 @@ public class ScriptExecutionActor : ReceiveActor
|
|||||||
string correlationId,
|
string correlationId,
|
||||||
IActorRef self,
|
IActorRef self,
|
||||||
IActorRef parent,
|
IActorRef parent,
|
||||||
ILogger logger)
|
ILogger logger,
|
||||||
|
ISiteHealthCollector? healthCollector)
|
||||||
{
|
{
|
||||||
var timeout = TimeSpan.FromSeconds(options.ScriptExecutionTimeoutSeconds);
|
var timeout = TimeSpan.FromSeconds(options.ScriptExecutionTimeoutSeconds);
|
||||||
|
|
||||||
@@ -93,6 +96,7 @@ public class ScriptExecutionActor : ReceiveActor
|
|||||||
}
|
}
|
||||||
catch (OperationCanceledException)
|
catch (OperationCanceledException)
|
||||||
{
|
{
|
||||||
|
healthCollector?.IncrementScriptError();
|
||||||
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' timed out after {timeout.TotalSeconds}s";
|
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' timed out after {timeout.TotalSeconds}s";
|
||||||
logger.LogWarning(errorMsg);
|
logger.LogWarning(errorMsg);
|
||||||
|
|
||||||
@@ -105,6 +109,7 @@ public class ScriptExecutionActor : ReceiveActor
|
|||||||
}
|
}
|
||||||
catch (Exception ex)
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
|
healthCollector?.IncrementScriptError();
|
||||||
// WP-32: Failures logged to site event log; script NOT disabled after failure
|
// WP-32: Failures logged to site event log; script NOT disabled after failure
|
||||||
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' failed: {ex.Message}";
|
var errorMsg = $"Script '{scriptName}' on instance '{instanceName}' failed: {ex.Message}";
|
||||||
logger.LogError(ex, "Script execution failed: {Script} on {Instance}", scriptName, instanceName);
|
logger.LogError(ex, "Script execution failed: {Script} on {Instance}", scriptName, instanceName);
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
||||||
|
<ProjectReference Include="../ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ public class MessageConventionTests
|
|||||||
},
|
},
|
||||||
0, 0,
|
0, 0,
|
||||||
new Dictionary<string, int> { ["queue1"] = 5 },
|
new Dictionary<string, int> { ["queue1"] = 5 },
|
||||||
0);
|
0, 0, 0, 0);
|
||||||
|
|
||||||
var json = JsonSerializer.Serialize(msg);
|
var json = JsonSerializer.Serialize(msg);
|
||||||
var deserialized = JsonSerializer.Deserialize<ScadaLink.Commons.Messages.Health.SiteHealthReport>(json);
|
var deserialized = JsonSerializer.Deserialize<ScadaLink.Commons.Messages.Health.SiteHealthReport>(json);
|
||||||
|
|||||||
@@ -50,7 +50,10 @@ public class CentralHealthAggregatorTests
|
|||||||
ScriptErrorCount: 0,
|
ScriptErrorCount: 0,
|
||||||
AlarmEvaluationErrorCount: 0,
|
AlarmEvaluationErrorCount: 0,
|
||||||
StoreAndForwardBufferDepths: new Dictionary<string, int>(),
|
StoreAndForwardBufferDepths: new Dictionary<string, int>(),
|
||||||
DeadLetterCount: 0);
|
DeadLetterCount: 0,
|
||||||
|
DeployedInstanceCount: 0,
|
||||||
|
EnabledInstanceCount: 0,
|
||||||
|
DisabledInstanceCount: 0);
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void ProcessReport_StoresState_ForNewSite()
|
public void ProcessReport_StoresState_ForNewSite()
|
||||||
|
|||||||
@@ -100,7 +100,10 @@ public class ObservabilityTests : IClassFixture<ScadaLinkWebApplicationFactory>
|
|||||||
["ext-system"] = 15,
|
["ext-system"] = 15,
|
||||||
["notification"] = 2
|
["notification"] = 2
|
||||||
},
|
},
|
||||||
DeadLetterCount: 5);
|
DeadLetterCount: 5,
|
||||||
|
DeployedInstanceCount: 0,
|
||||||
|
EnabledInstanceCount: 0,
|
||||||
|
DisabledInstanceCount: 0);
|
||||||
|
|
||||||
// Metric type 1: Data connection health
|
// Metric type 1: Data connection health
|
||||||
Assert.Equal(2, report.DataConnectionStatuses.Count);
|
Assert.Equal(2, report.DataConnectionStatuses.Count);
|
||||||
@@ -141,7 +144,7 @@ public class ObservabilityTests : IClassFixture<ScadaLinkWebApplicationFactory>
|
|||||||
"site-01", 1, DateTimeOffset.UtcNow,
|
"site-01", 1, DateTimeOffset.UtcNow,
|
||||||
new Dictionary<string, ConnectionHealth>(),
|
new Dictionary<string, ConnectionHealth>(),
|
||||||
new Dictionary<string, TagResolutionStatus>(),
|
new Dictionary<string, TagResolutionStatus>(),
|
||||||
0, 0, new Dictionary<string, int>(), 0));
|
0, 0, new Dictionary<string, int>(), 0, 0, 0, 0));
|
||||||
|
|
||||||
var state = aggregator.GetSiteState("site-01");
|
var state = aggregator.GetSiteState("site-01");
|
||||||
Assert.NotNull(state);
|
Assert.NotNull(state);
|
||||||
@@ -152,7 +155,7 @@ public class ObservabilityTests : IClassFixture<ScadaLinkWebApplicationFactory>
|
|||||||
"site-01", 2, DateTimeOffset.UtcNow,
|
"site-01", 2, DateTimeOffset.UtcNow,
|
||||||
new Dictionary<string, ConnectionHealth>(),
|
new Dictionary<string, ConnectionHealth>(),
|
||||||
new Dictionary<string, TagResolutionStatus>(),
|
new Dictionary<string, TagResolutionStatus>(),
|
||||||
3, 0, new Dictionary<string, int>(), 0));
|
3, 0, new Dictionary<string, int>(), 0, 0, 0, 0));
|
||||||
|
|
||||||
state = aggregator.GetSiteState("site-01");
|
state = aggregator.GetSiteState("site-01");
|
||||||
Assert.Equal(2, state!.LastSequenceNumber);
|
Assert.Equal(2, state!.LastSequenceNumber);
|
||||||
@@ -174,7 +177,7 @@ public class ObservabilityTests : IClassFixture<ScadaLinkWebApplicationFactory>
|
|||||||
"site-01", seq, DateTimeOffset.UtcNow,
|
"site-01", seq, DateTimeOffset.UtcNow,
|
||||||
new Dictionary<string, ConnectionHealth>(),
|
new Dictionary<string, ConnectionHealth>(),
|
||||||
new Dictionary<string, TagResolutionStatus>(),
|
new Dictionary<string, TagResolutionStatus>(),
|
||||||
seq, 0, new Dictionary<string, int>(), 0));
|
seq, 0, new Dictionary<string, int>(), 0, 0, 0, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
var state = aggregator.GetSiteState("site-01");
|
var state = aggregator.GetSiteState("site-01");
|
||||||
|
|||||||
@@ -53,7 +53,10 @@ public class HealthAggregationTests
|
|||||||
{
|
{
|
||||||
["ext-system"] = i * 2
|
["ext-system"] = i * 2
|
||||||
},
|
},
|
||||||
DeadLetterCount: 0);
|
DeadLetterCount: 0,
|
||||||
|
DeployedInstanceCount: 0,
|
||||||
|
EnabledInstanceCount: 0,
|
||||||
|
DisabledInstanceCount: 0);
|
||||||
|
|
||||||
_aggregator.ProcessReport(report);
|
_aggregator.ProcessReport(report);
|
||||||
}
|
}
|
||||||
@@ -83,7 +86,10 @@ public class HealthAggregationTests
|
|||||||
ScriptErrorCount: seq % 5 == 0 ? 1 : 0,
|
ScriptErrorCount: seq % 5 == 0 ? 1 : 0,
|
||||||
AlarmEvaluationErrorCount: 0,
|
AlarmEvaluationErrorCount: 0,
|
||||||
StoreAndForwardBufferDepths: new Dictionary<string, int>(),
|
StoreAndForwardBufferDepths: new Dictionary<string, int>(),
|
||||||
DeadLetterCount: 0);
|
DeadLetterCount: 0,
|
||||||
|
DeployedInstanceCount: 0,
|
||||||
|
EnabledInstanceCount: 0,
|
||||||
|
DisabledInstanceCount: 0);
|
||||||
|
|
||||||
_aggregator.ProcessReport(report);
|
_aggregator.ProcessReport(report);
|
||||||
}
|
}
|
||||||
@@ -111,14 +117,14 @@ public class HealthAggregationTests
|
|||||||
siteId, 10, DateTimeOffset.UtcNow,
|
siteId, 10, DateTimeOffset.UtcNow,
|
||||||
new Dictionary<string, ConnectionHealth>(),
|
new Dictionary<string, ConnectionHealth>(),
|
||||||
new Dictionary<string, TagResolutionStatus>(),
|
new Dictionary<string, TagResolutionStatus>(),
|
||||||
5, 0, new Dictionary<string, int>(), 0));
|
5, 0, new Dictionary<string, int>(), 0, 0, 0, 0));
|
||||||
|
|
||||||
// Send stale report with seq 5 — should be rejected
|
// Send stale report with seq 5 — should be rejected
|
||||||
_aggregator.ProcessReport(new SiteHealthReport(
|
_aggregator.ProcessReport(new SiteHealthReport(
|
||||||
siteId, 5, DateTimeOffset.UtcNow,
|
siteId, 5, DateTimeOffset.UtcNow,
|
||||||
new Dictionary<string, ConnectionHealth>(),
|
new Dictionary<string, ConnectionHealth>(),
|
||||||
new Dictionary<string, TagResolutionStatus>(),
|
new Dictionary<string, TagResolutionStatus>(),
|
||||||
99, 0, new Dictionary<string, int>(), 0));
|
99, 0, new Dictionary<string, int>(), 0, 0, 0, 0));
|
||||||
|
|
||||||
var state = _aggregator.GetSiteState(siteId);
|
var state = _aggregator.GetSiteState(siteId);
|
||||||
Assert.NotNull(state);
|
Assert.NotNull(state);
|
||||||
|
|||||||
@@ -26,6 +26,7 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ProjectReference Include="../../src/ScadaLink.SiteRuntime/ScadaLink.SiteRuntime.csproj" />
|
<ProjectReference Include="../../src/ScadaLink.SiteRuntime/ScadaLink.SiteRuntime.csproj" />
|
||||||
<ProjectReference Include="../../src/ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
<ProjectReference Include="../../src/ScadaLink.Commons/ScadaLink.Commons.csproj" />
|
||||||
|
<ProjectReference Include="../../src/ScadaLink.HealthMonitoring/ScadaLink.HealthMonitoring.csproj" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
Reference in New Issue
Block a user