feat(health): wire up NodeHostname, ConnectionEndpoint, TagQuality, ParkedMessageCount collectors

- AkkaHostedService: SetNodeHostname from NodeOptions
- DataConnectionActor: UpdateConnectionEndpoint on state transitions,
  track per-tag quality counts and UpdateTagQuality on value changes
- HealthReportSender: query StoreAndForwardStorage for parked message count
- StoreAndForwardStorage: add GetParkedMessageCountAsync()
This commit is contained in:
Joseph Doherty
2026-03-23 10:57:57 -04:00
parent e84a831a02
commit 65cc7b69cd
5 changed files with 69 additions and 1 deletions

View File

@@ -66,6 +66,11 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
private int _totalSubscribed; private int _totalSubscribed;
private int _resolvedTags; private int _resolvedTags;
private int _tagsGoodQuality;
private int _tagsBadQuality;
private int _tagsUncertainQuality;
private readonly Dictionary<string, QualityCode> _lastTagQuality = new();
private IDictionary<string, string> _connectionDetails; private IDictionary<string, string> _connectionDetails;
private readonly IDictionary<string, string> _primaryConfig; private readonly IDictionary<string, string> _primaryConfig;
private readonly IDictionary<string, string>? _backupConfig; private readonly IDictionary<string, string>? _backupConfig;
@@ -144,6 +149,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
{ {
_log.Info("[{0}] Entering Connecting state", _connectionName); _log.Info("[{0}] Entering Connecting state", _connectionName);
_healthCollector.UpdateConnectionHealth(_connectionName, ConnectionHealth.Connecting); _healthCollector.UpdateConnectionHealth(_connectionName, ConnectionHealth.Connecting);
_healthCollector.UpdateConnectionEndpoint(_connectionName, "Connecting");
Become(Connecting); Become(Connecting);
Self.Tell(new AttemptConnect()); Self.Tell(new AttemptConnect());
} }
@@ -179,6 +185,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_log.Info("[{0}] Entering Connected state", _connectionName); _log.Info("[{0}] Entering Connected state", _connectionName);
_healthCollector.UpdateConnectionHealth(_connectionName, ConnectionHealth.Connected); _healthCollector.UpdateConnectionHealth(_connectionName, ConnectionHealth.Connected);
_healthCollector.UpdateTagResolution(_connectionName, _totalSubscribed, _resolvedTags); _healthCollector.UpdateTagResolution(_connectionName, _totalSubscribed, _resolvedTags);
var endpointLabel = _backupConfig == null ? "Connected" : $"Connected to {_activeEndpoint.ToString().ToLower()}";
_healthCollector.UpdateConnectionEndpoint(_connectionName, endpointLabel);
Become(Connected); Become(Connected);
Stash.UnstashAll(); Stash.UnstashAll();
} }
@@ -226,6 +234,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
{ {
_log.Warning("[{0}] Entering Reconnecting state", _connectionName); _log.Warning("[{0}] Entering Reconnecting state", _connectionName);
_healthCollector.UpdateConnectionHealth(_connectionName, ConnectionHealth.Disconnected); _healthCollector.UpdateConnectionHealth(_connectionName, ConnectionHealth.Disconnected);
_healthCollector.UpdateConnectionEndpoint(_connectionName, "Disconnected");
Become(Reconnecting); Become(Reconnecting);
// WP-9: Push bad quality for all subscribed tags on disconnect // WP-9: Push bad quality for all subscribed tags on disconnect
@@ -552,6 +561,14 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
subscriber.Tell(new ConnectionQualityChanged(_connectionName, QualityCode.Bad, now)); subscriber.Tell(new ConnectionQualityChanged(_connectionName, QualityCode.Bad, now));
} }
// All tags now bad quality
_tagsGoodQuality = 0;
_tagsUncertainQuality = 0;
_tagsBadQuality = _lastTagQuality.Count;
foreach (var key in _lastTagQuality.Keys.ToList())
_lastTagQuality[key] = QualityCode.Bad;
_healthCollector.UpdateTagQuality(_connectionName, _tagsGoodQuality, _tagsBadQuality, _tagsUncertainQuality);
} }
// ── Re-subscribe (WP-10) ── // ── Re-subscribe (WP-10) ──
@@ -646,6 +663,27 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_connectionName, msg.TagPath, msg.Value.Value, msg.Value.Quality, msg.Value.Timestamp)); _connectionName, msg.TagPath, msg.Value.Value, msg.Value.Quality, msg.Value.Timestamp));
} }
} }
// Track quality transitions
if (_lastTagQuality.TryGetValue(msg.TagPath, out var prevQuality))
{
// Decrement old quality bucket
switch (prevQuality)
{
case QualityCode.Good: _tagsGoodQuality--; break;
case QualityCode.Bad: _tagsBadQuality--; break;
case QualityCode.Uncertain: _tagsUncertainQuality--; break;
}
}
// Increment new quality bucket
switch (msg.Value.Quality)
{
case QualityCode.Good: _tagsGoodQuality++; break;
case QualityCode.Bad: _tagsBadQuality++; break;
case QualityCode.Uncertain: _tagsUncertainQuality++; break;
}
_lastTagQuality[msg.TagPath] = msg.Value.Quality;
_healthCollector.UpdateTagQuality(_connectionName, _tagsGoodQuality, _tagsBadQuality, _tagsUncertainQuality);
} }
// ── Internal messages ── // ── Internal messages ──

View File

@@ -2,6 +2,7 @@ using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options; using Microsoft.Extensions.Options;
using ScadaLink.Commons.Messages.Health; using ScadaLink.Commons.Messages.Health;
using ScadaLink.StoreAndForward;
namespace ScadaLink.HealthMonitoring; namespace ScadaLink.HealthMonitoring;
@@ -16,6 +17,7 @@ public class HealthReportSender : BackgroundService
private readonly HealthMonitoringOptions _options; private readonly HealthMonitoringOptions _options;
private readonly ILogger<HealthReportSender> _logger; private readonly ILogger<HealthReportSender> _logger;
private readonly string _siteId; private readonly string _siteId;
private readonly StoreAndForwardStorage? _sfStorage;
private long _sequenceNumber; private long _sequenceNumber;
public HealthReportSender( public HealthReportSender(
@@ -23,13 +25,15 @@ public class HealthReportSender : BackgroundService
IHealthReportTransport transport, IHealthReportTransport transport,
IOptions<HealthMonitoringOptions> options, IOptions<HealthMonitoringOptions> options,
ILogger<HealthReportSender> logger, ILogger<HealthReportSender> logger,
ISiteIdentityProvider siteIdentityProvider) ISiteIdentityProvider siteIdentityProvider,
StoreAndForwardStorage? sfStorage = null)
{ {
_collector = collector; _collector = collector;
_transport = transport; _transport = transport;
_options = options.Value; _options = options.Value;
_logger = logger; _logger = logger;
_siteId = siteIdentityProvider.SiteId; _siteId = siteIdentityProvider.SiteId;
_sfStorage = sfStorage;
} }
/// <summary> /// <summary>
@@ -54,6 +58,16 @@ public class HealthReportSender : BackgroundService
if (!_collector.IsActiveNode) if (!_collector.IsActiveNode)
continue; continue;
if (_sfStorage != null)
{
try
{
var parkedCount = await _sfStorage.GetParkedMessageCountAsync();
_collector.SetParkedMessageCount(parkedCount);
}
catch { /* Non-fatal — parked count will be 0 */ }
}
var seq = Interlocked.Increment(ref _sequenceNumber); var seq = Interlocked.Increment(ref _sequenceNumber);
var report = _collector.CollectReport(_siteId); var report = _collector.CollectReport(_siteId);

View File

@@ -16,6 +16,7 @@
<ItemGroup> <ItemGroup>
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" /> <ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
<ProjectReference Include="../ScadaLink.StoreAndForward/ScadaLink.StoreAndForward.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>

View File

@@ -241,6 +241,7 @@ akka {{
// Resolve the health collector for the Deployment Manager // Resolve the health collector for the Deployment Manager
var siteHealthCollector = _serviceProvider.GetService<ScadaLink.HealthMonitoring.ISiteHealthCollector>(); var siteHealthCollector = _serviceProvider.GetService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
siteHealthCollector?.SetNodeHostname(_nodeOptions.NodeHostname);
// Create SiteReplicationActor on every node (not a singleton) // Create SiteReplicationActor on every node (not a singleton)
var sfStorage = _serviceProvider.GetRequiredService<StoreAndForwardStorage>(); var sfStorage = _serviceProvider.GetRequiredService<StoreAndForwardStorage>();

View File

@@ -297,6 +297,20 @@ public class StoreAndForwardStorage
return messages.FirstOrDefault(); return messages.FirstOrDefault();
} }
/// <summary>
/// Gets the count of parked messages (for health reporting).
/// </summary>
public async Task<int> GetParkedMessageCountAsync()
{
await using var conn = new SqliteConnection(_connectionString);
await conn.OpenAsync();
await using var cmd = conn.CreateCommand();
cmd.CommandText = "SELECT COUNT(*) FROM sf_messages WHERE status = @parked";
cmd.Parameters.AddWithValue("@parked", (int)StoreAndForwardMessageStatus.Parked);
var result = await cmd.ExecuteScalarAsync();
return Convert.ToInt32(result);
}
/// <summary> /// <summary>
/// Gets total message count by status. /// Gets total message count by status.
/// </summary> /// </summary>