feat(health): wire up NodeHostname, ConnectionEndpoint, TagQuality, ParkedMessageCount collectors

- AkkaHostedService: SetNodeHostname from NodeOptions
- DataConnectionActor: UpdateConnectionEndpoint on state transitions,
  track per-tag quality counts and UpdateTagQuality on value changes
- HealthReportSender: query StoreAndForwardStorage for parked message count
- StoreAndForwardStorage: add GetParkedMessageCountAsync()
This commit is contained in:
Joseph Doherty
2026-03-23 10:57:57 -04:00
parent e84a831a02
commit 65cc7b69cd
5 changed files with 69 additions and 1 deletions

View File

@@ -66,6 +66,11 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
private int _totalSubscribed;
private int _resolvedTags;
private int _tagsGoodQuality;
private int _tagsBadQuality;
private int _tagsUncertainQuality;
private readonly Dictionary<string, QualityCode> _lastTagQuality = new();
private IDictionary<string, string> _connectionDetails;
private readonly IDictionary<string, string> _primaryConfig;
private readonly IDictionary<string, string>? _backupConfig;
@@ -144,6 +149,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
{
_log.Info("[{0}] Entering Connecting state", _connectionName);
_healthCollector.UpdateConnectionHealth(_connectionName, ConnectionHealth.Connecting);
_healthCollector.UpdateConnectionEndpoint(_connectionName, "Connecting");
Become(Connecting);
Self.Tell(new AttemptConnect());
}
@@ -179,6 +185,8 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_log.Info("[{0}] Entering Connected state", _connectionName);
_healthCollector.UpdateConnectionHealth(_connectionName, ConnectionHealth.Connected);
_healthCollector.UpdateTagResolution(_connectionName, _totalSubscribed, _resolvedTags);
var endpointLabel = _backupConfig == null ? "Connected" : $"Connected to {_activeEndpoint.ToString().ToLower()}";
_healthCollector.UpdateConnectionEndpoint(_connectionName, endpointLabel);
Become(Connected);
Stash.UnstashAll();
}
@@ -226,6 +234,7 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
{
_log.Warning("[{0}] Entering Reconnecting state", _connectionName);
_healthCollector.UpdateConnectionHealth(_connectionName, ConnectionHealth.Disconnected);
_healthCollector.UpdateConnectionEndpoint(_connectionName, "Disconnected");
Become(Reconnecting);
// WP-9: Push bad quality for all subscribed tags on disconnect
@@ -552,6 +561,14 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
subscriber.Tell(new ConnectionQualityChanged(_connectionName, QualityCode.Bad, now));
}
// All tags now bad quality
_tagsGoodQuality = 0;
_tagsUncertainQuality = 0;
_tagsBadQuality = _lastTagQuality.Count;
foreach (var key in _lastTagQuality.Keys.ToList())
_lastTagQuality[key] = QualityCode.Bad;
_healthCollector.UpdateTagQuality(_connectionName, _tagsGoodQuality, _tagsBadQuality, _tagsUncertainQuality);
}
// ── Re-subscribe (WP-10) ──
@@ -646,6 +663,27 @@ public class DataConnectionActor : UntypedActor, IWithStash, IWithTimers
_connectionName, msg.TagPath, msg.Value.Value, msg.Value.Quality, msg.Value.Timestamp));
}
}
// Track quality transitions
if (_lastTagQuality.TryGetValue(msg.TagPath, out var prevQuality))
{
// Decrement old quality bucket
switch (prevQuality)
{
case QualityCode.Good: _tagsGoodQuality--; break;
case QualityCode.Bad: _tagsBadQuality--; break;
case QualityCode.Uncertain: _tagsUncertainQuality--; break;
}
}
// Increment new quality bucket
switch (msg.Value.Quality)
{
case QualityCode.Good: _tagsGoodQuality++; break;
case QualityCode.Bad: _tagsBadQuality++; break;
case QualityCode.Uncertain: _tagsUncertainQuality++; break;
}
_lastTagQuality[msg.TagPath] = msg.Value.Quality;
_healthCollector.UpdateTagQuality(_connectionName, _tagsGoodQuality, _tagsBadQuality, _tagsUncertainQuality);
}
// ── Internal messages ──

View File

@@ -2,6 +2,7 @@ using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using ScadaLink.Commons.Messages.Health;
using ScadaLink.StoreAndForward;
namespace ScadaLink.HealthMonitoring;
@@ -16,6 +17,7 @@ public class HealthReportSender : BackgroundService
private readonly HealthMonitoringOptions _options;
private readonly ILogger<HealthReportSender> _logger;
private readonly string _siteId;
private readonly StoreAndForwardStorage? _sfStorage;
private long _sequenceNumber;
public HealthReportSender(
@@ -23,13 +25,15 @@ public class HealthReportSender : BackgroundService
IHealthReportTransport transport,
IOptions<HealthMonitoringOptions> options,
ILogger<HealthReportSender> logger,
ISiteIdentityProvider siteIdentityProvider)
ISiteIdentityProvider siteIdentityProvider,
StoreAndForwardStorage? sfStorage = null)
{
_collector = collector;
_transport = transport;
_options = options.Value;
_logger = logger;
_siteId = siteIdentityProvider.SiteId;
_sfStorage = sfStorage;
}
/// <summary>
@@ -54,6 +58,16 @@ public class HealthReportSender : BackgroundService
if (!_collector.IsActiveNode)
continue;
if (_sfStorage != null)
{
try
{
var parkedCount = await _sfStorage.GetParkedMessageCountAsync();
_collector.SetParkedMessageCount(parkedCount);
}
catch { /* Non-fatal — parked count will be 0 */ }
}
var seq = Interlocked.Increment(ref _sequenceNumber);
var report = _collector.CollectReport(_siteId);

View File

@@ -16,6 +16,7 @@
<ItemGroup>
<ProjectReference Include="../ScadaLink.Commons/ScadaLink.Commons.csproj" />
<ProjectReference Include="../ScadaLink.StoreAndForward/ScadaLink.StoreAndForward.csproj" />
</ItemGroup>
<ItemGroup>

View File

@@ -241,6 +241,7 @@ akka {{
// Resolve the health collector for the Deployment Manager
var siteHealthCollector = _serviceProvider.GetService<ScadaLink.HealthMonitoring.ISiteHealthCollector>();
siteHealthCollector?.SetNodeHostname(_nodeOptions.NodeHostname);
// Create SiteReplicationActor on every node (not a singleton)
var sfStorage = _serviceProvider.GetRequiredService<StoreAndForwardStorage>();

View File

@@ -297,6 +297,20 @@ public class StoreAndForwardStorage
return messages.FirstOrDefault();
}
/// <summary>
/// Gets the count of parked messages (for health reporting).
/// </summary>
public async Task<int> GetParkedMessageCountAsync()
{
await using var conn = new SqliteConnection(_connectionString);
await conn.OpenAsync();
await using var cmd = conn.CreateCommand();
cmd.CommandText = "SELECT COUNT(*) FROM sf_messages WHERE status = @parked";
cmd.Parameters.AddWithValue("@parked", (int)StoreAndForwardMessageStatus.Parked);
var result = await cmd.ExecuteScalarAsync();
return Convert.ToInt32(result);
}
/// <summary>
/// Gets total message count by status.
/// </summary>