feat(health): show all cluster nodes (online/offline, primary/standby) in health dashboard

Add NodeStatus record, IClusterNodeProvider interface, and AkkaClusterNodeProvider
that queries Akka cluster membership for all site-role nodes. HealthReportSender
populates ClusterNodes before each report. UI shows a row per node with
hostname, Online/Offline badge, and Primary/Standby badge. Falls back to
single-node display if ClusterNodes is not populated.
This commit is contained in:
Joseph Doherty
2026-03-23 14:54:59 -04:00
parent 65cc7b69cd
commit 02a7e8abc6
9 changed files with 127 additions and 8 deletions

View File

@@ -90,11 +90,25 @@
<h6 class="text-muted mb-2 border-bottom pb-1">Nodes</h6>
<table class="table table-sm table-borderless mb-0">
<tbody>
@if (report.ClusterNodes is { Count: > 0 })
{
@foreach (var node in report.ClusterNodes)
{
<tr>
<td class="small">@node.Hostname</td>
<td><span class="badge @(node.IsOnline ? "bg-success" : "bg-danger")">@(node.IsOnline ? "Online" : "Offline")</span></td>
<td><span class="badge @(node.Role == "Primary" ? "bg-primary" : "bg-secondary")">@node.Role</span></td>
</tr>
}
}
else
{
<tr>
<td class="small">@(report.NodeHostname != "" ? report.NodeHostname : "Node")</td>
<td><span class="badge @(state.IsOnline ? "bg-success" : "bg-danger")">@(state.IsOnline ? "Online" : "Offline")</span></td>
<td><span class="badge @(report.NodeRole == "Active" ? "bg-primary" : "bg-secondary")">@(report.NodeRole == "Active" ? "Primary" : "Standby")</span></td>
</tr>
}
</tbody>
</table>
</div>

View File

@@ -0,0 +1,3 @@
namespace ScadaLink.Commons.Messages.Health;
public record NodeStatus(string Hostname, bool IsOnline, string Role);

View File

@@ -19,4 +19,5 @@ public record SiteHealthReport(
string NodeHostname = "",
IReadOnlyDictionary<string, string>? DataConnectionEndpoints = null,
IReadOnlyDictionary<string, TagQualityCounts>? DataConnectionTagQuality = null,
int ParkedMessageCount = 0);
int ParkedMessageCount = 0,
IReadOnlyList<NodeStatus>? ClusterNodes = null);

View File

@@ -18,6 +18,7 @@ public class HealthReportSender : BackgroundService
private readonly ILogger<HealthReportSender> _logger;
private readonly string _siteId;
private readonly StoreAndForwardStorage? _sfStorage;
private readonly IClusterNodeProvider? _clusterNodeProvider;
private long _sequenceNumber;
public HealthReportSender(
@@ -26,7 +27,8 @@ public class HealthReportSender : BackgroundService
IOptions<HealthMonitoringOptions> options,
ILogger<HealthReportSender> logger,
ISiteIdentityProvider siteIdentityProvider,
StoreAndForwardStorage? sfStorage = null)
StoreAndForwardStorage? sfStorage = null,
IClusterNodeProvider? clusterNodeProvider = null)
{
_collector = collector;
_transport = transport;
@@ -34,6 +36,7 @@ public class HealthReportSender : BackgroundService
_logger = logger;
_siteId = siteIdentityProvider.SiteId;
_sfStorage = sfStorage;
_clusterNodeProvider = clusterNodeProvider;
}
/// <summary>
@@ -58,6 +61,15 @@ public class HealthReportSender : BackgroundService
if (!_collector.IsActiveNode)
continue;
if (_clusterNodeProvider != null)
{
try
{
_collector.SetClusterNodes(_clusterNodeProvider.GetClusterNodes());
}
catch { /* Non-fatal */ }
}
if (_sfStorage != null)
{
try

View File

@@ -0,0 +1,12 @@
using ScadaLink.Commons.Messages.Health;
namespace ScadaLink.HealthMonitoring;
/// <summary>
/// Provides cluster node status information for health reporting.
/// Implemented by the Host project which has access to the Akka.NET actor system.
/// </summary>
public interface IClusterNodeProvider
{
IReadOnlyList<NodeStatus> GetClusterNodes();
}

View File

@@ -21,6 +21,7 @@ public interface ISiteHealthCollector
void SetInstanceCounts(int deployed, int enabled, int disabled);
void SetParkedMessageCount(int count);
void SetNodeHostname(string hostname);
void SetClusterNodes(IReadOnlyList<Commons.Messages.Health.NodeStatus> nodes);
void SetActiveNode(bool isActive);
bool IsActiveNode { get; }
SiteHealthReport CollectReport(string siteId);

View File

@@ -21,6 +21,7 @@ public class SiteHealthCollector : ISiteHealthCollector
private int _deployedInstanceCount, _enabledInstanceCount, _disabledInstanceCount;
private int _parkedMessageCount;
private volatile string _nodeHostname = "";
private volatile IReadOnlyList<Commons.Messages.Health.NodeStatus>? _clusterNodes;
private volatile bool _isActiveNode;
/// <summary>
@@ -94,6 +95,8 @@ public class SiteHealthCollector : ISiteHealthCollector
public void SetNodeHostname(string hostname) => _nodeHostname = hostname;
public void SetClusterNodes(IReadOnlyList<Commons.Messages.Health.NodeStatus> nodes) => _clusterNodes = nodes;
/// <summary>
/// Set the current store-and-forward buffer depths snapshot.
/// Called before report collection with data from the S&amp;F service.
@@ -159,6 +162,7 @@ public class SiteHealthCollector : ISiteHealthCollector
NodeHostname: _nodeHostname,
DataConnectionEndpoints: connectionEndpoints,
DataConnectionTagQuality: tagQuality,
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0));
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0),
ClusterNodes: _clusterNodes?.ToList());
}
}

View File

@@ -0,0 +1,62 @@
using Akka.Actor;
using Akka.Cluster;
using ScadaLink.Commons.Messages.Health;
using ScadaLink.HealthMonitoring;
using ScadaLink.Host.Actors;
namespace ScadaLink.Host.Health;
/// <summary>
/// Provides cluster node statuses from Akka.NET cluster membership for health reporting.
/// </summary>
public class AkkaClusterNodeProvider : IClusterNodeProvider
{
private readonly AkkaHostedService _akkaService;
private readonly string _siteRole;
public AkkaClusterNodeProvider(AkkaHostedService akkaService, string siteRole)
{
_akkaService = akkaService;
_siteRole = siteRole;
}
public IReadOnlyList<NodeStatus> GetClusterNodes()
{
var system = _akkaService.ActorSystem;
if (system == null) return [];
var cluster = Cluster.Get(system);
var selfAddress = cluster.SelfAddress;
var leader = cluster.State.Leader;
var nodes = new List<NodeStatus>();
foreach (var member in cluster.State.Members)
{
if (!member.HasRole(_siteRole))
continue;
var hostname = member.Address.Host ?? member.Address.ToString();
var isOnline = member.Status == MemberStatus.Up;
var isLeader = member.Address.Equals(leader);
var role = isLeader ? "Primary" : "Standby";
nodes.Add(new NodeStatus(hostname, isOnline, role));
}
// If we have unreachable members, add them as offline
foreach (var unreachable in cluster.State.Unreachable)
{
if (!unreachable.HasRole(_siteRole))
continue;
// Don't duplicate if already in members list
if (nodes.Any(n => n.Hostname == (unreachable.Address.Host ?? unreachable.Address.ToString())))
continue;
var hostname = unreachable.Address.Host ?? unreachable.Address.ToString();
nodes.Add(new NodeStatus(hostname, false, "Standby"));
}
return nodes;
}
}

View File

@@ -4,6 +4,7 @@ using ScadaLink.DataConnectionLayer;
using ScadaLink.ExternalSystemGateway;
using ScadaLink.HealthMonitoring;
using ScadaLink.Host.Actors;
using ScadaLink.Host.Health;
using ScadaLink.NotificationService;
using ScadaLink.SiteEventLogging;
using ScadaLink.SiteRuntime;
@@ -42,6 +43,15 @@ public static class SiteServiceRegistration
services.AddSingleton<AkkaHostedService>();
services.AddHostedService(sp => sp.GetRequiredService<AkkaHostedService>());
// Cluster node status provider for health reports
services.AddSingleton<IClusterNodeProvider>(sp =>
{
var akkaService = sp.GetRequiredService<AkkaHostedService>();
var nodeOptions = sp.GetRequiredService<Microsoft.Extensions.Options.IOptions<NodeOptions>>().Value;
var siteRole = $"site-{nodeOptions.SiteId}";
return new AkkaClusterNodeProvider(akkaService, siteRole);
});
// Options binding
BindSharedOptions(services, config);
services.Configure<SiteRuntimeOptions>(config.GetSection("ScadaLink:SiteRuntime"));