feat(health): show all cluster nodes (online/offline, primary/standby) in health dashboard
Add NodeStatus record, IClusterNodeProvider interface, and AkkaClusterNodeProvider that queries Akka cluster membership for all site-role nodes. HealthReportSender populates ClusterNodes before each report. UI shows a row per node with hostname, Online/Offline badge, and Primary/Standby badge. Falls back to single-node display if ClusterNodes is not populated.
This commit is contained in:
@@ -90,11 +90,25 @@
|
||||
<h6 class="text-muted mb-2 border-bottom pb-1">Nodes</h6>
|
||||
<table class="table table-sm table-borderless mb-0">
|
||||
<tbody>
|
||||
@if (report.ClusterNodes is { Count: > 0 })
|
||||
{
|
||||
@foreach (var node in report.ClusterNodes)
|
||||
{
|
||||
<tr>
|
||||
<td class="small">@node.Hostname</td>
|
||||
<td><span class="badge @(node.IsOnline ? "bg-success" : "bg-danger")">@(node.IsOnline ? "Online" : "Offline")</span></td>
|
||||
<td><span class="badge @(node.Role == "Primary" ? "bg-primary" : "bg-secondary")">@node.Role</span></td>
|
||||
</tr>
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
<tr>
|
||||
<td class="small">@(report.NodeHostname != "" ? report.NodeHostname : "Node")</td>
|
||||
<td><span class="badge @(state.IsOnline ? "bg-success" : "bg-danger")">@(state.IsOnline ? "Online" : "Offline")</span></td>
|
||||
<td><span class="badge @(report.NodeRole == "Active" ? "bg-primary" : "bg-secondary")">@(report.NodeRole == "Active" ? "Primary" : "Standby")</span></td>
|
||||
</tr>
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
3
src/ScadaLink.Commons/Messages/Health/NodeStatus.cs
Normal file
3
src/ScadaLink.Commons/Messages/Health/NodeStatus.cs
Normal file
@@ -0,0 +1,3 @@
|
||||
namespace ScadaLink.Commons.Messages.Health;
|
||||
|
||||
public record NodeStatus(string Hostname, bool IsOnline, string Role);
|
||||
@@ -19,4 +19,5 @@ public record SiteHealthReport(
|
||||
string NodeHostname = "",
|
||||
IReadOnlyDictionary<string, string>? DataConnectionEndpoints = null,
|
||||
IReadOnlyDictionary<string, TagQualityCounts>? DataConnectionTagQuality = null,
|
||||
int ParkedMessageCount = 0);
|
||||
int ParkedMessageCount = 0,
|
||||
IReadOnlyList<NodeStatus>? ClusterNodes = null);
|
||||
|
||||
@@ -18,6 +18,7 @@ public class HealthReportSender : BackgroundService
|
||||
private readonly ILogger<HealthReportSender> _logger;
|
||||
private readonly string _siteId;
|
||||
private readonly StoreAndForwardStorage? _sfStorage;
|
||||
private readonly IClusterNodeProvider? _clusterNodeProvider;
|
||||
private long _sequenceNumber;
|
||||
|
||||
public HealthReportSender(
|
||||
@@ -26,7 +27,8 @@ public class HealthReportSender : BackgroundService
|
||||
IOptions<HealthMonitoringOptions> options,
|
||||
ILogger<HealthReportSender> logger,
|
||||
ISiteIdentityProvider siteIdentityProvider,
|
||||
StoreAndForwardStorage? sfStorage = null)
|
||||
StoreAndForwardStorage? sfStorage = null,
|
||||
IClusterNodeProvider? clusterNodeProvider = null)
|
||||
{
|
||||
_collector = collector;
|
||||
_transport = transport;
|
||||
@@ -34,6 +36,7 @@ public class HealthReportSender : BackgroundService
|
||||
_logger = logger;
|
||||
_siteId = siteIdentityProvider.SiteId;
|
||||
_sfStorage = sfStorage;
|
||||
_clusterNodeProvider = clusterNodeProvider;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -58,6 +61,15 @@ public class HealthReportSender : BackgroundService
|
||||
if (!_collector.IsActiveNode)
|
||||
continue;
|
||||
|
||||
if (_clusterNodeProvider != null)
|
||||
{
|
||||
try
|
||||
{
|
||||
_collector.SetClusterNodes(_clusterNodeProvider.GetClusterNodes());
|
||||
}
|
||||
catch { /* Non-fatal */ }
|
||||
}
|
||||
|
||||
if (_sfStorage != null)
|
||||
{
|
||||
try
|
||||
|
||||
12
src/ScadaLink.HealthMonitoring/IClusterNodeProvider.cs
Normal file
12
src/ScadaLink.HealthMonitoring/IClusterNodeProvider.cs
Normal file
@@ -0,0 +1,12 @@
|
||||
using ScadaLink.Commons.Messages.Health;
|
||||
|
||||
namespace ScadaLink.HealthMonitoring;
|
||||
|
||||
/// <summary>
|
||||
/// Provides cluster node status information for health reporting.
|
||||
/// Implemented by the Host project which has access to the Akka.NET actor system.
|
||||
/// </summary>
|
||||
public interface IClusterNodeProvider
|
||||
{
|
||||
IReadOnlyList<NodeStatus> GetClusterNodes();
|
||||
}
|
||||
@@ -21,6 +21,7 @@ public interface ISiteHealthCollector
|
||||
void SetInstanceCounts(int deployed, int enabled, int disabled);
|
||||
void SetParkedMessageCount(int count);
|
||||
void SetNodeHostname(string hostname);
|
||||
void SetClusterNodes(IReadOnlyList<Commons.Messages.Health.NodeStatus> nodes);
|
||||
void SetActiveNode(bool isActive);
|
||||
bool IsActiveNode { get; }
|
||||
SiteHealthReport CollectReport(string siteId);
|
||||
|
||||
@@ -21,6 +21,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
private int _deployedInstanceCount, _enabledInstanceCount, _disabledInstanceCount;
|
||||
private int _parkedMessageCount;
|
||||
private volatile string _nodeHostname = "";
|
||||
private volatile IReadOnlyList<Commons.Messages.Health.NodeStatus>? _clusterNodes;
|
||||
private volatile bool _isActiveNode;
|
||||
|
||||
/// <summary>
|
||||
@@ -94,6 +95,8 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
|
||||
public void SetNodeHostname(string hostname) => _nodeHostname = hostname;
|
||||
|
||||
public void SetClusterNodes(IReadOnlyList<Commons.Messages.Health.NodeStatus> nodes) => _clusterNodes = nodes;
|
||||
|
||||
/// <summary>
|
||||
/// Set the current store-and-forward buffer depths snapshot.
|
||||
/// Called before report collection with data from the S&F service.
|
||||
@@ -159,6 +162,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
||||
NodeHostname: _nodeHostname,
|
||||
DataConnectionEndpoints: connectionEndpoints,
|
||||
DataConnectionTagQuality: tagQuality,
|
||||
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0));
|
||||
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0),
|
||||
ClusterNodes: _clusterNodes?.ToList());
|
||||
}
|
||||
}
|
||||
|
||||
62
src/ScadaLink.Host/Health/AkkaClusterNodeProvider.cs
Normal file
62
src/ScadaLink.Host/Health/AkkaClusterNodeProvider.cs
Normal file
@@ -0,0 +1,62 @@
|
||||
using Akka.Actor;
|
||||
using Akka.Cluster;
|
||||
using ScadaLink.Commons.Messages.Health;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
using ScadaLink.Host.Actors;
|
||||
|
||||
namespace ScadaLink.Host.Health;
|
||||
|
||||
/// <summary>
|
||||
/// Provides cluster node statuses from Akka.NET cluster membership for health reporting.
|
||||
/// </summary>
|
||||
public class AkkaClusterNodeProvider : IClusterNodeProvider
|
||||
{
|
||||
private readonly AkkaHostedService _akkaService;
|
||||
private readonly string _siteRole;
|
||||
|
||||
public AkkaClusterNodeProvider(AkkaHostedService akkaService, string siteRole)
|
||||
{
|
||||
_akkaService = akkaService;
|
||||
_siteRole = siteRole;
|
||||
}
|
||||
|
||||
public IReadOnlyList<NodeStatus> GetClusterNodes()
|
||||
{
|
||||
var system = _akkaService.ActorSystem;
|
||||
if (system == null) return [];
|
||||
|
||||
var cluster = Cluster.Get(system);
|
||||
var selfAddress = cluster.SelfAddress;
|
||||
var leader = cluster.State.Leader;
|
||||
|
||||
var nodes = new List<NodeStatus>();
|
||||
foreach (var member in cluster.State.Members)
|
||||
{
|
||||
if (!member.HasRole(_siteRole))
|
||||
continue;
|
||||
|
||||
var hostname = member.Address.Host ?? member.Address.ToString();
|
||||
var isOnline = member.Status == MemberStatus.Up;
|
||||
var isLeader = member.Address.Equals(leader);
|
||||
var role = isLeader ? "Primary" : "Standby";
|
||||
|
||||
nodes.Add(new NodeStatus(hostname, isOnline, role));
|
||||
}
|
||||
|
||||
// If we have unreachable members, add them as offline
|
||||
foreach (var unreachable in cluster.State.Unreachable)
|
||||
{
|
||||
if (!unreachable.HasRole(_siteRole))
|
||||
continue;
|
||||
|
||||
// Don't duplicate if already in members list
|
||||
if (nodes.Any(n => n.Hostname == (unreachable.Address.Host ?? unreachable.Address.ToString())))
|
||||
continue;
|
||||
|
||||
var hostname = unreachable.Address.Host ?? unreachable.Address.ToString();
|
||||
nodes.Add(new NodeStatus(hostname, false, "Standby"));
|
||||
}
|
||||
|
||||
return nodes;
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ using ScadaLink.DataConnectionLayer;
|
||||
using ScadaLink.ExternalSystemGateway;
|
||||
using ScadaLink.HealthMonitoring;
|
||||
using ScadaLink.Host.Actors;
|
||||
using ScadaLink.Host.Health;
|
||||
using ScadaLink.NotificationService;
|
||||
using ScadaLink.SiteEventLogging;
|
||||
using ScadaLink.SiteRuntime;
|
||||
@@ -42,6 +43,15 @@ public static class SiteServiceRegistration
|
||||
services.AddSingleton<AkkaHostedService>();
|
||||
services.AddHostedService(sp => sp.GetRequiredService<AkkaHostedService>());
|
||||
|
||||
// Cluster node status provider for health reports
|
||||
services.AddSingleton<IClusterNodeProvider>(sp =>
|
||||
{
|
||||
var akkaService = sp.GetRequiredService<AkkaHostedService>();
|
||||
var nodeOptions = sp.GetRequiredService<Microsoft.Extensions.Options.IOptions<NodeOptions>>().Value;
|
||||
var siteRole = $"site-{nodeOptions.SiteId}";
|
||||
return new AkkaClusterNodeProvider(akkaService, siteRole);
|
||||
});
|
||||
|
||||
// Options binding
|
||||
BindSharedOptions(services, config);
|
||||
services.Configure<SiteRuntimeOptions>(config.GetSection("ScadaLink:SiteRuntime"));
|
||||
|
||||
Reference in New Issue
Block a user