feat(health): show all cluster nodes (online/offline, primary/standby) in health dashboard
Add NodeStatus record, IClusterNodeProvider interface, and AkkaClusterNodeProvider that queries Akka cluster membership for all site-role nodes. HealthReportSender populates ClusterNodes before each report. UI shows a row per node with hostname, Online/Offline badge, and Primary/Standby badge. Falls back to single-node display if ClusterNodes is not populated.
This commit is contained in:
@@ -90,11 +90,25 @@
|
|||||||
<h6 class="text-muted mb-2 border-bottom pb-1">Nodes</h6>
|
<h6 class="text-muted mb-2 border-bottom pb-1">Nodes</h6>
|
||||||
<table class="table table-sm table-borderless mb-0">
|
<table class="table table-sm table-borderless mb-0">
|
||||||
<tbody>
|
<tbody>
|
||||||
|
@if (report.ClusterNodes is { Count: > 0 })
|
||||||
|
{
|
||||||
|
@foreach (var node in report.ClusterNodes)
|
||||||
|
{
|
||||||
|
<tr>
|
||||||
|
<td class="small">@node.Hostname</td>
|
||||||
|
<td><span class="badge @(node.IsOnline ? "bg-success" : "bg-danger")">@(node.IsOnline ? "Online" : "Offline")</span></td>
|
||||||
|
<td><span class="badge @(node.Role == "Primary" ? "bg-primary" : "bg-secondary")">@node.Role</span></td>
|
||||||
|
</tr>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
<tr>
|
<tr>
|
||||||
<td class="small">@(report.NodeHostname != "" ? report.NodeHostname : "Node")</td>
|
<td class="small">@(report.NodeHostname != "" ? report.NodeHostname : "Node")</td>
|
||||||
<td><span class="badge @(state.IsOnline ? "bg-success" : "bg-danger")">@(state.IsOnline ? "Online" : "Offline")</span></td>
|
<td><span class="badge @(state.IsOnline ? "bg-success" : "bg-danger")">@(state.IsOnline ? "Online" : "Offline")</span></td>
|
||||||
<td><span class="badge @(report.NodeRole == "Active" ? "bg-primary" : "bg-secondary")">@(report.NodeRole == "Active" ? "Primary" : "Standby")</span></td>
|
<td><span class="badge @(report.NodeRole == "Active" ? "bg-primary" : "bg-secondary")">@(report.NodeRole == "Active" ? "Primary" : "Standby")</span></td>
|
||||||
</tr>
|
</tr>
|
||||||
|
}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
3
src/ScadaLink.Commons/Messages/Health/NodeStatus.cs
Normal file
3
src/ScadaLink.Commons/Messages/Health/NodeStatus.cs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
namespace ScadaLink.Commons.Messages.Health;
|
||||||
|
|
||||||
|
public record NodeStatus(string Hostname, bool IsOnline, string Role);
|
||||||
@@ -19,4 +19,5 @@ public record SiteHealthReport(
|
|||||||
string NodeHostname = "",
|
string NodeHostname = "",
|
||||||
IReadOnlyDictionary<string, string>? DataConnectionEndpoints = null,
|
IReadOnlyDictionary<string, string>? DataConnectionEndpoints = null,
|
||||||
IReadOnlyDictionary<string, TagQualityCounts>? DataConnectionTagQuality = null,
|
IReadOnlyDictionary<string, TagQualityCounts>? DataConnectionTagQuality = null,
|
||||||
int ParkedMessageCount = 0);
|
int ParkedMessageCount = 0,
|
||||||
|
IReadOnlyList<NodeStatus>? ClusterNodes = null);
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ public class HealthReportSender : BackgroundService
|
|||||||
private readonly ILogger<HealthReportSender> _logger;
|
private readonly ILogger<HealthReportSender> _logger;
|
||||||
private readonly string _siteId;
|
private readonly string _siteId;
|
||||||
private readonly StoreAndForwardStorage? _sfStorage;
|
private readonly StoreAndForwardStorage? _sfStorage;
|
||||||
|
private readonly IClusterNodeProvider? _clusterNodeProvider;
|
||||||
private long _sequenceNumber;
|
private long _sequenceNumber;
|
||||||
|
|
||||||
public HealthReportSender(
|
public HealthReportSender(
|
||||||
@@ -26,7 +27,8 @@ public class HealthReportSender : BackgroundService
|
|||||||
IOptions<HealthMonitoringOptions> options,
|
IOptions<HealthMonitoringOptions> options,
|
||||||
ILogger<HealthReportSender> logger,
|
ILogger<HealthReportSender> logger,
|
||||||
ISiteIdentityProvider siteIdentityProvider,
|
ISiteIdentityProvider siteIdentityProvider,
|
||||||
StoreAndForwardStorage? sfStorage = null)
|
StoreAndForwardStorage? sfStorage = null,
|
||||||
|
IClusterNodeProvider? clusterNodeProvider = null)
|
||||||
{
|
{
|
||||||
_collector = collector;
|
_collector = collector;
|
||||||
_transport = transport;
|
_transport = transport;
|
||||||
@@ -34,6 +36,7 @@ public class HealthReportSender : BackgroundService
|
|||||||
_logger = logger;
|
_logger = logger;
|
||||||
_siteId = siteIdentityProvider.SiteId;
|
_siteId = siteIdentityProvider.SiteId;
|
||||||
_sfStorage = sfStorage;
|
_sfStorage = sfStorage;
|
||||||
|
_clusterNodeProvider = clusterNodeProvider;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -58,6 +61,15 @@ public class HealthReportSender : BackgroundService
|
|||||||
if (!_collector.IsActiveNode)
|
if (!_collector.IsActiveNode)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (_clusterNodeProvider != null)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
_collector.SetClusterNodes(_clusterNodeProvider.GetClusterNodes());
|
||||||
|
}
|
||||||
|
catch { /* Non-fatal */ }
|
||||||
|
}
|
||||||
|
|
||||||
if (_sfStorage != null)
|
if (_sfStorage != null)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
|
|||||||
12
src/ScadaLink.HealthMonitoring/IClusterNodeProvider.cs
Normal file
12
src/ScadaLink.HealthMonitoring/IClusterNodeProvider.cs
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
using ScadaLink.Commons.Messages.Health;
|
||||||
|
|
||||||
|
namespace ScadaLink.HealthMonitoring;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Provides cluster node status information for health reporting.
|
||||||
|
/// Implemented by the Host project which has access to the Akka.NET actor system.
|
||||||
|
/// </summary>
|
||||||
|
public interface IClusterNodeProvider
|
||||||
|
{
|
||||||
|
IReadOnlyList<NodeStatus> GetClusterNodes();
|
||||||
|
}
|
||||||
@@ -21,6 +21,7 @@ public interface ISiteHealthCollector
|
|||||||
void SetInstanceCounts(int deployed, int enabled, int disabled);
|
void SetInstanceCounts(int deployed, int enabled, int disabled);
|
||||||
void SetParkedMessageCount(int count);
|
void SetParkedMessageCount(int count);
|
||||||
void SetNodeHostname(string hostname);
|
void SetNodeHostname(string hostname);
|
||||||
|
void SetClusterNodes(IReadOnlyList<Commons.Messages.Health.NodeStatus> nodes);
|
||||||
void SetActiveNode(bool isActive);
|
void SetActiveNode(bool isActive);
|
||||||
bool IsActiveNode { get; }
|
bool IsActiveNode { get; }
|
||||||
SiteHealthReport CollectReport(string siteId);
|
SiteHealthReport CollectReport(string siteId);
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
private int _deployedInstanceCount, _enabledInstanceCount, _disabledInstanceCount;
|
private int _deployedInstanceCount, _enabledInstanceCount, _disabledInstanceCount;
|
||||||
private int _parkedMessageCount;
|
private int _parkedMessageCount;
|
||||||
private volatile string _nodeHostname = "";
|
private volatile string _nodeHostname = "";
|
||||||
|
private volatile IReadOnlyList<Commons.Messages.Health.NodeStatus>? _clusterNodes;
|
||||||
private volatile bool _isActiveNode;
|
private volatile bool _isActiveNode;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -94,6 +95,8 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
|
|
||||||
public void SetNodeHostname(string hostname) => _nodeHostname = hostname;
|
public void SetNodeHostname(string hostname) => _nodeHostname = hostname;
|
||||||
|
|
||||||
|
public void SetClusterNodes(IReadOnlyList<Commons.Messages.Health.NodeStatus> nodes) => _clusterNodes = nodes;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Set the current store-and-forward buffer depths snapshot.
|
/// Set the current store-and-forward buffer depths snapshot.
|
||||||
/// Called before report collection with data from the S&F service.
|
/// Called before report collection with data from the S&F service.
|
||||||
@@ -159,6 +162,7 @@ public class SiteHealthCollector : ISiteHealthCollector
|
|||||||
NodeHostname: _nodeHostname,
|
NodeHostname: _nodeHostname,
|
||||||
DataConnectionEndpoints: connectionEndpoints,
|
DataConnectionEndpoints: connectionEndpoints,
|
||||||
DataConnectionTagQuality: tagQuality,
|
DataConnectionTagQuality: tagQuality,
|
||||||
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0));
|
ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0),
|
||||||
|
ClusterNodes: _clusterNodes?.ToList());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
62
src/ScadaLink.Host/Health/AkkaClusterNodeProvider.cs
Normal file
62
src/ScadaLink.Host/Health/AkkaClusterNodeProvider.cs
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
using Akka.Actor;
|
||||||
|
using Akka.Cluster;
|
||||||
|
using ScadaLink.Commons.Messages.Health;
|
||||||
|
using ScadaLink.HealthMonitoring;
|
||||||
|
using ScadaLink.Host.Actors;
|
||||||
|
|
||||||
|
namespace ScadaLink.Host.Health;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Provides cluster node statuses from Akka.NET cluster membership for health reporting.
|
||||||
|
/// </summary>
|
||||||
|
public class AkkaClusterNodeProvider : IClusterNodeProvider
|
||||||
|
{
|
||||||
|
private readonly AkkaHostedService _akkaService;
|
||||||
|
private readonly string _siteRole;
|
||||||
|
|
||||||
|
public AkkaClusterNodeProvider(AkkaHostedService akkaService, string siteRole)
|
||||||
|
{
|
||||||
|
_akkaService = akkaService;
|
||||||
|
_siteRole = siteRole;
|
||||||
|
}
|
||||||
|
|
||||||
|
public IReadOnlyList<NodeStatus> GetClusterNodes()
|
||||||
|
{
|
||||||
|
var system = _akkaService.ActorSystem;
|
||||||
|
if (system == null) return [];
|
||||||
|
|
||||||
|
var cluster = Cluster.Get(system);
|
||||||
|
var selfAddress = cluster.SelfAddress;
|
||||||
|
var leader = cluster.State.Leader;
|
||||||
|
|
||||||
|
var nodes = new List<NodeStatus>();
|
||||||
|
foreach (var member in cluster.State.Members)
|
||||||
|
{
|
||||||
|
if (!member.HasRole(_siteRole))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
var hostname = member.Address.Host ?? member.Address.ToString();
|
||||||
|
var isOnline = member.Status == MemberStatus.Up;
|
||||||
|
var isLeader = member.Address.Equals(leader);
|
||||||
|
var role = isLeader ? "Primary" : "Standby";
|
||||||
|
|
||||||
|
nodes.Add(new NodeStatus(hostname, isOnline, role));
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have unreachable members, add them as offline
|
||||||
|
foreach (var unreachable in cluster.State.Unreachable)
|
||||||
|
{
|
||||||
|
if (!unreachable.HasRole(_siteRole))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Don't duplicate if already in members list
|
||||||
|
if (nodes.Any(n => n.Hostname == (unreachable.Address.Host ?? unreachable.Address.ToString())))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
var hostname = unreachable.Address.Host ?? unreachable.Address.ToString();
|
||||||
|
nodes.Add(new NodeStatus(hostname, false, "Standby"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@ using ScadaLink.DataConnectionLayer;
|
|||||||
using ScadaLink.ExternalSystemGateway;
|
using ScadaLink.ExternalSystemGateway;
|
||||||
using ScadaLink.HealthMonitoring;
|
using ScadaLink.HealthMonitoring;
|
||||||
using ScadaLink.Host.Actors;
|
using ScadaLink.Host.Actors;
|
||||||
|
using ScadaLink.Host.Health;
|
||||||
using ScadaLink.NotificationService;
|
using ScadaLink.NotificationService;
|
||||||
using ScadaLink.SiteEventLogging;
|
using ScadaLink.SiteEventLogging;
|
||||||
using ScadaLink.SiteRuntime;
|
using ScadaLink.SiteRuntime;
|
||||||
@@ -42,6 +43,15 @@ public static class SiteServiceRegistration
|
|||||||
services.AddSingleton<AkkaHostedService>();
|
services.AddSingleton<AkkaHostedService>();
|
||||||
services.AddHostedService(sp => sp.GetRequiredService<AkkaHostedService>());
|
services.AddHostedService(sp => sp.GetRequiredService<AkkaHostedService>());
|
||||||
|
|
||||||
|
// Cluster node status provider for health reports
|
||||||
|
services.AddSingleton<IClusterNodeProvider>(sp =>
|
||||||
|
{
|
||||||
|
var akkaService = sp.GetRequiredService<AkkaHostedService>();
|
||||||
|
var nodeOptions = sp.GetRequiredService<Microsoft.Extensions.Options.IOptions<NodeOptions>>().Value;
|
||||||
|
var siteRole = $"site-{nodeOptions.SiteId}";
|
||||||
|
return new AkkaClusterNodeProvider(akkaService, siteRole);
|
||||||
|
});
|
||||||
|
|
||||||
// Options binding
|
// Options binding
|
||||||
BindSharedOptions(services, config);
|
BindSharedOptions(services, config);
|
||||||
services.Configure<SiteRuntimeOptions>(config.GetSection("ScadaLink:SiteRuntime"));
|
services.Configure<SiteRuntimeOptions>(config.GetSection("ScadaLink:SiteRuntime"));
|
||||||
|
|||||||
Reference in New Issue
Block a user