From 02a7e8abc6890a34d55cbb8b778f9c26ab7adf4d Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 23 Mar 2026 14:54:59 -0400 Subject: [PATCH] feat(health): show all cluster nodes (online/offline, primary/standby) in health dashboard Add NodeStatus record, IClusterNodeProvider interface, and AkkaClusterNodeProvider that queries Akka cluster membership for all site-role nodes. HealthReportSender populates ClusterNodes before each report. UI shows a row per node with hostname, Online/Offline badge, and Primary/Standby badge. Falls back to single-node display if ClusterNodes is not populated. --- .../Components/Pages/Monitoring/Health.razor | 24 +++++-- .../Messages/Health/NodeStatus.cs | 3 + .../Messages/Health/SiteHealthReport.cs | 3 +- .../HealthReportSender.cs | 14 ++++- .../IClusterNodeProvider.cs | 12 ++++ .../ISiteHealthCollector.cs | 1 + .../SiteHealthCollector.cs | 6 +- .../Health/AkkaClusterNodeProvider.cs | 62 +++++++++++++++++++ src/ScadaLink.Host/SiteServiceRegistration.cs | 10 +++ 9 files changed, 127 insertions(+), 8 deletions(-) create mode 100644 src/ScadaLink.Commons/Messages/Health/NodeStatus.cs create mode 100644 src/ScadaLink.HealthMonitoring/IClusterNodeProvider.cs create mode 100644 src/ScadaLink.Host/Health/AkkaClusterNodeProvider.cs diff --git a/src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor b/src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor index 2aa846a..f57bae8 100644 --- a/src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor +++ b/src/ScadaLink.CentralUI/Components/Pages/Monitoring/Health.razor @@ -90,11 +90,25 @@
Nodes
- - - - - + @if (report.ClusterNodes is { Count: > 0 }) + { + @foreach (var node in report.ClusterNodes) + { + + + + + + } + } + else + { + + + + + + }
@(report.NodeHostname != "" ? report.NodeHostname : "Node")@(state.IsOnline ? "Online" : "Offline")@(report.NodeRole == "Active" ? "Primary" : "Standby")
@node.Hostname@(node.IsOnline ? "Online" : "Offline")@node.Role
@(report.NodeHostname != "" ? report.NodeHostname : "Node")@(state.IsOnline ? "Online" : "Offline")@(report.NodeRole == "Active" ? "Primary" : "Standby")
diff --git a/src/ScadaLink.Commons/Messages/Health/NodeStatus.cs b/src/ScadaLink.Commons/Messages/Health/NodeStatus.cs new file mode 100644 index 0000000..0881891 --- /dev/null +++ b/src/ScadaLink.Commons/Messages/Health/NodeStatus.cs @@ -0,0 +1,3 @@ +namespace ScadaLink.Commons.Messages.Health; + +public record NodeStatus(string Hostname, bool IsOnline, string Role); diff --git a/src/ScadaLink.Commons/Messages/Health/SiteHealthReport.cs b/src/ScadaLink.Commons/Messages/Health/SiteHealthReport.cs index 39da31c..793c5fd 100644 --- a/src/ScadaLink.Commons/Messages/Health/SiteHealthReport.cs +++ b/src/ScadaLink.Commons/Messages/Health/SiteHealthReport.cs @@ -19,4 +19,5 @@ public record SiteHealthReport( string NodeHostname = "", IReadOnlyDictionary? DataConnectionEndpoints = null, IReadOnlyDictionary? DataConnectionTagQuality = null, - int ParkedMessageCount = 0); + int ParkedMessageCount = 0, + IReadOnlyList? ClusterNodes = null); diff --git a/src/ScadaLink.HealthMonitoring/HealthReportSender.cs b/src/ScadaLink.HealthMonitoring/HealthReportSender.cs index 2ec652b..6806571 100644 --- a/src/ScadaLink.HealthMonitoring/HealthReportSender.cs +++ b/src/ScadaLink.HealthMonitoring/HealthReportSender.cs @@ -18,6 +18,7 @@ public class HealthReportSender : BackgroundService private readonly ILogger _logger; private readonly string _siteId; private readonly StoreAndForwardStorage? _sfStorage; + private readonly IClusterNodeProvider? _clusterNodeProvider; private long _sequenceNumber; public HealthReportSender( @@ -26,7 +27,8 @@ public class HealthReportSender : BackgroundService IOptions options, ILogger logger, ISiteIdentityProvider siteIdentityProvider, - StoreAndForwardStorage? sfStorage = null) + StoreAndForwardStorage? sfStorage = null, + IClusterNodeProvider? clusterNodeProvider = null) { _collector = collector; _transport = transport; @@ -34,6 +36,7 @@ public class HealthReportSender : BackgroundService _logger = logger; _siteId = siteIdentityProvider.SiteId; _sfStorage = sfStorage; + _clusterNodeProvider = clusterNodeProvider; } /// @@ -58,6 +61,15 @@ public class HealthReportSender : BackgroundService if (!_collector.IsActiveNode) continue; + if (_clusterNodeProvider != null) + { + try + { + _collector.SetClusterNodes(_clusterNodeProvider.GetClusterNodes()); + } + catch { /* Non-fatal */ } + } + if (_sfStorage != null) { try diff --git a/src/ScadaLink.HealthMonitoring/IClusterNodeProvider.cs b/src/ScadaLink.HealthMonitoring/IClusterNodeProvider.cs new file mode 100644 index 0000000..c5de246 --- /dev/null +++ b/src/ScadaLink.HealthMonitoring/IClusterNodeProvider.cs @@ -0,0 +1,12 @@ +using ScadaLink.Commons.Messages.Health; + +namespace ScadaLink.HealthMonitoring; + +/// +/// Provides cluster node status information for health reporting. +/// Implemented by the Host project which has access to the Akka.NET actor system. +/// +public interface IClusterNodeProvider +{ + IReadOnlyList GetClusterNodes(); +} diff --git a/src/ScadaLink.HealthMonitoring/ISiteHealthCollector.cs b/src/ScadaLink.HealthMonitoring/ISiteHealthCollector.cs index 7a2236e..1210833 100644 --- a/src/ScadaLink.HealthMonitoring/ISiteHealthCollector.cs +++ b/src/ScadaLink.HealthMonitoring/ISiteHealthCollector.cs @@ -21,6 +21,7 @@ public interface ISiteHealthCollector void SetInstanceCounts(int deployed, int enabled, int disabled); void SetParkedMessageCount(int count); void SetNodeHostname(string hostname); + void SetClusterNodes(IReadOnlyList nodes); void SetActiveNode(bool isActive); bool IsActiveNode { get; } SiteHealthReport CollectReport(string siteId); diff --git a/src/ScadaLink.HealthMonitoring/SiteHealthCollector.cs b/src/ScadaLink.HealthMonitoring/SiteHealthCollector.cs index 68cceec..8dc02a7 100644 --- a/src/ScadaLink.HealthMonitoring/SiteHealthCollector.cs +++ b/src/ScadaLink.HealthMonitoring/SiteHealthCollector.cs @@ -21,6 +21,7 @@ public class SiteHealthCollector : ISiteHealthCollector private int _deployedInstanceCount, _enabledInstanceCount, _disabledInstanceCount; private int _parkedMessageCount; private volatile string _nodeHostname = ""; + private volatile IReadOnlyList? _clusterNodes; private volatile bool _isActiveNode; /// @@ -94,6 +95,8 @@ public class SiteHealthCollector : ISiteHealthCollector public void SetNodeHostname(string hostname) => _nodeHostname = hostname; + public void SetClusterNodes(IReadOnlyList nodes) => _clusterNodes = nodes; + /// /// Set the current store-and-forward buffer depths snapshot. /// Called before report collection with data from the S&F service. @@ -159,6 +162,7 @@ public class SiteHealthCollector : ISiteHealthCollector NodeHostname: _nodeHostname, DataConnectionEndpoints: connectionEndpoints, DataConnectionTagQuality: tagQuality, - ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0)); + ParkedMessageCount: Interlocked.CompareExchange(ref _parkedMessageCount, 0, 0), + ClusterNodes: _clusterNodes?.ToList()); } } diff --git a/src/ScadaLink.Host/Health/AkkaClusterNodeProvider.cs b/src/ScadaLink.Host/Health/AkkaClusterNodeProvider.cs new file mode 100644 index 0000000..6caa325 --- /dev/null +++ b/src/ScadaLink.Host/Health/AkkaClusterNodeProvider.cs @@ -0,0 +1,62 @@ +using Akka.Actor; +using Akka.Cluster; +using ScadaLink.Commons.Messages.Health; +using ScadaLink.HealthMonitoring; +using ScadaLink.Host.Actors; + +namespace ScadaLink.Host.Health; + +/// +/// Provides cluster node statuses from Akka.NET cluster membership for health reporting. +/// +public class AkkaClusterNodeProvider : IClusterNodeProvider +{ + private readonly AkkaHostedService _akkaService; + private readonly string _siteRole; + + public AkkaClusterNodeProvider(AkkaHostedService akkaService, string siteRole) + { + _akkaService = akkaService; + _siteRole = siteRole; + } + + public IReadOnlyList GetClusterNodes() + { + var system = _akkaService.ActorSystem; + if (system == null) return []; + + var cluster = Cluster.Get(system); + var selfAddress = cluster.SelfAddress; + var leader = cluster.State.Leader; + + var nodes = new List(); + foreach (var member in cluster.State.Members) + { + if (!member.HasRole(_siteRole)) + continue; + + var hostname = member.Address.Host ?? member.Address.ToString(); + var isOnline = member.Status == MemberStatus.Up; + var isLeader = member.Address.Equals(leader); + var role = isLeader ? "Primary" : "Standby"; + + nodes.Add(new NodeStatus(hostname, isOnline, role)); + } + + // If we have unreachable members, add them as offline + foreach (var unreachable in cluster.State.Unreachable) + { + if (!unreachable.HasRole(_siteRole)) + continue; + + // Don't duplicate if already in members list + if (nodes.Any(n => n.Hostname == (unreachable.Address.Host ?? unreachable.Address.ToString()))) + continue; + + var hostname = unreachable.Address.Host ?? unreachable.Address.ToString(); + nodes.Add(new NodeStatus(hostname, false, "Standby")); + } + + return nodes; + } +} diff --git a/src/ScadaLink.Host/SiteServiceRegistration.cs b/src/ScadaLink.Host/SiteServiceRegistration.cs index 9031ae9..9567723 100644 --- a/src/ScadaLink.Host/SiteServiceRegistration.cs +++ b/src/ScadaLink.Host/SiteServiceRegistration.cs @@ -4,6 +4,7 @@ using ScadaLink.DataConnectionLayer; using ScadaLink.ExternalSystemGateway; using ScadaLink.HealthMonitoring; using ScadaLink.Host.Actors; +using ScadaLink.Host.Health; using ScadaLink.NotificationService; using ScadaLink.SiteEventLogging; using ScadaLink.SiteRuntime; @@ -42,6 +43,15 @@ public static class SiteServiceRegistration services.AddSingleton(); services.AddHostedService(sp => sp.GetRequiredService()); + // Cluster node status provider for health reports + services.AddSingleton(sp => + { + var akkaService = sp.GetRequiredService(); + var nodeOptions = sp.GetRequiredService>().Value; + var siteRole = $"site-{nodeOptions.SiteId}"; + return new AkkaClusterNodeProvider(akkaService, siteRole); + }); + // Options binding BindSharedOptions(services, config); services.Configure(config.GetSection("ScadaLink:SiteRuntime"));