6f1f6b8467
CentralHealthAggregator is a per-node hosted singleton, but site health reports flow through ClusterClient which round-robins each report to one central node only. The other node's aggregator never saw those reports and marked sites offline at the 60s threshold — sites constantly flapped between online and offline on the monitoring page. On receive, the active CentralCommunicationActor now republishes a SiteHealthReportReplica wrapper on a DistributedPubSub topic. Both central nodes subscribe to the topic and process replicas through a dedicated path that updates the local aggregator without re-broadcasting (avoids fan-out loops). The aggregator's existing sequence-number idempotency makes self-delivery a cheap no-op. DistributedPubSubExtensionProvider is now listed in the HOCON `akka.extensions` block so the mediator is initialised at cluster start, eliminating a race where the first Subscribe arrived before the extension was loaded.
34 lines
1.3 KiB
C#
34 lines
1.3 KiB
C#
using ScadaLink.Commons.Types.Enums;
|
|
|
|
namespace ScadaLink.Commons.Messages.Health;
|
|
|
|
public record SiteHealthReport(
|
|
string SiteId,
|
|
long SequenceNumber,
|
|
DateTimeOffset ReportTimestamp,
|
|
IReadOnlyDictionary<string, ConnectionHealth> DataConnectionStatuses,
|
|
IReadOnlyDictionary<string, TagResolutionStatus> TagResolutionCounts,
|
|
int ScriptErrorCount,
|
|
int AlarmEvaluationErrorCount,
|
|
IReadOnlyDictionary<string, int> StoreAndForwardBufferDepths,
|
|
int DeadLetterCount,
|
|
int DeployedInstanceCount,
|
|
int EnabledInstanceCount,
|
|
int DisabledInstanceCount,
|
|
string NodeRole = "Unknown",
|
|
string NodeHostname = "",
|
|
IReadOnlyDictionary<string, string>? DataConnectionEndpoints = null,
|
|
IReadOnlyDictionary<string, TagQualityCounts>? DataConnectionTagQuality = null,
|
|
int ParkedMessageCount = 0,
|
|
IReadOnlyList<NodeStatus>? ClusterNodes = null);
|
|
|
|
/// <summary>
|
|
/// Broadcast wrapper used between central nodes to keep per-node
|
|
/// CentralHealthAggregator state in sync. ClusterClient load-balances each
|
|
/// incoming SiteHealthReport to one central node; that node re-publishes
|
|
/// this wrapper on a DistributedPubSub topic so the peer node's aggregator
|
|
/// also processes the report (idempotently — sequence numbers guard against
|
|
/// double-counting).
|
|
/// </summary>
|
|
public record SiteHealthReportReplica(SiteHealthReport Report);
|