Files
scadalink-design/tests/ScadaLink.Communication.Tests/CentralCommunicationActorTests.cs
Joseph Doherty f66dc031a4 fix(health): route site heartbeats into the aggregator
CentralCommunicationActor.HandleHeartbeat was forwarding each incoming
HeartbeatMessage to Context.Parent, which resolves to the /user
guardian — a non-actor. Every site heartbeat went straight to dead
letters (~1026 per central node per 30 minutes at the default ~2s
interval across three sites).

The aggregator now exposes MarkHeartbeat(siteId, receivedAt) which
bumps LastReportReceivedAt on already-known sites (and clears IsOnline
if it had flipped) without touching LatestReport. Heartbeats from
unregistered sites are dropped — first registration still happens on
the first full report. CentralCommunicationActor calls this in place
of the no-op Tell.

The result: heartbeats now serve their stated health-monitoring
purpose (per CLAUDE.md) by keeping a site marked online between the
30s full reports if a single report is briefly delayed, and the dead
letter noise disappears entirely.
2026-05-13 08:11:43 -04:00

219 lines
8.5 KiB
C#

using System.Collections.Immutable;
using Akka.Actor;
using Akka.Cluster.Tools.Client;
using Akka.TestKit.Xunit2;
using Microsoft.Extensions.DependencyInjection;
using NSubstitute;
using ScadaLink.Commons.Entities.Sites;
using ScadaLink.Commons.Interfaces.Repositories;
using ScadaLink.Commons.Messages.Communication;
using ScadaLink.Commons.Messages.Deployment;
using ScadaLink.Commons.Messages.DebugView;
using ScadaLink.Commons.Messages.Health;
using ScadaLink.Communication.Actors;
using ScadaLink.HealthMonitoring;
using Akka.TestKit;
namespace ScadaLink.Communication.Tests;
/// <summary>
/// Tests for CentralCommunicationActor with per-site ClusterClient routing.
/// WP-4: Message routing via ClusterClient instances created per site.
/// WP-5: Connection failure and failover handling.
/// </summary>
public class CentralCommunicationActorTests : TestKit
{
public CentralCommunicationActorTests() : base(@"akka.loglevel = DEBUG") { }
private (IActorRef actor, ISiteRepository mockRepo, Dictionary<string, TestProbe> siteProbes) CreateActorWithMockRepo(
IEnumerable<Site>? sites = null)
{
var mockRepo = Substitute.For<ISiteRepository>();
mockRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
.Returns(sites?.ToList() ?? new List<Site>());
var services = new ServiceCollection();
services.AddScoped(_ => mockRepo);
var sp = services.BuildServiceProvider();
var siteProbes = new Dictionary<string, TestProbe>();
var mockFactory = Substitute.For<ISiteClientFactory>();
mockFactory.Create(Arg.Any<ActorSystem>(), Arg.Any<string>(), Arg.Any<ImmutableHashSet<ActorPath>>())
.Returns(callInfo =>
{
var siteId = callInfo.ArgAt<string>(1);
var probe = CreateTestProbe();
siteProbes[siteId] = probe;
return probe.Ref;
});
var actor = Sys.ActorOf(Props.Create(() => new CentralCommunicationActor(sp, mockFactory)));
return (actor, mockRepo, siteProbes);
}
private (IActorRef actor, ISiteRepository mockRepo, Dictionary<string, TestProbe> siteProbes, ISiteClientFactory mockFactory) CreateActorWithFactory(
IEnumerable<Site>? sites = null)
{
var mockRepo = Substitute.For<ISiteRepository>();
mockRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
.Returns(sites?.ToList() ?? new List<Site>());
var services = new ServiceCollection();
services.AddScoped(_ => mockRepo);
var sp = services.BuildServiceProvider();
var siteProbes = new Dictionary<string, TestProbe>();
var mockFactory = Substitute.For<ISiteClientFactory>();
mockFactory.Create(Arg.Any<ActorSystem>(), Arg.Any<string>(), Arg.Any<ImmutableHashSet<ActorPath>>())
.Returns(callInfo =>
{
var siteId = callInfo.ArgAt<string>(1);
var probe = CreateTestProbe();
siteProbes[siteId] = probe;
return probe.Ref;
});
var actor = Sys.ActorOf(Props.Create(() => new CentralCommunicationActor(sp, mockFactory)));
return (actor, mockRepo, siteProbes, mockFactory);
}
private Site CreateSite(string identifier, string? nodeAAddress, string? nodeBAddress = null) =>
new("Test Site", identifier) { NodeAAddress = nodeAAddress, NodeBAddress = nodeBAddress };
[Fact]
public void ClusterClientRouting_RoutesToConfiguredSite()
{
var site = CreateSite("site1", "akka.tcp://scadalink@host:8082");
var (actor, _, siteProbes) = CreateActorWithMockRepo(new[] { site });
// Wait for auto-refresh (PreStart schedules with TimeSpan.Zero initial delay)
Thread.Sleep(1000);
var command = new DeployInstanceCommand(
"dep1", "inst1", "hash1", "{}", "admin", DateTimeOffset.UtcNow);
actor.Tell(new SiteEnvelope("site1", command));
// The site1 probe (acting as ClusterClient) should receive a ClusterClient.Send
var msg = siteProbes["site1"].ExpectMsg<ClusterClient.Send>();
Assert.Equal("/user/site-communication", msg.Path);
Assert.IsType<DeployInstanceCommand>(msg.Message);
Assert.Equal("dep1", ((DeployInstanceCommand)msg.Message).DeploymentId);
}
[Fact]
public void UnconfiguredSite_MessageIsDropped()
{
var (actor, _, _) = CreateActorWithMockRepo();
// Wait for auto-refresh
Thread.Sleep(1000);
var command = new DeployInstanceCommand(
"dep1", "inst1", "hash1", "{}", "admin", DateTimeOffset.UtcNow);
actor.Tell(new SiteEnvelope("unknown-site", command));
ExpectNoMsg(TimeSpan.FromMilliseconds(200));
}
[Fact]
public void ConnectionLost_DebugStreamsKilled()
{
var site = CreateSite("site1", "akka.tcp://scadalink@host:8082");
var (actor, _, siteProbes) = CreateActorWithMockRepo(new[] { site });
// Wait for auto-refresh
Thread.Sleep(1000);
// Subscribe to debug view (tracks the subscription)
var subscriberProbe = CreateTestProbe();
var subRequest = new SubscribeDebugViewRequest("inst1", "corr-123");
actor.Tell(new SiteEnvelope("site1", subRequest), subscriberProbe.Ref);
// The ClusterClient probe receives the routed message
siteProbes["site1"].ExpectMsg<ClusterClient.Send>();
// Simulate site disconnection
actor.Tell(new ConnectionStateChanged("site1", false, DateTimeOffset.UtcNow));
// The subscriber should receive a DebugStreamTerminated notification
subscriberProbe.ExpectMsg<DebugStreamTerminated>(
msg => msg.SiteId == "site1" && msg.CorrelationId == "corr-123");
}
[Fact]
public void Heartbeat_BumpsAggregatorTimestamp()
{
var mockRepo = Substitute.For<ISiteRepository>();
mockRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
.Returns(new List<Site>());
var aggregator = Substitute.For<ICentralHealthAggregator>();
var services = new ServiceCollection();
services.AddScoped(_ => mockRepo);
services.AddSingleton(aggregator);
var sp = services.BuildServiceProvider();
var siteClientFactory = Substitute.For<ISiteClientFactory>();
var centralActor = Sys.ActorOf(
Props.Create(() => new CentralCommunicationActor(sp, siteClientFactory)));
var timestamp = DateTimeOffset.UtcNow;
centralActor.Tell(new HeartbeatMessage("site1", "host1", true, timestamp));
AwaitAssert(() => aggregator.Received(1).MarkHeartbeat("site1", timestamp));
}
[Fact]
public void RefreshSiteAddresses_UpdatesCache()
{
var site1 = CreateSite("site1", "akka.tcp://scadalink@host1:8082");
var (actor, mockRepo, siteProbes) = CreateActorWithMockRepo(new[] { site1 });
// Wait for initial load
Thread.Sleep(1000);
// Verify routing to site1 works
var cmd1 = new DeployInstanceCommand(
"dep1", "inst1", "hash1", "{}", "admin", DateTimeOffset.UtcNow);
actor.Tell(new SiteEnvelope("site1", cmd1));
var msg1 = siteProbes["site1"].ExpectMsg<ClusterClient.Send>();
Assert.Equal("dep1", ((DeployInstanceCommand)msg1.Message).DeploymentId);
// Update mock repo to return both sites
var site2 = CreateSite("site2", "akka.tcp://scadalink@host2:8082");
mockRepo.GetAllSitesAsync(Arg.Any<CancellationToken>())
.Returns(new List<Site> { site1, site2 });
// Refresh again
actor.Tell(new RefreshSiteAddresses());
Thread.Sleep(1000);
// Verify routing to site2 now works
var cmd2 = new DeployInstanceCommand(
"dep2", "inst2", "hash2", "{}", "admin", DateTimeOffset.UtcNow);
actor.Tell(new SiteEnvelope("site2", cmd2));
var msg2 = siteProbes["site2"].ExpectMsg<ClusterClient.Send>();
Assert.Equal("dep2", ((DeployInstanceCommand)msg2.Message).DeploymentId);
}
[Fact]
public void BothContactPoints_UsedInSingleClient()
{
var site = CreateSite("site1",
"akka.tcp://scadalink@host1:8082",
"akka.tcp://scadalink@host2:8082");
var (actor, _, siteProbes, mockFactory) = CreateActorWithFactory(new[] { site });
// Wait for auto-refresh
Thread.Sleep(1000);
// Verify the factory was called with 2 contact paths
mockFactory.Received(1).Create(
Arg.Any<ActorSystem>(),
Arg.Is("site1"),
Arg.Is<ImmutableHashSet<ActorPath>>(paths => paths.Count == 2));
}
}