fix(redundancy): periodic heartbeat re-publish so late subscribers learn their role

This commit is contained in:
Joseph Doherty
2026-06-11 10:06:46 -04:00
parent e241332a24
commit 7891e28b52
2 changed files with 62 additions and 4 deletions
@@ -43,6 +43,29 @@ public sealed class RedundancyStateActorTests : ControlPlaneActorTestBase
probe.ExpectNoMsg(TimeSpan.FromMilliseconds(500));
}
/// <summary>
/// Verifies the periodic heartbeat re-publishes the CURRENT snapshot even when no new cluster
/// events arrive — so a late subscriber that missed the change-driven publish converges within
/// the heartbeat interval. The first message is the self-join publish; the second proves the
/// recurring heartbeat fires with no intervening cluster event.
/// </summary>
[Fact]
public void Heartbeat_republishes_current_snapshot_without_cluster_events()
{
var probe = CreateTestProbe("heartbeat-listener");
Sys.ActorOf(
RedundancyStateActor.Props(
broadcast: msg => probe.Ref.Tell(msg),
heartbeatInterval: TimeSpan.FromMilliseconds(300)),
"redundancy-heartbeat");
// First publish: the change-driven self-join snapshot.
probe.ExpectMsg<RedundancyStateChanged>(TimeSpan.FromSeconds(2));
// Second publish: the periodic heartbeat re-publish, with NO new cluster event sent.
probe.ExpectMsg<RedundancyStateChanged>(TimeSpan.FromSeconds(2));
}
/// <summary>
/// Regression guard: the snapshot node id MUST be the canonical <c>host:port</c> form (matching
/// ClusterRoleInfo.LocalNode/ToNodeId), or every consumer's