test(host): deploy happy-path + idempotency integration tests (Task 59)
DeployHappyPathTests exercises the full deploy pipeline on the 2-node harness:
AdminOperationsActor → ConfigPublishCoordinator → DistributedPubSub →
DriverHostActor on both nodes → ApplyAck → coordinator seals. Verifies both
NodeDeploymentState rows reach Applied and Deployment.Status reaches Sealed.
Exposed + fixed two production bugs along the way:
1. Coordinator was publishing DispatchDeployment on the "deployments" topic but
never subscribed to anything — DriverHostActor ACKs published on the same
topic could not reach it. Added dedicated "deployment-acks" topic with
coordinator subscription in PreStart, and DriverHostActor publishes ACKs
there.
2. NodeId derivation used member.Address.Host only — two cluster members on a
shared loopback host (test harness, dev VMs) collided to one identity. The
coordinator's expected-ack set became {1} and the system sealed after only
half the nodes acked. Switched to host:port everywhere (ClusterRoleInfo +
coordinator) so loopback nodes stay distinct and production identities are
harmlessly more specific.
Tests: 95 v2 tests pass (was 93 + 2 deploy tests), 0 skipped.
Failover scenarios (design §8 cases 3-7: node-kill-mid-apply, split-brain,
restart-during-deploy) deferred — they need controlled node-down primitives
on the harness. Tracked as F22 (failover scenario test cases).
This commit is contained in:
@@ -24,6 +24,7 @@ namespace ZB.MOM.WW.OtOpcUa.ControlPlane.Coordinators;
|
||||
public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
{
|
||||
public const string DeploymentsTopic = "deployments";
|
||||
public const string DeploymentAcksTopic = "deployment-acks";
|
||||
public static readonly TimeSpan DefaultApplyDeadline = TimeSpan.FromMinutes(2);
|
||||
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
@@ -50,6 +51,7 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
Receive<DispatchDeployment>(HandleDispatch);
|
||||
Receive<ApplyAck>(HandleAck);
|
||||
Receive<DeadlineElapsed>(HandleDeadline);
|
||||
Receive<SubscribeAck>(_ => { /* DPS subscribe confirmation */ });
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -59,6 +61,10 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
/// </summary>
|
||||
protected override void PreStart()
|
||||
{
|
||||
// Subscribe to per-node ApplyAck broadcasts so DriverHostActors on remote members can
|
||||
// route their ACKs to whichever node currently hosts this singleton.
|
||||
DistributedPubSub.Get(Context.System).Mediator.Tell(new Subscribe(DeploymentAcksTopic, Self));
|
||||
|
||||
using var db = _dbFactory.CreateDbContext();
|
||||
var inflight = db.Deployments
|
||||
.Where(d => d.Status == DeploymentStatus.Dispatching || d.Status == DeploymentStatus.AwaitingApplyAcks)
|
||||
@@ -239,7 +245,9 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
if (!member.Roles.Contains("driver")) continue;
|
||||
var host = member.Address.Host;
|
||||
if (string.IsNullOrWhiteSpace(host)) continue;
|
||||
nodes.Add(NodeId.Parse(host));
|
||||
// Match ClusterRoleInfo's NodeId derivation (host:port) so DriverHostActor's
|
||||
// self-identification and the coordinator's expected-ack set agree.
|
||||
nodes.Add(NodeId.Parse($"{host}:{member.Address.Port ?? 0}"));
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
|
||||
public sealed class DriverHostActor : ReceiveActor, IWithTimers
|
||||
{
|
||||
public const string DeploymentsTopic = "deployments";
|
||||
public const string DeploymentAcksTopic = "deployment-acks";
|
||||
public static readonly TimeSpan ReconnectInterval = TimeSpan.FromSeconds(30);
|
||||
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
@@ -276,9 +277,10 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
|
||||
}
|
||||
else
|
||||
{
|
||||
// No direct coordinator handle — publish back through DistributedPubSub so the
|
||||
// singleton routes it. The coordinator subscribes to its own incoming topic.
|
||||
DistributedPubSub.Get(Context.System).Mediator.Tell(new Publish(DeploymentsTopic, ack));
|
||||
// No direct coordinator handle — publish on the dedicated ACK topic. The coordinator
|
||||
// singleton subscribes there in PreStart so the ACK reaches whichever admin node hosts
|
||||
// it without an actor-path lookup.
|
||||
DistributedPubSub.Get(Context.System).Mediator.Tell(new Publish(DeploymentAcksTopic, ack));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user