test(host): deploy happy-path + idempotency integration tests (Task 59)

DeployHappyPathTests exercises the full deploy pipeline on the 2-node harness:
AdminOperationsActor → ConfigPublishCoordinator → DistributedPubSub →
DriverHostActor on both nodes → ApplyAck → coordinator seals. Verifies both
NodeDeploymentState rows reach Applied and Deployment.Status reaches Sealed.

Exposed + fixed two production bugs along the way:

1. Coordinator was publishing DispatchDeployment on the "deployments" topic but
   never subscribed to anything — DriverHostActor ACKs published on the same
   topic could not reach it. Added dedicated "deployment-acks" topic with
   coordinator subscription in PreStart, and DriverHostActor publishes ACKs
   there.

2. NodeId derivation used member.Address.Host only — two cluster members on a
   shared loopback host (test harness, dev VMs) collided to one identity. The
   coordinator's expected-ack set became {1} and the system sealed after only
   half the nodes acked. Switched to host:port everywhere (ClusterRoleInfo +
   coordinator) so loopback nodes stay distinct and production identities are
   harmlessly more specific.

Tests: 95 v2 tests pass (was 93 + 2 deploy tests), 0 skipped.

Failover scenarios (design §8 cases 3-7: node-kill-mid-apply, split-brain,
restart-during-deploy) deferred — they need controlled node-down primitives
on the harness. Tracked as F22 (failover scenario test cases).
This commit is contained in:
Joseph Doherty
2026-05-26 06:34:36 -04:00
parent 62e3cd6599
commit 5cfbe8b5dd
5 changed files with 158 additions and 18 deletions

View File

@@ -24,6 +24,7 @@ namespace ZB.MOM.WW.OtOpcUa.ControlPlane.Coordinators;
public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
{
public const string DeploymentsTopic = "deployments";
public const string DeploymentAcksTopic = "deployment-acks";
public static readonly TimeSpan DefaultApplyDeadline = TimeSpan.FromMinutes(2);
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
@@ -50,6 +51,7 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
Receive<DispatchDeployment>(HandleDispatch);
Receive<ApplyAck>(HandleAck);
Receive<DeadlineElapsed>(HandleDeadline);
Receive<SubscribeAck>(_ => { /* DPS subscribe confirmation */ });
}
/// <summary>
@@ -59,6 +61,10 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
/// </summary>
protected override void PreStart()
{
// Subscribe to per-node ApplyAck broadcasts so DriverHostActors on remote members can
// route their ACKs to whichever node currently hosts this singleton.
DistributedPubSub.Get(Context.System).Mediator.Tell(new Subscribe(DeploymentAcksTopic, Self));
using var db = _dbFactory.CreateDbContext();
var inflight = db.Deployments
.Where(d => d.Status == DeploymentStatus.Dispatching || d.Status == DeploymentStatus.AwaitingApplyAcks)
@@ -239,7 +245,9 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
if (!member.Roles.Contains("driver")) continue;
var host = member.Address.Host;
if (string.IsNullOrWhiteSpace(host)) continue;
nodes.Add(NodeId.Parse(host));
// Match ClusterRoleInfo's NodeId derivation (host:port) so DriverHostActor's
// self-identification and the coordinator's expected-ack set agree.
nodes.Add(NodeId.Parse($"{host}:{member.Address.Port ?? 0}"));
}
return nodes;
}

View File

@@ -30,6 +30,7 @@ namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
public sealed class DriverHostActor : ReceiveActor, IWithTimers
{
public const string DeploymentsTopic = "deployments";
public const string DeploymentAcksTopic = "deployment-acks";
public static readonly TimeSpan ReconnectInterval = TimeSpan.FromSeconds(30);
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
@@ -276,9 +277,10 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
}
else
{
// No direct coordinator handle — publish back through DistributedPubSub so the
// singleton routes it. The coordinator subscribes to its own incoming topic.
DistributedPubSub.Get(Context.System).Mediator.Tell(new Publish(DeploymentsTopic, ack));
// No direct coordinator handle — publish on the dedicated ACK topic. The coordinator
// singleton subscribes there in PreStart so the ACK reaches whichever admin node hosts
// it without an actor-path lookup.
DistributedPubSub.Get(Context.System).Mediator.Tell(new Publish(DeploymentAcksTopic, ack));
}
}
}