test(host): deploy happy-path + idempotency integration tests (Task 59)
DeployHappyPathTests exercises the full deploy pipeline on the 2-node harness:
AdminOperationsActor → ConfigPublishCoordinator → DistributedPubSub →
DriverHostActor on both nodes → ApplyAck → coordinator seals. Verifies both
NodeDeploymentState rows reach Applied and Deployment.Status reaches Sealed.
Exposed + fixed two production bugs along the way:
1. Coordinator was publishing DispatchDeployment on the "deployments" topic but
never subscribed to anything — DriverHostActor ACKs published on the same
topic could not reach it. Added dedicated "deployment-acks" topic with
coordinator subscription in PreStart, and DriverHostActor publishes ACKs
there.
2. NodeId derivation used member.Address.Host only — two cluster members on a
shared loopback host (test harness, dev VMs) collided to one identity. The
coordinator's expected-ack set became {1} and the system sealed after only
half the nodes acked. Switched to host:port everywhere (ClusterRoleInfo +
coordinator) so loopback nodes stay distinct and production identities are
harmlessly more specific.
Tests: 95 v2 tests pass (was 93 + 2 deploy tests), 0 skipped.
Failover scenarios (design §8 cases 3-7: node-kill-mid-apply, split-brain,
restart-during-deploy) deferred — they need controlled node-down primitives
on the harness. Tracked as F22 (failover scenario test cases).
This commit is contained in:
@@ -29,7 +29,10 @@ public sealed class ClusterRoleInfo : IClusterRoleInfo, IDisposable
|
||||
{
|
||||
_cluster = Akka.Cluster.Cluster.Get(system);
|
||||
_logger = logger;
|
||||
_localNode = CommonsNodeId.Parse(options.Value.PublicHostname);
|
||||
// NodeId encodes host:port so cluster members on shared hosts (test loopback, dev VMs
|
||||
// sharing a bind IP) stay distinct. Production hosts have unique DNS names so the port
|
||||
// suffix is harmless redundancy.
|
||||
_localNode = CommonsNodeId.Parse($"{options.Value.PublicHostname}:{options.Value.Port}");
|
||||
_localRoles = new HashSet<string>(options.Value.Roles, StringComparer.Ordinal);
|
||||
|
||||
SeedFromCurrentState();
|
||||
@@ -48,7 +51,7 @@ public sealed class ClusterRoleInfo : IClusterRoleInfo, IDisposable
|
||||
{
|
||||
if (!_membersByRole.TryGetValue(role, out var members)) return Array.Empty<CommonsNodeId>();
|
||||
return members
|
||||
.Select(m => CommonsNodeId.Parse(m.Address.Host ?? string.Empty))
|
||||
.Select(m => ToNodeId(m.Address))
|
||||
.ToArray();
|
||||
}
|
||||
}
|
||||
@@ -58,7 +61,7 @@ public sealed class ClusterRoleInfo : IClusterRoleInfo, IDisposable
|
||||
lock (_lock)
|
||||
{
|
||||
return _roleLeaders.TryGetValue(role, out var leader) && leader is not null
|
||||
? CommonsNodeId.Parse(leader.Address.Host ?? string.Empty)
|
||||
? ToNodeId(leader.Address)
|
||||
: (CommonsNodeId?)null;
|
||||
}
|
||||
}
|
||||
@@ -121,7 +124,7 @@ public sealed class ClusterRoleInfo : IClusterRoleInfo, IDisposable
|
||||
{
|
||||
_roleLeaders.TryGetValue(evt.Role, out var prevMember);
|
||||
if (prevMember is not null)
|
||||
previous = CommonsNodeId.Parse(prevMember.Address.Host ?? string.Empty);
|
||||
previous = ToNodeId(prevMember.Address);
|
||||
|
||||
var nextMember = evt.Leader is null
|
||||
? null
|
||||
@@ -129,7 +132,7 @@ public sealed class ClusterRoleInfo : IClusterRoleInfo, IDisposable
|
||||
|
||||
_roleLeaders[evt.Role] = nextMember;
|
||||
if (nextMember is not null)
|
||||
next = CommonsNodeId.Parse(nextMember.Address.Host ?? string.Empty);
|
||||
next = ToNodeId(nextMember.Address);
|
||||
|
||||
raise = !Nullable.Equals(previous, next);
|
||||
}
|
||||
@@ -150,6 +153,9 @@ public sealed class ClusterRoleInfo : IClusterRoleInfo, IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
private static CommonsNodeId ToNodeId(Akka.Actor.Address address) =>
|
||||
CommonsNodeId.Parse($"{address.Host ?? string.Empty}:{address.Port ?? 0}");
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_subscriber?.Tell(PoisonPill.Instance);
|
||||
|
||||
@@ -24,6 +24,7 @@ namespace ZB.MOM.WW.OtOpcUa.ControlPlane.Coordinators;
|
||||
public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
{
|
||||
public const string DeploymentsTopic = "deployments";
|
||||
public const string DeploymentAcksTopic = "deployment-acks";
|
||||
public static readonly TimeSpan DefaultApplyDeadline = TimeSpan.FromMinutes(2);
|
||||
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
@@ -50,6 +51,7 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
Receive<DispatchDeployment>(HandleDispatch);
|
||||
Receive<ApplyAck>(HandleAck);
|
||||
Receive<DeadlineElapsed>(HandleDeadline);
|
||||
Receive<SubscribeAck>(_ => { /* DPS subscribe confirmation */ });
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -59,6 +61,10 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
/// </summary>
|
||||
protected override void PreStart()
|
||||
{
|
||||
// Subscribe to per-node ApplyAck broadcasts so DriverHostActors on remote members can
|
||||
// route their ACKs to whichever node currently hosts this singleton.
|
||||
DistributedPubSub.Get(Context.System).Mediator.Tell(new Subscribe(DeploymentAcksTopic, Self));
|
||||
|
||||
using var db = _dbFactory.CreateDbContext();
|
||||
var inflight = db.Deployments
|
||||
.Where(d => d.Status == DeploymentStatus.Dispatching || d.Status == DeploymentStatus.AwaitingApplyAcks)
|
||||
@@ -239,7 +245,9 @@ public sealed class ConfigPublishCoordinator : ReceiveActor, IWithTimers
|
||||
if (!member.Roles.Contains("driver")) continue;
|
||||
var host = member.Address.Host;
|
||||
if (string.IsNullOrWhiteSpace(host)) continue;
|
||||
nodes.Add(NodeId.Parse(host));
|
||||
// Match ClusterRoleInfo's NodeId derivation (host:port) so DriverHostActor's
|
||||
// self-identification and the coordinator's expected-ack set agree.
|
||||
nodes.Add(NodeId.Parse($"{host}:{member.Address.Port ?? 0}"));
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ namespace ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
|
||||
public sealed class DriverHostActor : ReceiveActor, IWithTimers
|
||||
{
|
||||
public const string DeploymentsTopic = "deployments";
|
||||
public const string DeploymentAcksTopic = "deployment-acks";
|
||||
public static readonly TimeSpan ReconnectInterval = TimeSpan.FromSeconds(30);
|
||||
|
||||
private readonly IDbContextFactory<OtOpcUaConfigDbContext> _dbFactory;
|
||||
@@ -276,9 +277,10 @@ public sealed class DriverHostActor : ReceiveActor, IWithTimers
|
||||
}
|
||||
else
|
||||
{
|
||||
// No direct coordinator handle — publish back through DistributedPubSub so the
|
||||
// singleton routes it. The coordinator subscribes to its own incoming topic.
|
||||
DistributedPubSub.Get(Context.System).Mediator.Tell(new Publish(DeploymentsTopic, ack));
|
||||
// No direct coordinator handle — publish on the dedicated ACK topic. The coordinator
|
||||
// singleton subscribes there in PreStart so the ACK reaches whichever admin node hosts
|
||||
// it without an actor-path lookup.
|
||||
DistributedPubSub.Get(Context.System).Mediator.Tell(new Publish(DeploymentAcksTopic, ack));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user