test(host): deploy happy-path + idempotency integration tests (Task 59)

DeployHappyPathTests exercises the full deploy pipeline on the 2-node harness:
AdminOperationsActor → ConfigPublishCoordinator → DistributedPubSub →
DriverHostActor on both nodes → ApplyAck → coordinator seals. Verifies both
NodeDeploymentState rows reach Applied and Deployment.Status reaches Sealed.

Exposed + fixed two production bugs along the way:

1. Coordinator was publishing DispatchDeployment on the "deployments" topic but
   never subscribed to anything — DriverHostActor ACKs published on the same
   topic could not reach it. Added dedicated "deployment-acks" topic with
   coordinator subscription in PreStart, and DriverHostActor publishes ACKs
   there.

2. NodeId derivation used member.Address.Host only — two cluster members on a
   shared loopback host (test harness, dev VMs) collided to one identity. The
   coordinator's expected-ack set became {1} and the system sealed after only
   half the nodes acked. Switched to host:port everywhere (ClusterRoleInfo +
   coordinator) so loopback nodes stay distinct and production identities are
   harmlessly more specific.

Tests: 95 v2 tests pass (was 93 + 2 deploy tests), 0 skipped.

Failover scenarios (design §8 cases 3-7: node-kill-mid-apply, split-brain,
restart-during-deploy) deferred — they need controlled node-down primitives
on the harness. Tracked as F22 (failover scenario test cases).
This commit is contained in:
Joseph Doherty
2026-05-26 06:34:36 -04:00
parent 62e3cd6599
commit 5cfbe8b5dd
5 changed files with 158 additions and 18 deletions

View File

@@ -0,0 +1,115 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
/// <summary>
/// End-to-end deploy: AdminOperationsActor → ConfigPublishCoordinator → DriverHostActor on both
/// nodes → ApplyAck → coordinator seals. Verifies the cluster singleton + DistributedPubSub
/// dispatch path actually works against the real Akka cluster the harness builds.
/// </summary>
public sealed class DeployHappyPathTests
{
private static CancellationToken Ct => TestContext.Current.CancellationToken;
[Fact]
public async Task StartDeployment_seals_after_both_nodes_apply()
{
await using var harness = await TwoNodeClusterHarness.StartAsync();
await using var scope = harness.NodeA.Services.CreateAsyncScope();
var client = scope.ServiceProvider.GetRequiredService<IAdminOperationsClient>();
var result = await client.StartDeploymentAsync(createdBy: "alice@test", Ct);
result.Outcome.ShouldBe(StartDeploymentOutcome.Accepted);
result.DeploymentId.ShouldNotBeNull();
var deploymentId = result.DeploymentId!.Value.Value;
// Wait up to 15s for coordinator to seal after both DriverHostActors ack.
await WaitForAsync(async () =>
{
await using var db = await CreateDbAsync(harness);
var d = await db.Deployments.AsNoTracking().FirstOrDefaultAsync(d => d.DeploymentId == deploymentId, Ct);
return d?.Status == DeploymentStatus.Sealed;
}, TimeSpan.FromSeconds(15));
await using var db = await CreateDbAsync(harness);
var deployment = await db.Deployments.AsNoTracking()
.FirstAsync(d => d.DeploymentId == deploymentId, Ct);
deployment.Status.ShouldBe(DeploymentStatus.Sealed);
deployment.SealedAtUtc.ShouldNotBeNull();
var nodeStates = await db.NodeDeploymentStates.AsNoTracking()
.Where(s => s.DeploymentId == deploymentId)
.ToListAsync(Ct);
nodeStates.Count.ShouldBe(2);
nodeStates.ShouldAllBe(s => s.Status == NodeDeploymentStatus.Applied);
}
[Fact]
public async Task Replaying_dispatch_to_same_revision_is_idempotent_no_op()
{
await using var harness = await TwoNodeClusterHarness.StartAsync();
await using var scope = harness.NodeA.Services.CreateAsyncScope();
var client = scope.ServiceProvider.GetRequiredService<IAdminOperationsClient>();
var first = await client.StartDeploymentAsync(createdBy: "alice@test", Ct);
first.Outcome.ShouldBe(StartDeploymentOutcome.Accepted);
await WaitForAsync(async () =>
{
await using var db = await CreateDbAsync(harness);
var d = await db.Deployments.AsNoTracking()
.FirstOrDefaultAsync(d => d.DeploymentId == first.DeploymentId!.Value.Value, Ct);
return d?.Status == DeploymentStatus.Sealed;
}, TimeSpan.FromSeconds(15));
// Same DB state → same revision hash → AdminOperations short-circuits with NoChanges,
// OR (if it accepts) the second dispatch is idempotent (DriverHostActor recognises
// _currentRevision == msg.RevisionHash and acks Applied without re-applying).
var second = await client.StartDeploymentAsync(createdBy: "alice@test", Ct);
if (second.Outcome == StartDeploymentOutcome.Accepted)
{
// Second deployment row should also seal quickly because both drivers immediate-ack.
await WaitForAsync(async () =>
{
await using var db = await CreateDbAsync(harness);
var d = await db.Deployments.AsNoTracking()
.FirstOrDefaultAsync(d => d.DeploymentId == second.DeploymentId!.Value.Value, Ct);
return d?.Status == DeploymentStatus.Sealed;
}, TimeSpan.FromSeconds(15));
second.RevisionHash!.Value.Value.ShouldBe(first.RevisionHash!.Value.Value);
}
else
{
second.Outcome.ShouldBeOneOf(StartDeploymentOutcome.NoChanges, StartDeploymentOutcome.AnotherDeploymentInFlight);
}
}
private static async Task<OtOpcUaConfigDbContext> CreateDbAsync(TwoNodeClusterHarness harness)
{
var factory = harness.NodeA.Services.GetRequiredService<IDbContextFactory<OtOpcUaConfigDbContext>>();
return await factory.CreateDbContextAsync();
}
private static async Task WaitForAsync(Func<Task<bool>> condition, TimeSpan timeout)
{
var deadline = DateTime.UtcNow + timeout;
while (DateTime.UtcNow < deadline)
{
if (await condition()) return;
await Task.Delay(200);
}
throw new TimeoutException($"Condition not met within {timeout}");
}
}

View File

@@ -43,7 +43,7 @@ namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
public sealed class TwoNodeClusterHarness : IAsyncDisposable
{
public const string TestRoles = "admin,driver";
public static readonly string SharedDbName = $"two-node-cluster-{Guid.NewGuid():N}";
public string SharedDbName { get; } = $"two-node-cluster-{Guid.NewGuid():N}";
public WebApplication NodeA { get; private set; } = null!;
public WebApplication NodeB { get; private set; } = null!;
@@ -51,6 +51,10 @@ public sealed class TwoNodeClusterHarness : IAsyncDisposable
public int NodeAAkkaPort { get; private set; }
public int NodeBAkkaPort { get; private set; }
// Both nodes bind to 127.0.0.1 — ClusterRoleInfo + ConfigPublishCoordinator encode
// host:port into NodeId so the cluster membership stays distinct on different ports.
public const string LoopbackHost = "127.0.0.1";
public ActorSystem NodeASystem => NodeA.Services.GetRequiredService<ActorSystem>();
public ActorSystem NodeBSystem => NodeB.Services.GetRequiredService<ActorSystem>();
@@ -63,14 +67,18 @@ public sealed class TwoNodeClusterHarness : IAsyncDisposable
// Node A boots first as the seed.
harness.NodeA = await BuildNodeAsync(
host: LoopbackHost,
akkaPort: harness.NodeAAkkaPort,
seedHost: LoopbackHost,
seedAkkaPort: harness.NodeAAkkaPort,
dbName: SharedDbName);
dbName: harness.SharedDbName);
harness.NodeB = await BuildNodeAsync(
host: LoopbackHost,
akkaPort: harness.NodeBAkkaPort,
seedHost: LoopbackHost,
seedAkkaPort: harness.NodeAAkkaPort,
dbName: SharedDbName);
dbName: harness.SharedDbName);
await WaitForClusterFormationAsync(
harness.NodeASystem,
@@ -80,18 +88,19 @@ public sealed class TwoNodeClusterHarness : IAsyncDisposable
return harness;
}
private static async Task<WebApplication> BuildNodeAsync(int akkaPort, int seedAkkaPort, string dbName)
private static async Task<WebApplication> BuildNodeAsync(
string host, int akkaPort, string seedHost, int seedAkkaPort, string dbName)
{
var builder = WebApplication.CreateBuilder(new WebApplicationOptions { Args = [] });
builder.WebHost.UseKestrel(o => o.Listen(System.Net.IPAddress.Loopback, 0));
builder.WebHost.UseKestrel(o => o.Listen(System.Net.IPAddress.Parse(host), 0));
builder.Configuration.AddInMemoryCollection(new Dictionary<string, string?>
{
["ConnectionStrings:ConfigDb"] = "Server=test;Database=test;Trusted_Connection=True;TrustServerCertificate=True;",
["Cluster:Hostname"] = "127.0.0.1",
["Cluster:Hostname"] = host,
["Cluster:Port"] = akkaPort.ToString(),
["Cluster:PublicHostname"] = "127.0.0.1",
["Cluster:SeedNodes:0"] = $"akka.tcp://otopcua@127.0.0.1:{seedAkkaPort}",
["Cluster:PublicHostname"] = host,
["Cluster:SeedNodes:0"] = $"akka.tcp://otopcua@{seedHost}:{seedAkkaPort}",
["Cluster:Roles:0"] = "admin",
["Cluster:Roles:1"] = "driver",
["Security:Jwt:SigningKey"] = "two-node-harness-test-signing-key-with-enough-bytes-for-hs256",
@@ -149,7 +158,7 @@ public sealed class TwoNodeClusterHarness : IAsyncDisposable
private static int AllocateFreePort()
{
using var listener = new TcpListener(System.Net.IPAddress.Loopback, 0);
using var listener = new TcpListener(System.Net.IPAddress.Parse(LoopbackHost), 0);
listener.Start();
var port = ((System.Net.IPEndPoint)listener.LocalEndpoint).Port;
listener.Stop();