DeployHappyPathTests exercises the full deploy pipeline on the 2-node harness:
AdminOperationsActor → ConfigPublishCoordinator → DistributedPubSub →
DriverHostActor on both nodes → ApplyAck → coordinator seals. Verifies both
NodeDeploymentState rows reach Applied and Deployment.Status reaches Sealed.
Exposed + fixed two production bugs along the way:
1. Coordinator was publishing DispatchDeployment on the "deployments" topic but
never subscribed to anything — DriverHostActor ACKs published on the same
topic could not reach it. Added dedicated "deployment-acks" topic with
coordinator subscription in PreStart, and DriverHostActor publishes ACKs
there.
2. NodeId derivation used member.Address.Host only — two cluster members on a
shared loopback host (test harness, dev VMs) collided to one identity. The
coordinator's expected-ack set became {1} and the system sealed after only
half the nodes acked. Switched to host:port everywhere (ClusterRoleInfo +
coordinator) so loopback nodes stay distinct and production identities are
harmlessly more specific.
Tests: 95 v2 tests pass (was 93 + 2 deploy tests), 0 skipped.
Failover scenarios (design §8 cases 3-7: node-kill-mid-apply, split-brain,
restart-during-deploy) deferred — they need controlled node-down primitives
on the harness. Tracked as F22 (failover scenario test cases).
186 lines
7.8 KiB
C#
186 lines
7.8 KiB
C#
using System.Net.Sockets;
|
|
using Akka.Actor;
|
|
using Akka.Cluster;
|
|
using Akka.Hosting;
|
|
using Microsoft.AspNetCore.Builder;
|
|
using Microsoft.AspNetCore.Hosting;
|
|
using Microsoft.EntityFrameworkCore;
|
|
using Microsoft.Extensions.Configuration;
|
|
using Microsoft.Extensions.DependencyInjection;
|
|
using Microsoft.Extensions.Hosting;
|
|
using ZB.MOM.WW.OtOpcUa.AdminUI;
|
|
using ZB.MOM.WW.OtOpcUa.AdminUI.Clients;
|
|
using ZB.MOM.WW.OtOpcUa.AdminUI.Hubs;
|
|
using ZB.MOM.WW.OtOpcUa.Cluster;
|
|
using ZB.MOM.WW.OtOpcUa.Configuration;
|
|
using ZB.MOM.WW.OtOpcUa.ControlPlane;
|
|
using ZB.MOM.WW.OtOpcUa.Host.Health;
|
|
using ZB.MOM.WW.OtOpcUa.Runtime;
|
|
using ZB.MOM.WW.OtOpcUa.Security;
|
|
using ZB.MOM.WW.OtOpcUa.Security.Endpoints;
|
|
using ZB.MOM.WW.OtOpcUa.Security.Ldap;
|
|
|
|
namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
|
|
|
|
/// <summary>
|
|
/// Spins up two in-process <c>OtOpcUa.Host</c>-equivalent <see cref="WebApplication"/> instances
|
|
/// that share an in-memory <see cref="OtOpcUaConfigDbContext"/> and form a 2-member Akka cluster.
|
|
/// Both nodes carry the <c>admin</c> + <c>driver</c> roles, matching design §8's failover-test
|
|
/// 2-node profile.
|
|
///
|
|
/// Why not <c>WebApplicationFactory<Program></c>?
|
|
/// Program.cs reads <c>OTOPCUA_ROLES</c> from process env (shared across in-process WAF
|
|
/// instances) and writes both Serilog file sinks + Akka cluster TCP listener to the host
|
|
/// process — neither survives two parallel WAFs cleanly. This harness instead replays the
|
|
/// Program.cs DI graph from a clean <see cref="WebApplicationBuilder"/> per node with
|
|
/// per-node config overrides. The production wiring is the same set of extensions
|
|
/// (<see cref="ServiceCollectionExtensions.AddOtOpcUaConfigDb"/>,
|
|
/// <see cref="AkkaCluster.ServiceCollectionExtensions.AddOtOpcUaCluster"/>,
|
|
/// <see cref="AddOtOpcUaAuth"/>, <see cref="AddOtOpcUaHealth"/>,
|
|
/// <see cref="WithOtOpcUaControlPlaneSingletons"/>,
|
|
/// <see cref="WithOtOpcUaRuntimeActors"/>).
|
|
/// </summary>
|
|
public sealed class TwoNodeClusterHarness : IAsyncDisposable
|
|
{
|
|
public const string TestRoles = "admin,driver";
|
|
public string SharedDbName { get; } = $"two-node-cluster-{Guid.NewGuid():N}";
|
|
|
|
public WebApplication NodeA { get; private set; } = null!;
|
|
public WebApplication NodeB { get; private set; } = null!;
|
|
|
|
public int NodeAAkkaPort { get; private set; }
|
|
public int NodeBAkkaPort { get; private set; }
|
|
|
|
// Both nodes bind to 127.0.0.1 — ClusterRoleInfo + ConfigPublishCoordinator encode
|
|
// host:port into NodeId so the cluster membership stays distinct on different ports.
|
|
public const string LoopbackHost = "127.0.0.1";
|
|
|
|
public ActorSystem NodeASystem => NodeA.Services.GetRequiredService<ActorSystem>();
|
|
public ActorSystem NodeBSystem => NodeB.Services.GetRequiredService<ActorSystem>();
|
|
|
|
/// <summary>Boots both nodes and waits up to <paramref name="formationTimeout"/> for cluster convergence.</summary>
|
|
public static async Task<TwoNodeClusterHarness> StartAsync(TimeSpan? formationTimeout = null)
|
|
{
|
|
var harness = new TwoNodeClusterHarness();
|
|
harness.NodeAAkkaPort = AllocateFreePort();
|
|
harness.NodeBAkkaPort = AllocateFreePort();
|
|
|
|
// Node A boots first as the seed.
|
|
harness.NodeA = await BuildNodeAsync(
|
|
host: LoopbackHost,
|
|
akkaPort: harness.NodeAAkkaPort,
|
|
seedHost: LoopbackHost,
|
|
seedAkkaPort: harness.NodeAAkkaPort,
|
|
dbName: harness.SharedDbName);
|
|
|
|
harness.NodeB = await BuildNodeAsync(
|
|
host: LoopbackHost,
|
|
akkaPort: harness.NodeBAkkaPort,
|
|
seedHost: LoopbackHost,
|
|
seedAkkaPort: harness.NodeAAkkaPort,
|
|
dbName: harness.SharedDbName);
|
|
|
|
await WaitForClusterFormationAsync(
|
|
harness.NodeASystem,
|
|
harness.NodeBSystem,
|
|
formationTimeout ?? TimeSpan.FromSeconds(20));
|
|
|
|
return harness;
|
|
}
|
|
|
|
private static async Task<WebApplication> BuildNodeAsync(
|
|
string host, int akkaPort, string seedHost, int seedAkkaPort, string dbName)
|
|
{
|
|
var builder = WebApplication.CreateBuilder(new WebApplicationOptions { Args = [] });
|
|
|
|
builder.WebHost.UseKestrel(o => o.Listen(System.Net.IPAddress.Parse(host), 0));
|
|
builder.Configuration.AddInMemoryCollection(new Dictionary<string, string?>
|
|
{
|
|
["ConnectionStrings:ConfigDb"] = "Server=test;Database=test;Trusted_Connection=True;TrustServerCertificate=True;",
|
|
["Cluster:Hostname"] = host,
|
|
["Cluster:Port"] = akkaPort.ToString(),
|
|
["Cluster:PublicHostname"] = host,
|
|
["Cluster:SeedNodes:0"] = $"akka.tcp://otopcua@{seedHost}:{seedAkkaPort}",
|
|
["Cluster:Roles:0"] = "admin",
|
|
["Cluster:Roles:1"] = "driver",
|
|
["Security:Jwt:SigningKey"] = "two-node-harness-test-signing-key-with-enough-bytes-for-hs256",
|
|
["Security:Jwt:Issuer"] = "otopcua-test",
|
|
["Security:Jwt:Audience"] = "otopcua-test",
|
|
});
|
|
|
|
// Replicate Program.cs role wiring with the harness-shared in-memory ConfigDb.
|
|
builder.Services.AddDbContextFactory<OtOpcUaConfigDbContext>(opt => opt.UseInMemoryDatabase(dbName));
|
|
builder.Services.AddDbContext<OtOpcUaConfigDbContext>(opt => opt.UseInMemoryDatabase(dbName));
|
|
builder.Services.AddOtOpcUaCluster(builder.Configuration);
|
|
|
|
builder.Services.AddAkka("otopcua", (ab, sp) =>
|
|
{
|
|
ab.WithOtOpcUaClusterBootstrap(sp);
|
|
ab.WithOtOpcUaControlPlaneSingletons();
|
|
ab.WithOtOpcUaRuntimeActors();
|
|
});
|
|
|
|
builder.Services.AddOtOpcUaAuth(builder.Configuration);
|
|
builder.Services.AddSingleton<ILdapAuthService, StubLdapAuthService>();
|
|
builder.Services.AddAdminUI();
|
|
builder.Services.AddSignalR();
|
|
builder.Services.AddOtOpcUaAdminClients();
|
|
builder.Services.AddOtOpcUaHealth();
|
|
|
|
var app = builder.Build();
|
|
app.UseAuthentication();
|
|
app.UseAuthorization();
|
|
app.MapOtOpcUaAuth();
|
|
app.MapOtOpcUaHubs();
|
|
app.MapOtOpcUaHealth();
|
|
|
|
await app.StartAsync();
|
|
return app;
|
|
}
|
|
|
|
private static async Task WaitForClusterFormationAsync(ActorSystem a, ActorSystem b, TimeSpan timeout)
|
|
{
|
|
var deadline = DateTime.UtcNow + timeout;
|
|
while (DateTime.UtcNow < deadline)
|
|
{
|
|
var aMembers = Akka.Cluster.Cluster.Get(a).State.Members
|
|
.Where(m => m.Status == MemberStatus.Up).ToArray();
|
|
var bMembers = Akka.Cluster.Cluster.Get(b).State.Members
|
|
.Where(m => m.Status == MemberStatus.Up).ToArray();
|
|
if (aMembers.Length >= 2 && bMembers.Length >= 2) return;
|
|
await Task.Delay(200);
|
|
}
|
|
throw new TimeoutException(
|
|
$"Cluster did not form within {timeout}. " +
|
|
$"A up={Akka.Cluster.Cluster.Get(a).State.Members.Count(m => m.Status == MemberStatus.Up)}, " +
|
|
$"B up={Akka.Cluster.Cluster.Get(b).State.Members.Count(m => m.Status == MemberStatus.Up)}");
|
|
}
|
|
|
|
private static int AllocateFreePort()
|
|
{
|
|
using var listener = new TcpListener(System.Net.IPAddress.Parse(LoopbackHost), 0);
|
|
listener.Start();
|
|
var port = ((System.Net.IPEndPoint)listener.LocalEndpoint).Port;
|
|
listener.Stop();
|
|
return port;
|
|
}
|
|
|
|
public async ValueTask DisposeAsync()
|
|
{
|
|
if (NodeB is not null) await NodeB.DisposeAsync();
|
|
if (NodeA is not null) await NodeA.DisposeAsync();
|
|
}
|
|
|
|
private sealed class StubLdapAuthService : ILdapAuthService
|
|
{
|
|
public Task<LdapAuthResult> AuthenticateAsync(string username, string password, CancellationToken ct = default)
|
|
=> Task.FromResult(new LdapAuthResult(
|
|
Success: password == "valid-password",
|
|
DisplayName: username,
|
|
Username: username,
|
|
Groups: ["FleetAdmin"],
|
|
Roles: ["FleetAdmin"],
|
|
Error: null));
|
|
}
|
|
}
|