test(host): 2-node integration test harness + consolidate to one ActorSystem (Task 58)

Builds TwoNodeClusterHarness: two in-process Host-equivalent nodes sharing
an in-memory ConfigDb. Forms a 2-member Akka cluster. ClusterFormationTests
proves both nodes see each other as admin+driver role members.

Fixes a real production bug uncovered while wiring the harness — Program.cs
ran two separate ActorSystems (one from AddOtOpcUaCluster.AkkaHostedService
with cluster HOCON, one from Akka.Hosting.AddAkka with bare HOCON). Cluster
singletons landed on the bare ActorSystem and could not actually form a
cluster ("Configuration does not contain `akka.cluster` node").

Consolidation:
- AddOtOpcUaCluster now only binds AkkaClusterOptions + registers IClusterRoleInfo
- New WithOtOpcUaClusterBootstrap pushes embedded HOCON + Remote/Cluster options
  into Akka.Hosting's AkkaConfigurationBuilder
- AkkaHostedService.cs deleted — Akka.Hosting now owns the lifecycle
- Program.cs + harness call WithOtOpcUaClusterBootstrap inside AddAkka

Why not WebApplicationFactory<Program>? Program.cs reads OTOPCUA_ROLES from
process env (shared across in-process WAFs); the harness replays Program.cs's
DI graph from a clean WebApplicationBuilder per node with per-node config
overrides. Same production extensions, isolated config + Kestrel + Akka ports.

Tests: 93 v2 tests pass (was 91 + 2 new cluster formation), 0 skipped.
This commit is contained in:
Joseph Doherty
2026-05-26 06:27:04 -04:00
parent bb353c4d43
commit d6fac2d81d
7 changed files with 305 additions and 106 deletions

View File

@@ -1,97 +0,0 @@
using Akka.Actor;
using Akka.Configuration;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace ZB.MOM.WW.OtOpcUa.Cluster;
/// <summary>
/// Starts the local <see cref="ActorSystem"/>, applies the embedded HOCON plus an overlay
/// generated from <see cref="AkkaClusterOptions"/>, and joins the cluster. On shutdown,
/// runs <c>CoordinatedShutdown</c> with the <c>ClusterLeavingReason</c> so the local node
/// leaves the cluster cleanly before the process exits.
/// </summary>
public sealed class AkkaHostedService : IHostedService
{
private readonly AkkaClusterOptions _options;
private readonly ILogger<AkkaHostedService> _logger;
private ActorSystem? _actorSystem;
public AkkaHostedService(IOptions<AkkaClusterOptions> options, ILogger<AkkaHostedService> logger)
{
_options = options.Value;
_logger = logger;
}
public ActorSystem ActorSystem =>
_actorSystem ?? throw new InvalidOperationException(
"ActorSystem requested before AkkaHostedService.StartAsync ran.");
public Task StartAsync(CancellationToken cancellationToken)
{
var overlay = BuildOverlay(_options);
var baseConfig = ConfigurationFactory.ParseString(HoconLoader.LoadBaseConfig());
var config = ConfigurationFactory.ParseString(overlay).WithFallback(baseConfig);
_logger.LogInformation(
"Starting ActorSystem '{System}' on {Host}:{Port} with roles=[{Roles}]",
_options.SystemName, _options.PublicHostname, _options.Port,
string.Join(",", _options.Roles));
_actorSystem = ActorSystem.Create(_options.SystemName, config);
if (_options.SeedNodes.Length > 0)
{
var seeds = _options.SeedNodes.Select(Address.Parse).ToList();
Akka.Cluster.Cluster.Get(_actorSystem).JoinSeedNodes(seeds);
}
return Task.CompletedTask;
}
public async Task StopAsync(CancellationToken cancellationToken)
{
if (_actorSystem is null) return;
_logger.LogInformation("Initiating cluster-leave CoordinatedShutdown");
var shutdown = CoordinatedShutdown.Get(_actorSystem);
using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(TimeSpan.FromSeconds(30));
try
{
await shutdown.Run(CoordinatedShutdown.ClusterLeavingReason.Instance)
.WaitAsync(cts.Token).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
_logger.LogWarning("Cluster leave timed out after 30s; forcing terminate");
await _actorSystem.Terminate().ConfigureAwait(false);
}
}
private static string BuildOverlay(AkkaClusterOptions o)
{
var seeds = string.Join(",", o.SeedNodes.Select(Quote));
var roles = string.Join(",", o.Roles.Select(Quote));
return $@"
akka {{
remote.dot-netty.tcp {{
hostname = {Quote(o.Hostname)}
port = {o.Port}
public-hostname = {Quote(o.PublicHostname)}
}}
cluster {{
seed-nodes = [{seeds}]
roles = [{roles}]
}}
}}";
}
private static string Quote(string? value)
{
var escaped = (value ?? string.Empty).Replace("\\", "\\\\").Replace("\"", "\\\"");
return $"\"{escaped}\"";
}
}

View File

@@ -1,7 +1,9 @@
using Akka.Actor;
using Akka.Cluster.Hosting;
using Akka.Hosting;
using Akka.Remote.Hosting;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Options;
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
namespace ZB.MOM.WW.OtOpcUa.Cluster;
@@ -9,20 +11,57 @@ namespace ZB.MOM.WW.OtOpcUa.Cluster;
public static class ServiceCollectionExtensions
{
/// <summary>
/// Registers the Akka cluster hosted service and exposes <see cref="ActorSystem"/> and
/// <see cref="IClusterRoleInfo"/> as singletons resolved from it. Call after binding
/// <c>OTOPCUA_ROLES</c> into <c>AkkaClusterOptions.Roles</c> via the calling Program.cs.
/// Binds <see cref="AkkaClusterOptions"/> and registers <see cref="IClusterRoleInfo"/>. The
/// actual ActorSystem + cluster bootstrap is layered on inside the host's <c>AddAkka(...)</c>
/// configurator via <see cref="WithOtOpcUaClusterBootstrap"/> — keeping the entire Akka graph
/// under Akka.Hosting's management so cluster singletons land on the same ActorSystem.
/// </summary>
public static IServiceCollection AddOtOpcUaCluster(this IServiceCollection services, IConfiguration configuration)
{
services.AddOptions<AkkaClusterOptions>()
.Bind(configuration.GetSection(AkkaClusterOptions.SectionName));
services.AddSingleton<AkkaHostedService>();
services.AddHostedService(sp => sp.GetRequiredService<AkkaHostedService>());
services.AddSingleton<ActorSystem>(sp => sp.GetRequiredService<AkkaHostedService>().ActorSystem);
services.AddSingleton<IClusterRoleInfo, ClusterRoleInfo>();
return services;
}
/// <summary>
/// Configures the Akka.Hosting builder with the embedded OtOpcUa HOCON (split-brain resolver,
/// pinned dispatcher, failure detector tuning) + remote endpoint + cluster bootstrap derived
/// from <see cref="AkkaClusterOptions"/>.
///
/// Wire from Program.cs:
/// <code>
/// services.AddAkka("otopcua", (ab, sp) =>
/// {
/// ab.WithOtOpcUaClusterBootstrap(sp);
/// if (hasAdmin) ab.WithOtOpcUaControlPlaneSingletons();
/// if (hasDriver) ab.WithOtOpcUaRuntimeActors();
/// });
/// </code>
/// </summary>
public static AkkaConfigurationBuilder WithOtOpcUaClusterBootstrap(
this AkkaConfigurationBuilder builder,
IServiceProvider serviceProvider)
{
var options = serviceProvider.GetRequiredService<IOptions<AkkaClusterOptions>>().Value;
builder.AddHocon(HoconLoader.LoadBaseConfig(), HoconAddMode.Append);
builder.WithRemoting(new RemoteOptions
{
HostName = options.Hostname,
Port = options.Port,
PublicHostName = options.PublicHostname,
});
builder.WithClustering(new ClusterOptions
{
SeedNodes = options.SeedNodes,
Roles = options.Roles,
});
return builder;
}
}

View File

@@ -40,8 +40,9 @@ builder.Services.AddOtOpcUaCluster(builder.Configuration);
// Akka cluster bootstrap. Role-specific singletons are registered on the AkkaConfigurationBuilder
// from inside the configurator lambda. AddAkka spins the ActorSystem at host start.
builder.Services.AddAkka("otopcua", (ab, _) =>
builder.Services.AddAkka("otopcua", (ab, sp) =>
{
ab.WithOtOpcUaClusterBootstrap(sp);
if (hasAdmin)
ab.WithOtOpcUaControlPlaneSingletons();
if (hasDriver)