test(adminui): E2E deployed-driver Healthy->Reconnecting->Healthy transition on Reconnect
This commit is contained in:
@@ -1,8 +1,15 @@
|
||||
using Akka.Actor;
|
||||
using Microsoft.AspNetCore.SignalR;
|
||||
using Microsoft.Extensions.DependencyInjection;
|
||||
using Moq;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.AdminUI.Hubs;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
|
||||
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Drivers;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Entities;
|
||||
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
|
||||
|
||||
@@ -10,17 +17,20 @@ namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
|
||||
/// E2E integration coverage for the <c>ReconnectDriver</c> command path through
|
||||
/// <see cref="IAdminOperationsClient"/>.
|
||||
///
|
||||
/// <para><b>Scope note:</b> wiring a live <c>DriverInstanceActor</c> for the full
|
||||
/// Healthy → Reconnecting → Healthy health-transition assertion requires a deployed
|
||||
/// driver row in the config DB, a real fixture endpoint, and the
|
||||
/// <c>DriverHostActor</c> to have registered the instance — substantially more
|
||||
/// harness complexity than the two-node cluster setup alone provides. That deeper
|
||||
/// fixture is tracked as a follow-up. This suite instead verifies the message
|
||||
/// round-trip through the <c>AdminOperationsActor</c> singleton: the command is
|
||||
/// <para>The first two tests verify the message round-trip through the
|
||||
/// <c>AdminOperationsActor</c> singleton against a non-deployed instance id: the command is
|
||||
/// accepted, persisted as a <c>ConfigEdit</c> audit row, and the reply carries
|
||||
/// <c>Ok = true</c> with the matching <c>CorrelationId</c>. The DPS broadcast
|
||||
/// that triggers the actor-side reconnect is exercised by the control-plane unit
|
||||
/// tests that mock <c>IActorRef</c>.</para>
|
||||
/// <c>Ok = true</c> with the matching <c>CorrelationId</c>.</para>
|
||||
///
|
||||
/// <para><see cref="Reconnect_DeployedDriver_TransitionsThroughReconnectingBackToHealthy"/>
|
||||
/// goes the full distance: it deploys a real driver (via the opt-in
|
||||
/// <see cref="FakeReconnectDriverFactory"/> wired into the harness) so the
|
||||
/// <c>DriverHostActor</c> spawns a managed <c>DriverInstanceActor</c>, then drives the
|
||||
/// end-to-end reconnect path —
|
||||
/// <c>ReconnectDriver → AdminOperationsActor → DriverHostActor.HandleReconnectDriver →
|
||||
/// DriverInstanceActor.ForceReconnect (FSM) → PublishHealthSnapshot → driver-health DPS topic →
|
||||
/// DriverStatusSignalRBridge → snapshot store / hub push</c> — and asserts the published health
|
||||
/// transitions Healthy → Reconnecting → Healthy.</para>
|
||||
/// </summary>
|
||||
[Trait("Category", "Integration")]
|
||||
public sealed class DriverReconnectE2eTests
|
||||
@@ -83,4 +93,175 @@ public sealed class DriverReconnectE2eTests
|
||||
r1.CorrelationId.ShouldBe(first.CorrelationId);
|
||||
r2.CorrelationId.ShouldBe(second.CorrelationId);
|
||||
}
|
||||
|
||||
private const string ClusterId = "RECONNECT-E2E";
|
||||
private const string DriverId = "drv-modbus";
|
||||
|
||||
/// <summary>
|
||||
/// Full-stack reconnect: deploys a real driver (the in-process
|
||||
/// <see cref="FakeReconnectDriverFactory"/>), proves it reaches Healthy on the driver-health DPS
|
||||
/// topic, simulates a lost connection (<see cref="FakeReconnectDriver.ReportReconnecting"/>), issues
|
||||
/// a <see cref="ReconnectDriver"/> through <see cref="IAdminOperationsClient"/>, and asserts the
|
||||
/// published health walks Healthy → Reconnecting → Healthy — captured at the
|
||||
/// <see cref="DriverStatusSignalRBridge"/> hub-push seam. Confirms the operator Reconnect threads
|
||||
/// the whole cluster path and genuinely re-initialises the driver (<c>InitializeCount ≥ 2</c>).
|
||||
/// </summary>
|
||||
[Fact]
|
||||
public async Task Reconnect_DeployedDriver_TransitionsThroughReconnectingBackToHealthy()
|
||||
{
|
||||
var factory = new FakeReconnectDriverFactory();
|
||||
await using var harness = await TwoNodeClusterHarness.StartAsync(driverFactory: factory);
|
||||
|
||||
var store = harness.NodeA.Services.GetRequiredService<IDriverStatusSnapshotStore>();
|
||||
|
||||
// Capture every DriverHealthChanged the bridge pushes to the hub (the first SendCoreAsync arg).
|
||||
var captured = new List<DriverHealthChanged>();
|
||||
var captureLock = new object();
|
||||
var mockClients = new Mock<IHubClients>();
|
||||
var mockClientProxy = new Mock<IClientProxy>();
|
||||
mockClients.Setup(c => c.Group(It.IsAny<string>())).Returns(mockClientProxy.Object);
|
||||
mockClientProxy
|
||||
.Setup(p => p.SendCoreAsync(It.IsAny<string>(), It.IsAny<object?[]>(), It.IsAny<CancellationToken>()))
|
||||
.Callback<string, object?[], CancellationToken>((_, args, _) =>
|
||||
{
|
||||
if (args.FirstOrDefault() is DriverHealthChanged hc)
|
||||
lock (captureLock) captured.Add(hc);
|
||||
})
|
||||
.Returns(Task.CompletedTask);
|
||||
var mockHub = new Mock<IHubContext<DriverStatusHub>>();
|
||||
mockHub.Setup(h => h.Clients).Returns(mockClients.Object);
|
||||
|
||||
// Spawn the bridge + wait for its DPS SubscribeAck BEFORE deploying, so it catches the initial
|
||||
// Healthy publish (DPS is fire-and-forget with no replay, and repeat publishes are deduped).
|
||||
var bridge = harness.NodeASystem.ActorOf(
|
||||
DriverStatusSignalRBridge.Props(mockHub.Object, store),
|
||||
$"test-reconnect-bridge-{Guid.NewGuid():N}");
|
||||
await Task.Delay(TimeSpan.FromSeconds(2), Ct);
|
||||
|
||||
try
|
||||
{
|
||||
// Validator-clean seed: a single cluster bound to NodeA with one enabled Modbus driver, no
|
||||
// equipment/tags (tags would trip DraftValidator → deploy Rejected).
|
||||
await SeedSingleDriverClusterAsync(harness);
|
||||
|
||||
await using var scope = harness.NodeA.Services.CreateAsyncScope();
|
||||
var client = scope.ServiceProvider.GetRequiredService<IAdminOperationsClient>();
|
||||
|
||||
var deploy = await client.StartDeploymentAsync(createdBy: "e2e", Ct);
|
||||
deploy.Outcome.ShouldBe(StartDeploymentOutcome.Accepted, $"Deploy not accepted: {deploy.Message}");
|
||||
|
||||
// Wait until the driver is spawned (factory recorded it) AND reached Healthy in the store.
|
||||
await WaitForAsync(() => Task.FromResult(
|
||||
factory.Created.TryGetValue(DriverId, out _)
|
||||
&& store.TryGet(DriverId, out var s) && s.State == "Healthy"),
|
||||
TimeSpan.FromSeconds(20));
|
||||
|
||||
// Simulate the lost connection the operator Reconnect responds to.
|
||||
factory.Created[DriverId].ReportReconnecting();
|
||||
|
||||
var result = await client.AskAsync<ReconnectDriverResult>(
|
||||
new ReconnectDriver(ClusterId, DriverId, "e2e", Guid.NewGuid()), Ct);
|
||||
result.Ok.ShouldBeTrue($"ReconnectDriver failed: {result.Message}");
|
||||
|
||||
// The published health must walk Reconnecting → (later) Healthy for this driver.
|
||||
await WaitForAsync(() =>
|
||||
{
|
||||
lock (captureLock) return Task.FromResult(HasReconnectThenHealthy(captured));
|
||||
}, TimeSpan.FromSeconds(20));
|
||||
|
||||
List<DriverHealthChanged> snapshot;
|
||||
lock (captureLock) snapshot = captured.Where(c => c.DriverInstanceId == DriverId).ToList();
|
||||
|
||||
HasReconnectThenHealthy(captured).ShouldBeTrue(
|
||||
"Expected a Reconnecting push followed by a later Healthy push for the deployed driver. " +
|
||||
$"States seen: [{string.Join(", ", snapshot.Select(c => c.State))}]");
|
||||
|
||||
store.TryGet(DriverId, out var final).ShouldBeTrue();
|
||||
final.State.ShouldBe("Healthy");
|
||||
|
||||
// ≥ 2 proves the command genuinely re-initialised the driver via the full cluster path
|
||||
// (initial connect + at least one reconnect retry).
|
||||
factory.Created[DriverId].InitializeCount.ShouldBeGreaterThanOrEqualTo(2);
|
||||
}
|
||||
finally
|
||||
{
|
||||
harness.NodeASystem.Stop(bridge);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// True when the captured pushes for <see cref="DriverId"/> contain a <c>Reconnecting</c> entry
|
||||
/// followed by a strictly-later <c>Healthy</c> entry (the ordered sub-sequence the reconnect FSM
|
||||
/// produces).
|
||||
/// </summary>
|
||||
private static bool HasReconnectThenHealthy(List<DriverHealthChanged> captured)
|
||||
{
|
||||
var states = captured.Where(c => c.DriverInstanceId == DriverId).Select(c => c.State).ToList();
|
||||
var reconnectAt = states.IndexOf("Reconnecting");
|
||||
if (reconnectAt < 0) return false;
|
||||
return states.Skip(reconnectAt + 1).Contains("Healthy");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Seeds one single-node cluster bound to NodeA with one enabled Modbus <see cref="DriverInstance"/>
|
||||
/// and no equipment/tags, so <c>StartDeploymentAsync</c> returns <c>Accepted</c> and NodeA's
|
||||
/// <c>DriverHostActor</c> spawns the driver as a managed child. Mirrors
|
||||
/// <c>MultiClusterScopingTests</c>'s validator-clean entity shapes.
|
||||
/// </summary>
|
||||
private static async Task SeedSingleDriverClusterAsync(TwoNodeClusterHarness harness)
|
||||
{
|
||||
await using var db = await harness.CreateConfigDbContextAsync();
|
||||
|
||||
db.ServerClusters.Add(new ServerCluster
|
||||
{
|
||||
ClusterId = ClusterId,
|
||||
Name = "Reconnect E2E Cluster",
|
||||
Enterprise = "zb",
|
||||
Site = "central",
|
||||
NodeCount = 1,
|
||||
RedundancyMode = RedundancyMode.None,
|
||||
CreatedBy = "test",
|
||||
});
|
||||
|
||||
db.Namespaces.Add(new Namespace
|
||||
{
|
||||
NamespaceId = "RECONNECT-E2E-equipment",
|
||||
ClusterId = ClusterId,
|
||||
Kind = NamespaceKind.Equipment,
|
||||
NamespaceUri = "urn:zb:reconnect-e2e:equipment",
|
||||
});
|
||||
|
||||
db.ClusterNodes.Add(new ClusterNode
|
||||
{
|
||||
NodeId = harness.NodeANodeId,
|
||||
ClusterId = ClusterId,
|
||||
Host = TwoNodeClusterHarness.LoopbackHost,
|
||||
ApplicationUri = "urn:zb:reconnect-e2e:node-a",
|
||||
CreatedBy = "test",
|
||||
});
|
||||
|
||||
db.DriverInstances.Add(new DriverInstance
|
||||
{
|
||||
DriverInstanceId = DriverId,
|
||||
ClusterId = ClusterId,
|
||||
NamespaceId = "RECONNECT-E2E-equipment",
|
||||
Name = DriverId,
|
||||
DriverType = "Modbus",
|
||||
Enabled = true,
|
||||
DriverConfig = "{}",
|
||||
});
|
||||
|
||||
await db.SaveChangesAsync(Ct);
|
||||
}
|
||||
|
||||
private static async Task WaitForAsync(Func<Task<bool>> condition, TimeSpan timeout)
|
||||
{
|
||||
var deadline = DateTime.UtcNow + timeout;
|
||||
while (DateTime.UtcNow < deadline)
|
||||
{
|
||||
if (await condition()) return;
|
||||
await Task.Delay(100);
|
||||
}
|
||||
throw new TimeoutException($"Condition not met within {timeout}");
|
||||
}
|
||||
}
|
||||
|
||||
+71
-9
@@ -1,3 +1,4 @@
|
||||
using System.Collections.Concurrent;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
|
||||
@@ -15,24 +16,38 @@ namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
|
||||
/// initialize/connect succeeds, so the wrapping <c>DriverInstanceActor</c> walks
|
||||
/// <c>Connecting → Connected → Healthy</c> and (on <c>ReconnectDriver</c>) drives
|
||||
/// <c>ForceReconnect → Reconnecting → re-initialize → Connected</c> without any fault injection.
|
||||
/// Each created driver is recorded in <see cref="Created"/> so a test can reach in and flip its
|
||||
/// health to <see cref="DriverState.Reconnecting"/> just before issuing the reconnect command.
|
||||
/// </remarks>
|
||||
public sealed class FakeReconnectDriverFactory : IDriverFactory
|
||||
{
|
||||
/// <summary>The single driver type this fake factory materialises.</summary>
|
||||
public const string FakeDriverType = "Modbus";
|
||||
|
||||
/// <summary>
|
||||
/// Drivers this factory has created, keyed by <c>driverInstanceId</c>, so a test can retrieve the
|
||||
/// live instance (e.g. to call <see cref="FakeReconnectDriver.ReportReconnecting"/> before
|
||||
/// dispatching a reconnect command). Concurrent because the factory is invoked on the spawning
|
||||
/// actor thread while the test reads on its own thread.
|
||||
/// </summary>
|
||||
public ConcurrentDictionary<string, FakeReconnectDriver> Created { get; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// Returns a <see cref="FakeReconnectDriver"/> when <paramref name="driverType"/> is
|
||||
/// <see cref="FakeDriverType"/>; otherwise <c>null</c> (the host logs + skips the row).
|
||||
/// <see cref="FakeDriverType"/>; otherwise <c>null</c> (the host logs + skips the row). Records the
|
||||
/// created driver in <see cref="Created"/> keyed by <paramref name="driverInstanceId"/>.
|
||||
/// </summary>
|
||||
/// <param name="driverType">The driver type name from the deployed <c>DriverInstance</c> row.</param>
|
||||
/// <param name="driverInstanceId">The stable driver-instance identifier.</param>
|
||||
/// <param name="driverConfigJson">The driver configuration as a JSON string (ignored by the fake).</param>
|
||||
/// <returns>A new <see cref="FakeReconnectDriver"/>, or <c>null</c> for an unsupported type.</returns>
|
||||
public IDriver? TryCreate(string driverType, string driverInstanceId, string driverConfigJson)
|
||||
=> driverType == FakeDriverType
|
||||
? new FakeReconnectDriver(driverInstanceId, driverType)
|
||||
: null;
|
||||
{
|
||||
if (driverType != FakeDriverType) return null;
|
||||
var driver = new FakeReconnectDriver(driverInstanceId, driverType);
|
||||
Created[driverInstanceId] = driver;
|
||||
return driver;
|
||||
}
|
||||
|
||||
/// <summary>Gets the driver-type names this factory can materialise.</summary>
|
||||
public IReadOnlyCollection<string> SupportedTypes { get; } = new[] { FakeDriverType };
|
||||
@@ -43,6 +58,14 @@ public sealed class FakeReconnectDriverFactory : IDriverFactory
|
||||
/// wrapping <c>DriverInstanceActor</c> reaches <c>Connected</c> and publishes a
|
||||
/// <see cref="DriverState.Healthy"/> snapshot. Reads/writes/subscribes are benign no-ops; this exists
|
||||
/// only to let a deployed driver walk the Healthy ↔ Reconnecting FSM in-process for E2E tests.
|
||||
///
|
||||
/// <para>The driver's reported health is controllable: a test calls <see cref="ReportReconnecting"/>
|
||||
/// to make <see cref="GetHealth"/> return <see cref="DriverState.Reconnecting"/> (simulating a lost
|
||||
/// connection — the realistic trigger for an operator Reconnect). The <c>DriverInstanceActor</c>'s
|
||||
/// <c>ForceReconnect</c> handler POLLS <see cref="GetHealth"/> right after entering its Reconnecting
|
||||
/// state, so that snapshot surfaces <see cref="DriverState.Reconnecting"/> on the driver-health topic.
|
||||
/// The subsequent retry calls <see cref="InitializeAsync"/>, which clears the flag back to
|
||||
/// <see cref="DriverState.Healthy"/>, so the next snapshot returns to Healthy.</para>
|
||||
/// </summary>
|
||||
public sealed class FakeReconnectDriver : IDriver
|
||||
{
|
||||
@@ -63,16 +86,48 @@ public sealed class FakeReconnectDriver : IDriver
|
||||
/// <summary>Gets the driver type name (e.g. "Modbus").</summary>
|
||||
public string DriverType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// When <c>true</c>, <see cref="GetHealth"/> reports <see cref="DriverState.Reconnecting"/>.
|
||||
/// <c>volatile</c> because the actor polls <see cref="GetHealth"/> from its own thread while the
|
||||
/// test flips this from another via <see cref="ReportReconnecting"/>.
|
||||
/// </summary>
|
||||
private volatile bool _reconnecting;
|
||||
|
||||
/// <summary>Timestamp of the most recent successful initialize; surfaced as the last successful read.</summary>
|
||||
private DateTime _lastSuccess = DateTime.UtcNow;
|
||||
|
||||
/// <summary>
|
||||
/// Number of times <see cref="InitializeAsync"/> has been invoked. Read by the test to prove a
|
||||
/// reconnect genuinely re-initialised the driver through the full cluster path (≥ 2 means the
|
||||
/// initial connect plus at least one reconnect retry). Mutated via <see cref="Interlocked"/> since
|
||||
/// the actor's retry path runs on a thread-pool thread.
|
||||
/// </summary>
|
||||
public int InitializeCount;
|
||||
|
||||
/// <summary>
|
||||
/// Marks the driver as having lost its connection so the next <see cref="GetHealth"/> poll reports
|
||||
/// <see cref="DriverState.Reconnecting"/>. The test calls this immediately before dispatching the
|
||||
/// reconnect command, simulating the realistic operator-Reconnect trigger.
|
||||
/// </summary>
|
||||
public void ReportReconnecting() => _reconnecting = true;
|
||||
|
||||
/// <summary>
|
||||
/// Connect/initialize path — always succeeds (returns a completed task), so the actor self-Tells
|
||||
/// <c>InitializeSucceeded</c> and becomes <c>Connected</c>. This is the method that makes connect
|
||||
/// succeed; the FSM's reconnect path re-invokes it and it succeeds again.
|
||||
/// succeed; the FSM's reconnect path re-invokes it and it succeeds again. Increments
|
||||
/// <see cref="InitializeCount"/> and clears the reconnecting flag (initialize succeeded → healthy
|
||||
/// again).
|
||||
/// </summary>
|
||||
/// <param name="driverConfigJson">The driver configuration JSON (ignored).</param>
|
||||
/// <param name="cancellationToken">Cancellation token for the operation.</param>
|
||||
/// <returns>A completed task — initialization always succeeds.</returns>
|
||||
public Task InitializeAsync(string driverConfigJson, CancellationToken cancellationToken)
|
||||
=> Task.CompletedTask;
|
||||
{
|
||||
Interlocked.Increment(ref InitializeCount);
|
||||
_lastSuccess = DateTime.UtcNow;
|
||||
_reconnecting = false;
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
/// <summary>Applies a config change in place — a no-op that always succeeds.</summary>
|
||||
/// <param name="driverConfigJson">The driver configuration JSON (ignored).</param>
|
||||
@@ -87,9 +142,16 @@ public sealed class FakeReconnectDriver : IDriver
|
||||
public Task ShutdownAsync(CancellationToken cancellationToken)
|
||||
=> Task.CompletedTask;
|
||||
|
||||
/// <summary>Returns a <see cref="DriverState.Healthy"/> snapshot so deployed drivers publish health.</summary>
|
||||
/// <returns>A <see cref="DriverHealth"/> in the <see cref="DriverState.Healthy"/> state.</returns>
|
||||
public DriverHealth GetHealth() => new(DriverState.Healthy, DateTime.UtcNow, null);
|
||||
/// <summary>
|
||||
/// Returns a <see cref="DriverState.Reconnecting"/> snapshot when the test has flagged a lost
|
||||
/// connection via <see cref="ReportReconnecting"/>; otherwise a <see cref="DriverState.Healthy"/>
|
||||
/// snapshot. The actor polls this on every observable state change, so the published state tracks
|
||||
/// this flag.
|
||||
/// </summary>
|
||||
/// <returns>A <see cref="DriverHealth"/> reflecting the controllable connection state.</returns>
|
||||
public DriverHealth GetHealth() => _reconnecting
|
||||
? new DriverHealth(DriverState.Reconnecting, _lastSuccess, null)
|
||||
: new DriverHealth(DriverState.Healthy, DateTime.UtcNow, null);
|
||||
|
||||
/// <summary>Returns a zero memory footprint (the fake holds no driver-attributable caches).</summary>
|
||||
/// <returns>Always <c>0</c>.</returns>
|
||||
|
||||
Reference in New Issue
Block a user