2 Commits

Author SHA1 Message Date
Joseph Doherty 686138123f feat(runtime): F11 — HistorianAdapterActor wired to IAlarmHistorianSink
v2-ci / unit-tests (tests/Core/ZB.MOM.WW.OtOpcUa.Cluster.Tests) (push) Has been cancelled
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.ControlPlane.Tests) (push) Has been cancelled
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.OpcUaServer.Tests) (push) Has been cancelled
v2-ci / build (push) Has been cancelled
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests) (push) Has been cancelled
v2-ci / unit-tests (tests/Server/ZB.MOM.WW.OtOpcUa.Security.Tests) (push) Has been cancelled
v2-ci / integration (push) Has been cancelled
Reshapes the placeholder buffered-counter actor into a thin fire-and-forget
bridge over the existing IAlarmHistorianSink contract. Default sink is
NullAlarmHistorianSink; production deployments override the DI binding to
SqliteStoreAndForwardSink wrapping WonderwareHistorianClient (the v1
components in src/Drivers/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware*
are reused verbatim — actor is just a mailbox-friendly entry point).

- HistorianAdapterActor.Props(IAlarmHistorianSink?) — null defaults to NullAlarmHistorianSink
- Receive<AlarmHistorianEvent>: fire-and-forget sink.EnqueueAsync
- Receive<GetStatus>: returns sink.GetStatus() (queue depth + drain state)
- ServiceCollectionExtensions.AddOtOpcUaRuntime registers the default sink
- WithOtOpcUaRuntimeActors spawns the actor + registers HistorianAdapterActorKey
- Program.cs calls AddOtOpcUaRuntime when hasDriver

Tests: 2 new (forward-to-sink + GetStatus). Runtime suite 17 → 18.
2026-05-26 07:18:08 -04:00
Joseph Doherty cd5540cb1a test(integration): F22 — failover scenario tests + harness Stop/Restart primitives
Extends TwoNodeClusterHarness with three lifecycle primitives:
- StopNodeBAsync()      — graceful CoordinatedShutdown (Cluster.Leave)
- RestartNodeBAsync()   — rebuild node B on same Akka port + same in-memory DB
- WaitForClusterSizeAsync(n) — converge assertion helper

Adds three failover scenario tests:
- Stopping node B shrinks cluster to 1 Up member
- Restarted node B rejoins on the same Akka port
- Deployment started with B down seals with a single NodeDeploymentState
  (validates ConfigPublishCoordinator.DiscoverDriverNodes snapshots
   membership at dispatch time)

Closes follow-up F22. Integration test count: 6 → 9 (+3).
2026-05-26 07:13:14 -04:00
8 changed files with 293 additions and 29 deletions
@@ -38,6 +38,9 @@ builder.Host.UseSerilog((ctx, lc) => lc
builder.Services.AddOtOpcUaConfigDb(builder.Configuration);
builder.Services.AddOtOpcUaCluster(builder.Configuration);
if (hasDriver)
builder.Services.AddOtOpcUaRuntime();
// Akka cluster bootstrap. Role-specific singletons are registered on the AkkaConfigurationBuilder
// from inside the configurator lambda. AddAkka spins the ActorSystem at host start.
builder.Services.AddAkka("otopcua", (ab, sp) =>
@@ -1,32 +1,58 @@
using Akka.Actor;
using Akka.Event;
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
namespace ZB.MOM.WW.OtOpcUa.Runtime.Historian;
/// <summary>
/// Wraps the named-pipe IPC to the Wonderware historian sidecar with a store-and-forward
/// SQLite buffer for pipe outages. Engine wiring (named-pipe client + <c>SqliteStoreAndForwardSink</c>)
/// is staged for follow-up F11.
/// Thin actor wrapper around <see cref="IAlarmHistorianSink"/>. Engine code (ScriptedAlarmActor,
/// Galaxy native alarm bridge, AB CIP ALMD reader) tells <see cref="AlarmHistorianEvent"/>s to this
/// actor; the actor enqueues them on the sink fire-and-forget. Production deployments register
/// <see cref="SqliteStoreAndForwardSink"/> against <c>IAlarmHistorianSink</c>; the sink owns the
/// durable queue + drain-to-Wonderware-pipe loop. The actor here owns nothing operational beyond
/// the message contract — its job is to keep the engine actors on Akka's mailbox without blocking
/// them on disk I/O or pipe handshakes.
///
/// Query queue depth + drain health via <see cref="GetStatus"/>.
/// </summary>
public sealed class HistorianAdapterActor : ReceiveActor
{
public sealed record HistoryRow(string Source, string AttributeId, object? Value, DateTime TimestampUtc);
private readonly ILoggingAdapter _log = Context.GetLogger();
private int _buffered;
public int BufferedCount => _buffered;
public static Props Props() => Akka.Actor.Props.Create(() => new HistorianAdapterActor());
public HistorianAdapterActor()
public sealed record GetStatus
{
Receive<HistoryRow>(row =>
public static readonly GetStatus Instance = new();
}
private readonly IAlarmHistorianSink _sink;
private readonly ILoggingAdapter _log = Context.GetLogger();
public static Props Props(IAlarmHistorianSink? sink = null) =>
Akka.Actor.Props.Create(() => new HistorianAdapterActor(sink ?? NullAlarmHistorianSink.Instance));
public HistorianAdapterActor(IAlarmHistorianSink sink)
{
_sink = sink;
Receive<AlarmHistorianEvent>(evt =>
{
// F11: dispatch to named-pipe sink; on disconnect → buffer in SQLite.
Interlocked.Increment(ref _buffered);
_log.Debug("Historian: buffered row for {Source}/{Attr} (sink wiring staged for F11)",
row.Source, row.AttributeId);
// Fire-and-forget: SqliteStoreAndForwardSink persists to local SQLite synchronously
// inside EnqueueAsync (it returns once the row is committed), so we don't block on
// network/pipe latency. Failures are surfaced via GetStatus's LastError + drain state.
_ = EnqueueAsync(evt);
});
Receive<GetStatus>(_ => Sender.Tell(_sink.GetStatus()));
}
private async Task EnqueueAsync(AlarmHistorianEvent evt)
{
try
{
await _sink.EnqueueAsync(evt, CancellationToken.None);
}
catch (Exception ex)
{
_log.Error(ex, "Historian sink rejected event for {AlarmId} at {Ts}",
evt.AlarmId, evt.TimestampUtc);
}
}
}
@@ -1,10 +1,14 @@
using Akka.Actor;
using Akka.Hosting;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
using ZB.MOM.WW.OtOpcUa.Runtime.Drivers;
using ZB.MOM.WW.OtOpcUa.Runtime.Health;
using ZB.MOM.WW.OtOpcUa.Runtime.Historian;
namespace ZB.MOM.WW.OtOpcUa.Runtime;
@@ -14,11 +18,25 @@ public static class ServiceCollectionExtensions
public const string DriverHostActorName = "driver-host";
public const string DbHealthProbeActorName = "db-health";
public const string HistorianAdapterActorName = "historian-adapter";
/// <summary>
/// Registers shared runtime services. Currently binds <see cref="IAlarmHistorianSink"/>
/// to <see cref="NullAlarmHistorianSink"/> as the default; production deployments
/// override this with <c>SqliteStoreAndForwardSink</c> wrapping <c>WonderwareHistorianClient</c>.
/// Call this BEFORE <c>AddAkka</c>.
/// </summary>
public static IServiceCollection AddOtOpcUaRuntime(this IServiceCollection services)
{
services.TryAddSingleton<IAlarmHistorianSink>(NullAlarmHistorianSink.Instance);
return services;
}
/// <summary>
/// Spawns the per-node driver-role actors on the host's <see cref="ActorSystem"/>:
/// <see cref="DriverHostActor"/> (one per node) and <see cref="DbHealthProbeActor"/>
/// (consumed by the health endpoint + redundancy calc).
/// <see cref="DriverHostActor"/> (one per node), <see cref="DbHealthProbeActor"/>
/// (consumed by the health endpoint + redundancy calc), and
/// <see cref="HistorianAdapterActor"/> wrapping the registered <see cref="IAlarmHistorianSink"/>.
///
/// Mirror of <c>WithOtOpcUaControlPlaneSingletons</c> for the driver role. Both must
/// be registered on the same <see cref="AkkaConfigurationBuilder"/> as the cluster
@@ -26,8 +44,8 @@ public static class ServiceCollectionExtensions
///
/// Wire from the fused Host's Program.cs when the node carries the <c>driver</c> role:
/// <code>
/// if (hasDriver)
/// ab.WithOtOpcUaRuntimeActors();
/// services.AddOtOpcUaRuntime();
/// services.AddAkka("otopcua", (ab, sp) => { ab.WithOtOpcUaClusterBootstrap(sp); if (hasDriver) ab.WithOtOpcUaRuntimeActors(); });
/// </code>
/// </summary>
public static AkkaConfigurationBuilder WithOtOpcUaRuntimeActors(this AkkaConfigurationBuilder builder)
@@ -36,6 +54,8 @@ public static class ServiceCollectionExtensions
{
var dbFactory = resolver.GetService<IDbContextFactory<OtOpcUaConfigDbContext>>();
var roleInfo = resolver.GetService<IClusterRoleInfo>();
// Fallback to NullAlarmHistorianSink if AddOtOpcUaRuntime wasn't called (e.g., test harnesses).
var historianSink = resolver.GetService<IAlarmHistorianSink>() ?? NullAlarmHistorianSink.Instance;
var dbHealth = system.ActorOf(
DbHealthProbeActor.Props(dbFactory),
@@ -46,6 +66,11 @@ public static class ServiceCollectionExtensions
DriverHostActor.Props(dbFactory, roleInfo.LocalNode),
DriverHostActorName);
registry.Register<DriverHostActorKey>(driverHost);
var historian = system.ActorOf(
HistorianAdapterActor.Props(historianSink),
HistorianAdapterActorName);
registry.Register<HistorianAdapterActorKey>(historian);
});
return builder;
@@ -55,3 +80,4 @@ public static class ServiceCollectionExtensions
/// <summary>Marker key types used by <c>Akka.Hosting</c> to resolve runtime actors from the registry.</summary>
public sealed class DriverHostActorKey { }
public sealed class DbHealthProbeActorKey { }
public sealed class HistorianAdapterActorKey { }
@@ -24,6 +24,7 @@
the reflective-load design.
-->
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.Abstractions\ZB.MOM.WW.OtOpcUa.Core.Abstractions.csproj"/>
<ProjectReference Include="..\..\Core\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian\ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian.csproj"/>
</ItemGroup>
<ItemGroup>
@@ -0,0 +1,101 @@
using Akka.Cluster;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.DependencyInjection;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Commons.Interfaces;
using ZB.MOM.WW.OtOpcUa.Commons.Messages.Admin;
using ZB.MOM.WW.OtOpcUa.Configuration;
using ZB.MOM.WW.OtOpcUa.Configuration.Enums;
namespace ZB.MOM.WW.OtOpcUa.Host.IntegrationTests;
/// <summary>
/// Failover scenarios layered on <see cref="TwoNodeClusterHarness"/> Stop/Restart primitives.
/// Covers graceful node loss, rejoin on the same Akka port, and deployment under reduced membership.
/// </summary>
public sealed class FailoverScenarioTests
{
private static CancellationToken Ct => TestContext.Current.CancellationToken;
[Fact]
public async Task Stopping_node_b_shrinks_cluster_to_one_up_member()
{
await using var harness = await TwoNodeClusterHarness.StartAsync();
Akka.Cluster.Cluster.Get(harness.NodeASystem).State.Members
.Count(m => m.Status == MemberStatus.Up).ShouldBe(2);
await harness.StopNodeBAsync();
await harness.WaitForClusterSizeAsync(1, TimeSpan.FromSeconds(20));
Akka.Cluster.Cluster.Get(harness.NodeASystem).State.Members
.Count(m => m.Status == MemberStatus.Up).ShouldBe(1);
}
[Fact]
public async Task Restarted_node_b_rejoins_cluster_on_same_port()
{
await using var harness = await TwoNodeClusterHarness.StartAsync();
await harness.StopNodeBAsync();
await harness.WaitForClusterSizeAsync(1, TimeSpan.FromSeconds(20));
await harness.RestartNodeBAsync();
Akka.Cluster.Cluster.Get(harness.NodeASystem).State.Members
.Count(m => m.Status == MemberStatus.Up).ShouldBe(2);
Akka.Cluster.Cluster.Get(harness.NodeBSystem).State.Members
.Count(m => m.Status == MemberStatus.Up).ShouldBe(2);
}
[Fact]
public async Task Deployment_started_with_node_b_down_seals_with_one_node_state()
{
// Establishes that ConfigPublishCoordinator.DiscoverDriverNodes snapshots membership at
// dispatch time — when only node A is Up, only one ApplyAck is expected and the
// deployment seals without B ever participating.
await using var harness = await TwoNodeClusterHarness.StartAsync();
await harness.StopNodeBAsync();
await harness.WaitForClusterSizeAsync(1, TimeSpan.FromSeconds(20));
await using var scope = harness.NodeA.Services.CreateAsyncScope();
var client = scope.ServiceProvider.GetRequiredService<IAdminOperationsClient>();
var result = await client.StartDeploymentAsync(createdBy: "alice@test", Ct);
result.Outcome.ShouldBe(StartDeploymentOutcome.Accepted);
var deploymentId = result.DeploymentId!.Value.Value;
await WaitForAsync(async () =>
{
await using var db = await CreateDbAsync(harness);
var d = await db.Deployments.AsNoTracking()
.FirstOrDefaultAsync(d => d.DeploymentId == deploymentId, Ct);
return d?.Status == DeploymentStatus.Sealed;
}, TimeSpan.FromSeconds(15));
await using var db = await CreateDbAsync(harness);
var nodeStates = await db.NodeDeploymentStates.AsNoTracking()
.Where(s => s.DeploymentId == deploymentId)
.ToListAsync(Ct);
nodeStates.Count.ShouldBe(1);
nodeStates[0].Status.ShouldBe(NodeDeploymentStatus.Applied);
}
private static async Task<OtOpcUaConfigDbContext> CreateDbAsync(TwoNodeClusterHarness harness)
{
var factory = harness.NodeA.Services.GetRequiredService<IDbContextFactory<OtOpcUaConfigDbContext>>();
return await factory.CreateDbContextAsync();
}
private static async Task WaitForAsync(Func<Task<bool>> condition, TimeSpan timeout)
{
var deadline = DateTime.UtcNow + timeout;
while (DateTime.UtcNow < deadline)
{
if (await condition()) return;
await Task.Delay(200);
}
throw new TimeoutException($"Condition not met within {timeout}");
}
}
@@ -88,6 +88,58 @@ public sealed class TwoNodeClusterHarness : IAsyncDisposable
return harness;
}
/// <summary>
/// Gracefully shuts down node B via <see cref="WebApplication.DisposeAsync"/>, which runs
/// CoordinatedShutdown → Cluster.Leave. Node A sees the member transition to Removed within
/// a couple of seconds. Use this for failover scenarios; call <see cref="RestartNodeBAsync"/>
/// to bring it back on the same Akka port.
/// </summary>
public async Task StopNodeBAsync()
{
if (NodeB is null) return;
await NodeB.DisposeAsync();
NodeB = null!;
}
/// <summary>
/// Rebuilds node B on the same Akka port + same in-memory ConfigDb and waits for the cluster
/// to re-converge to 2 Up members. Use after <see cref="StopNodeBAsync"/> to test rejoin.
/// </summary>
public async Task RestartNodeBAsync(TimeSpan? formationTimeout = null)
{
NodeB = await BuildNodeAsync(
host: LoopbackHost,
akkaPort: NodeBAkkaPort,
seedHost: LoopbackHost,
seedAkkaPort: NodeAAkkaPort,
dbName: SharedDbName);
await WaitForClusterFormationAsync(
NodeASystem,
NodeBSystem,
formationTimeout ?? TimeSpan.FromSeconds(20));
}
/// <summary>
/// Waits for node A's cluster view to reach <paramref name="expectedUpMembers"/> members in
/// <see cref="MemberStatus.Up"/>. Used for asserting shrink-after-stop or grow-after-restart.
/// </summary>
public async Task WaitForClusterSizeAsync(int expectedUpMembers, TimeSpan timeout)
{
var deadline = DateTime.UtcNow + timeout;
while (DateTime.UtcNow < deadline)
{
var count = Akka.Cluster.Cluster.Get(NodeASystem).State.Members
.Count(m => m.Status == MemberStatus.Up);
if (count == expectedUpMembers) return;
await Task.Delay(200);
}
var actual = Akka.Cluster.Cluster.Get(NodeASystem).State.Members
.Count(m => m.Status == MemberStatus.Up);
throw new TimeoutException(
$"Cluster did not converge to {expectedUpMembers} Up members within {timeout}. Actual={actual}");
}
private static async Task<WebApplication> BuildNodeAsync(
string host, int akkaPort, string seedHost, int seedAkkaPort, string dbName)
{
@@ -1,9 +1,12 @@
using System.Collections.Concurrent;
using System.Net;
using System.Net.Sockets;
using Akka.Actor;
using Shouldly;
using Xunit;
using ZB.MOM.WW.OtOpcUa.Commons.Types;
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
using ZB.MOM.WW.OtOpcUa.Core.AlarmHistorian;
using ZB.MOM.WW.OtOpcUa.Runtime.Health;
using ZB.MOM.WW.OtOpcUa.Runtime.Historian;
using ZB.MOM.WW.OtOpcUa.Runtime.Tests.Harness;
@@ -61,14 +64,63 @@ public sealed class HealthProbeActorTests : RuntimeActorTestBase
}
[Fact]
public void HistorianAdapterActor_buffers_rows()
public void HistorianAdapterActor_forwards_events_to_injected_sink()
{
var actor = Sys.ActorOf(HistorianAdapterActor.Props());
for (var i = 0; i < 5; i++)
actor.Tell(new HistorianAdapterActor.HistoryRow("driver-a", $"tag-{i}", i, DateTime.UtcNow));
var sink = new RecordingSink();
var actor = Sys.ActorOf(HistorianAdapterActor.Props(sink));
ExpectNoMsg(TimeSpan.FromMilliseconds(100));
// No direct readback of the count from a sealed actor — assert by Ask of a self-probe later
// when the engine wiring lands (F11). For now this asserts the actor accepts the contract.
for (var i = 0; i < 5; i++)
actor.Tell(new AlarmHistorianEvent(
AlarmId: $"alm-{i}",
EquipmentPath: "Plant/LineA",
AlarmName: $"Alarm{i}",
AlarmTypeName: "LimitAlarm",
Severity: AlarmSeverity.High,
EventKind: "Activated",
Message: $"Test alarm {i}",
User: "system",
Comment: null,
TimestampUtc: DateTime.UtcNow));
AwaitCondition(() => sink.Enqueued.Count == 5, TimeSpan.FromSeconds(2));
sink.Enqueued.Select(e => e.AlarmId).OrderBy(s => s).ShouldBe(
new[] { "alm-0", "alm-1", "alm-2", "alm-3", "alm-4" });
}
[Fact]
public async Task HistorianAdapterActor_returns_sink_status_via_GetStatus()
{
var sink = new RecordingSink();
var actor = Sys.ActorOf(HistorianAdapterActor.Props(sink));
actor.Tell(new AlarmHistorianEvent(
"alm-x", "Plant/LineB", "OffNormal", "OffNormalAlarm",
AlarmSeverity.Low, "Activated", "msg", "system", null, DateTime.UtcNow));
AwaitCondition(() => sink.Enqueued.Count == 1, TimeSpan.FromSeconds(2));
var status = await actor.Ask<HistorianSinkStatus>(
HistorianAdapterActor.GetStatus.Instance, TimeSpan.FromSeconds(2));
status.QueueDepth.ShouldBe(1);
status.DrainState.ShouldBe(HistorianDrainState.Idle);
}
private sealed class RecordingSink : IAlarmHistorianSink
{
public ConcurrentBag<AlarmHistorianEvent> Enqueued { get; } = [];
public Task EnqueueAsync(AlarmHistorianEvent evt, CancellationToken cancellationToken)
{
Enqueued.Add(evt);
return Task.CompletedTask;
}
public HistorianSinkStatus GetStatus() => new(
QueueDepth: Enqueued.Count,
DeadLetterDepth: 0,
LastDrainUtc: null,
LastSuccessUtc: null,
LastError: null,
DrainState: HistorianDrainState.Idle);
}
}
@@ -46,11 +46,14 @@ public sealed class ServiceCollectionExtensionsTests
{
var driverHost = host.Services.GetRequiredService<IRequiredActor<DriverHostActorKey>>();
var dbHealth = host.Services.GetRequiredService<IRequiredActor<DbHealthProbeActorKey>>();
var historian = host.Services.GetRequiredService<IRequiredActor<HistorianAdapterActorKey>>();
driverHost.ActorRef.ShouldNotBeNull();
dbHealth.ActorRef.ShouldNotBeNull();
historian.ActorRef.ShouldNotBeNull();
driverHost.ActorRef.Path.Name.ShouldBe(ServiceCollectionExtensions.DriverHostActorName);
dbHealth.ActorRef.Path.Name.ShouldBe(ServiceCollectionExtensions.DbHealthProbeActorName);
historian.ActorRef.Path.Name.ShouldBe(ServiceCollectionExtensions.HistorianAdapterActorName);
}
finally
{