feat(historian-sidecar): live aahClientManaged alarm-event write path (C.1)
SdkAlarmHistorianWriteBackend.WriteBatchAsync replaces the RetryPlease placeholder with the real entry point — HistorianAccess.AddStreamedValue (HistorianEvent, out HistorianAccessError) in aahClientManaged, pinned by decompiling the installed SDK. The write path opens its own ReadOnly=false connection: the query-side HistorianDataSource opens ReadOnly sessions and AddStreamedValue fails on those with WriteToReadOnlyFile. IHistorianConnectionFactory gains a readOnly parameter (default true, query path unchanged); BuildConnectionArgs is extracted as a pure helper. HistorianClusterEndpointPicker is shared for node failover; connection-class errors abort the batch as RetryPlease and reset the connection, malformed-input codes map to PermanentFail. Tests: connection-unavailable batch deferral, ClassifyOutcome error-code table, BuildConnectionArgs read-vs-write shaping (80 pass, 2 rig-skipped). Live_* round-trip tests stay Skip-gated for the D.1 rollout smoke. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,7 @@ using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using ArchestrA;
|
||||
using Shouldly;
|
||||
using Xunit;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend;
|
||||
@@ -11,42 +12,42 @@ using ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Tests
|
||||
{
|
||||
/// <summary>
|
||||
/// PR C.1 — pins the <see cref="SdkAlarmHistorianWriteBackend"/> contract:
|
||||
/// PR C.1 — covers <see cref="SdkAlarmHistorianWriteBackend"/>, the aahClientManaged-bound
|
||||
/// alarm-event writer. The SDK-touching batch loop itself is exercised by the rig-gated
|
||||
/// <c>Live_*</c> tests (D.1); the unit tests below pin the parts that are SDK-type-free:
|
||||
/// <list type="bullet">
|
||||
/// <item><description>
|
||||
/// Unit: the placeholder backend returns <see cref="AlarmHistorianWriteOutcome.RetryPlease"/>
|
||||
/// for every slot so the lmxopcua-side store-and-forward sink retains events rather than
|
||||
/// dropping them while D.1 is unresolved.
|
||||
/// </description></item>
|
||||
/// <item><description>
|
||||
/// Integration (rig-gated): once D.1 pins the live SDK entry point the Skip attribute is
|
||||
/// removed. The live test writes a synthetic batch to a real AVEVA Historian and asserts
|
||||
/// the cluster picker rotates from a broken primary to a healthy secondary.
|
||||
/// </description></item>
|
||||
/// <item><description>connection-unavailable → whole batch deferred as RetryPlease;</description></item>
|
||||
/// <item><description><see cref="SdkAlarmHistorianWriteBackend.ClassifyOutcome"/> error-code mapping;</description></item>
|
||||
/// <item><description><see cref="SdkHistorianConnectionFactory.BuildConnectionArgs"/> read-only-vs-write shaping.</description></item>
|
||||
/// </list>
|
||||
/// </summary>
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class SdkAlarmHistorianWriteBackendTests
|
||||
{
|
||||
// ── Placeholder-mode tests (no rig required) ─────────────────────────
|
||||
// ── Connection-unavailable path (deterministic, no SDK load) ──────────
|
||||
|
||||
[Fact]
|
||||
public async Task Placeholder_returns_RetryPlease_for_every_slot_so_queue_is_preserved()
|
||||
public async Task Empty_batch_returns_empty_array()
|
||||
{
|
||||
// The SDK call-site in SdkAlarmHistorianWriteBackend is not yet pinned (PR D.1).
|
||||
// Until D.1 swaps in the live call, the backend must return RetryPlease for every
|
||||
// event so the lmxopcua-side SqliteStoreAndForwardSink retains the rows instead of
|
||||
// dropping them — same effect as the NullAlarmHistorianSink fallback, but each
|
||||
// slot is individually addressable for the drain worker.
|
||||
var cfg = new HistorianConfiguration { ServerName = "placeholder-test", Enabled = true };
|
||||
var backend = new SdkAlarmHistorianWriteBackend(cfg);
|
||||
var backend = new SdkAlarmHistorianWriteBackend(
|
||||
Config("any"), new ThrowingConnectionFactory());
|
||||
|
||||
var events = new[]
|
||||
{
|
||||
AlarmEvent("E1"),
|
||||
AlarmEvent("E2"),
|
||||
AlarmEvent("E3"),
|
||||
};
|
||||
var outcomes = await backend.WriteBatchAsync(
|
||||
Array.Empty<AlarmHistorianEventDto>(), CancellationToken.None);
|
||||
|
||||
outcomes.ShouldBeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Unreachable_node_defers_whole_batch_as_RetryPlease()
|
||||
{
|
||||
// No node can be connected — the backend must defer every event so the
|
||||
// lmxopcua-side SQLite store-and-forward sink retains the rows rather than
|
||||
// dropping them.
|
||||
var backend = new SdkAlarmHistorianWriteBackend(
|
||||
Config("unreachable"), new ThrowingConnectionFactory());
|
||||
|
||||
var events = new[] { AlarmEvent("E1"), AlarmEvent("E2"), AlarmEvent("E3") };
|
||||
var outcomes = await backend.WriteBatchAsync(events, CancellationToken.None);
|
||||
|
||||
outcomes.Length.ShouldBe(events.Length);
|
||||
@@ -54,23 +55,12 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Tests
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Placeholder_returns_empty_array_for_empty_batch()
|
||||
public async Task Unreachable_node_large_batch_returns_one_outcome_per_event()
|
||||
{
|
||||
var cfg = new HistorianConfiguration { ServerName = "placeholder-test", Enabled = true };
|
||||
var backend = new SdkAlarmHistorianWriteBackend(cfg);
|
||||
|
||||
var outcomes = await backend.WriteBatchAsync(Array.Empty<AlarmHistorianEventDto>(), CancellationToken.None);
|
||||
|
||||
outcomes.ShouldBeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Placeholder_returns_same_count_as_input_for_large_batch()
|
||||
{
|
||||
// Guards against an off-by-one error in the placeholder array allocation —
|
||||
// WriteBatchAsync must always return exactly as many outcomes as input events.
|
||||
var cfg = new HistorianConfiguration { ServerName = "placeholder-test", Enabled = true };
|
||||
var backend = new SdkAlarmHistorianWriteBackend(cfg);
|
||||
// Guards the outcome-array allocation: WriteBatchAsync must always return exactly
|
||||
// as many outcomes as input events, even on the whole-batch-deferred path.
|
||||
var backend = new SdkAlarmHistorianWriteBackend(
|
||||
Config("unreachable"), new ThrowingConnectionFactory());
|
||||
|
||||
var batch = Enumerable.Range(0, 1000).Select(i => AlarmEvent($"E{i}")).ToArray();
|
||||
var outcomes = await backend.WriteBatchAsync(batch, CancellationToken.None);
|
||||
@@ -79,22 +69,91 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Tests
|
||||
outcomes.ShouldAllBe(o => o == AlarmHistorianWriteOutcome.RetryPlease);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task Connect_failure_marks_node_failed_in_picker()
|
||||
{
|
||||
// Every connect attempt throws → the picker should record the failure so the
|
||||
// node enters cooldown (cluster-failover plumbing).
|
||||
var cfg = Config("node-a");
|
||||
var picker = new HistorianClusterEndpointPicker(cfg);
|
||||
var backend = new SdkAlarmHistorianWriteBackend(cfg, new ThrowingConnectionFactory(), picker);
|
||||
|
||||
await backend.WriteBatchAsync(new[] { AlarmEvent("E1") }, CancellationToken.None);
|
||||
|
||||
picker.HealthyNodeCount.ShouldBe(0, "the only node failed to connect and is now in cooldown");
|
||||
}
|
||||
|
||||
// ── ClassifyOutcome — error-code → outcome mapping ────────────────────
|
||||
|
||||
[Theory]
|
||||
[InlineData(HistorianAccessError.ErrorValue.Success, AlarmHistorianWriteOutcome.Ack)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.FailedToConnect, AlarmHistorianWriteOutcome.RetryPlease)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.FailedToCreateSession, AlarmHistorianWriteOutcome.RetryPlease)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.NoReply, AlarmHistorianWriteOutcome.RetryPlease)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.NotReady, AlarmHistorianWriteOutcome.RetryPlease)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.Failure, AlarmHistorianWriteOutcome.RetryPlease)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.NoData, AlarmHistorianWriteOutcome.RetryPlease)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.InvalidArgument, AlarmHistorianWriteOutcome.PermanentFail)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.ValidationFailed, AlarmHistorianWriteOutcome.PermanentFail)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.NullPointerArgument, AlarmHistorianWriteOutcome.PermanentFail)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.WriteToReadOnlyFile, AlarmHistorianWriteOutcome.PermanentFail)]
|
||||
[InlineData(HistorianAccessError.ErrorValue.NotImplemented, AlarmHistorianWriteOutcome.PermanentFail)]
|
||||
public void ClassifyOutcome_maps_error_code_to_expected_outcome(
|
||||
HistorianAccessError.ErrorValue code, AlarmHistorianWriteOutcome expected)
|
||||
{
|
||||
SdkAlarmHistorianWriteBackend.ClassifyOutcome(code).ShouldBe(expected);
|
||||
}
|
||||
|
||||
// ── BuildConnectionArgs — read-only vs write shaping ──────────────────
|
||||
|
||||
[Fact]
|
||||
public void BuildConnectionArgs_write_connection_is_not_read_only()
|
||||
{
|
||||
// The alarm-event write path must open ReadOnly=false; AddStreamedValue on a
|
||||
// read-only session fails with WriteToReadOnlyFile.
|
||||
var args = SdkHistorianConnectionFactory.BuildConnectionArgs(
|
||||
Config("h1"), HistorianConnectionType.Event, readOnly: false);
|
||||
|
||||
args.ReadOnly.ShouldBeFalse();
|
||||
args.ConnectionType.ShouldBe(HistorianConnectionType.Event);
|
||||
args.ServerName.ShouldBe("h1");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildConnectionArgs_query_connection_is_read_only()
|
||||
{
|
||||
var args = SdkHistorianConnectionFactory.BuildConnectionArgs(
|
||||
Config("h1"), HistorianConnectionType.Process, readOnly: true);
|
||||
|
||||
args.ReadOnly.ShouldBeTrue();
|
||||
args.ConnectionType.ShouldBe(HistorianConnectionType.Process);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BuildConnectionArgs_non_integrated_security_carries_credentials()
|
||||
{
|
||||
var cfg = Config("h1");
|
||||
cfg.IntegratedSecurity = false;
|
||||
cfg.UserName = "histuser";
|
||||
cfg.Password = "histpass";
|
||||
|
||||
var args = SdkHistorianConnectionFactory.BuildConnectionArgs(
|
||||
cfg, HistorianConnectionType.Event, readOnly: false);
|
||||
|
||||
args.IntegratedSecurity.ShouldBeFalse();
|
||||
args.UserName.ShouldBe("histuser");
|
||||
args.Password.ShouldBe("histpass");
|
||||
}
|
||||
|
||||
// ── Rig-gated integration tests ───────────────────────────────────────
|
||||
//
|
||||
// The tests below need a live AVEVA Historian install and are gated with
|
||||
// Skip="rig-required". Once PR D.1 pins the SDK entry point, remove the
|
||||
// Skip attribute and add them to the integration test run profile.
|
||||
// The entry point (HistorianAccess.AddStreamedValue) is pinned and implemented;
|
||||
// these need a live AVEVA Historian and are un-skipped during the PR D.1 smoke.
|
||||
|
||||
[Fact(Skip = "rig-required: needs a live AVEVA Historian + aahClientManaged SDK — enable in PR D.1")]
|
||||
[Fact(Skip = "rig-required: needs a live AVEVA Historian — un-skip during the PR D.1 rollout smoke")]
|
||||
public async Task Live_single_event_roundtrip_returns_Ack()
|
||||
{
|
||||
// Spec (PR C.1, Tests): "1 / 100 / 1000 events through a fake aahClientManaged
|
||||
// writer; assert per-row outcome list parallel to input order."
|
||||
//
|
||||
// This slice exercises the *live* SDK path. The fake-backend variant at
|
||||
// AahClientManagedAlarmEventWriterTests covers the same assertion without the rig.
|
||||
var cfg = BuildRigConfig();
|
||||
var backend = new SdkAlarmHistorianWriteBackend(cfg);
|
||||
var backend = new SdkAlarmHistorianWriteBackend(BuildRigConfig());
|
||||
|
||||
var outcomes = await backend.WriteBatchAsync(new[] { AlarmEvent("rig-E1") }, CancellationToken.None);
|
||||
|
||||
@@ -102,19 +161,13 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Tests
|
||||
outcomes[0].ShouldBe(AlarmHistorianWriteOutcome.Ack);
|
||||
}
|
||||
|
||||
[Fact(Skip = "rig-required: needs a live AVEVA Historian cluster (two nodes) — enable in PR D.1")]
|
||||
[Fact(Skip = "rig-required: needs a live AVEVA Historian cluster (two nodes) — un-skip during the PR D.1 rollout smoke")]
|
||||
public async Task Live_cluster_failover_primary_bad_rotates_to_secondary()
|
||||
{
|
||||
// Spec (PR C.1, Tests): "Cluster failover: primary node returns
|
||||
// BadCommunicationError; picker rotates to secondary; assert eventual success."
|
||||
//
|
||||
// Configure the first server name to point at a deliberately unreachable node
|
||||
// and the second to the real Historian; the picker should mark the first node
|
||||
// failed and succeed via the second.
|
||||
var cfg = new HistorianConfiguration
|
||||
{
|
||||
Enabled = true,
|
||||
ServerNames = new System.Collections.Generic.List<string>
|
||||
ServerNames = new List<string>
|
||||
{
|
||||
"invalid-primary-node-deliberately-unreachable",
|
||||
Environment.GetEnvironmentVariable("OTOPCUA_HISTORIAN_SERVER") ?? "localhost",
|
||||
@@ -128,18 +181,29 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Tests
|
||||
|
||||
var outcomes = await backend.WriteBatchAsync(new[] { AlarmEvent("rig-failover-E1") }, CancellationToken.None);
|
||||
|
||||
// The backend must succeed (Ack) via the secondary even though the primary was bad.
|
||||
outcomes.Length.ShouldBe(1);
|
||||
outcomes[0].ShouldBe(AlarmHistorianWriteOutcome.Ack);
|
||||
}
|
||||
|
||||
// ── helpers ───────────────────────────────────────────────────────────
|
||||
|
||||
private static HistorianConfiguration Config(string server) => new HistorianConfiguration
|
||||
{
|
||||
Enabled = true,
|
||||
ServerName = server,
|
||||
Port = 32568,
|
||||
IntegratedSecurity = true,
|
||||
CommandTimeoutSeconds = 30,
|
||||
FailureCooldownSeconds = 60,
|
||||
};
|
||||
|
||||
private static AlarmHistorianEventDto AlarmEvent(string id) => new AlarmHistorianEventDto
|
||||
{
|
||||
EventId = id,
|
||||
SourceName = "TestSource",
|
||||
ConditionId = "TestSource.Level.HiHi",
|
||||
AlarmType = "AnalogLimitAlarm.HiHi",
|
||||
Message = "C.1 integration test alarm",
|
||||
Message = "C.1 test alarm",
|
||||
Severity = 500,
|
||||
EventTimeUtcTicks = DateTime.UtcNow.Ticks,
|
||||
AckComment = null,
|
||||
@@ -160,5 +224,16 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Host.Tests
|
||||
var raw = Environment.GetEnvironmentVariable(envName);
|
||||
return int.TryParse(raw, out var parsed) ? parsed : defaultValue;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Fake factory whose every connect attempt throws — drives the
|
||||
/// connection-unavailable path without loading the native SDK.
|
||||
/// </summary>
|
||||
private sealed class ThrowingConnectionFactory : IHistorianConnectionFactory
|
||||
{
|
||||
public HistorianAccess CreateAndConnect(
|
||||
HistorianConfiguration config, HistorianConnectionType type, bool readOnly = true)
|
||||
=> throw new InvalidOperationException($"simulated connect failure to {config.ServerName}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,4 +24,14 @@
|
||||
<ProjectReference Include="..\..\..\src\Drivers\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware\ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.csproj"/>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<!-- Wonderware Historian SDK — SdkAlarmHistorianWriteBackendTests pins the
|
||||
error-code (HistorianAccessError.ErrorValue) and connection-arg shaping;
|
||||
a DLL <Reference> doesn't flow transitively through the ProjectReference. -->
|
||||
<Reference Include="aahClientManaged">
|
||||
<HintPath>..\..\..\lib\aahClientManaged.dll</HintPath>
|
||||
<EmbedInteropTypes>false</EmbedInteropTypes>
|
||||
</Reference>
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
Reference in New Issue
Block a user