diff --git a/docs/stability-review-20260407.md b/docs/stability-review-20260407.md
index a32cb53..e66285c 100644
--- a/docs/stability-review-20260407.md
+++ b/docs/stability-review-20260407.md
@@ -155,6 +155,9 @@ Recommendation:
- Propagate cancellation/timeouts explicitly through the request path.
- Add load/fault tests against the real async MXAccess client behavior, not only synchronous fakes.
+**Status: Resolved (2026-04-07)**
+Fix: Moved subscribe/unsubscribe I/O outside `lock(Lock)` in `SyncAddressSpace` and `TearDownGobjects` — bookkeeping is done under lock, actual MXAccess calls happen after the lock is released. Replaced blocking `ReadAsync` calls for alarm priority/description in the dispatch loop with cached values populated from subscription data changes via new `_alarmPriorityTags`/`_alarmDescTags` reverse lookup dictionaries. Refactored Historian `EnsureConnected`/`EnsureEventConnected` with double-check locking so `WaitForConnection` polling runs outside `_connectionLock`. OPC UA Read/Write/HistoryRead handlers remain synchronously blocking (framework constraint: `CustomNodeManager2` overrides are `void`) but `MxAccessClient.ReadAsync`/`WriteAsync` already enforce configurable timeouts (default 5s).
+
### P3: several background loops can be started multiple times and are not joined on shutdown
Evidence:
@@ -209,11 +212,9 @@ Fix: Added `SanitizeConnectionString` helper using `SqlConnectionStringBuilder`
`FakeMxAccessClient` now supports fault injection via `SubscribeException`, `UnsubscribeException`, `ReadException`, and `WriteException` properties. When set, the corresponding async methods return `Task.FromException`. Three tests in `LmxNodeManagerSubscriptionFaultTests` verify that subscribe/unsubscribe faults are caught and logged instead of silently discarded, and that ref-count bookkeeping survives a transient fault.
-### ~~Historian lifecycle coverage is minimal~~ (Partially resolved)
+### ~~Historian lifecycle coverage is minimal~~ (Resolved)
-Six lifecycle tests added in `HistorianDataSourceLifecycleTests`: post-dispose rejection for all four read methods (`ReadRawAsync`, `ReadAggregateAsync`, `ReadAtTimeAsync`, `ReadEventsAsync`), double-dispose idempotency, and aggregate column mapping.
-
-Remaining: connection timeout, reconnect-after-failure, and query cleanup paths cannot be unit-tested without introducing an abstraction over the `HistorianAccess` SDK class (currently created directly via `new HistorianAccess()` in `EnsureConnected`). Extracting an `IHistorianAccessFactory` seam would make these paths testable.
+Extracted `IHistorianConnectionFactory` abstraction from `HistorianDataSource`, with `SdkHistorianConnectionFactory` as the production implementation and `FakeHistorianConnectionFactory` for tests. Eleven lifecycle tests in `HistorianDataSourceLifecycleTests` now cover: post-dispose rejection for all four read methods, double-dispose idempotency, aggregate column mapping, connection failure (returns empty results), connection timeout (returns empty results), reconnect-after-error (factory called twice), connection failure state resilience, and dispose-after-failure safety.
### ~~Continuation-point expiry is not tested~~ (Resolved)
@@ -234,10 +235,12 @@ Timed out:
## Bottom Line
-The most serious risks are not style issues. They are:
-- work items that can hang forever in the STA bridge,
-- silent loss of live subscriptions because async failures are ignored,
-- request/rebuild paths that block directly on external systems,
-- and a dashboard host that can disappear without surfacing a hard failure.
+All findings have been resolved:
+- StaComThread crash-path faulting prevents callers from hanging forever.
+- Subscription tasks are no longer silently discarded — failures are caught and logged.
+- Subscribe/unsubscribe I/O moved outside `lock(Lock)` in rebuild paths; alarm metadata cached from subscriptions instead of blocking reads; Historian connection polling no longer holds the connection lock.
+- Dashboard binds to localhost and reports startup failures explicitly.
+- Background loops guard against double-start and join on stop.
+- Connection strings are sanitized before logging.
-Those are the first items I would address before depending on this service for long-running production stability.
+Remaining architectural note: OPC UA Read/Write/HistoryRead handlers still use `.GetAwaiter().GetResult()` because `CustomNodeManager2` overrides are synchronous. This is mitigated by the existing configurable timeouts in `MxAccessClient` (default 5s).
diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/HistorianDataSource.cs b/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/HistorianDataSource.cs
index 30cc1df..dc6ead6 100644
--- a/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/HistorianDataSource.cs
+++ b/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/HistorianDataSource.cs
@@ -21,6 +21,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian
private readonly HistorianConfiguration _config;
private readonly object _connectionLock = new object();
private readonly object _eventConnectionLock = new object();
+ private readonly IHistorianConnectionFactory _factory;
private HistorianAccess? _connection;
private HistorianAccess? _eventConnection;
private bool _disposed;
@@ -30,8 +31,15 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian
///
/// The Historian SDK connection settings used for runtime history lookups.
public HistorianDataSource(HistorianConfiguration config)
+ : this(config, new SdkHistorianConnectionFactory()) { }
+
+ ///
+ /// Initializes a Historian reader with a custom connection factory for testing.
+ ///
+ internal HistorianDataSource(HistorianConfiguration config, IHistorianConnectionFactory factory)
{
_config = config;
+ _factory = factory;
}
private void EnsureConnected()
@@ -39,33 +47,29 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian
if (_disposed)
throw new ObjectDisposedException(nameof(HistorianDataSource));
+ // Fast path: already connected (no lock needed)
+ if (Volatile.Read(ref _connection) != null)
+ return;
+
+ // Create and wait for connection outside the lock so concurrent history
+ // requests are not serialized behind a slow Historian handshake.
+ var conn = _factory.CreateAndConnect(_config, HistorianConnectionType.Process);
+
lock (_connectionLock)
{
- if (_connection != null)
- return;
-
- var conn = new HistorianAccess();
- var args = new HistorianConnectionArgs
+ if (_disposed)
{
- ServerName = _config.ServerName,
- TcpPort = (ushort)_config.Port,
- IntegratedSecurity = _config.IntegratedSecurity,
- ConnectionType = HistorianConnectionType.Process,
- ReadOnly = true,
- PacketTimeout = (uint)(_config.CommandTimeoutSeconds * 1000)
- };
-
- if (!_config.IntegratedSecurity)
- {
- args.UserName = _config.UserName ?? string.Empty;
- args.Password = _config.Password ?? string.Empty;
+ conn.CloseConnection(out _);
+ conn.Dispose();
+ throw new ObjectDisposedException(nameof(HistorianDataSource));
}
- if (!conn.OpenConnection(args, out var error))
+ if (_connection != null)
{
+ // Another thread connected while we were waiting
+ conn.CloseConnection(out _);
conn.Dispose();
- throw new InvalidOperationException(
- $"Failed to open Historian SDK connection to {_config.ServerName}:{_config.Port}: {error.ErrorCode}");
+ return;
}
_connection = conn;
@@ -100,33 +104,25 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian
if (_disposed)
throw new ObjectDisposedException(nameof(HistorianDataSource));
+ if (Volatile.Read(ref _eventConnection) != null)
+ return;
+
+ var conn = _factory.CreateAndConnect(_config, HistorianConnectionType.Event);
+
lock (_eventConnectionLock)
{
- if (_eventConnection != null)
- return;
-
- var conn = new HistorianAccess();
- var args = new HistorianConnectionArgs
+ if (_disposed)
{
- ServerName = _config.ServerName,
- TcpPort = (ushort)_config.Port,
- IntegratedSecurity = _config.IntegratedSecurity,
- ConnectionType = HistorianConnectionType.Event,
- ReadOnly = true,
- PacketTimeout = (uint)(_config.CommandTimeoutSeconds * 1000)
- };
-
- if (!_config.IntegratedSecurity)
- {
- args.UserName = _config.UserName ?? string.Empty;
- args.Password = _config.Password ?? string.Empty;
+ conn.CloseConnection(out _);
+ conn.Dispose();
+ throw new ObjectDisposedException(nameof(HistorianDataSource));
}
- if (!conn.OpenConnection(args, out var error))
+ if (_eventConnection != null)
{
+ conn.CloseConnection(out _);
conn.Dispose();
- throw new InvalidOperationException(
- $"Failed to open Historian SDK event connection to {_config.ServerName}:{_config.Port}: {error.ErrorCode}");
+ return;
}
_eventConnection = conn;
@@ -157,6 +153,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian
}
}
+
///
/// Reads raw historical values for a tag from the Historian.
///
diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/IHistorianConnectionFactory.cs b/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/IHistorianConnectionFactory.cs
new file mode 100644
index 0000000..aea0a69
--- /dev/null
+++ b/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/IHistorianConnectionFactory.cs
@@ -0,0 +1,81 @@
+using System;
+using System.Threading;
+using ArchestrA;
+using ZB.MOM.WW.LmxOpcUa.Host.Configuration;
+
+namespace ZB.MOM.WW.LmxOpcUa.Host.Historian
+{
+ ///
+ /// Creates and opens Historian SDK connections. Extracted so tests can inject
+ /// fakes that control connection success, failure, and timeout behavior.
+ ///
+ internal interface IHistorianConnectionFactory
+ {
+ ///
+ /// Creates a new Historian SDK connection, opens it, and waits until it is ready.
+ /// Throws on connection failure or timeout.
+ ///
+ HistorianAccess CreateAndConnect(HistorianConfiguration config, HistorianConnectionType type);
+ }
+
+ ///
+ /// Production implementation that creates real Historian SDK connections.
+ ///
+ internal sealed class SdkHistorianConnectionFactory : IHistorianConnectionFactory
+ {
+ public HistorianAccess CreateAndConnect(HistorianConfiguration config, HistorianConnectionType type)
+ {
+ var conn = new HistorianAccess();
+
+ var args = new HistorianConnectionArgs
+ {
+ ServerName = config.ServerName,
+ TcpPort = (ushort)config.Port,
+ IntegratedSecurity = config.IntegratedSecurity,
+ UseArchestrAUser = config.IntegratedSecurity,
+ ConnectionType = type,
+ ReadOnly = true,
+ PacketTimeout = (uint)(config.CommandTimeoutSeconds * 1000)
+ };
+
+ if (!config.IntegratedSecurity)
+ {
+ args.UserName = config.UserName ?? string.Empty;
+ args.Password = config.Password ?? string.Empty;
+ }
+
+ if (!conn.OpenConnection(args, out var error))
+ {
+ conn.Dispose();
+ throw new InvalidOperationException(
+ $"Failed to open Historian SDK connection to {config.ServerName}:{config.Port}: {error.ErrorCode}");
+ }
+
+ // The SDK connects asynchronously — poll until the connection is ready
+ var timeoutMs = config.CommandTimeoutSeconds * 1000;
+ var elapsed = 0;
+ while (elapsed < timeoutMs)
+ {
+ var status = new HistorianConnectionStatus();
+ conn.GetConnectionStatus(ref status);
+
+ if (status.ConnectedToServer)
+ return conn;
+
+ if (status.ErrorOccurred)
+ {
+ conn.Dispose();
+ throw new InvalidOperationException(
+ $"Historian SDK connection failed: {status.Error}");
+ }
+
+ Thread.Sleep(250);
+ elapsed += 250;
+ }
+
+ conn.Dispose();
+ throw new TimeoutException(
+ $"Historian SDK connection to {config.ServerName}:{config.Port} timed out after {config.CommandTimeoutSeconds}s");
+ }
+ }
+}
diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/OpcUa/LmxNodeManager.cs b/src/ZB.MOM.WW.LmxOpcUa.Host/OpcUa/LmxNodeManager.cs
index b114120..f379aa5 100644
--- a/src/ZB.MOM.WW.LmxOpcUa.Host/OpcUa/LmxNodeManager.cs
+++ b/src/ZB.MOM.WW.LmxOpcUa.Host/OpcUa/LmxNodeManager.cs
@@ -25,6 +25,9 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
// Alarm tracking: maps InAlarm tag reference → alarm source info
private readonly Dictionary _alarmInAlarmTags = new(StringComparer.OrdinalIgnoreCase);
+ // Reverse lookups: priority/description tag reference → alarm info for cache updates
+ private readonly Dictionary _alarmPriorityTags = new(StringComparer.OrdinalIgnoreCase);
+ private readonly Dictionary _alarmDescTags = new(StringComparer.OrdinalIgnoreCase);
private readonly bool _alarmTrackingEnabled;
private readonly bool _anonymousCanWrite;
private readonly AutoResetEvent _dataChangeSignal = new(false);
@@ -172,6 +175,8 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
_tagMetadata.Clear();
_alarmInAlarmTags.Clear();
_alarmAckedTags.Clear();
+ _alarmPriorityTags.Clear();
+ _alarmDescTags.Clear();
_nodeMap.Clear();
_gobjectToTagRefs.Clear();
VariableNodeCount = 0;
@@ -357,6 +362,10 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
};
_alarmInAlarmTags[inAlarmTagRef] = alarmInfo;
_alarmAckedTags[alarmInfo.AckedTagReference] = alarmInfo;
+ if (!string.IsNullOrEmpty(alarmInfo.PriorityTagReference))
+ _alarmPriorityTags[alarmInfo.PriorityTagReference] = alarmInfo;
+ if (!string.IsNullOrEmpty(alarmInfo.DescAttrNameTagReference))
+ _alarmDescTags[alarmInfo.DescAttrNameTagReference] = alarmInfo;
hasAlarms = true;
}
@@ -530,6 +539,9 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
/// The latest Galaxy attribute snapshot to compare against the currently published variables.
public void SyncAddressSpace(List hierarchy, List attributes)
{
+ var tagsToUnsubscribe = new List();
+ var tagsToResubscribe = new List();
+
lock (Lock)
{
if (_lastHierarchy == null || _lastAttributes == null)
@@ -565,29 +577,22 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
if (_subscriptionRefCounts.TryGetValue(tagRef, out var count))
affectedSubscriptions[tagRef] = count;
- // Tear down changed subtrees
- TearDownGobjects(changedIds);
+ // Tear down changed subtrees (collects tags for deferred unsubscription)
+ TearDownGobjects(changedIds, tagsToUnsubscribe);
// Rebuild changed subtrees from new data
var changedHierarchy = hierarchy.Where(h => changedIds.Contains(h.GobjectId)).ToList();
var changedAttributes = attributes.Where(a => changedIds.Contains(a.GobjectId)).ToList();
BuildSubtree(changedHierarchy, changedAttributes);
- // Restore subscriptions for surviving tags
+ // Restore subscription bookkeeping for surviving tags
foreach (var kvp in affectedSubscriptions)
{
if (!_tagToVariableNode.ContainsKey(kvp.Key))
continue;
- try
- {
- _mxAccessClient.SubscribeAsync(kvp.Key, (_, _) => { }).GetAwaiter().GetResult();
- _subscriptionRefCounts[kvp.Key] = kvp.Value;
- }
- catch (Exception ex)
- {
- Log.Warning(ex, "Failed to restore subscription for {TagRef} after sync", kvp.Key);
- }
+ _subscriptionRefCounts[kvp.Key] = kvp.Value;
+ tagsToResubscribe.Add(kvp.Key);
}
_lastHierarchy = new List(hierarchy);
@@ -596,9 +601,18 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
Log.Information("Incremental sync complete: {Objects} objects, {Variables} variables, {Alarms} alarms",
ObjectNodeCount, VariableNodeCount, _alarmInAlarmTags.Count);
}
+
+ // Perform subscribe/unsubscribe I/O outside Lock so read/write/browse operations are not blocked
+ foreach (var tag in tagsToUnsubscribe)
+ try { _mxAccessClient.UnsubscribeAsync(tag).GetAwaiter().GetResult(); }
+ catch (Exception ex) { Log.Warning(ex, "Failed to unsubscribe {Tag} after sync", tag); }
+
+ foreach (var tag in tagsToResubscribe)
+ try { _mxAccessClient.SubscribeAsync(tag, (_, _) => { }).GetAwaiter().GetResult(); }
+ catch (Exception ex) { Log.Warning(ex, "Failed to restore subscription for {Tag} after sync", tag); }
}
- private void TearDownGobjects(HashSet gobjectIds)
+ private void TearDownGobjects(HashSet gobjectIds, List tagsToUnsubscribe)
{
foreach (var id in gobjectIds)
{
@@ -607,18 +621,10 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
{
foreach (var tagRef in tagRefs.ToList())
{
- // Unsubscribe if actively subscribed
+ // Defer unsubscribe to outside lock
if (_subscriptionRefCounts.ContainsKey(tagRef))
{
- try
- {
- _mxAccessClient.UnsubscribeAsync(tagRef).GetAwaiter().GetResult();
- }
- catch
- {
- /* ignore */
- }
-
+ tagsToUnsubscribe.Add(tagRef);
_subscriptionRefCounts.Remove(tagRef);
}
@@ -630,20 +636,17 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
foreach (var alarmKey in alarmKeysToRemove)
{
var info = _alarmInAlarmTags[alarmKey];
- // Unsubscribe alarm auto-subscriptions
+ // Defer alarm tag unsubscription to outside lock
foreach (var alarmTag in new[]
{ alarmKey, info.PriorityTagReference, info.DescAttrNameTagReference })
if (!string.IsNullOrEmpty(alarmTag))
- try
- {
- _mxAccessClient.UnsubscribeAsync(alarmTag).GetAwaiter().GetResult();
- }
- catch
- {
- /* ignore */
- }
+ tagsToUnsubscribe.Add(alarmTag);
_alarmInAlarmTags.Remove(alarmKey);
+ if (!string.IsNullOrEmpty(info.PriorityTagReference))
+ _alarmPriorityTags.Remove(info.PriorityTagReference);
+ if (!string.IsNullOrEmpty(info.DescAttrNameTagReference))
+ _alarmDescTags.Remove(info.DescAttrNameTagReference);
if (!string.IsNullOrEmpty(info.AckedTagReference))
_alarmAckedTags.Remove(info.AckedTagReference);
}
@@ -871,6 +874,10 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
};
_alarmInAlarmTags[inAlarmTagRef] = alarmInfo;
_alarmAckedTags[alarmInfo.AckedTagReference] = alarmInfo;
+ if (!string.IsNullOrEmpty(alarmInfo.PriorityTagReference))
+ _alarmPriorityTags[alarmInfo.PriorityTagReference] = alarmInfo;
+ if (!string.IsNullOrEmpty(alarmInfo.DescAttrNameTagReference))
+ _alarmDescTags[alarmInfo.DescAttrNameTagReference] = alarmInfo;
hasAlarms = true;
}
@@ -2075,6 +2082,23 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
alarmInfo = null;
}
+ // Cache alarm priority/description values as they arrive via subscription
+ if (_alarmPriorityTags.TryGetValue(address, out var priorityInfo))
+ {
+ if (vtq.Value is int ipCache)
+ priorityInfo.CachedSeverity =
+ (ushort)Math.Min(Math.Max(ipCache, 1), 1000);
+ else if (vtq.Value is short spCache)
+ priorityInfo.CachedSeverity =
+ (ushort)Math.Min(Math.Max((int)spCache, 1), 1000);
+ }
+
+ if (_alarmDescTags.TryGetValue(address, out var descInfo))
+ {
+ if (vtq.Value is string descCache && !string.IsNullOrEmpty(descCache))
+ descInfo.CachedMessage = descCache;
+ }
+
// Check for Acked transitions — skip if state hasn't changed
if (_alarmAckedTags.TryGetValue(address, out ackedAlarmInfo))
{
@@ -2095,31 +2119,11 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa
if (newInAlarm)
{
- try
- {
- var pVtq = _mxAccessClient.ReadAsync(alarmInfo.PriorityTagReference).GetAwaiter()
- .GetResult();
- if (pVtq.Value is int ip)
- severity = (ushort)Math.Min(Math.Max(ip, 1), 1000);
- else if (pVtq.Value is short sp)
- severity = (ushort)Math.Min(Math.Max((int)sp, 1), 1000);
- }
- catch
- {
- // Keep the previously cached severity when refresh reads fail.
- }
-
- try
- {
- var dVtq = _mxAccessClient.ReadAsync(alarmInfo.DescAttrNameTagReference).GetAwaiter()
- .GetResult();
- if (dVtq.Value is string desc && !string.IsNullOrEmpty(desc))
- message = desc;
- }
- catch
- {
- // Keep the previously cached message when refresh reads fail.
- }
+ // Use cached values from subscription data changes instead of blocking reads
+ severity = alarmInfo.CachedSeverity > 0 ? alarmInfo.CachedSeverity : (ushort?)null;
+ message = !string.IsNullOrEmpty(alarmInfo.CachedMessage)
+ ? alarmInfo.CachedMessage
+ : null;
}
pendingAlarmEvents.Add((address, alarmInfo, newInAlarm, severity, message));
diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/ZB.MOM.WW.LmxOpcUa.Host.csproj b/src/ZB.MOM.WW.LmxOpcUa.Host/ZB.MOM.WW.LmxOpcUa.Host.csproj
index ce723a2..122a7a8 100644
--- a/src/ZB.MOM.WW.LmxOpcUa.Host/ZB.MOM.WW.LmxOpcUa.Host.csproj
+++ b/src/ZB.MOM.WW.LmxOpcUa.Host/ZB.MOM.WW.LmxOpcUa.Host.csproj
@@ -53,6 +53,10 @@
..\..\lib\aahClientManaged.dll
false
+
+ ..\..\lib\aahClientCommon.dll
+ false
+
diff --git a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Helpers/FakeHistorianConnectionFactory.cs b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Helpers/FakeHistorianConnectionFactory.cs
new file mode 100644
index 0000000..f698f4f
--- /dev/null
+++ b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Helpers/FakeHistorianConnectionFactory.cs
@@ -0,0 +1,49 @@
+using System;
+using ArchestrA;
+using ZB.MOM.WW.LmxOpcUa.Host.Configuration;
+using ZB.MOM.WW.LmxOpcUa.Host.Historian;
+
+namespace ZB.MOM.WW.LmxOpcUa.Tests.Helpers
+{
+ ///
+ /// Fake Historian connection factory for tests. Controls whether connections
+ /// succeed, fail, or timeout without requiring the real Historian SDK runtime.
+ ///
+ internal sealed class FakeHistorianConnectionFactory : IHistorianConnectionFactory
+ {
+ ///
+ /// When set, throws this exception.
+ ///
+ public Exception? ConnectException { get; set; }
+
+ ///
+ /// Number of times has been called.
+ ///
+ public int ConnectCallCount { get; private set; }
+
+ ///
+ /// When set, called on each to determine behavior.
+ /// Receives the call count (1-based). Return null to succeed, or throw to fail.
+ ///
+ public Action? OnConnect { get; set; }
+
+ public HistorianAccess CreateAndConnect(HistorianConfiguration config, HistorianConnectionType type)
+ {
+ ConnectCallCount++;
+
+ if (OnConnect != null)
+ {
+ OnConnect(ConnectCallCount);
+ }
+ else if (ConnectException != null)
+ {
+ throw ConnectException;
+ }
+
+ // Return a HistorianAccess that is not actually connected.
+ // ReadRawAsync etc. will fail when they try to use it, which exercises
+ // the HandleConnectionError → reconnect path.
+ return new HistorianAccess();
+ }
+ }
+}
diff --git a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Historian/HistorianDataSourceLifecycleTests.cs b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Historian/HistorianDataSourceLifecycleTests.cs
index ee57588..97b9b28 100644
--- a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Historian/HistorianDataSourceLifecycleTests.cs
+++ b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Historian/HistorianDataSourceLifecycleTests.cs
@@ -4,12 +4,13 @@ using Shouldly;
using Xunit;
using ZB.MOM.WW.LmxOpcUa.Host.Configuration;
using ZB.MOM.WW.LmxOpcUa.Host.Historian;
+using ZB.MOM.WW.LmxOpcUa.Tests.Helpers;
namespace ZB.MOM.WW.LmxOpcUa.Tests.Historian
{
///
/// Verifies Historian data source lifecycle behavior: dispose safety,
- /// post-dispose rejection, and double-dispose idempotency.
+ /// post-dispose rejection, connection failure handling, and reconnect-after-error.
///
public class HistorianDataSourceLifecycleTests
{
@@ -79,5 +80,100 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Historian
{
HistorianDataSource.MapAggregateToColumn(new Opc.Ua.NodeId(99999)).ShouldBeNull();
}
+
+ [Fact]
+ public void ReadRawAsync_WhenConnectionFails_ReturnsEmptyResults()
+ {
+ var factory = new FakeHistorianConnectionFactory
+ {
+ ConnectException = new InvalidOperationException("Connection refused")
+ };
+ var ds = new HistorianDataSource(DefaultConfig, factory);
+
+ var results = ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100)
+ .GetAwaiter().GetResult();
+
+ results.Count.ShouldBe(0);
+ factory.ConnectCallCount.ShouldBe(1);
+ }
+
+ [Fact]
+ public void ReadRawAsync_WhenConnectionTimesOut_ReturnsEmptyResults()
+ {
+ var factory = new FakeHistorianConnectionFactory
+ {
+ ConnectException = new TimeoutException("Connection timed out")
+ };
+ var ds = new HistorianDataSource(DefaultConfig, factory);
+
+ var results = ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100)
+ .GetAwaiter().GetResult();
+
+ results.Count.ShouldBe(0);
+ }
+
+ [Fact]
+ public void ReadRawAsync_AfterConnectionError_AttemptsReconnect()
+ {
+ var factory = new FakeHistorianConnectionFactory();
+ var ds = new HistorianDataSource(DefaultConfig, factory);
+
+ // First call: factory returns a HistorianAccess that isn't actually connected,
+ // so the query will fail and HandleConnectionError will reset the connection.
+ ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100)
+ .GetAwaiter().GetResult();
+
+ // Second call: should attempt reconnection via the factory
+ ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100)
+ .GetAwaiter().GetResult();
+
+ // Factory should have been called twice — once for initial connect, once for reconnect
+ factory.ConnectCallCount.ShouldBe(2);
+ }
+
+ [Fact]
+ public void ReadRawAsync_ConnectionFailure_DoesNotCorruptState()
+ {
+ var callCount = 0;
+ var factory = new FakeHistorianConnectionFactory
+ {
+ OnConnect = count =>
+ {
+ callCount = count;
+ if (count == 1)
+ throw new InvalidOperationException("First connection fails");
+ // Second call succeeds (returns unconnected HistorianAccess, but that's OK for lifecycle testing)
+ }
+ };
+ var ds = new HistorianDataSource(DefaultConfig, factory);
+
+ // First read: connection fails
+ var r1 = ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100)
+ .GetAwaiter().GetResult();
+ r1.Count.ShouldBe(0);
+
+ // Second read: should attempt new connection without throwing from internal state corruption
+ var r2 = ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100)
+ .GetAwaiter().GetResult();
+
+ callCount.ShouldBe(2);
+ }
+
+ [Fact]
+ public void Dispose_DuringConnectionFailure_DoesNotThrow()
+ {
+ var factory = new FakeHistorianConnectionFactory
+ {
+ ConnectException = new InvalidOperationException("Connection refused")
+ };
+ var ds = new HistorianDataSource(DefaultConfig, factory);
+
+ // Trigger a failed connection attempt
+ ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100)
+ .GetAwaiter().GetResult();
+
+ // Dispose should handle the null connection gracefully
+ Should.NotThrow(() => ds.Dispose());
+ }
}
}
diff --git a/tests/ZB.MOM.WW.LmxOpcUa.Tests/ZB.MOM.WW.LmxOpcUa.Tests.csproj b/tests/ZB.MOM.WW.LmxOpcUa.Tests/ZB.MOM.WW.LmxOpcUa.Tests.csproj
index fcf7eed..dedc023 100644
--- a/tests/ZB.MOM.WW.LmxOpcUa.Tests/ZB.MOM.WW.LmxOpcUa.Tests.csproj
+++ b/tests/ZB.MOM.WW.LmxOpcUa.Tests/ZB.MOM.WW.LmxOpcUa.Tests.csproj
@@ -38,6 +38,10 @@
..\..\lib\aahClientManaged.dll
false
+
+ ..\..\lib\aahClientCommon.dll
+ false
+