diff --git a/docs/stability-review-20260407.md b/docs/stability-review-20260407.md index a32cb53..e66285c 100644 --- a/docs/stability-review-20260407.md +++ b/docs/stability-review-20260407.md @@ -155,6 +155,9 @@ Recommendation: - Propagate cancellation/timeouts explicitly through the request path. - Add load/fault tests against the real async MXAccess client behavior, not only synchronous fakes. +**Status: Resolved (2026-04-07)** +Fix: Moved subscribe/unsubscribe I/O outside `lock(Lock)` in `SyncAddressSpace` and `TearDownGobjects` — bookkeeping is done under lock, actual MXAccess calls happen after the lock is released. Replaced blocking `ReadAsync` calls for alarm priority/description in the dispatch loop with cached values populated from subscription data changes via new `_alarmPriorityTags`/`_alarmDescTags` reverse lookup dictionaries. Refactored Historian `EnsureConnected`/`EnsureEventConnected` with double-check locking so `WaitForConnection` polling runs outside `_connectionLock`. OPC UA Read/Write/HistoryRead handlers remain synchronously blocking (framework constraint: `CustomNodeManager2` overrides are `void`) but `MxAccessClient.ReadAsync`/`WriteAsync` already enforce configurable timeouts (default 5s). + ### P3: several background loops can be started multiple times and are not joined on shutdown Evidence: @@ -209,11 +212,9 @@ Fix: Added `SanitizeConnectionString` helper using `SqlConnectionStringBuilder` `FakeMxAccessClient` now supports fault injection via `SubscribeException`, `UnsubscribeException`, `ReadException`, and `WriteException` properties. When set, the corresponding async methods return `Task.FromException`. Three tests in `LmxNodeManagerSubscriptionFaultTests` verify that subscribe/unsubscribe faults are caught and logged instead of silently discarded, and that ref-count bookkeeping survives a transient fault. -### ~~Historian lifecycle coverage is minimal~~ (Partially resolved) +### ~~Historian lifecycle coverage is minimal~~ (Resolved) -Six lifecycle tests added in `HistorianDataSourceLifecycleTests`: post-dispose rejection for all four read methods (`ReadRawAsync`, `ReadAggregateAsync`, `ReadAtTimeAsync`, `ReadEventsAsync`), double-dispose idempotency, and aggregate column mapping. - -Remaining: connection timeout, reconnect-after-failure, and query cleanup paths cannot be unit-tested without introducing an abstraction over the `HistorianAccess` SDK class (currently created directly via `new HistorianAccess()` in `EnsureConnected`). Extracting an `IHistorianAccessFactory` seam would make these paths testable. +Extracted `IHistorianConnectionFactory` abstraction from `HistorianDataSource`, with `SdkHistorianConnectionFactory` as the production implementation and `FakeHistorianConnectionFactory` for tests. Eleven lifecycle tests in `HistorianDataSourceLifecycleTests` now cover: post-dispose rejection for all four read methods, double-dispose idempotency, aggregate column mapping, connection failure (returns empty results), connection timeout (returns empty results), reconnect-after-error (factory called twice), connection failure state resilience, and dispose-after-failure safety. ### ~~Continuation-point expiry is not tested~~ (Resolved) @@ -234,10 +235,12 @@ Timed out: ## Bottom Line -The most serious risks are not style issues. They are: -- work items that can hang forever in the STA bridge, -- silent loss of live subscriptions because async failures are ignored, -- request/rebuild paths that block directly on external systems, -- and a dashboard host that can disappear without surfacing a hard failure. +All findings have been resolved: +- StaComThread crash-path faulting prevents callers from hanging forever. +- Subscription tasks are no longer silently discarded — failures are caught and logged. +- Subscribe/unsubscribe I/O moved outside `lock(Lock)` in rebuild paths; alarm metadata cached from subscriptions instead of blocking reads; Historian connection polling no longer holds the connection lock. +- Dashboard binds to localhost and reports startup failures explicitly. +- Background loops guard against double-start and join on stop. +- Connection strings are sanitized before logging. -Those are the first items I would address before depending on this service for long-running production stability. +Remaining architectural note: OPC UA Read/Write/HistoryRead handlers still use `.GetAwaiter().GetResult()` because `CustomNodeManager2` overrides are synchronous. This is mitigated by the existing configurable timeouts in `MxAccessClient` (default 5s). diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/HistorianDataSource.cs b/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/HistorianDataSource.cs index 30cc1df..dc6ead6 100644 --- a/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/HistorianDataSource.cs +++ b/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/HistorianDataSource.cs @@ -21,6 +21,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian private readonly HistorianConfiguration _config; private readonly object _connectionLock = new object(); private readonly object _eventConnectionLock = new object(); + private readonly IHistorianConnectionFactory _factory; private HistorianAccess? _connection; private HistorianAccess? _eventConnection; private bool _disposed; @@ -30,8 +31,15 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian /// /// The Historian SDK connection settings used for runtime history lookups. public HistorianDataSource(HistorianConfiguration config) + : this(config, new SdkHistorianConnectionFactory()) { } + + /// + /// Initializes a Historian reader with a custom connection factory for testing. + /// + internal HistorianDataSource(HistorianConfiguration config, IHistorianConnectionFactory factory) { _config = config; + _factory = factory; } private void EnsureConnected() @@ -39,33 +47,29 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian if (_disposed) throw new ObjectDisposedException(nameof(HistorianDataSource)); + // Fast path: already connected (no lock needed) + if (Volatile.Read(ref _connection) != null) + return; + + // Create and wait for connection outside the lock so concurrent history + // requests are not serialized behind a slow Historian handshake. + var conn = _factory.CreateAndConnect(_config, HistorianConnectionType.Process); + lock (_connectionLock) { - if (_connection != null) - return; - - var conn = new HistorianAccess(); - var args = new HistorianConnectionArgs + if (_disposed) { - ServerName = _config.ServerName, - TcpPort = (ushort)_config.Port, - IntegratedSecurity = _config.IntegratedSecurity, - ConnectionType = HistorianConnectionType.Process, - ReadOnly = true, - PacketTimeout = (uint)(_config.CommandTimeoutSeconds * 1000) - }; - - if (!_config.IntegratedSecurity) - { - args.UserName = _config.UserName ?? string.Empty; - args.Password = _config.Password ?? string.Empty; + conn.CloseConnection(out _); + conn.Dispose(); + throw new ObjectDisposedException(nameof(HistorianDataSource)); } - if (!conn.OpenConnection(args, out var error)) + if (_connection != null) { + // Another thread connected while we were waiting + conn.CloseConnection(out _); conn.Dispose(); - throw new InvalidOperationException( - $"Failed to open Historian SDK connection to {_config.ServerName}:{_config.Port}: {error.ErrorCode}"); + return; } _connection = conn; @@ -100,33 +104,25 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian if (_disposed) throw new ObjectDisposedException(nameof(HistorianDataSource)); + if (Volatile.Read(ref _eventConnection) != null) + return; + + var conn = _factory.CreateAndConnect(_config, HistorianConnectionType.Event); + lock (_eventConnectionLock) { - if (_eventConnection != null) - return; - - var conn = new HistorianAccess(); - var args = new HistorianConnectionArgs + if (_disposed) { - ServerName = _config.ServerName, - TcpPort = (ushort)_config.Port, - IntegratedSecurity = _config.IntegratedSecurity, - ConnectionType = HistorianConnectionType.Event, - ReadOnly = true, - PacketTimeout = (uint)(_config.CommandTimeoutSeconds * 1000) - }; - - if (!_config.IntegratedSecurity) - { - args.UserName = _config.UserName ?? string.Empty; - args.Password = _config.Password ?? string.Empty; + conn.CloseConnection(out _); + conn.Dispose(); + throw new ObjectDisposedException(nameof(HistorianDataSource)); } - if (!conn.OpenConnection(args, out var error)) + if (_eventConnection != null) { + conn.CloseConnection(out _); conn.Dispose(); - throw new InvalidOperationException( - $"Failed to open Historian SDK event connection to {_config.ServerName}:{_config.Port}: {error.ErrorCode}"); + return; } _eventConnection = conn; @@ -157,6 +153,7 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.Historian } } + /// /// Reads raw historical values for a tag from the Historian. /// diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/IHistorianConnectionFactory.cs b/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/IHistorianConnectionFactory.cs new file mode 100644 index 0000000..aea0a69 --- /dev/null +++ b/src/ZB.MOM.WW.LmxOpcUa.Host/Historian/IHistorianConnectionFactory.cs @@ -0,0 +1,81 @@ +using System; +using System.Threading; +using ArchestrA; +using ZB.MOM.WW.LmxOpcUa.Host.Configuration; + +namespace ZB.MOM.WW.LmxOpcUa.Host.Historian +{ + /// + /// Creates and opens Historian SDK connections. Extracted so tests can inject + /// fakes that control connection success, failure, and timeout behavior. + /// + internal interface IHistorianConnectionFactory + { + /// + /// Creates a new Historian SDK connection, opens it, and waits until it is ready. + /// Throws on connection failure or timeout. + /// + HistorianAccess CreateAndConnect(HistorianConfiguration config, HistorianConnectionType type); + } + + /// + /// Production implementation that creates real Historian SDK connections. + /// + internal sealed class SdkHistorianConnectionFactory : IHistorianConnectionFactory + { + public HistorianAccess CreateAndConnect(HistorianConfiguration config, HistorianConnectionType type) + { + var conn = new HistorianAccess(); + + var args = new HistorianConnectionArgs + { + ServerName = config.ServerName, + TcpPort = (ushort)config.Port, + IntegratedSecurity = config.IntegratedSecurity, + UseArchestrAUser = config.IntegratedSecurity, + ConnectionType = type, + ReadOnly = true, + PacketTimeout = (uint)(config.CommandTimeoutSeconds * 1000) + }; + + if (!config.IntegratedSecurity) + { + args.UserName = config.UserName ?? string.Empty; + args.Password = config.Password ?? string.Empty; + } + + if (!conn.OpenConnection(args, out var error)) + { + conn.Dispose(); + throw new InvalidOperationException( + $"Failed to open Historian SDK connection to {config.ServerName}:{config.Port}: {error.ErrorCode}"); + } + + // The SDK connects asynchronously — poll until the connection is ready + var timeoutMs = config.CommandTimeoutSeconds * 1000; + var elapsed = 0; + while (elapsed < timeoutMs) + { + var status = new HistorianConnectionStatus(); + conn.GetConnectionStatus(ref status); + + if (status.ConnectedToServer) + return conn; + + if (status.ErrorOccurred) + { + conn.Dispose(); + throw new InvalidOperationException( + $"Historian SDK connection failed: {status.Error}"); + } + + Thread.Sleep(250); + elapsed += 250; + } + + conn.Dispose(); + throw new TimeoutException( + $"Historian SDK connection to {config.ServerName}:{config.Port} timed out after {config.CommandTimeoutSeconds}s"); + } + } +} diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/OpcUa/LmxNodeManager.cs b/src/ZB.MOM.WW.LmxOpcUa.Host/OpcUa/LmxNodeManager.cs index b114120..f379aa5 100644 --- a/src/ZB.MOM.WW.LmxOpcUa.Host/OpcUa/LmxNodeManager.cs +++ b/src/ZB.MOM.WW.LmxOpcUa.Host/OpcUa/LmxNodeManager.cs @@ -25,6 +25,9 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa // Alarm tracking: maps InAlarm tag reference → alarm source info private readonly Dictionary _alarmInAlarmTags = new(StringComparer.OrdinalIgnoreCase); + // Reverse lookups: priority/description tag reference → alarm info for cache updates + private readonly Dictionary _alarmPriorityTags = new(StringComparer.OrdinalIgnoreCase); + private readonly Dictionary _alarmDescTags = new(StringComparer.OrdinalIgnoreCase); private readonly bool _alarmTrackingEnabled; private readonly bool _anonymousCanWrite; private readonly AutoResetEvent _dataChangeSignal = new(false); @@ -172,6 +175,8 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa _tagMetadata.Clear(); _alarmInAlarmTags.Clear(); _alarmAckedTags.Clear(); + _alarmPriorityTags.Clear(); + _alarmDescTags.Clear(); _nodeMap.Clear(); _gobjectToTagRefs.Clear(); VariableNodeCount = 0; @@ -357,6 +362,10 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa }; _alarmInAlarmTags[inAlarmTagRef] = alarmInfo; _alarmAckedTags[alarmInfo.AckedTagReference] = alarmInfo; + if (!string.IsNullOrEmpty(alarmInfo.PriorityTagReference)) + _alarmPriorityTags[alarmInfo.PriorityTagReference] = alarmInfo; + if (!string.IsNullOrEmpty(alarmInfo.DescAttrNameTagReference)) + _alarmDescTags[alarmInfo.DescAttrNameTagReference] = alarmInfo; hasAlarms = true; } @@ -530,6 +539,9 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa /// The latest Galaxy attribute snapshot to compare against the currently published variables. public void SyncAddressSpace(List hierarchy, List attributes) { + var tagsToUnsubscribe = new List(); + var tagsToResubscribe = new List(); + lock (Lock) { if (_lastHierarchy == null || _lastAttributes == null) @@ -565,29 +577,22 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa if (_subscriptionRefCounts.TryGetValue(tagRef, out var count)) affectedSubscriptions[tagRef] = count; - // Tear down changed subtrees - TearDownGobjects(changedIds); + // Tear down changed subtrees (collects tags for deferred unsubscription) + TearDownGobjects(changedIds, tagsToUnsubscribe); // Rebuild changed subtrees from new data var changedHierarchy = hierarchy.Where(h => changedIds.Contains(h.GobjectId)).ToList(); var changedAttributes = attributes.Where(a => changedIds.Contains(a.GobjectId)).ToList(); BuildSubtree(changedHierarchy, changedAttributes); - // Restore subscriptions for surviving tags + // Restore subscription bookkeeping for surviving tags foreach (var kvp in affectedSubscriptions) { if (!_tagToVariableNode.ContainsKey(kvp.Key)) continue; - try - { - _mxAccessClient.SubscribeAsync(kvp.Key, (_, _) => { }).GetAwaiter().GetResult(); - _subscriptionRefCounts[kvp.Key] = kvp.Value; - } - catch (Exception ex) - { - Log.Warning(ex, "Failed to restore subscription for {TagRef} after sync", kvp.Key); - } + _subscriptionRefCounts[kvp.Key] = kvp.Value; + tagsToResubscribe.Add(kvp.Key); } _lastHierarchy = new List(hierarchy); @@ -596,9 +601,18 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa Log.Information("Incremental sync complete: {Objects} objects, {Variables} variables, {Alarms} alarms", ObjectNodeCount, VariableNodeCount, _alarmInAlarmTags.Count); } + + // Perform subscribe/unsubscribe I/O outside Lock so read/write/browse operations are not blocked + foreach (var tag in tagsToUnsubscribe) + try { _mxAccessClient.UnsubscribeAsync(tag).GetAwaiter().GetResult(); } + catch (Exception ex) { Log.Warning(ex, "Failed to unsubscribe {Tag} after sync", tag); } + + foreach (var tag in tagsToResubscribe) + try { _mxAccessClient.SubscribeAsync(tag, (_, _) => { }).GetAwaiter().GetResult(); } + catch (Exception ex) { Log.Warning(ex, "Failed to restore subscription for {Tag} after sync", tag); } } - private void TearDownGobjects(HashSet gobjectIds) + private void TearDownGobjects(HashSet gobjectIds, List tagsToUnsubscribe) { foreach (var id in gobjectIds) { @@ -607,18 +621,10 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa { foreach (var tagRef in tagRefs.ToList()) { - // Unsubscribe if actively subscribed + // Defer unsubscribe to outside lock if (_subscriptionRefCounts.ContainsKey(tagRef)) { - try - { - _mxAccessClient.UnsubscribeAsync(tagRef).GetAwaiter().GetResult(); - } - catch - { - /* ignore */ - } - + tagsToUnsubscribe.Add(tagRef); _subscriptionRefCounts.Remove(tagRef); } @@ -630,20 +636,17 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa foreach (var alarmKey in alarmKeysToRemove) { var info = _alarmInAlarmTags[alarmKey]; - // Unsubscribe alarm auto-subscriptions + // Defer alarm tag unsubscription to outside lock foreach (var alarmTag in new[] { alarmKey, info.PriorityTagReference, info.DescAttrNameTagReference }) if (!string.IsNullOrEmpty(alarmTag)) - try - { - _mxAccessClient.UnsubscribeAsync(alarmTag).GetAwaiter().GetResult(); - } - catch - { - /* ignore */ - } + tagsToUnsubscribe.Add(alarmTag); _alarmInAlarmTags.Remove(alarmKey); + if (!string.IsNullOrEmpty(info.PriorityTagReference)) + _alarmPriorityTags.Remove(info.PriorityTagReference); + if (!string.IsNullOrEmpty(info.DescAttrNameTagReference)) + _alarmDescTags.Remove(info.DescAttrNameTagReference); if (!string.IsNullOrEmpty(info.AckedTagReference)) _alarmAckedTags.Remove(info.AckedTagReference); } @@ -871,6 +874,10 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa }; _alarmInAlarmTags[inAlarmTagRef] = alarmInfo; _alarmAckedTags[alarmInfo.AckedTagReference] = alarmInfo; + if (!string.IsNullOrEmpty(alarmInfo.PriorityTagReference)) + _alarmPriorityTags[alarmInfo.PriorityTagReference] = alarmInfo; + if (!string.IsNullOrEmpty(alarmInfo.DescAttrNameTagReference)) + _alarmDescTags[alarmInfo.DescAttrNameTagReference] = alarmInfo; hasAlarms = true; } @@ -2075,6 +2082,23 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa alarmInfo = null; } + // Cache alarm priority/description values as they arrive via subscription + if (_alarmPriorityTags.TryGetValue(address, out var priorityInfo)) + { + if (vtq.Value is int ipCache) + priorityInfo.CachedSeverity = + (ushort)Math.Min(Math.Max(ipCache, 1), 1000); + else if (vtq.Value is short spCache) + priorityInfo.CachedSeverity = + (ushort)Math.Min(Math.Max((int)spCache, 1), 1000); + } + + if (_alarmDescTags.TryGetValue(address, out var descInfo)) + { + if (vtq.Value is string descCache && !string.IsNullOrEmpty(descCache)) + descInfo.CachedMessage = descCache; + } + // Check for Acked transitions — skip if state hasn't changed if (_alarmAckedTags.TryGetValue(address, out ackedAlarmInfo)) { @@ -2095,31 +2119,11 @@ namespace ZB.MOM.WW.LmxOpcUa.Host.OpcUa if (newInAlarm) { - try - { - var pVtq = _mxAccessClient.ReadAsync(alarmInfo.PriorityTagReference).GetAwaiter() - .GetResult(); - if (pVtq.Value is int ip) - severity = (ushort)Math.Min(Math.Max(ip, 1), 1000); - else if (pVtq.Value is short sp) - severity = (ushort)Math.Min(Math.Max((int)sp, 1), 1000); - } - catch - { - // Keep the previously cached severity when refresh reads fail. - } - - try - { - var dVtq = _mxAccessClient.ReadAsync(alarmInfo.DescAttrNameTagReference).GetAwaiter() - .GetResult(); - if (dVtq.Value is string desc && !string.IsNullOrEmpty(desc)) - message = desc; - } - catch - { - // Keep the previously cached message when refresh reads fail. - } + // Use cached values from subscription data changes instead of blocking reads + severity = alarmInfo.CachedSeverity > 0 ? alarmInfo.CachedSeverity : (ushort?)null; + message = !string.IsNullOrEmpty(alarmInfo.CachedMessage) + ? alarmInfo.CachedMessage + : null; } pendingAlarmEvents.Add((address, alarmInfo, newInAlarm, severity, message)); diff --git a/src/ZB.MOM.WW.LmxOpcUa.Host/ZB.MOM.WW.LmxOpcUa.Host.csproj b/src/ZB.MOM.WW.LmxOpcUa.Host/ZB.MOM.WW.LmxOpcUa.Host.csproj index ce723a2..122a7a8 100644 --- a/src/ZB.MOM.WW.LmxOpcUa.Host/ZB.MOM.WW.LmxOpcUa.Host.csproj +++ b/src/ZB.MOM.WW.LmxOpcUa.Host/ZB.MOM.WW.LmxOpcUa.Host.csproj @@ -53,6 +53,10 @@ ..\..\lib\aahClientManaged.dll false + + ..\..\lib\aahClientCommon.dll + false + diff --git a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Helpers/FakeHistorianConnectionFactory.cs b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Helpers/FakeHistorianConnectionFactory.cs new file mode 100644 index 0000000..f698f4f --- /dev/null +++ b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Helpers/FakeHistorianConnectionFactory.cs @@ -0,0 +1,49 @@ +using System; +using ArchestrA; +using ZB.MOM.WW.LmxOpcUa.Host.Configuration; +using ZB.MOM.WW.LmxOpcUa.Host.Historian; + +namespace ZB.MOM.WW.LmxOpcUa.Tests.Helpers +{ + /// + /// Fake Historian connection factory for tests. Controls whether connections + /// succeed, fail, or timeout without requiring the real Historian SDK runtime. + /// + internal sealed class FakeHistorianConnectionFactory : IHistorianConnectionFactory + { + /// + /// When set, throws this exception. + /// + public Exception? ConnectException { get; set; } + + /// + /// Number of times has been called. + /// + public int ConnectCallCount { get; private set; } + + /// + /// When set, called on each to determine behavior. + /// Receives the call count (1-based). Return null to succeed, or throw to fail. + /// + public Action? OnConnect { get; set; } + + public HistorianAccess CreateAndConnect(HistorianConfiguration config, HistorianConnectionType type) + { + ConnectCallCount++; + + if (OnConnect != null) + { + OnConnect(ConnectCallCount); + } + else if (ConnectException != null) + { + throw ConnectException; + } + + // Return a HistorianAccess that is not actually connected. + // ReadRawAsync etc. will fail when they try to use it, which exercises + // the HandleConnectionError → reconnect path. + return new HistorianAccess(); + } + } +} diff --git a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Historian/HistorianDataSourceLifecycleTests.cs b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Historian/HistorianDataSourceLifecycleTests.cs index ee57588..97b9b28 100644 --- a/tests/ZB.MOM.WW.LmxOpcUa.Tests/Historian/HistorianDataSourceLifecycleTests.cs +++ b/tests/ZB.MOM.WW.LmxOpcUa.Tests/Historian/HistorianDataSourceLifecycleTests.cs @@ -4,12 +4,13 @@ using Shouldly; using Xunit; using ZB.MOM.WW.LmxOpcUa.Host.Configuration; using ZB.MOM.WW.LmxOpcUa.Host.Historian; +using ZB.MOM.WW.LmxOpcUa.Tests.Helpers; namespace ZB.MOM.WW.LmxOpcUa.Tests.Historian { /// /// Verifies Historian data source lifecycle behavior: dispose safety, - /// post-dispose rejection, and double-dispose idempotency. + /// post-dispose rejection, connection failure handling, and reconnect-after-error. /// public class HistorianDataSourceLifecycleTests { @@ -79,5 +80,100 @@ namespace ZB.MOM.WW.LmxOpcUa.Tests.Historian { HistorianDataSource.MapAggregateToColumn(new Opc.Ua.NodeId(99999)).ShouldBeNull(); } + + [Fact] + public void ReadRawAsync_WhenConnectionFails_ReturnsEmptyResults() + { + var factory = new FakeHistorianConnectionFactory + { + ConnectException = new InvalidOperationException("Connection refused") + }; + var ds = new HistorianDataSource(DefaultConfig, factory); + + var results = ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100) + .GetAwaiter().GetResult(); + + results.Count.ShouldBe(0); + factory.ConnectCallCount.ShouldBe(1); + } + + [Fact] + public void ReadRawAsync_WhenConnectionTimesOut_ReturnsEmptyResults() + { + var factory = new FakeHistorianConnectionFactory + { + ConnectException = new TimeoutException("Connection timed out") + }; + var ds = new HistorianDataSource(DefaultConfig, factory); + + var results = ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100) + .GetAwaiter().GetResult(); + + results.Count.ShouldBe(0); + } + + [Fact] + public void ReadRawAsync_AfterConnectionError_AttemptsReconnect() + { + var factory = new FakeHistorianConnectionFactory(); + var ds = new HistorianDataSource(DefaultConfig, factory); + + // First call: factory returns a HistorianAccess that isn't actually connected, + // so the query will fail and HandleConnectionError will reset the connection. + ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100) + .GetAwaiter().GetResult(); + + // Second call: should attempt reconnection via the factory + ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100) + .GetAwaiter().GetResult(); + + // Factory should have been called twice — once for initial connect, once for reconnect + factory.ConnectCallCount.ShouldBe(2); + } + + [Fact] + public void ReadRawAsync_ConnectionFailure_DoesNotCorruptState() + { + var callCount = 0; + var factory = new FakeHistorianConnectionFactory + { + OnConnect = count => + { + callCount = count; + if (count == 1) + throw new InvalidOperationException("First connection fails"); + // Second call succeeds (returns unconnected HistorianAccess, but that's OK for lifecycle testing) + } + }; + var ds = new HistorianDataSource(DefaultConfig, factory); + + // First read: connection fails + var r1 = ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100) + .GetAwaiter().GetResult(); + r1.Count.ShouldBe(0); + + // Second read: should attempt new connection without throwing from internal state corruption + var r2 = ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100) + .GetAwaiter().GetResult(); + + callCount.ShouldBe(2); + } + + [Fact] + public void Dispose_DuringConnectionFailure_DoesNotThrow() + { + var factory = new FakeHistorianConnectionFactory + { + ConnectException = new InvalidOperationException("Connection refused") + }; + var ds = new HistorianDataSource(DefaultConfig, factory); + + // Trigger a failed connection attempt + ds.ReadRawAsync("Tag1", DateTime.UtcNow.AddHours(-1), DateTime.UtcNow, 100) + .GetAwaiter().GetResult(); + + // Dispose should handle the null connection gracefully + Should.NotThrow(() => ds.Dispose()); + } } } diff --git a/tests/ZB.MOM.WW.LmxOpcUa.Tests/ZB.MOM.WW.LmxOpcUa.Tests.csproj b/tests/ZB.MOM.WW.LmxOpcUa.Tests/ZB.MOM.WW.LmxOpcUa.Tests.csproj index fcf7eed..dedc023 100644 --- a/tests/ZB.MOM.WW.LmxOpcUa.Tests/ZB.MOM.WW.LmxOpcUa.Tests.csproj +++ b/tests/ZB.MOM.WW.LmxOpcUa.Tests/ZB.MOM.WW.LmxOpcUa.Tests.csproj @@ -38,6 +38,10 @@ ..\..\lib\aahClientManaged.dll false + + ..\..\lib\aahClientCommon.dll + false +