fix(driver-historian-wonderware): resolve Low code-review findings (Driver.Historian.Wonderware-004,005,007,008,010,011,012)
- Driver.Historian.Wonderware-004: ToHistorianEvent synthesises a fresh
Guid when the upstream EventId is unparseable and logs the substitution
instead of writing the historian with Guid.Empty.
- Driver.Historian.Wonderware-005: GetHealthSnapshot derives the
connection-open booleans from the active-node fields so the snapshot
is self-consistent without depending on the secondary lock.
- Driver.Historian.Wonderware-007: SID-mismatch branch in PipeServer now
sends a HelloAck { Accepted=false, RejectReason } so the client sees a
symmetric rejection.
- Driver.Historian.Wonderware-008: classify StartQuery failures —
connection-class codes drop the connection, query-class codes throw
QueryClassStartQueryException so the IPC layer surfaces Success=false.
- Driver.Historian.Wonderware-010: RequestTimeoutSeconds now enforced
via BuildRequestCts linked to the caller's CancellationToken.
- Driver.Historian.Wonderware-011: refreshed XML docs to describe the
current sidecar / named-pipe architecture (Galaxy.Host / Proxy
references reframed as historical context).
- Driver.Historian.Wonderware-012: pinned the previously-uncovered
HistorianDataSource behaviours with five new test files; also removed
the stale empty tests/ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Tests
directory.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+165
-23
@@ -11,7 +11,10 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
/// <summary>
|
||||
/// Reads historical data from the Wonderware Historian via the aahClientManaged SDK.
|
||||
/// OPC-UA-free — emits <see cref="HistorianSample"/>/<see cref="HistorianAggregateSample"/>
|
||||
/// which the Proxy maps to OPC UA <c>DataValue</c> on its side of the IPC.
|
||||
/// which the sidecar serialises onto the named-pipe wire (PR 3.3 contracts) for the
|
||||
/// .NET 10 <c>WonderwareHistorianClient</c> to translate into OPC UA <c>DataValue</c>
|
||||
/// on its side of the IPC. The v1 Galaxy.Host / Proxy architecture this class
|
||||
/// originally lived in retired in PR 7.2.
|
||||
/// </summary>
|
||||
public sealed class HistorianDataSource : IHistorianDataSource
|
||||
{
|
||||
@@ -50,6 +53,51 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
_picker = picker ?? new HistorianClusterEndpointPicker(config);
|
||||
}
|
||||
|
||||
// Error codes that signify the connection or server is the problem rather than the
|
||||
// query itself. A query-class failure (bad tag name, unsupported aggregate, etc.) must
|
||||
// not force us to tear down and re-open the (relatively expensive) historian
|
||||
// connection — that would let a burst of bad-tag queries push an otherwise healthy
|
||||
// cluster node into cooldown. See Driver.Historian.Wonderware-008.
|
||||
private static readonly HashSet<HistorianAccessError.ErrorValue> ConnectionErrorCodes =
|
||||
new HashSet<HistorianAccessError.ErrorValue>
|
||||
{
|
||||
HistorianAccessError.ErrorValue.FailedToConnect,
|
||||
HistorianAccessError.ErrorValue.FailedToCreateSession,
|
||||
HistorianAccessError.ErrorValue.NoReply,
|
||||
HistorianAccessError.ErrorValue.NotReady,
|
||||
HistorianAccessError.ErrorValue.NotInitialized,
|
||||
HistorianAccessError.ErrorValue.Stopping,
|
||||
HistorianAccessError.ErrorValue.Win32Exception,
|
||||
HistorianAccessError.ErrorValue.InvalidResponse,
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Whether an <c>aahClientManaged</c> error code indicates that the
|
||||
/// <em>connection</em> (rather than the query payload) is the problem and the
|
||||
/// shared SDK connection should therefore be reset. Internal for unit testing.
|
||||
/// </summary>
|
||||
internal static bool IsConnectionClassError(HistorianAccessError.ErrorValue code)
|
||||
=> ConnectionErrorCodes.Contains(code);
|
||||
|
||||
/// <summary>
|
||||
/// Builds the per-read <see cref="CancellationTokenSource"/> linked into the
|
||||
/// caller's <paramref name="ct"/> and pre-wired to fire after
|
||||
/// <see cref="HistorianConfiguration.RequestTimeoutSeconds"/> if positive. The
|
||||
/// read paths use the resulting token in their <c>ThrowIfCancellationRequested</c>
|
||||
/// checks so a hung <c>StartQuery</c> or slow <c>MoveNext</c> cannot block the
|
||||
/// single pipe-server connection thread indefinitely. See
|
||||
/// Driver.Historian.Wonderware-010.
|
||||
/// </summary>
|
||||
internal static CancellationTokenSource BuildRequestCts(HistorianConfiguration cfg, CancellationToken ct)
|
||||
{
|
||||
var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
|
||||
if (cfg.RequestTimeoutSeconds > 0)
|
||||
{
|
||||
cts.CancelAfter(TimeSpan.FromSeconds(cfg.RequestTimeoutSeconds));
|
||||
}
|
||||
return cts;
|
||||
}
|
||||
|
||||
private (HistorianAccess Connection, string Node) ConnectToAnyHealthyNode(HistorianConnectionType type)
|
||||
{
|
||||
var candidates = _picker.GetHealthyNodes();
|
||||
@@ -110,6 +158,13 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
foreach (var n in nodeStates)
|
||||
if (n.IsHealthy) healthyCount++;
|
||||
|
||||
// Driver.Historian.Wonderware-005: derive the connection-open booleans from the
|
||||
// active-node strings, both of which live under _healthLock. _connection itself
|
||||
// is published under _connectionLock — reading it here under a different lock
|
||||
// could produce an internally inconsistent snapshot (open with no node, or
|
||||
// closed with a non-null node) at the publish/clear boundary. Treating the
|
||||
// active-node strings as the single source of truth makes the snapshot
|
||||
// self-consistent by construction.
|
||||
lock (_healthLock)
|
||||
{
|
||||
return new HistorianHealthSnapshot
|
||||
@@ -121,8 +176,8 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
LastSuccessTime = _lastSuccessTime,
|
||||
LastFailureTime = _lastFailureTime,
|
||||
LastError = _lastError,
|
||||
ProcessConnectionOpen = Volatile.Read(ref _connection) != null,
|
||||
EventConnectionOpen = Volatile.Read(ref _eventConnection) != null,
|
||||
ProcessConnectionOpen = _activeProcessNode != null,
|
||||
EventConnectionOpen = _activeEventNode != null,
|
||||
ActiveProcessNode = _activeProcessNode,
|
||||
ActiveEventNode = _activeEventNode,
|
||||
NodeCount = nodeStates.Count,
|
||||
@@ -245,6 +300,59 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Internal exception signalling that <c>StartQuery</c> returned an SDK error
|
||||
/// whose code is <em>query-class</em> (bad tag name, unsupported aggregate, etc.)
|
||||
/// and the shared SDK connection therefore must NOT be reset. The outer catch
|
||||
/// re-throws this so the IPC frame handler surfaces <c>Success=false</c> without
|
||||
/// touching the connection. See Driver.Historian.Wonderware-008.
|
||||
/// </summary>
|
||||
internal sealed class QueryClassStartQueryException : InvalidOperationException
|
||||
{
|
||||
public HistorianAccessError.ErrorValue Code { get; }
|
||||
public QueryClassStartQueryException(string message, HistorianAccessError.ErrorValue code)
|
||||
: base(message)
|
||||
{
|
||||
Code = code;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Centralised <c>StartQuery</c>-failure handler. Throws so the caller surfaces
|
||||
/// <c>Success=false</c> in the IPC reply (the previous return-empty-with-success
|
||||
/// behaviour made an SDK error look like "no data in range" to the client). The
|
||||
/// connection is only reset when the error code is connection-class —
|
||||
/// query-class failures (bad tag name, unsupported aggregate, etc.) must leave
|
||||
/// the shared SDK connection intact, otherwise a burst of bad-tag queries cycles
|
||||
/// the connection and pushes a healthy cluster node into cooldown.
|
||||
/// See Driver.Historian.Wonderware-008.
|
||||
/// </summary>
|
||||
private void HandleStartQueryFailure(
|
||||
string operation, HistorianAccessError error, bool isEventConnection)
|
||||
{
|
||||
var code = error?.ErrorCode ?? HistorianAccessError.ErrorValue.Failure;
|
||||
var description = error?.ErrorDescription ?? string.Empty;
|
||||
var connectionClass = IsConnectionClassError(code);
|
||||
|
||||
Log.Warning(
|
||||
"Historian SDK StartQuery failed: {Operation} -> {Code} ({Desc}) [{Kind}]",
|
||||
operation, code, description,
|
||||
connectionClass ? "connection-class" : "query-class");
|
||||
RecordFailure($"{operation}: {code}");
|
||||
|
||||
var message = $"Historian SDK StartQuery failed for {operation}: {code} ({description})";
|
||||
|
||||
if (connectionClass)
|
||||
{
|
||||
if (isEventConnection) HandleEventConnectionError();
|
||||
else HandleConnectionError();
|
||||
throw new InvalidOperationException(message);
|
||||
}
|
||||
|
||||
// Query-class — the outer catch block must NOT call HandleConnectionError on this.
|
||||
throw new QueryClassStartQueryException(message, code);
|
||||
}
|
||||
|
||||
private void HandleEventConnectionError(Exception? ex = null)
|
||||
{
|
||||
lock (_eventConnectionLock)
|
||||
@@ -280,6 +388,11 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
var results = new List<HistorianSample>();
|
||||
|
||||
// Driver.Historian.Wonderware-010: wire RequestTimeoutSeconds into the read path
|
||||
// so a hung StartQuery / slow MoveNext can't block the connection thread forever.
|
||||
using var requestCts = BuildRequestCts(_config, ct);
|
||||
var token = requestCts.Token;
|
||||
|
||||
try
|
||||
{
|
||||
EnsureConnected();
|
||||
@@ -300,10 +413,8 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
|
||||
if (!query.StartQuery(args, out var error))
|
||||
{
|
||||
Log.Warning("Historian SDK raw query start failed for {Tag}: {Error}", tagName, error.ErrorCode);
|
||||
RecordFailure($"raw StartQuery: {error.ErrorCode}");
|
||||
HandleConnectionError();
|
||||
return Task.FromResult(results);
|
||||
HandleStartQueryFailure(
|
||||
$"raw query for tag '{tagName}'", error, isEventConnection: false);
|
||||
}
|
||||
|
||||
var count = 0;
|
||||
@@ -311,7 +422,7 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
|
||||
while (query.MoveNext(out error))
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
token.ThrowIfCancellationRequested();
|
||||
|
||||
var result = query.QueryResult;
|
||||
var timestamp = DateTime.SpecifyKind(result.StartDateTime, DateTimeKind.Utc);
|
||||
@@ -332,11 +443,20 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (ObjectDisposedException) { throw; }
|
||||
catch (QueryClassStartQueryException)
|
||||
{
|
||||
// Query-class StartQuery failure — HandleStartQueryFailure already logged
|
||||
// and recorded. Re-throw so the IPC layer surfaces Success=false instead of
|
||||
// returning an empty list (which would look like "no data in range"). The
|
||||
// connection is deliberately NOT reset. See Driver.Historian.Wonderware-008.
|
||||
throw;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "HistoryRead raw failed for {Tag}", tagName);
|
||||
RecordFailure($"raw: {ex.Message}");
|
||||
HandleConnectionError(ex);
|
||||
throw;
|
||||
}
|
||||
|
||||
Log.Debug("HistoryRead raw: {Tag} returned {Count} values ({Start} to {End})",
|
||||
@@ -352,6 +472,10 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
var results = new List<HistorianAggregateSample>();
|
||||
|
||||
// Driver.Historian.Wonderware-010: outer safety timeout — see ReadRawAsync.
|
||||
using var requestCts = BuildRequestCts(_config, ct);
|
||||
var token = requestCts.Token;
|
||||
|
||||
try
|
||||
{
|
||||
EnsureConnected();
|
||||
@@ -367,10 +491,8 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
|
||||
if (!query.StartQuery(args, out var error))
|
||||
{
|
||||
Log.Warning("Historian SDK aggregate query start failed for {Tag}: {Error}", tagName, error.ErrorCode);
|
||||
RecordFailure($"aggregate StartQuery: {error.ErrorCode}");
|
||||
HandleConnectionError();
|
||||
return Task.FromResult(results);
|
||||
HandleStartQueryFailure(
|
||||
$"aggregate query for tag '{tagName}'", error, isEventConnection: false);
|
||||
}
|
||||
|
||||
// Apply the same bucket cap as the raw-read path so a wide time range with a
|
||||
@@ -381,7 +503,7 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
|
||||
while (query.MoveNext(out error))
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
token.ThrowIfCancellationRequested();
|
||||
|
||||
var result = query.QueryResult;
|
||||
var timestamp = DateTime.SpecifyKind(result.StartDateTime, DateTimeKind.Utc);
|
||||
@@ -408,11 +530,13 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (ObjectDisposedException) { throw; }
|
||||
catch (QueryClassStartQueryException) { throw; } // see ReadRawAsync — keep connection
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "HistoryRead aggregate failed for {Tag}", tagName);
|
||||
RecordFailure($"aggregate: {ex.Message}");
|
||||
HandleConnectionError(ex);
|
||||
throw;
|
||||
}
|
||||
|
||||
Log.Debug("HistoryRead aggregate ({Aggregate}): {Tag} returned {Count} values",
|
||||
@@ -430,13 +554,17 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
if (timestamps == null || timestamps.Length == 0)
|
||||
return Task.FromResult(results);
|
||||
|
||||
// Driver.Historian.Wonderware-010: outer safety timeout — see ReadRawAsync.
|
||||
using var requestCts = BuildRequestCts(_config, ct);
|
||||
var token = requestCts.Token;
|
||||
|
||||
try
|
||||
{
|
||||
EnsureConnected();
|
||||
|
||||
foreach (var timestamp in timestamps)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
token.ThrowIfCancellationRequested();
|
||||
|
||||
using var query = _connection!.CreateHistoryQuery();
|
||||
var args = new HistoryQueryArgs
|
||||
@@ -490,6 +618,7 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
Log.Warning(ex, "HistoryRead at-time failed for {Tag}", tagName);
|
||||
RecordFailure($"at-time: {ex.Message}");
|
||||
HandleConnectionError(ex);
|
||||
throw;
|
||||
}
|
||||
|
||||
Log.Debug("HistoryRead at-time: {Tag} returned {Count} values for {Timestamps} timestamps",
|
||||
@@ -504,6 +633,10 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
var results = new List<HistorianEventDto>();
|
||||
|
||||
// Driver.Historian.Wonderware-010: outer safety timeout — see ReadRawAsync.
|
||||
using var requestCts = BuildRequestCts(_config, ct);
|
||||
var token = requestCts.Token;
|
||||
|
||||
try
|
||||
{
|
||||
EnsureEventConnected();
|
||||
@@ -525,16 +658,14 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
|
||||
if (!query.StartQuery(args, out var error))
|
||||
{
|
||||
Log.Warning("Historian SDK event query start failed: {Error}", error.ErrorCode);
|
||||
RecordFailure($"events StartQuery: {error.ErrorCode}");
|
||||
HandleEventConnectionError();
|
||||
return Task.FromResult(results);
|
||||
HandleStartQueryFailure(
|
||||
$"event query for source '{sourceName ?? "(all)"}'", error, isEventConnection: true);
|
||||
}
|
||||
|
||||
var count = 0;
|
||||
while (query.MoveNext(out error))
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
token.ThrowIfCancellationRequested();
|
||||
results.Add(ToDto(query.QueryResult));
|
||||
count++;
|
||||
if (maxEvents > 0 && count >= maxEvents) break;
|
||||
@@ -545,11 +676,13 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
}
|
||||
catch (OperationCanceledException) { throw; }
|
||||
catch (ObjectDisposedException) { throw; }
|
||||
catch (QueryClassStartQueryException) { throw; } // see ReadRawAsync — keep connection
|
||||
catch (Exception ex)
|
||||
{
|
||||
Log.Warning(ex, "HistoryRead events failed for source {Source}", sourceName ?? "(all)");
|
||||
RecordFailure($"events: {ex.Message}");
|
||||
HandleEventConnectionError(ex);
|
||||
throw;
|
||||
}
|
||||
|
||||
Log.Debug("HistoryRead events: source={Source} returned {Count} events ({Start} to {End})",
|
||||
@@ -593,11 +726,20 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
/// as a string; this is a known edge case of the SDK binding.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
private static object? SelectValue(HistoryQueryResult result)
|
||||
internal static object? SelectValue(HistoryQueryResult result)
|
||||
=> SelectValueFromPair(result.Value, result.StringValue);
|
||||
|
||||
/// <summary>
|
||||
/// SDK-independent overload of the string-vs-numeric heuristic. Exposed so unit
|
||||
/// tests can pin the logic without having to instantiate the SDK
|
||||
/// <see cref="HistoryQueryResult"/> (whose internal property initialisers make
|
||||
/// it impractical to fake). See Driver.Historian.Wonderware-012.
|
||||
/// </summary>
|
||||
internal static object? SelectValueFromPair(double value, string? stringValue)
|
||||
{
|
||||
if (!string.IsNullOrEmpty(result.StringValue) && result.Value == 0)
|
||||
return result.StringValue;
|
||||
return result.Value;
|
||||
if (!string.IsNullOrEmpty(stringValue) && value == 0)
|
||||
return stringValue;
|
||||
return value;
|
||||
}
|
||||
|
||||
internal static double? ExtractAggregateValue(AnalogSummaryQueryResult result, string column)
|
||||
|
||||
Reference in New Issue
Block a user