review(Driver.Historian.Wonderware): AtTime fails over on connection-class errors
Re-review at 7286d320. -014 (Medium): ReadAtTimeAsync didn't classify StartQuery failures,
so a connection-class failure left a dead connection, re-failed every timestamp, and returned
Success=true with all-Bad (no failover); now resets+fails over via a shared classifier + tests.
-015: refresh stale named-pipe comments to TCP (no wire change). -013 (silent cap truncation,
ties OpcUaServer-002/Core.Abstractions-009) deferred cross-module. NOTE: the SDK-touching tests
are net48 + native aahClientManaged and run only on Windows; macOS verifies build + the SDK-free
subset only.
This commit is contained in:
+1
-1
@@ -6,7 +6,7 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
/// Wonderware Historian SDK configuration. Populated from environment variables at
|
||||
/// sidecar startup (see <c>Program.cs</c>): the supervisor (lmxopcua-side
|
||||
/// <c>WonderwareHistorianClient</c>) spawns the sidecar with these env vars; UA
|
||||
/// translation lives on the client side of the named-pipe IPC, so this surface is
|
||||
/// translation lives on the client side of the TCP IPC, so this surface is
|
||||
/// kept OPC-UA-free. The legacy v1 Galaxy.Host / Proxy host this lived in retired
|
||||
/// in PR 7.2.
|
||||
/// </summary>
|
||||
|
||||
+33
-3
@@ -11,7 +11,7 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
/// <summary>
|
||||
/// Reads historical data from the Wonderware Historian via the aahClientManaged SDK.
|
||||
/// OPC-UA-free — emits <see cref="HistorianSample"/>/<see cref="HistorianAggregateSample"/>
|
||||
/// which the sidecar serialises onto the named-pipe wire (PR 3.3 contracts) for the
|
||||
/// which the sidecar serialises onto the TCP wire (PR 3.3 contracts) for the
|
||||
/// .NET 10 <c>WonderwareHistorianClient</c> to translate into OPC UA <c>DataValue</c>
|
||||
/// on its side of the IPC. The v1 Galaxy.Host / Proxy architecture this class
|
||||
/// originally lived in retired in PR 7.2.
|
||||
@@ -86,13 +86,27 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
internal static bool IsConnectionClassError(HistorianAccessError.ErrorValue code)
|
||||
=> ConnectionErrorCodes.Contains(code);
|
||||
|
||||
/// <summary>
|
||||
/// Whether a failed <c>StartQuery</c> in the per-timestamp at-time loop should reset
|
||||
/// the shared SDK connection (and abort the read) rather than record a per-timestamp
|
||||
/// Bad sample and continue. Returns <c>true</c> only for connection-class error
|
||||
/// codes; query-class / no-data codes (and a missing error) return <c>false</c> so
|
||||
/// a single bad/empty timestamp does not tear down a connection that is still serving
|
||||
/// the other timestamps. The <c>HistoryQuery</c> SDK type is non-virtual and has no
|
||||
/// interface, so the at-time loop can't be driven offline — this pure helper is the
|
||||
/// unit-testable seam for the classification. See Driver.Historian.Wonderware-014.
|
||||
/// </summary>
|
||||
/// <param name="error">The SDK error returned by the failed <c>StartQuery</c>.</param>
|
||||
internal static bool ShouldResetConnectionForStartQueryFailure(HistorianAccessError? error)
|
||||
=> IsConnectionClassError(error?.ErrorCode ?? HistorianAccessError.ErrorValue.Failure);
|
||||
|
||||
/// <summary>
|
||||
/// Builds the per-read <see cref="CancellationTokenSource"/> linked into the
|
||||
/// caller's <paramref name="ct"/> and pre-wired to fire after
|
||||
/// <see cref="HistorianConfiguration.RequestTimeoutSeconds"/> if positive. The
|
||||
/// read paths use the resulting token in their <c>ThrowIfCancellationRequested</c>
|
||||
/// checks so a hung <c>StartQuery</c> or slow <c>MoveNext</c> cannot block the
|
||||
/// single pipe-server connection thread indefinitely. See
|
||||
/// single TCP-server connection thread indefinitely. See
|
||||
/// Driver.Historian.Wonderware-010.
|
||||
/// </summary>
|
||||
/// <param name="cfg">The historian configuration.</param>
|
||||
@@ -409,7 +423,7 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
var results = new List<HistorianSample>();
|
||||
|
||||
// Driver.Historian.Wonderware-010: wire RequestTimeoutSeconds into the read path
|
||||
// so a hung StartQuery / slow MoveNext can't block the connection thread forever.
|
||||
// so a hung StartQuery / slow MoveNext can't block the TCP connection thread forever.
|
||||
using var requestCts = BuildRequestCts(_config, ct);
|
||||
var token = requestCts.Token;
|
||||
|
||||
@@ -609,6 +623,22 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
|
||||
if (!query.StartQuery(args, out var error))
|
||||
{
|
||||
// Driver.Historian.Wonderware-014: classify the failure like the raw /
|
||||
// aggregate / event paths. A connection-class code means the shared
|
||||
// connection is dead — throw so the whole at-time read aborts and the IPC
|
||||
// layer surfaces Success=false (the outer catch resets the connection and
|
||||
// marks the node failed). Without this, every remaining timestamp would
|
||||
// re-fail StartQuery on the dead connection and the method would still
|
||||
// report Success=true with an all-Bad result, never failing over. A
|
||||
// query-class / no-data code keeps the connection and records a Bad sample
|
||||
// for just this timestamp.
|
||||
if (ShouldResetConnectionForStartQueryFailure(error))
|
||||
{
|
||||
var code = error?.ErrorCode ?? HistorianAccessError.ErrorValue.Failure;
|
||||
throw new InvalidOperationException(
|
||||
$"Historian SDK StartQuery failed for at-time query of tag '{tagName}': {code} ({error?.ErrorDescription})");
|
||||
}
|
||||
|
||||
results.Add(new HistorianSample
|
||||
{
|
||||
Value = null,
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
{
|
||||
/// <summary>
|
||||
/// OPC-UA-free representation of a single historical data point. The sidecar serialises
|
||||
/// these onto the named-pipe wire (<c>HistorianSampleDto</c>) for the .NET 10
|
||||
/// these onto the TCP wire (<c>HistorianSampleDto</c>) for the .NET 10
|
||||
/// <c>WonderwareHistorianClient</c>, which maps quality and value into OPC UA
|
||||
/// <c>DataValue</c> on its side. Raw OPC DA quality byte is preserved so the client
|
||||
/// can reuse the same quality mapper it already uses for live reads.
|
||||
|
||||
+1
-1
@@ -8,7 +8,7 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Backend
|
||||
/// <summary>
|
||||
/// OPC-UA-free surface for the Wonderware Historian subsystem inside the historian
|
||||
/// sidecar process. Implementations read via the aahClient* SDK; the .NET 10
|
||||
/// <c>WonderwareHistorianClient</c> on the other side of the named-pipe IPC maps
|
||||
/// <c>WonderwareHistorianClient</c> on the other side of the TCP IPC maps
|
||||
/// returned samples to OPC UA <c>DataValue</c>. The v1 Galaxy.Host / Proxy hosts
|
||||
/// this lived in retired in PR 7.2.
|
||||
/// </summary>
|
||||
|
||||
@@ -4,7 +4,7 @@ using MessagePack;
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
// ============================================================================
|
||||
// Wire DTOs for the sidecar pipe protocol. The sidecar speaks its own legacy
|
||||
// Wire DTOs for the sidecar TCP protocol. The sidecar speaks its own legacy
|
||||
// shape (List<HistorianSample> etc.) — the .NET 10 client (PR 3.4) translates
|
||||
// to / from Core.Abstractions.DataValueSnapshot + HistoricalEvent.
|
||||
//
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Ipc;
|
||||
|
||||
/// <summary>
|
||||
/// Length-prefixed framing constants for the Wonderware historian sidecar pipe protocol.
|
||||
/// Length-prefixed framing constants for the Wonderware historian sidecar TCP protocol.
|
||||
/// Each frame on the wire is:
|
||||
/// <c>[4-byte big-endian length][1-byte message kind][MessagePack body]</c>.
|
||||
/// Length is the body size only; the kind byte is not part of the prefixed length.
|
||||
|
||||
Reference in New Issue
Block a user