fix(driver-historian-wonderware-client): resolve Low code-review findings (Driver.Historian.Wonderware.Client-003,004,006,008,010)
- Driver.Historian.Wonderware.Client-003: replaced the mixed Interlocked + healthLock counters with RecordOutcome that touches _totalQueries and exactly one of _totalSuccesses / _totalFailures under one acquisition. - Driver.Historian.Wonderware.Client-004: InvokeAndClassifyAsync routes transport + sidecar classification through a single RecordOutcome call; the legacy ReclassifySuccessAsFailure two-step is gone. - Driver.Historian.Wonderware.Client-006: removed the dead ReconnectInitialBackoff / ReconnectMaxBackoff options and added a doc <remarks> stating the channel performs a single in-place reconnect; retry/backoff stays with the caller. - Driver.Historian.Wonderware.Client-008: the audit-suppression comment block now records advisory titles, why neither applies, and the revisit trigger. - Driver.Historian.Wonderware.Client-010: reworded Dispose() to claim deadlock-safety and added a GetHealthSnapshot summary documenting the single-channel collapse + counter invariant. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -72,8 +72,9 @@ public sealed class WonderwareHistorianClient : IHistorianDataSource, IAlarmHist
|
||||
MaxValues = (int)Math.Min(maxValuesPerNode, int.MaxValue),
|
||||
CorrelationId = Guid.NewGuid().ToString("N"),
|
||||
};
|
||||
var reply = await Invoke<ReadRawRequest, ReadRawReply>(MessageKind.ReadRawRequest, MessageKind.ReadRawReply, req, cancellationToken).ConfigureAwait(false);
|
||||
ThrowIfFailed(reply.Success, reply.Error, "ReadRaw");
|
||||
var reply = await InvokeAndClassifyAsync<ReadRawRequest, ReadRawReply>(
|
||||
MessageKind.ReadRawRequest, MessageKind.ReadRawReply, req,
|
||||
r => (r.Success, r.Error), "ReadRaw", cancellationToken).ConfigureAwait(false);
|
||||
return new HistoryReadResult(ToSnapshots(reply.Samples), ContinuationPoint: null);
|
||||
}
|
||||
|
||||
@@ -90,8 +91,9 @@ public sealed class WonderwareHistorianClient : IHistorianDataSource, IAlarmHist
|
||||
AggregateColumn = MapAggregate(aggregate),
|
||||
CorrelationId = Guid.NewGuid().ToString("N"),
|
||||
};
|
||||
var reply = await Invoke<ReadProcessedRequest, ReadProcessedReply>(MessageKind.ReadProcessedRequest, MessageKind.ReadProcessedReply, req, cancellationToken).ConfigureAwait(false);
|
||||
ThrowIfFailed(reply.Success, reply.Error, "ReadProcessed");
|
||||
var reply = await InvokeAndClassifyAsync<ReadProcessedRequest, ReadProcessedReply>(
|
||||
MessageKind.ReadProcessedRequest, MessageKind.ReadProcessedReply, req,
|
||||
r => (r.Success, r.Error), "ReadProcessed", cancellationToken).ConfigureAwait(false);
|
||||
return new HistoryReadResult(ToAggregateSnapshots(reply.Buckets), ContinuationPoint: null);
|
||||
}
|
||||
|
||||
@@ -107,8 +109,9 @@ public sealed class WonderwareHistorianClient : IHistorianDataSource, IAlarmHist
|
||||
TimestampsUtcTicks = ticks,
|
||||
CorrelationId = Guid.NewGuid().ToString("N"),
|
||||
};
|
||||
var reply = await Invoke<ReadAtTimeRequest, ReadAtTimeReply>(MessageKind.ReadAtTimeRequest, MessageKind.ReadAtTimeReply, req, cancellationToken).ConfigureAwait(false);
|
||||
ThrowIfFailed(reply.Success, reply.Error, "ReadAtTime");
|
||||
var reply = await InvokeAndClassifyAsync<ReadAtTimeRequest, ReadAtTimeReply>(
|
||||
MessageKind.ReadAtTimeRequest, MessageKind.ReadAtTimeReply, req,
|
||||
r => (r.Success, r.Error), "ReadAtTime", cancellationToken).ConfigureAwait(false);
|
||||
return new HistoryReadResult(AlignAtTimeSnapshots(timestampsUtc, reply.Samples), ContinuationPoint: null);
|
||||
}
|
||||
|
||||
@@ -167,11 +170,34 @@ public sealed class WonderwareHistorianClient : IHistorianDataSource, IAlarmHist
|
||||
MaxEvents = maxEvents,
|
||||
CorrelationId = Guid.NewGuid().ToString("N"),
|
||||
};
|
||||
var reply = await Invoke<ReadEventsRequest, ReadEventsReply>(MessageKind.ReadEventsRequest, MessageKind.ReadEventsReply, req, cancellationToken).ConfigureAwait(false);
|
||||
ThrowIfFailed(reply.Success, reply.Error, "ReadEvents");
|
||||
var reply = await InvokeAndClassifyAsync<ReadEventsRequest, ReadEventsReply>(
|
||||
MessageKind.ReadEventsRequest, MessageKind.ReadEventsReply, req,
|
||||
r => (r.Success, r.Error), "ReadEvents", cancellationToken).ConfigureAwait(false);
|
||||
return new HistoricalEventsResult(ToHistoricalEvents(reply.Events), ContinuationPoint: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns a snapshot of operation counters and the single pipe channel's connection
|
||||
/// state.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// This client owns one duplex named-pipe channel to the sidecar — it has no notion of
|
||||
/// separate process / event connections and no per-node telemetry. The single channel's
|
||||
/// connected state is reported for both <see cref="HistorianHealthSnapshot.ProcessConnectionOpen"/>
|
||||
/// and <see cref="HistorianHealthSnapshot.EventConnectionOpen"/>, and
|
||||
/// <see cref="HistorianHealthSnapshot.ActiveProcessNode"/> /
|
||||
/// <see cref="HistorianHealthSnapshot.ActiveEventNode"/> /
|
||||
/// <see cref="HistorianHealthSnapshot.Nodes"/> are intentionally null/empty. Consumers
|
||||
/// that need to distinguish two connections should read another driver. (Finding 010.)
|
||||
/// <para>
|
||||
/// All six counter fields (TotalQueries, TotalSuccesses, TotalFailures,
|
||||
/// ConsecutiveFailures, LastSuccessTime, LastFailureTime, LastError) are mutated
|
||||
/// exclusively under <c>_healthLock</c>, so the snapshot is internally consistent —
|
||||
/// in particular <c>TotalSuccesses + TotalFailures == TotalQueries</c> at every
|
||||
/// observed snapshot (a call that has started but not yet completed has not
|
||||
/// incremented any counter). (Finding 003 / 004.)
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public HistorianHealthSnapshot GetHealthSnapshot()
|
||||
{
|
||||
lock (_healthLock)
|
||||
@@ -233,8 +259,9 @@ public sealed class WonderwareHistorianClient : IHistorianDataSource, IAlarmHist
|
||||
|
||||
try
|
||||
{
|
||||
var reply = await Invoke<WriteAlarmEventsRequest, WriteAlarmEventsReply>(
|
||||
MessageKind.WriteAlarmEventsRequest, MessageKind.WriteAlarmEventsReply, req, cancellationToken).ConfigureAwait(false);
|
||||
var reply = await InvokeAsync<WriteAlarmEventsRequest, WriteAlarmEventsReply>(
|
||||
MessageKind.WriteAlarmEventsRequest, MessageKind.WriteAlarmEventsReply, req,
|
||||
r => (r.Success, r.Error), cancellationToken).ConfigureAwait(false);
|
||||
|
||||
// Whole-call failure → transient retry for every event in the batch.
|
||||
if (!reply.Success)
|
||||
@@ -279,69 +306,79 @@ public sealed class WonderwareHistorianClient : IHistorianDataSource, IAlarmHist
|
||||
|
||||
// ===== Helpers =====
|
||||
|
||||
private async Task<TReply> Invoke<TRequest, TReply>(
|
||||
MessageKind requestKind, MessageKind expectedReplyKind, TRequest request, CancellationToken ct)
|
||||
/// <summary>
|
||||
/// Sends one request through the channel and records the outcome (transport success or
|
||||
/// transport failure) under a single <c>_healthLock</c> acquisition that also bumps
|
||||
/// <c>_totalQueries</c>. Sidecar-level success / failure is NOT classified here — the
|
||||
/// caller passes that through <see cref="InvokeAndClassifyAsync"/> instead. (Finding
|
||||
/// 003 / 004: all six counter fields share one synchronization mechanism so a snapshot
|
||||
/// can never observe a torn state.)
|
||||
/// </summary>
|
||||
private async Task<TReply> InvokeAsync<TRequest, TReply>(
|
||||
MessageKind requestKind, MessageKind expectedReplyKind, TRequest request,
|
||||
Func<TReply, (bool ok, string? error)> evaluate, CancellationToken ct)
|
||||
where TReply : class
|
||||
{
|
||||
Interlocked.Increment(ref _totalQueries);
|
||||
try
|
||||
{
|
||||
var reply = await _channel.InvokeAsync<TRequest, TReply>(requestKind, expectedReplyKind, request, ct).ConfigureAwait(false);
|
||||
RecordSuccess();
|
||||
// Classify transport+sidecar in one lock so TotalQueries/TotalSuccesses/
|
||||
// TotalFailures move together and no intermediate "success-then-undo" state is
|
||||
// visible to a concurrent GetHealthSnapshot.
|
||||
var (ok, error) = evaluate(reply);
|
||||
RecordOutcome(ok, error);
|
||||
return reply;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
RecordFailure(ex.Message);
|
||||
RecordOutcome(success: false, ex.Message);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
private void RecordSuccess()
|
||||
/// <summary>
|
||||
/// Convenience wrapper around <see cref="InvokeAsync"/> that throws
|
||||
/// <see cref="InvalidOperationException"/> on a sidecar-reported failure. Used by the
|
||||
/// <see cref="IHistorianDataSource"/> read methods.
|
||||
/// </summary>
|
||||
private async Task<TReply> InvokeAndClassifyAsync<TRequest, TReply>(
|
||||
MessageKind requestKind, MessageKind expectedReplyKind, TRequest request,
|
||||
Func<TReply, (bool ok, string? error)> evaluate, string op, CancellationToken ct)
|
||||
where TReply : class
|
||||
{
|
||||
lock (_healthLock)
|
||||
var reply = await InvokeAsync<TRequest, TReply>(requestKind, expectedReplyKind, request, evaluate, ct).ConfigureAwait(false);
|
||||
var (ok, error) = evaluate(reply);
|
||||
if (!ok)
|
||||
{
|
||||
_totalSuccesses++;
|
||||
_consecutiveFailures = 0;
|
||||
_lastSuccessUtc = DateTime.UtcNow;
|
||||
}
|
||||
}
|
||||
|
||||
private void RecordFailure(string message)
|
||||
{
|
||||
lock (_healthLock)
|
||||
{
|
||||
_totalFailures++;
|
||||
_consecutiveFailures++;
|
||||
_lastFailureUtc = DateTime.UtcNow;
|
||||
_lastError = message;
|
||||
}
|
||||
}
|
||||
|
||||
private void ThrowIfFailed(bool success, string? error, string op)
|
||||
{
|
||||
if (!success)
|
||||
{
|
||||
// Sidecar-reported failure counts as an operation failure even though the
|
||||
// transport delivered a reply. The Invoke wrapper already recorded transport
|
||||
// success — undo that and record the failure so the health snapshot reflects
|
||||
// operation-level success rates rather than just connectivity.
|
||||
ReclassifySuccessAsFailure(error);
|
||||
throw new InvalidOperationException(
|
||||
$"Sidecar {op} failed: {error ?? "<no message>"}.");
|
||||
}
|
||||
return reply;
|
||||
}
|
||||
|
||||
private void ReclassifySuccessAsFailure(string? message)
|
||||
/// <summary>
|
||||
/// Records the outcome of a single call — increments <c>_totalQueries</c> and exactly
|
||||
/// one of <c>_totalSuccesses</c> / <c>_totalFailures</c> under a single
|
||||
/// <c>_healthLock</c> acquisition. (Findings 003 + 004.)
|
||||
/// </summary>
|
||||
private void RecordOutcome(bool success, string? error)
|
||||
{
|
||||
lock (_healthLock)
|
||||
{
|
||||
// Transport-level RecordSuccess happened a moment ago; reverse it.
|
||||
_totalSuccesses--;
|
||||
_totalFailures++;
|
||||
_consecutiveFailures++;
|
||||
_lastFailureUtc = DateTime.UtcNow;
|
||||
_lastError = message;
|
||||
_totalQueries++;
|
||||
if (success)
|
||||
{
|
||||
_totalSuccesses++;
|
||||
_consecutiveFailures = 0;
|
||||
_lastSuccessUtc = DateTime.UtcNow;
|
||||
}
|
||||
else
|
||||
{
|
||||
_totalFailures++;
|
||||
_consecutiveFailures++;
|
||||
_lastFailureUtc = DateTime.UtcNow;
|
||||
_lastError = error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -452,9 +489,12 @@ public sealed class WonderwareHistorianClient : IHistorianDataSource, IAlarmHist
|
||||
|
||||
/// <summary>
|
||||
/// Synchronous dispose required by <see cref="IDisposable"/> on
|
||||
/// <see cref="IHistorianDataSource"/>. The underlying channel's async cleanup is
|
||||
/// non-blocking (just resets transport state + disposes streams), so the
|
||||
/// GetAwaiter()/GetResult() bridge is safe.
|
||||
/// <see cref="IHistorianDataSource"/>. The underlying channel's async cleanup runs
|
||||
/// <see cref="System.IO.Pipes.NamedPipeClientStream"/> teardown, which can block briefly
|
||||
/// on OS handle release — strictly speaking it is not non-blocking — but the
|
||||
/// <c>GetAwaiter()/GetResult()</c> bridge is deadlock-safe because the cleanup never
|
||||
/// awaits a captured <see cref="System.Threading.SynchronizationContext"/> nor takes any
|
||||
/// lock that the caller could hold. (Finding 010.)
|
||||
/// </summary>
|
||||
public void Dispose() => _channel.DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
}
|
||||
|
||||
@@ -3,24 +3,28 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Historian.Wonderware.Client;
|
||||
/// <summary>
|
||||
/// Connection options for <see cref="WonderwareHistorianClient"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// <b>Retry / backoff ownership (finding 006):</b> this module performs exactly one
|
||||
/// in-place transport reconnect inside <c>PipeChannel.InvokeAsync</c> with no delay,
|
||||
/// and does NOT implement exponential reconnect backoff. Broader retry/backoff is the
|
||||
/// caller's responsibility — the alarm drain worker
|
||||
/// (<c>Core.AlarmHistorian.SqliteStoreAndForwardSink</c>) and the read-side
|
||||
/// history router are expected to layer their own backoff on top.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
/// <param name="PipeName">Named-pipe name the sidecar listens on (matches the sidecar's <c>OTOPCUA_HISTORIAN_PIPE</c>).</param>
|
||||
/// <param name="SharedSecret">Per-process shared secret the sidecar will verify in the Hello frame.</param>
|
||||
/// <param name="PeerName">Diagnostic peer identifier sent in Hello — typically the OtOpcUa instance id.</param>
|
||||
/// <param name="ConnectTimeout">Cap on the named-pipe connect + Hello round trip on each (re)connect.</param>
|
||||
/// <param name="CallTimeout">Cap on a single read/write call once connected.</param>
|
||||
/// <param name="ReconnectInitialBackoff">Backoff between the first failed reconnect attempts.</param>
|
||||
/// <param name="ReconnectMaxBackoff">Upper bound on the exponential backoff between reconnects.</param>
|
||||
public sealed record WonderwareHistorianClientOptions(
|
||||
string PipeName,
|
||||
string SharedSecret,
|
||||
string PeerName = "OtOpcUa",
|
||||
TimeSpan? ConnectTimeout = null,
|
||||
TimeSpan? CallTimeout = null,
|
||||
TimeSpan? ReconnectInitialBackoff = null,
|
||||
TimeSpan? ReconnectMaxBackoff = null)
|
||||
TimeSpan? CallTimeout = null)
|
||||
{
|
||||
public TimeSpan EffectiveConnectTimeout => ConnectTimeout ?? TimeSpan.FromSeconds(10);
|
||||
public TimeSpan EffectiveCallTimeout => CallTimeout ?? TimeSpan.FromSeconds(30);
|
||||
public TimeSpan EffectiveReconnectInitialBackoff => ReconnectInitialBackoff ?? TimeSpan.FromMilliseconds(500);
|
||||
public TimeSpan EffectiveReconnectMaxBackoff => ReconnectMaxBackoff ?? TimeSpan.FromSeconds(30);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user