fix(core-abstractions): resolve Low code-review findings (Core.Abstractions-004,005,006,007,008)

- Core.Abstractions-004: guard DriverTypeRegistry.Register with a Lock so
  concurrent registrations are atomic.
- Core.Abstractions-005: narrow PollGroupEngine catch blocks to non-fatal
  exceptions, add optional onError callback, tolerate disposed-CTS races.
- Core.Abstractions-006: document the deliberate int-vs-uint asymmetry on
  IHistoryProvider.ReadEventsAsync / IHistorianDataSource.ReadEventsAsync.
- Core.Abstractions-007: pin the gaps with PollGroupEngine + DriverHealth
  contract tests.
- Core.Abstractions-008: correct XML docs on DriverHealth.LastError and
  the optional / required asymmetry on the history-read surfaces.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Joseph Doherty
2026-05-23 05:37:54 -04:00
parent a02c0ffe36
commit ff2e75ab98
10 changed files with 422 additions and 33 deletions

View File

@@ -6,7 +6,15 @@ namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
/// </summary>
/// <param name="State">Current driver-instance state.</param>
/// <param name="LastSuccessfulRead">Timestamp of the most recent successful equipment read; null if never.</param>
/// <param name="LastError">Most recent error message; null when state is Healthy.</param>
/// <param name="LastError">
/// Most recent error message; null when no error has been recorded. The type makes no
/// guarantee about correlation with <paramref name="State"/> — a driver in
/// <see cref="DriverState.Healthy"/> may legitimately retain the last error from a recovered
/// failure (useful for diagnostics), and <see cref="DriverState.Degraded"/> /
/// <see cref="DriverState.Reconnecting"/> / <see cref="DriverState.Faulted"/> states may all
/// carry a non-null message. Callers must not key behaviour on the LastError-null ↔ Healthy
/// pairing (Core.Abstractions-008).
/// </param>
public sealed record DriverHealth(
DriverState State,
DateTime? LastSuccessfulRead,

View File

@@ -10,33 +10,46 @@ namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
/// and #111 (driver type → namespace kind mapping enforced by sp_ValidateDraft).
/// The registry is the source of truth for both checks.
///
/// Thread-safety: registration happens at startup (single thread); lookups happen on every
/// config-apply (multi-threaded). The internal dictionary is replaced atomically via
/// <see cref="System.Threading.Interlocked"/> on register; readers see a stable snapshot.
/// Thread-safety: registration is typically single-threaded at startup; lookups happen on
/// every config-apply (multi-threaded). The check-then-act inside <see cref="Register"/> is
/// guarded by a private lock so concurrent registrations are atomic — the "registered only
/// once per process" guarantee holds even if two callers race. Readers operate against the
/// volatile snapshot reference produced by the last successful <see cref="Register"/> and
/// never block.
/// </remarks>
public sealed class DriverTypeRegistry
{
private readonly Lock _writeLock = new();
private IReadOnlyDictionary<string, DriverTypeMetadata> _types =
new Dictionary<string, DriverTypeMetadata>(StringComparer.OrdinalIgnoreCase);
/// <summary>Register a driver type. Throws if the type name is already registered.</summary>
/// <remarks>
/// The check-then-act (duplicate check → copy-on-write rebuild → swap) is performed under
/// <see cref="_writeLock"/> so concurrent <see cref="Register"/> calls cannot silently
/// discard each other's registrations — see Core.Abstractions-004.
/// </remarks>
public void Register(DriverTypeMetadata metadata)
{
ArgumentNullException.ThrowIfNull(metadata);
var snapshot = _types;
if (snapshot.ContainsKey(metadata.TypeName))
lock (_writeLock)
{
throw new InvalidOperationException(
$"Driver type '{metadata.TypeName}' is already registered. " +
$"Each driver type may be registered only once per process.");
}
var snapshot = _types;
if (snapshot.ContainsKey(metadata.TypeName))
{
throw new InvalidOperationException(
$"Driver type '{metadata.TypeName}' is already registered. " +
$"Each driver type may be registered only once per process.");
}
var next = new Dictionary<string, DriverTypeMetadata>(snapshot, StringComparer.OrdinalIgnoreCase)
{
[metadata.TypeName] = metadata,
};
Interlocked.Exchange(ref _types, next);
var next = new Dictionary<string, DriverTypeMetadata>(snapshot, StringComparer.OrdinalIgnoreCase)
{
[metadata.TypeName] = metadata,
};
_types = next;
}
}
/// <summary>Look up a driver type by name. Throws if unknown.</summary>

View File

@@ -59,6 +59,21 @@ public interface IHistorianDataSource : IDisposable
/// Distinct from any live event stream; sources here come from the historian's
/// event log. <paramref name="sourceName"/> is null to return all sources.
/// </summary>
/// <remarks>
/// Note on parameter types — <paramref name="maxEvents"/> is <see cref="int"/> (not
/// <see cref="uint"/>) so callers can pass <c>0</c> or a negative value as a "use the
/// backend's default cap" sentinel; see <c>WonderwareHistorianClient</c> /
/// <c>HistorianDataSource</c> and Core.Abstractions-006 for the rationale. The sibling
/// <see cref="ReadRawAsync"/> / <see cref="ReadProcessedAsync"/> use
/// <c>uint maxValuesPerNode</c> because their OPC UA HistoryRead surface has no
/// equivalent "use default" sentinel.
///
/// This surface declares <see cref="ReadAtTimeAsync"/> and <see cref="ReadEventsAsync"/>
/// as required members — a server-side historian owns the full read surface, unlike
/// <see cref="IHistoryProvider"/> where the same two methods are optional default-impl
/// methods so legacy drivers can stay raw-only. The asymmetry is intentional
/// (Core.Abstractions-008).
/// </remarks>
Task<HistoricalEventsResult> ReadEventsAsync(
string? sourceName,
DateTime startUtc,

View File

@@ -6,6 +6,14 @@ namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
/// Galaxy (Wonderware Historian via the optional plugin), OPC UA Client (forward
/// to upstream server).
/// </summary>
/// <remarks>
/// <see cref="ReadAtTimeAsync"/> and <see cref="ReadEventsAsync"/> are C# default interface
/// methods that throw <see cref="NotSupportedException"/> — drivers opt in by overriding so
/// a raw-only driver compiles without forcing it to provide at-time / event surfaces it
/// has no backend for. The sibling server-side surface, <see cref="IHistorianDataSource"/>,
/// declares both methods as required because a registered historian owns the full read
/// surface; the asymmetry is intentional (Core.Abstractions-008).
/// </remarks>
public interface IHistoryProvider
{
/// <summary>
@@ -60,12 +68,24 @@ public interface IHistoryProvider
/// </param>
/// <param name="startUtc">Inclusive lower bound on <c>EventTimeUtc</c>.</param>
/// <param name="endUtc">Exclusive upper bound on <c>EventTimeUtc</c>.</param>
/// <param name="maxEvents">Upper cap on returned events — the driver's backend enforces this.</param>
/// <param name="maxEvents">
/// Upper cap on returned events — the driver's backend enforces this. The type is
/// <see cref="int"/> rather than <see cref="uint"/> (which the sibling raw / processed
/// reads use for <c>maxValuesPerNode</c>) because callers and downstream historian
/// adapters historically treat <c>maxEvents &lt;= 0</c> as a sentinel meaning
/// "use the backend's default cap" (see <c>WonderwareHistorianClient</c> /
/// <c>HistorianDataSource</c>). The asymmetry is intentional — Core.Abstractions-006.
/// </param>
/// <param name="cancellationToken">Request cancellation.</param>
/// <remarks>
/// Default implementation throws. Only drivers with an event historian (Galaxy via the
/// Wonderware Alarm &amp; Events log) override. Modbus / the OPC UA Client driver stay
/// with the default and let callers see <c>BadHistoryOperationUnsupported</c>.
///
/// Note the type asymmetry with <see cref="ReadRawAsync"/> /
/// <see cref="ReadProcessedAsync"/> (both use <c>uint maxValuesPerNode</c>): event
/// readers accept a signed <c>int maxEvents</c> so callers can pass 0 / negative as a
/// "use default cap" sentinel without an extra parameter or overload.
/// </remarks>
Task<HistoricalEventsResult> ReadEventsAsync(
string? sourceName,

View File

@@ -19,14 +19,21 @@ namespace ZB.MOM.WW.OtOpcUa.Core.Abstractions;
/// from the previously-seen snapshot.</para>
///
/// <para>Exceptions thrown by the reader on the initial poll or any subsequent poll are
/// swallowed — the loop continues on the next tick. The driver's own health surface is
/// where transient poll failures should be reported; the engine intentionally does not
/// double-book that responsibility.</para>
/// caught — the loop continues on the next tick. When an <c>onError</c> callback is supplied
/// to the constructor the caught exception is routed to it so the driver's health surface
/// can record the failure. Without an <c>onError</c> callback the exception is silently
/// swallowed (preserves the original behaviour for drivers that have not opted in yet).</para>
///
/// <para>Programmer errors and obviously-fatal exceptions (<see cref="OutOfMemoryException"/>,
/// <see cref="ThreadAbortException"/>, <see cref="StackOverflowException"/>,
/// <see cref="AccessViolationException"/>) are NOT caught — they propagate and tear the poll
/// loop down rather than spin a silently-broken subscription.</para>
/// </remarks>
public sealed class PollGroupEngine : IAsyncDisposable
{
private readonly Func<IReadOnlyList<string>, CancellationToken, Task<IReadOnlyList<DataValueSnapshot>>> _reader;
private readonly Action<ISubscriptionHandle, string, DataValueSnapshot> _onChange;
private readonly Action<Exception>? _onError;
private readonly TimeSpan _minInterval;
private readonly ConcurrentDictionary<long, SubscriptionState> _subscriptions = new();
private long _nextId;
@@ -40,15 +47,21 @@ public sealed class PollGroupEngine : IAsyncDisposable
/// <see cref="ISubscribable.OnDataChange"/> event.</param>
/// <param name="minInterval">Interval floor; anything below is clamped. Defaults to 100 ms
/// per <see cref="DefaultMinInterval"/>.</param>
/// <param name="onError">Optional error sink — invoked once per caught reader exception (or
/// internal contract-violation throw) so the owning driver can route the failure to its
/// health surface (Core.Abstractions-005). Defensive: an <c>onError</c> handler that
/// itself throws is silently absorbed so a buggy forwarder cannot crash the poll loop.</param>
public PollGroupEngine(
Func<IReadOnlyList<string>, CancellationToken, Task<IReadOnlyList<DataValueSnapshot>>> reader,
Action<ISubscriptionHandle, string, DataValueSnapshot> onChange,
TimeSpan? minInterval = null)
TimeSpan? minInterval = null,
Action<Exception>? onError = null)
{
ArgumentNullException.ThrowIfNull(reader);
ArgumentNullException.ThrowIfNull(onChange);
_reader = reader;
_onChange = onChange;
_onError = onError;
_minInterval = minInterval ?? DefaultMinInterval;
}
@@ -102,19 +115,54 @@ public sealed class PollGroupEngine : IAsyncDisposable
// whether it has changed, satisfying OPC UA Part 4 initial-value semantics.
try { await PollOnceAsync(state, forceRaise: true, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return; }
catch { /* first-read error tolerated — loop continues */ }
catch (Exception ex) when (!IsFatal(ex))
{
// first-read error tolerated — loop continues; forward to driver health surface.
ReportError(ex);
}
while (!ct.IsCancellationRequested)
{
try { await Task.Delay(state.Interval, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return; }
// Defensive: the CTS may be disposed by Unsubscribe/DisposeAsync between the
// cancellation check above and the Task.Delay touching the token. Treat that race
// as a normal cancellation rather than a fatal exception.
catch (ObjectDisposedException) { return; }
try { await PollOnceAsync(state, forceRaise: false, ct).ConfigureAwait(false); }
catch (OperationCanceledException) { return; }
catch { /* transient poll error — loop continues, driver health surface logs it */ }
catch (Exception ex) when (!IsFatal(ex))
{
// transient poll error — loop continues, driver health surface logs it
// via the supplied onError callback (Core.Abstractions-005).
ReportError(ex);
}
}
}
/// <summary>
/// Programmer-error / process-fatal exception classification: anything that cannot be
/// safely "swallowed and retry on the next tick" must escape the poll loop instead.
/// </summary>
private static bool IsFatal(Exception ex)
=> ex is OutOfMemoryException
or StackOverflowException
or AccessViolationException
or ThreadAbortException;
/// <summary>
/// Forward a caught exception to the optional <c>onError</c> callback. Defensive
/// against an <c>onError</c> implementation that itself throws — that would crash the
/// poll loop and re-introduce the silent-stall failure mode this method exists to prevent.
/// </summary>
private void ReportError(Exception ex)
{
if (_onError is null) return;
try { _onError(ex); }
catch { /* never let a buggy error sink stop the poll loop */ }
}
private async Task PollOnceAsync(SubscriptionState state, bool forceRaise, CancellationToken ct)
{
var snapshots = await _reader(state.TagReferences, ct).ConfigureAwait(false);