chore: organize solution into module folders (Core/Server/Drivers/Client/Tooling)
Group all 69 projects into category subfolders under src/ and tests/ so the Rider Solution Explorer mirrors the module structure. Folders: Core, Server, Drivers (with a nested Driver CLIs subfolder), Client, Tooling. - Move every project folder on disk with git mv (history preserved as renames). - Recompute relative paths in 57 .csproj files: cross-category ProjectReferences, the lib/ HintPath+None refs in Driver.Historian.Wonderware, and the external mxaccessgw refs in Driver.Galaxy and its test project. - Rebuild ZB.MOM.WW.OtOpcUa.slnx with nested solution folders. - Re-prefix project paths in functional scripts (e2e, compliance, smoke SQL, integration, install). Build green (0 errors); unit tests pass. Docs left for a separate pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,313 @@
|
||||
using System.Diagnostics.Metrics;
|
||||
using System.Threading.Channels;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using MxGateway.Contracts.Proto;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Long-running consumer of <see cref="IGalaxySubscriber.StreamEventsAsync"/>. Translates
|
||||
/// each <see cref="MxEvent"/> with family <see cref="MxEventFamily.OnDataChange"/> into
|
||||
/// <see cref="DataChangeEventArgs"/> and dispatches one event per registered driver
|
||||
/// subscription that includes the changed item handle (fan-out via
|
||||
/// <see cref="SubscriptionRegistry.ResolveSubscribers"/>).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// One pump per connected <see cref="GalaxyMxSession"/>. Reconnect lives in PR 4.5's
|
||||
/// supervisor; on transport failure here we log + propagate so the supervisor can
|
||||
/// decide whether to restart.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// PR 6.2 — the network-read loop and the listener-fanout loop are decoupled by a
|
||||
/// bounded <see cref="Channel{T}"/>. When a listener is slow enough to fill the
|
||||
/// channel, new events are dropped (newest-dropped semantics: producer's
|
||||
/// <c>TryWrite</c> fails) rather than back-pressuring the gw stream. Three counters
|
||||
/// on the <c>ZB.MOM.WW.OtOpcUa.Driver.Galaxy</c> meter expose received / dispatched
|
||||
/// / dropped totals so ops sees pressure before it manifests as user-visible loss.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
internal sealed class EventPump : IAsyncDisposable
|
||||
{
|
||||
public const string MeterName = "ZB.MOM.WW.OtOpcUa.Driver.Galaxy";
|
||||
private const int DefaultChannelCapacity = 50_000;
|
||||
|
||||
// Single static meter so a host-level MeterListener catches all pump instances.
|
||||
private static readonly Meter Meter = new(MeterName);
|
||||
private static readonly Counter<long> EventsReceived =
|
||||
Meter.CreateCounter<long>("galaxy.events.received", unit: "{event}",
|
||||
description: "MxEvents read from the gateway StreamEvents stream.");
|
||||
private static readonly Counter<long> EventsDispatched =
|
||||
Meter.CreateCounter<long>("galaxy.events.dispatched", unit: "{event}",
|
||||
description: "MxEvents passed through the bounded channel and into OnDataChange.");
|
||||
private static readonly Counter<long> EventsDropped =
|
||||
Meter.CreateCounter<long>("galaxy.events.dropped", unit: "{event}",
|
||||
description: "MxEvents dropped because the bounded channel was full (newest-dropped).");
|
||||
private static readonly Counter<long> AlarmTransitionsReceived =
|
||||
Meter.CreateCounter<long>("galaxy.alarm_transitions.received", unit: "{event}",
|
||||
description: "OnAlarmTransition events decoded and forwarded to driver-level handlers.");
|
||||
private static readonly Counter<long> AlarmTransitionsDecodingFailures =
|
||||
Meter.CreateCounter<long>("galaxy.alarm_transitions.decoding_failures", unit: "{event}",
|
||||
description: "OnAlarmTransition events that arrived without a populated body or with an unspecified transition kind.");
|
||||
|
||||
private readonly IGalaxySubscriber _subscriber;
|
||||
private readonly SubscriptionRegistry _registry;
|
||||
private readonly ILogger _logger;
|
||||
private readonly Func<long, ISubscriptionHandle> _handleFactory;
|
||||
private readonly Channel<MxEvent> _channel;
|
||||
private readonly KeyValuePair<string, object?> _clientTag;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
|
||||
private Task? _loop;
|
||||
private Task? _dispatchLoop;
|
||||
private bool _disposed;
|
||||
|
||||
public event EventHandler<DataChangeEventArgs>? OnDataChange;
|
||||
|
||||
/// <summary>
|
||||
/// Fires for every <see cref="MxEventFamily.OnAlarmTransition"/> event the
|
||||
/// gateway forwards. Decoded into a <see cref="GalaxyAlarmTransition"/> with
|
||||
/// the OPC UA severity bucket already mapped via
|
||||
/// <see cref="MxAccessSeverityMapper"/>. The driver wraps this onto
|
||||
/// <c>IAlarmSource.OnAlarmEvent</c> in PR B.2.
|
||||
/// </summary>
|
||||
internal event EventHandler<GalaxyAlarmTransition>? OnAlarmTransition;
|
||||
|
||||
public EventPump(
|
||||
IGalaxySubscriber subscriber,
|
||||
SubscriptionRegistry registry,
|
||||
ILogger? logger = null,
|
||||
Func<long, ISubscriptionHandle>? handleFactory = null,
|
||||
int channelCapacity = DefaultChannelCapacity,
|
||||
string? clientName = null)
|
||||
{
|
||||
_subscriber = subscriber ?? throw new ArgumentNullException(nameof(subscriber));
|
||||
_registry = registry ?? throw new ArgumentNullException(nameof(registry));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_handleFactory = handleFactory ?? (id => new GalaxySubscriptionHandle(id));
|
||||
|
||||
if (channelCapacity < 1)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(channelCapacity),
|
||||
"channelCapacity must be >= 1; recommended 50_000 for 50k-tag deployments.");
|
||||
}
|
||||
_channel = Channel.CreateBounded<MxEvent>(new BoundedChannelOptions(channelCapacity)
|
||||
{
|
||||
// Newest-dropped policy: when full, the producer's TryWrite returns false
|
||||
// and we account for the drop. We do this manually rather than relying on
|
||||
// BoundedChannelFullMode.DropWrite so we can count drops without polling.
|
||||
FullMode = BoundedChannelFullMode.Wait,
|
||||
SingleReader = true,
|
||||
SingleWriter = true,
|
||||
});
|
||||
_clientTag = new KeyValuePair<string, object?>("galaxy.client", clientName ?? "<unknown>");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Start consuming the event stream on a background task. Idempotent — second
|
||||
/// calls are no-ops while the loop is running.
|
||||
/// </summary>
|
||||
public void Start()
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
if (_loop is not null) return;
|
||||
_loop = Task.Run(() => RunAsync(_cts.Token));
|
||||
_dispatchLoop = Task.Run(() => DispatchLoopAsync(_cts.Token));
|
||||
}
|
||||
|
||||
private async Task RunAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
await foreach (var ev in _subscriber.StreamEventsAsync(ct).WithCancellation(ct).ConfigureAwait(false))
|
||||
{
|
||||
if (ct.IsCancellationRequested) break;
|
||||
EventsReceived.Add(1, _clientTag);
|
||||
|
||||
// Newest-dropped: TryWrite fast-paths the common case (channel has room).
|
||||
// When full we count the drop and continue reading the gw stream so
|
||||
// back-pressure doesn't propagate upstream.
|
||||
if (!_channel.Writer.TryWrite(ev))
|
||||
{
|
||||
EventsDropped.Add(1, _clientTag);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
// Clean shutdown — no log.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy EventPump loop ended with an exception — reconnect supervisor (PR 4.5) handles restart.");
|
||||
}
|
||||
finally
|
||||
{
|
||||
// Tell the dispatch loop the producer is done so it drains and exits.
|
||||
_channel.Writer.TryComplete();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task DispatchLoopAsync(CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
await foreach (var ev in _channel.Reader.ReadAllAsync(ct).ConfigureAwait(false))
|
||||
{
|
||||
Dispatch(ev);
|
||||
EventsDispatched.Add(1, _clientTag);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
// Clean shutdown.
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy EventPump dispatch loop ended with an exception — events past this point will be lost until restart.");
|
||||
}
|
||||
}
|
||||
|
||||
private void Dispatch(MxEvent ev)
|
||||
{
|
||||
switch (ev.Family)
|
||||
{
|
||||
case MxEventFamily.OnDataChange:
|
||||
DispatchDataChange(ev);
|
||||
break;
|
||||
case MxEventFamily.OnAlarmTransition:
|
||||
DispatchAlarmTransition(ev);
|
||||
break;
|
||||
default:
|
||||
// OnWriteComplete / OperationComplete / OnBufferedDataChange are filtered
|
||||
// out — write callers get their reply via the InvokeAsync round-trip, not
|
||||
// via the event stream.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private void DispatchDataChange(MxEvent ev)
|
||||
{
|
||||
var subscribers = _registry.ResolveSubscribers(ev.ItemHandle);
|
||||
if (subscribers.Count == 0) return; // stale event after unsubscribe — drop quietly
|
||||
|
||||
var snapshot = ToSnapshot(ev);
|
||||
foreach (var (subscriptionId, fullReference) in subscribers)
|
||||
{
|
||||
var handle = _handleFactory(subscriptionId);
|
||||
try
|
||||
{
|
||||
OnDataChange?.Invoke(this, new DataChangeEventArgs(handle, fullReference, snapshot));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy OnDataChange handler threw for {FullRef} subscription {SubscriptionId} — continuing fan-out.",
|
||||
fullReference, subscriptionId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void DispatchAlarmTransition(MxEvent ev)
|
||||
{
|
||||
// Body absent (e.g. malformed gateway event or worker version skew) — count and
|
||||
// drop. The Part 9 sub-attribute fallback path keeps an alarm functional even
|
||||
// when the rich payload disappears.
|
||||
if (ev.OnAlarmTransition is not { } body)
|
||||
{
|
||||
AlarmTransitionsDecodingFailures.Add(1, _clientTag);
|
||||
_logger.LogDebug(
|
||||
"Galaxy OnAlarmTransition event arrived without a populated body (sequence={Sequence}); ignoring.",
|
||||
ev.WorkerSequence);
|
||||
return;
|
||||
}
|
||||
if (body.TransitionKind == AlarmTransitionKind.Unspecified)
|
||||
{
|
||||
AlarmTransitionsDecodingFailures.Add(1, _clientTag);
|
||||
_logger.LogDebug(
|
||||
"Galaxy OnAlarmTransition for {AlarmRef} has unspecified transition kind; ignoring.",
|
||||
body.AlarmFullReference);
|
||||
return;
|
||||
}
|
||||
|
||||
var (bucket, opcUaSeverity) = MxAccessSeverityMapper.Map(body.Severity);
|
||||
var transitionTimestamp = body.TransitionTimestamp is { } tts
|
||||
? tts.ToDateTime()
|
||||
: DateTime.UtcNow;
|
||||
DateTime? originalRaiseTimestamp = body.OriginalRaiseTimestamp is { } orts
|
||||
? orts.ToDateTime()
|
||||
: null;
|
||||
|
||||
var transition = new GalaxyAlarmTransition(
|
||||
AlarmFullReference: body.AlarmFullReference,
|
||||
SourceObjectReference: body.SourceObjectReference,
|
||||
AlarmTypeName: body.AlarmTypeName,
|
||||
TransitionKind: MapTransitionKind(body.TransitionKind),
|
||||
SeverityBucket: bucket,
|
||||
OpcUaSeverity: opcUaSeverity,
|
||||
RawMxAccessSeverity: body.Severity,
|
||||
OriginalRaiseTimestampUtc: originalRaiseTimestamp,
|
||||
TransitionTimestampUtc: transitionTimestamp,
|
||||
OperatorUser: body.OperatorUser,
|
||||
OperatorComment: body.OperatorComment,
|
||||
Category: body.Category,
|
||||
Description: body.Description);
|
||||
|
||||
AlarmTransitionsReceived.Add(1, _clientTag);
|
||||
try
|
||||
{
|
||||
OnAlarmTransition?.Invoke(this, transition);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy OnAlarmTransition handler threw for {AlarmRef} — continuing.",
|
||||
transition.AlarmFullReference);
|
||||
}
|
||||
}
|
||||
|
||||
private static GalaxyAlarmTransitionKind MapTransitionKind(AlarmTransitionKind kind) => kind switch
|
||||
{
|
||||
AlarmTransitionKind.Raise => GalaxyAlarmTransitionKind.Raise,
|
||||
AlarmTransitionKind.Acknowledge => GalaxyAlarmTransitionKind.Acknowledge,
|
||||
AlarmTransitionKind.Clear => GalaxyAlarmTransitionKind.Clear,
|
||||
AlarmTransitionKind.Retrigger => GalaxyAlarmTransitionKind.Retrigger,
|
||||
_ => GalaxyAlarmTransitionKind.Unspecified,
|
||||
};
|
||||
|
||||
private DataValueSnapshot ToSnapshot(MxEvent ev)
|
||||
{
|
||||
var value = MxValueDecoder.Decode(ev.Value);
|
||||
var statusCode = ev.Statuses.Count > 0
|
||||
? StatusCodeMap.FromMxStatus(ev.Statuses[0], _logger)
|
||||
: StatusCodeMap.FromQualityByte((byte)(ev.Quality & 0xFF), _logger);
|
||||
|
||||
DateTime? sourceTimestamp = ev.SourceTimestamp is { } ts ? ts.ToDateTime() : null;
|
||||
return new DataValueSnapshot(
|
||||
Value: value,
|
||||
StatusCode: statusCode,
|
||||
SourceTimestampUtc: sourceTimestamp,
|
||||
ServerTimestampUtc: DateTime.UtcNow);
|
||||
}
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_cts.Cancel();
|
||||
_channel.Writer.TryComplete();
|
||||
if (_loop is not null)
|
||||
{
|
||||
try { await _loop.ConfigureAwait(false); } catch { /* shutdown */ }
|
||||
}
|
||||
if (_dispatchLoop is not null)
|
||||
{
|
||||
try { await _dispatchLoop.ConfigureAwait(false); } catch { /* shutdown */ }
|
||||
}
|
||||
_cts.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side handle returned by <see cref="GalaxyDriver.SubscribeAlarmsAsync"/>.
|
||||
/// The driver doesn't multiplex alarm transitions per handle — every active handle
|
||||
/// observes the gateway's alarm-event stream — but the handle is needed for
|
||||
/// symmetric Unsubscribe and for the server-side AlarmConditionService to
|
||||
/// correlate transitions with the originating subscription.
|
||||
/// </summary>
|
||||
internal sealed class GalaxyAlarmSubscriptionHandle : IAlarmSubscriptionHandle
|
||||
{
|
||||
public GalaxyAlarmSubscriptionHandle(string diagnosticId)
|
||||
{
|
||||
DiagnosticId = diagnosticId;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public string DiagnosticId { get; }
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Decoded MXAccess alarm transition surfaced by <see cref="EventPump"/>.
|
||||
/// The driver wraps this into <see cref="AlarmEventArgs"/> on the
|
||||
/// <see cref="IAlarmSource.OnAlarmEvent"/> path; the richer fields
|
||||
/// (operator user/comment, original raise time, category) become available
|
||||
/// on the OPC UA Part 9 condition once <c>AlarmEventArgs</c> is extended in
|
||||
/// the client-surface refresh PR (E.7).
|
||||
/// </summary>
|
||||
internal sealed record GalaxyAlarmTransition(
|
||||
string AlarmFullReference,
|
||||
string SourceObjectReference,
|
||||
string AlarmTypeName,
|
||||
GalaxyAlarmTransitionKind TransitionKind,
|
||||
AlarmSeverity SeverityBucket,
|
||||
int OpcUaSeverity,
|
||||
int RawMxAccessSeverity,
|
||||
DateTime? OriginalRaiseTimestampUtc,
|
||||
DateTime TransitionTimestampUtc,
|
||||
string OperatorUser,
|
||||
string OperatorComment,
|
||||
string Category,
|
||||
string Description);
|
||||
|
||||
/// <summary>Kind of alarm state change observed by <see cref="EventPump"/>.</summary>
|
||||
internal enum GalaxyAlarmTransitionKind
|
||||
{
|
||||
Unspecified = 0,
|
||||
Raise = 1,
|
||||
Acknowledge = 2,
|
||||
Clear = 3,
|
||||
Retrigger = 4,
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using MxGateway.Client;
|
||||
using ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Config;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side wrapper around the gateway's <see cref="MxGatewaySession"/>. Owns the
|
||||
/// MXAccess <c>Register</c> handle, caches the per-tag item handles AddItem returns,
|
||||
/// and coordinates the read / write / subscribe call paths. PRs 4.2-4.5 fill this in
|
||||
/// incrementally:
|
||||
/// <list type="bullet">
|
||||
/// <item>PR 4.2 (this PR) — skeleton + lifecycle wiring.</item>
|
||||
/// <item>PR 4.3 — write path.</item>
|
||||
/// <item>PR 4.4 — subscription registry + event pump + the production
|
||||
/// <see cref="IGalaxyDataReader"/> implementation that drives the read path.</item>
|
||||
/// <item>PR 4.5 — reconnect supervisor.</item>
|
||||
/// </list>
|
||||
/// </summary>
|
||||
public sealed class GalaxyMxSession : IAsyncDisposable
|
||||
{
|
||||
private readonly GalaxyMxAccessOptions _options;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
// Owned gateway client + session — populated when ConnectAsync runs. Tests can leave
|
||||
// them null and exercise the surface via injected IGalaxyDataReader fakes.
|
||||
private MxGatewayClient? _ownedClient;
|
||||
private MxGatewaySession? _session;
|
||||
private int _serverHandle;
|
||||
private bool _disposed;
|
||||
|
||||
public GalaxyMxSession(GalaxyMxAccessOptions options, ILogger? logger = null)
|
||||
{
|
||||
_options = options ?? throw new ArgumentNullException(nameof(options));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
}
|
||||
|
||||
public bool IsConnected => _session is not null;
|
||||
|
||||
/// <summary>
|
||||
/// Server-side handle returned by MXAccess <c>Register</c>. Zero before
|
||||
/// <see cref="ConnectAsync"/> opens the session.
|
||||
/// </summary>
|
||||
public int ServerHandle => _serverHandle;
|
||||
|
||||
/// <summary>
|
||||
/// Connect the underlying gateway client + open an MXAccess session + register the
|
||||
/// configured client name. Idempotent — second calls are no-ops while
|
||||
/// <see cref="IsConnected"/> is true.
|
||||
/// </summary>
|
||||
public async Task ConnectAsync(MxGatewayClientOptions clientOptions, CancellationToken cancellationToken)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
if (_session is not null) return;
|
||||
|
||||
_ownedClient = MxGatewayClient.Create(clientOptions);
|
||||
_session = await _ownedClient.OpenSessionAsync(cancellationToken: cancellationToken).ConfigureAwait(false);
|
||||
_serverHandle = await _session.RegisterAsync(_options.ClientName, cancellationToken).ConfigureAwait(false);
|
||||
_logger.LogInformation(
|
||||
"GalaxyMxSession connected — clientName={ClientName} serverHandle={Handle}",
|
||||
_options.ClientName, _serverHandle);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Test seam — attach a session opened externally (e.g. against an in-process gw
|
||||
/// fake). Skips the gateway-client construction so tests can drive the session
|
||||
/// surface without spinning a real gRPC channel. Caller retains client ownership.
|
||||
/// </summary>
|
||||
internal void AttachForTests(MxGatewaySession session, int serverHandle)
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
_session = session ?? throw new ArgumentNullException(nameof(session));
|
||||
_serverHandle = serverHandle;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the underlying gateway session. Null until <see cref="ConnectAsync"/> or
|
||||
/// <see cref="AttachForTests"/> runs. PR 4.3 / 4.4 use this to issue commands.
|
||||
/// </summary>
|
||||
public MxGatewaySession? Session => _session;
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
if (_session is not null)
|
||||
{
|
||||
try { await _session.DisposeAsync().ConfigureAwait(false); }
|
||||
catch (Exception ex) { _logger.LogWarning(ex, "GalaxyMxSession session dispose failed (best-effort)"); }
|
||||
}
|
||||
_session = null;
|
||||
|
||||
if (_ownedClient is not null)
|
||||
{
|
||||
try { await _ownedClient.DisposeAsync().ConfigureAwait(false); }
|
||||
catch (Exception ex) { _logger.LogWarning(ex, "GalaxyMxSession client dispose failed (best-effort)"); }
|
||||
}
|
||||
_ownedClient = null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-internal subscription identity. The numeric id is allocated monotonically per
|
||||
/// driver; the diagnostic string carries the same id prefixed for log cross-referencing.
|
||||
/// </summary>
|
||||
internal sealed record GalaxySubscriptionHandle(long SubscriptionId) : ISubscriptionHandle
|
||||
{
|
||||
public string DiagnosticId => $"galaxy-sub-{SubscriptionId}";
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// PR 6.1 — In-box <see cref="ActivitySource"/> wired around every gw call the
|
||||
/// driver makes (Subscribe/Unsubscribe, Write/WriteSecured, GetHierarchy). The
|
||||
/// decorators in this folder produce one span per call, tagged with the inputs
|
||||
/// ops needs to triage a slow or failing operation:
|
||||
/// <c>galaxy.tag_count</c>, <c>galaxy.success_count</c>, <c>galaxy.client</c>.
|
||||
/// <para>
|
||||
/// The driver itself doesn't take a dependency on the OpenTelemetry packages —
|
||||
/// <c>System.Diagnostics.ActivitySource</c> is in the BCL. The host process
|
||||
/// decides which listener (OTLP exporter, Application Insights, dotnet-trace)
|
||||
/// subscribes to <see cref="ActivitySourceName"/>.
|
||||
/// </para>
|
||||
/// </summary>
|
||||
internal static class GalaxyTelemetry
|
||||
{
|
||||
public const string ActivitySourceName = "ZB.MOM.WW.OtOpcUa.Driver.Galaxy";
|
||||
|
||||
public static readonly ActivitySource ActivitySource = new(ActivitySourceName);
|
||||
|
||||
/// <summary>
|
||||
/// Tag a span with a failure reason and set its status to <c>Error</c>. Helper
|
||||
/// so the decorators don't repeat the four-line idiom on every catch block.
|
||||
/// </summary>
|
||||
public static void RecordError(this Activity? activity, Exception ex)
|
||||
{
|
||||
if (activity is null) return;
|
||||
activity.SetStatus(ActivityStatusCode.Error, ex.Message);
|
||||
activity.SetTag("exception.type", ex.GetType().FullName);
|
||||
activity.SetTag("exception.message", ex.Message);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using MxGateway.Client;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Production <see cref="IGalaxyAlarmAcknowledger"/> backed by the
|
||||
/// <c>MxGatewayClient.AcknowledgeAlarmAsync</c> RPC (PR E.2). Maps the
|
||||
/// reply's protocol status into a thrown exception when the gateway
|
||||
/// reports a non-OK condition; native MxStatus failures inside the reply
|
||||
/// surface as a logged warning so operator workflows aren't blocked by a
|
||||
/// transient MxAccess hiccup.
|
||||
/// </summary>
|
||||
internal sealed class GatewayGalaxyAlarmAcknowledger : IGalaxyAlarmAcknowledger
|
||||
{
|
||||
private readonly MxGatewayClient _client;
|
||||
private readonly GalaxyMxSession _session;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
public GatewayGalaxyAlarmAcknowledger(
|
||||
MxGatewayClient client,
|
||||
GalaxyMxSession session,
|
||||
ILogger logger)
|
||||
{
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
_session = session ?? throw new ArgumentNullException(nameof(session));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
public async Task AcknowledgeAsync(
|
||||
string alarmFullReference,
|
||||
string comment,
|
||||
string operatorUser,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(alarmFullReference);
|
||||
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GatewayGalaxyAlarmAcknowledger requires a connected GalaxyMxSession; underlying gateway session is null.");
|
||||
var sessionId = session.SessionId;
|
||||
|
||||
var reply = await _client.AcknowledgeAlarmAsync(
|
||||
new AcknowledgeAlarmRequest
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ClientCorrelationId = Guid.NewGuid().ToString("N"),
|
||||
AlarmFullReference = alarmFullReference,
|
||||
Comment = comment ?? string.Empty,
|
||||
OperatorUser = operatorUser ?? string.Empty,
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (reply.Status is { Success: 0 } status)
|
||||
{
|
||||
// Native MxAccess rejected the ack — log but don't throw. Treat as a
|
||||
// best-effort operator workflow; the operator can retry via the OPC UA
|
||||
// session if necessary.
|
||||
_logger.LogWarning(
|
||||
"Galaxy AcknowledgeAlarm for {AlarmRef} returned MxStatus failure: category={Category} detail={Detail} text={Text}",
|
||||
alarmFullReference, status.Category, status.Detail, status.DiagnosticText);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
using System.Collections.Concurrent;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using MxGateway.Client;
|
||||
using MxGateway.Contracts.Proto;
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Production <see cref="IGalaxyDataWriter"/> over <see cref="GalaxyMxSession"/>.
|
||||
/// For each batch entry: lazy-AddItem to obtain the MXAccess item handle, encode
|
||||
/// the value via <see cref="MxValueEncoder"/>, route through Write or WriteSecured
|
||||
/// based on the per-tag <see cref="SecurityClassification"/>, and translate the
|
||||
/// reply's <c>MxStatusProxy</c> into an OPC UA <see cref="WriteResult"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Item handle cache survives across writes — repeated writes to the same tag avoid
|
||||
/// re-AddItem. Per-tag failures are isolated: one bad write doesn't fail the batch.
|
||||
/// PR 4.4 will share this cache with the subscription registry; for now it lives
|
||||
/// here so the writer is independently testable.
|
||||
/// </remarks>
|
||||
public sealed class GatewayGalaxyDataWriter : IGalaxyDataWriter
|
||||
{
|
||||
private readonly GalaxyMxSession _session;
|
||||
private readonly int _writeUserId;
|
||||
private readonly ILogger _logger;
|
||||
private readonly ConcurrentDictionary<string, int> _itemHandles =
|
||||
new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
public GatewayGalaxyDataWriter(GalaxyMxSession session, int writeUserId, ILogger? logger = null)
|
||||
{
|
||||
_session = session ?? throw new ArgumentNullException(nameof(session));
|
||||
_writeUserId = writeUserId;
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<WriteResult>> WriteAsync(
|
||||
IReadOnlyList<WriteRequest> writes,
|
||||
Func<string, SecurityClassification> securityResolver,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(writes);
|
||||
ArgumentNullException.ThrowIfNull(securityResolver);
|
||||
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GalaxyMxSession is not connected. Call ConnectAsync before issuing writes.");
|
||||
var serverHandle = _session.ServerHandle;
|
||||
|
||||
var results = new WriteResult[writes.Count];
|
||||
for (var i = 0; i < writes.Count; i++)
|
||||
{
|
||||
results[i] = await WriteOneAsync(session, serverHandle, writes[i],
|
||||
securityResolver(writes[i].FullReference), cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private async Task<WriteResult> WriteOneAsync(
|
||||
MxGatewaySession session, int serverHandle, WriteRequest request,
|
||||
SecurityClassification classification, CancellationToken ct)
|
||||
{
|
||||
try
|
||||
{
|
||||
var itemHandle = await EnsureItemHandleAsync(session, serverHandle, request.FullReference, ct)
|
||||
.ConfigureAwait(false);
|
||||
var mxValue = MxValueEncoder.Encode(request.Value);
|
||||
|
||||
var reply = NeedsSecuredWrite(classification)
|
||||
? await InvokeWriteSecuredAsync(session, serverHandle, itemHandle, mxValue, ct).ConfigureAwait(false)
|
||||
: await session.WriteRawAsync(serverHandle, itemHandle, mxValue, _writeUserId, ct).ConfigureAwait(false);
|
||||
|
||||
return TranslateReply(reply, request.FullReference);
|
||||
}
|
||||
catch (ArgumentException ex)
|
||||
{
|
||||
// Bad value type — caller passed a CLR type the encoder can't render.
|
||||
_logger.LogWarning(ex,
|
||||
"GalaxyDriver write rejected — unsupported value type for {FullRef}", request.FullReference);
|
||||
return new WriteResult(StatusCodeMap.BadInternalError);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested) { throw; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "GalaxyDriver write failed for {FullRef}", request.FullReference);
|
||||
return new WriteResult(StatusCodeMap.BadCommunicationError);
|
||||
}
|
||||
}
|
||||
|
||||
private static bool NeedsSecuredWrite(SecurityClassification classification) =>
|
||||
classification is SecurityClassification.SecuredWrite or SecurityClassification.VerifiedWrite;
|
||||
|
||||
private async Task<int> EnsureItemHandleAsync(
|
||||
MxGatewaySession session, int serverHandle, string fullRef, CancellationToken ct)
|
||||
{
|
||||
if (_itemHandles.TryGetValue(fullRef, out var existing)) return existing;
|
||||
var handle = await session.AddItemAsync(serverHandle, fullRef, ct).ConfigureAwait(false);
|
||||
_itemHandles[fullRef] = handle;
|
||||
return handle;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Issue a WriteSecured command. The high-level session client doesn't expose
|
||||
/// <c>WriteSecuredAsync</c> as a typed method — we build the <see cref="MxCommand"/>
|
||||
/// directly and route through <c>InvokeAsync</c>. Verifier user is left at zero
|
||||
/// for SecuredWrite; VerifiedWrite uses the same path because the gw's worker
|
||||
/// interprets the underlying MXAccess command kind.
|
||||
/// </summary>
|
||||
private static Task<MxCommandReply> InvokeWriteSecuredAsync(
|
||||
MxGatewaySession session, int serverHandle, int itemHandle, MxValue value, CancellationToken ct)
|
||||
{
|
||||
var command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.WriteSecured,
|
||||
WriteSecured = new WriteSecuredCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
ItemHandle = itemHandle,
|
||||
Value = value,
|
||||
CurrentUserId = 0,
|
||||
VerifierUserId = 0,
|
||||
},
|
||||
};
|
||||
var request = new MxCommandRequest
|
||||
{
|
||||
SessionId = session.SessionId,
|
||||
ClientCorrelationId = Guid.NewGuid().ToString("N"),
|
||||
Command = command,
|
||||
};
|
||||
return session.InvokeAsync(request, ct);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Translate a gateway <see cref="MxCommandReply"/> into an OPC UA
|
||||
/// <see cref="WriteResult"/>. Honours the protocol-level Status field first
|
||||
/// (transport / dispatch failures), then the first MXAccess status row.
|
||||
/// </summary>
|
||||
private WriteResult TranslateReply(MxCommandReply reply, string fullRef)
|
||||
{
|
||||
// Protocol status — wraps transport / worker-side failures that happen before
|
||||
// MXAccess saw the command.
|
||||
if (reply.ProtocolStatus is { } proto && proto.Code != ProtocolStatusCode.Ok)
|
||||
{
|
||||
_logger.LogWarning(
|
||||
"GalaxyDriver write protocol failure {Code} for {FullRef}: {Message}",
|
||||
proto.Code, fullRef, proto.Message);
|
||||
return new WriteResult(StatusCodeMap.BadCommunicationError);
|
||||
}
|
||||
|
||||
// MX-side status — the worker's WriteCompleteEvent rolls into the reply's
|
||||
// statuses array. Use the first row (single-write contract).
|
||||
if (reply.Statuses.Count > 0)
|
||||
{
|
||||
var status = reply.Statuses[0];
|
||||
return new WriteResult(StatusCodeMap.FromMxStatus(status, _logger));
|
||||
}
|
||||
|
||||
return new WriteResult(StatusCodeMap.Good);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
using MxGateway.Client;
|
||||
using MxGateway.Contracts.Proto;
|
||||
// Use the generated nested status enum for the SetBufferedUpdateInterval reply check.
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Production <see cref="IGalaxySubscriber"/> over a connected
|
||||
/// <see cref="GalaxyMxSession"/>. Forwards SubscribeBulk / UnsubscribeBulk to the
|
||||
/// gateway and streams MxEvents via the gw's bidirectional events RPC.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// PR 6.3 wired the per-call <c>buffered_update_interval_ms</c> through
|
||||
/// <see cref="SubscribeBulkAsync"/>. The gw's contract is session-level
|
||||
/// (<c>SetBufferedUpdateInterval</c> applies to all buffered subscriptions on the
|
||||
/// server handle), so we cache the last-applied value and skip redundant calls.
|
||||
/// </remarks>
|
||||
public sealed class GatewayGalaxySubscriber : IGalaxySubscriber
|
||||
{
|
||||
private readonly GalaxyMxSession _session;
|
||||
private readonly Lock _intervalLock = new();
|
||||
private int _lastAppliedIntervalMs = -1; // -1 = never applied; 0 = explicit "use gw default"
|
||||
|
||||
public GatewayGalaxySubscriber(GalaxyMxSession session)
|
||||
{
|
||||
_session = session ?? throw new ArgumentNullException(nameof(session));
|
||||
}
|
||||
|
||||
public async Task<IReadOnlyList<SubscribeResult>> SubscribeBulkAsync(
|
||||
IReadOnlyList<string> fullReferences, int bufferedUpdateIntervalMs, CancellationToken cancellationToken)
|
||||
{
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GalaxyMxSession is not connected. Call ConnectAsync before subscribing.");
|
||||
var serverHandle = _session.ServerHandle;
|
||||
|
||||
// The gw's SubscribeBulk RPC doesn't carry a per-call interval — buffered cadence
|
||||
// is session-level, set via SetBufferedUpdateInterval. Apply it before the
|
||||
// SubscribeBulk so the very first events on the new handles publish at the
|
||||
// requested cadence. Skip when the last-applied value already matches.
|
||||
if (bufferedUpdateIntervalMs > 0)
|
||||
{
|
||||
await EnsureSessionIntervalAsync(session, serverHandle, bufferedUpdateIntervalMs, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return await session.SubscribeBulkAsync(serverHandle, fullReferences, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Apply the gateway's session-level <c>SetBufferedUpdateInterval</c> command. The
|
||||
/// gw's contract is "for this server handle, every buffered subscription publishes
|
||||
/// at this cadence" — there's no per-handle granularity, so we cache the last
|
||||
/// applied value and skip redundant calls.
|
||||
/// </summary>
|
||||
private async Task EnsureSessionIntervalAsync(
|
||||
MxGateway.Client.MxGatewaySession session, int serverHandle, int intervalMs, CancellationToken cancellationToken)
|
||||
{
|
||||
lock (_intervalLock)
|
||||
{
|
||||
if (_lastAppliedIntervalMs == intervalMs) return;
|
||||
}
|
||||
|
||||
var reply = await session.InvokeAsync(
|
||||
new MxCommandRequest
|
||||
{
|
||||
SessionId = session.SessionId,
|
||||
ClientCorrelationId = Guid.NewGuid().ToString("N"),
|
||||
Command = new MxCommand
|
||||
{
|
||||
Kind = MxCommandKind.SetBufferedUpdateInterval,
|
||||
SetBufferedUpdateInterval = new SetBufferedUpdateIntervalCommand
|
||||
{
|
||||
ServerHandle = serverHandle,
|
||||
UpdateIntervalMilliseconds = intervalMs,
|
||||
},
|
||||
},
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (reply.ProtocolStatus?.Code is not (ProtocolStatusCode.Ok or ProtocolStatusCode.MxaccessFailure))
|
||||
{
|
||||
// Don't throw on a soft failure — the SubscribeBulk will still succeed at the
|
||||
// gw's default cadence, which is functional just not the requested cadence.
|
||||
// The trace span (PR 6.1) plus the warning here gives ops the signal.
|
||||
return;
|
||||
}
|
||||
|
||||
lock (_intervalLock)
|
||||
{
|
||||
_lastAppliedIntervalMs = intervalMs;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task UnsubscribeBulkAsync(IReadOnlyList<int> itemHandles, CancellationToken cancellationToken)
|
||||
{
|
||||
if (itemHandles.Count == 0) return;
|
||||
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GalaxyMxSession is not connected. UnsubscribeBulk called after disconnect.");
|
||||
var serverHandle = _session.ServerHandle;
|
||||
|
||||
await session.UnsubscribeBulkAsync(serverHandle, itemHandles, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
public IAsyncEnumerable<MxEvent> StreamEventsAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GalaxyMxSession is not connected. StreamEventsAsync called before ConnectAsync.");
|
||||
return session.StreamEventsAsync(afterWorkerSequence: 0, cancellationToken);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Test seam for the gateway-side Acknowledge call. Production wraps the
|
||||
/// <c>MxGatewayClient.AcknowledgeAlarmAsync</c> RPC; tests substitute a fake
|
||||
/// so <see cref="GalaxyDriver.AcknowledgeAsync"/> can be exercised without a
|
||||
/// running gateway.
|
||||
/// </summary>
|
||||
internal interface IGalaxyAlarmAcknowledger
|
||||
{
|
||||
/// <summary>
|
||||
/// Forward a single alarm acknowledgement to the gateway. The gateway
|
||||
/// translates this to an MxAccess Acknowledge call against the worker's
|
||||
/// session and returns the native MxStatus on the reply.
|
||||
/// </summary>
|
||||
/// <param name="alarmFullReference">
|
||||
/// Fully-qualified alarm reference (e.g. <c>"Tank01.Level.HiHi"</c>).
|
||||
/// </param>
|
||||
/// <param name="comment">Operator-supplied comment forwarded to MxAccess.</param>
|
||||
/// <param name="operatorUser">
|
||||
/// Operator principal performing the acknowledgement. Resolved from the
|
||||
/// OPC UA session by the server-side ACL layer before reaching the driver.
|
||||
/// </param>
|
||||
/// <param name="cancellationToken">Cancels the gateway RPC.</param>
|
||||
Task AcknowledgeAsync(
|
||||
string alarmFullReference,
|
||||
string comment,
|
||||
string operatorUser,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side seam for one-shot reads. Production implementation (PR 4.4) wraps
|
||||
/// <c>MxGatewaySession</c>'s SubscribeBulk + StreamEvents path to obtain values; tests
|
||||
/// substitute a fake returning canned snapshots.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The interface is deliberately minimal — no per-tag overload, no continuation
|
||||
/// points. The driver-side <c>IReadable.ReadAsync</c> contract guarantees a value per
|
||||
/// requested tag in input order, with status codes carrying the per-tag failure mode
|
||||
/// (e.g. BadInternalError for transport failure on a single tag, BadOutOfService for
|
||||
/// a tag the gateway didn't recognise).
|
||||
/// </remarks>
|
||||
public interface IGalaxyDataReader
|
||||
{
|
||||
/// <summary>
|
||||
/// Read each <paramref name="fullReferences"/> entry once and return one
|
||||
/// <see cref="DataValueSnapshot"/> per request entry, in input order.
|
||||
/// Implementations MUST return the same length as the input — partial-tag
|
||||
/// failures are encoded as Bad-quality snapshots, not omitted.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<DataValueSnapshot>> ReadAsync(
|
||||
IReadOnlyList<string> fullReferences, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side seam for batched writes. Production implementation routes by
|
||||
/// <see cref="SecurityClassification"/>: SecuredWrite / VerifiedWrite go through
|
||||
/// <c>MxCommandKind.WriteSecured</c>, everything else through
|
||||
/// <c>MxGatewaySession.WriteAsync</c>. Tests substitute a fake to record routing
|
||||
/// decisions without touching real gw infrastructure.
|
||||
/// </summary>
|
||||
public interface IGalaxyDataWriter
|
||||
{
|
||||
/// <summary>
|
||||
/// Write each <paramref name="writes"/> entry; return one
|
||||
/// <see cref="WriteResult"/> per request entry, in input order. Implementations
|
||||
/// MUST return the same length as the input — partial-tag failures are encoded
|
||||
/// as Bad-status results, not omitted.
|
||||
/// </summary>
|
||||
/// <param name="writes">Pairs of full reference + value to write.</param>
|
||||
/// <param name="securityResolver">
|
||||
/// Maps a full reference to its discovered <see cref="SecurityClassification"/>
|
||||
/// so the writer can route SecuredWrite / VerifiedWrite tags through the
|
||||
/// <c>WriteSecured</c> command instead of <c>Write</c>. Returns
|
||||
/// <see cref="SecurityClassification.FreeAccess"/> when the tag isn't tracked
|
||||
/// (the safest default — non-secured Write).
|
||||
/// </param>
|
||||
/// <param name="cancellationToken">Aborts the in-flight batch.</param>
|
||||
Task<IReadOnlyList<WriteResult>> WriteAsync(
|
||||
IReadOnlyList<WriteRequest> writes,
|
||||
Func<string, SecurityClassification> securityResolver,
|
||||
CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side seam for subscription lifecycle + the inbound event stream. Production
|
||||
/// wraps <c>MxGatewaySession.SubscribeBulkAsync</c>, <c>UnsubscribeBulkAsync</c>, and
|
||||
/// <c>StreamEventsAsync</c>; tests substitute a fake to drive synthetic events through
|
||||
/// the <see cref="EventPump"/> without a real gw.
|
||||
/// </summary>
|
||||
public interface IGalaxySubscriber
|
||||
{
|
||||
/// <summary>
|
||||
/// Subscribe a batch of tag full references. Returns one
|
||||
/// <see cref="SubscribeResult"/> per request entry, in input order. Failed tags
|
||||
/// (gateway rejection) carry a non-zero status and an item handle of zero or
|
||||
/// negative — the caller treats those as per-tag failures rather than a whole-call
|
||||
/// failure.
|
||||
/// </summary>
|
||||
Task<IReadOnlyList<SubscribeResult>> SubscribeBulkAsync(
|
||||
IReadOnlyList<string> fullReferences, int bufferedUpdateIntervalMs, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>Unsubscribe a batch of item handles obtained from <see cref="SubscribeBulkAsync"/>.</summary>
|
||||
Task UnsubscribeBulkAsync(IReadOnlyList<int> itemHandles, CancellationToken cancellationToken);
|
||||
|
||||
/// <summary>
|
||||
/// Long-running consumer of the gateway's <c>StreamEvents</c> RPC. Each emitted
|
||||
/// <see cref="MxEvent"/> carries the gw item handle the caller correlates against
|
||||
/// its <see cref="SubscriptionRegistry"/>.
|
||||
/// </summary>
|
||||
IAsyncEnumerable<MxEvent> StreamEventsAsync(CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Maps a raw MXAccess alarm severity (0-999, MXAccess scale) onto the
|
||||
/// <see cref="AlarmSeverity"/> ladder + an OPC UA Part 9 numeric severity (1-1000).
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The four-bucket OPC UA ladder (250 / 500 / 750 / 1000 — Low / Medium / High /
|
||||
/// Critical) is the same ladder v1's <c>GalaxyAlarmTracker</c> exposed (per
|
||||
/// <c>docs/v1/AlarmTracking.md</c>). Galaxy templates assign severity values
|
||||
/// 0-999; the bucket boundaries below match v1 so customers see no
|
||||
/// surprise re-classification when the v2 path takes over.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Out-of-range inputs (negative or >= 1000) are clamped into the nearest
|
||||
/// bucket rather than rejected. MXAccess occasionally surfaces slightly
|
||||
/// out-of-range severities for legacy alarm types and we want them to flow
|
||||
/// through the alarm path rather than disappear at the mapper.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
internal static class MxAccessSeverityMapper
|
||||
{
|
||||
/// <summary>OPC UA Part 9 numeric severity for the Low bucket (0-249 MxAccess).</summary>
|
||||
public const int OpcUaSeverityLow = 250;
|
||||
/// <summary>OPC UA Part 9 numeric severity for the Medium bucket (250-499 MxAccess).</summary>
|
||||
public const int OpcUaSeverityMedium = 500;
|
||||
/// <summary>OPC UA Part 9 numeric severity for the High bucket (500-749 MxAccess).</summary>
|
||||
public const int OpcUaSeverityHigh = 750;
|
||||
/// <summary>OPC UA Part 9 numeric severity for the Critical bucket (750+ MxAccess).</summary>
|
||||
public const int OpcUaSeverityCritical = 1000;
|
||||
|
||||
/// <summary>
|
||||
/// Translate a raw MXAccess severity into the four-bucket
|
||||
/// <see cref="AlarmSeverity"/> + OPC UA Part 9 numeric severity tuple.
|
||||
/// </summary>
|
||||
public static (AlarmSeverity Bucket, int OpcUaSeverity) Map(int rawMxAccessSeverity)
|
||||
{
|
||||
if (rawMxAccessSeverity < 250)
|
||||
{
|
||||
return (AlarmSeverity.Low, OpcUaSeverityLow);
|
||||
}
|
||||
if (rawMxAccessSeverity < 500)
|
||||
{
|
||||
return (AlarmSeverity.Medium, OpcUaSeverityMedium);
|
||||
}
|
||||
if (rawMxAccessSeverity < 750)
|
||||
{
|
||||
return (AlarmSeverity.High, OpcUaSeverityHigh);
|
||||
}
|
||||
return (AlarmSeverity.Critical, OpcUaSeverityCritical);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Translates gateway-side <see cref="MxValue"/> instances into the boxed CLR objects
|
||||
/// <c>DataValueSnapshot.Value</c> carries. Mirrors the seven Galaxy data types in
|
||||
/// <c>DataTypeMap</c> (Boolean, Int32, Int64, Float32, Float64, String, DateTime), plus
|
||||
/// the array variants exposed by <see cref="MxArray"/>. Unknown / awkward values fall
|
||||
/// back to the <c>raw_value</c> bytes so a forward-compatible MXAccess deployment
|
||||
/// doesn't lose data on the wire — the consumer can opt to deserialise the bytes.
|
||||
/// </summary>
|
||||
internal static class MxValueDecoder
|
||||
{
|
||||
public static object? Decode(MxValue? value)
|
||||
{
|
||||
if (value is null) return null;
|
||||
if (value.IsNull) return null;
|
||||
|
||||
return value.KindCase switch
|
||||
{
|
||||
MxValue.KindOneofCase.BoolValue => value.BoolValue,
|
||||
MxValue.KindOneofCase.Int32Value => value.Int32Value,
|
||||
MxValue.KindOneofCase.Int64Value => value.Int64Value,
|
||||
MxValue.KindOneofCase.FloatValue => value.FloatValue,
|
||||
MxValue.KindOneofCase.DoubleValue => value.DoubleValue,
|
||||
MxValue.KindOneofCase.StringValue => value.StringValue,
|
||||
MxValue.KindOneofCase.TimestampValue => DecodeTimestamp(value.TimestampValue),
|
||||
MxValue.KindOneofCase.ArrayValue => DecodeArray(value.ArrayValue),
|
||||
MxValue.KindOneofCase.RawValue => value.RawValue.ToByteArray(),
|
||||
_ => null,
|
||||
};
|
||||
}
|
||||
|
||||
private static DateTime? DecodeTimestamp(Timestamp? ts) => ts?.ToDateTime();
|
||||
|
||||
private static object? DecodeArray(MxArray? array)
|
||||
{
|
||||
if (array is null) return null;
|
||||
|
||||
return array.ValuesCase switch
|
||||
{
|
||||
MxArray.ValuesOneofCase.BoolValues => array.BoolValues.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.Int32Values => array.Int32Values.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.Int64Values => array.Int64Values.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.FloatValues => array.FloatValues.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.DoubleValues => array.DoubleValues.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.StringValues => array.StringValues.Values.ToArray(),
|
||||
MxArray.ValuesOneofCase.TimestampValues => array.TimestampValues.Values.Select(t => t.ToDateTime()).ToArray(),
|
||||
_ => null,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
using Google.Protobuf.WellKnownTypes;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Translates boxed CLR values from <c>WriteRequest.Value</c> into gateway-side
|
||||
/// <see cref="MxValue"/> instances. Inverse of <see cref="MxValueDecoder"/>.
|
||||
/// Handles the seven Galaxy data types — Boolean, Int32, Int64, Float32, Float64,
|
||||
/// String, DateTime — and their array variants. Null + unsupported types throw
|
||||
/// <see cref="ArgumentException"/> so the IWritable caller can fail the write with a
|
||||
/// clear status code rather than silently mis-typing the wire payload.
|
||||
/// </summary>
|
||||
internal static class MxValueEncoder
|
||||
{
|
||||
public static MxValue Encode(object? value)
|
||||
{
|
||||
if (value is null) return new MxValue { IsNull = true };
|
||||
|
||||
switch (value)
|
||||
{
|
||||
case bool b: return new MxValue { BoolValue = b };
|
||||
case sbyte i8: return new MxValue { Int32Value = i8 };
|
||||
case short i16: return new MxValue { Int32Value = i16 };
|
||||
case int i32: return new MxValue { Int32Value = i32 };
|
||||
case byte u8: return new MxValue { Int32Value = u8 };
|
||||
case ushort u16: return new MxValue { Int32Value = u16 };
|
||||
case uint u32 when u32 <= int.MaxValue: return new MxValue { Int32Value = (int)u32 };
|
||||
case long i64: return new MxValue { Int64Value = i64 };
|
||||
case ulong u64 when u64 <= long.MaxValue: return new MxValue { Int64Value = (long)u64 };
|
||||
case float f32: return new MxValue { FloatValue = f32 };
|
||||
case double f64: return new MxValue { DoubleValue = f64 };
|
||||
case string s: return new MxValue { StringValue = s };
|
||||
case DateTime dt: return new MxValue { TimestampValue = Timestamp.FromDateTime(EnsureUtc(dt)) };
|
||||
case DateTimeOffset dto: return new MxValue { TimestampValue = Timestamp.FromDateTimeOffset(dto) };
|
||||
|
||||
case bool[] arr: return EncodeArray(arr, (mx, vs) => mx.BoolValues = ToBoolArray(vs));
|
||||
case int[] arr: return EncodeArray(arr, (mx, vs) => mx.Int32Values = ToInt32Array(vs));
|
||||
case long[] arr: return EncodeArray(arr, (mx, vs) => mx.Int64Values = ToInt64Array(vs));
|
||||
case float[] arr: return EncodeArray(arr, (mx, vs) => mx.FloatValues = ToFloatArray(vs));
|
||||
case double[] arr: return EncodeArray(arr, (mx, vs) => mx.DoubleValues = ToDoubleArray(vs));
|
||||
case string[] arr: return EncodeArray(arr, (mx, vs) => mx.StringValues = ToStringArray(vs));
|
||||
case DateTime[] arr: return EncodeArray(arr, (mx, vs) => mx.TimestampValues = ToTimestampArray(vs));
|
||||
|
||||
default:
|
||||
throw new ArgumentException(
|
||||
$"Cannot encode value of type {value.GetType()} as MxValue. Supported: " +
|
||||
"bool, int / long (and their unsigned variants), float, double, string, DateTime, " +
|
||||
"and their 1-D array variants.",
|
||||
nameof(value));
|
||||
}
|
||||
}
|
||||
|
||||
private static MxValue EncodeArray<T>(T[] values, Action<MxArray, T[]> populate)
|
||||
{
|
||||
var array = new MxArray();
|
||||
populate(array, values);
|
||||
array.Dimensions.Add((uint)values.Length);
|
||||
return new MxValue { ArrayValue = array };
|
||||
}
|
||||
|
||||
private static BoolArray ToBoolArray(bool[] vs) { var a = new BoolArray(); a.Values.AddRange(vs); return a; }
|
||||
private static Int32Array ToInt32Array(int[] vs) { var a = new Int32Array(); a.Values.AddRange(vs); return a; }
|
||||
private static Int64Array ToInt64Array(long[] vs) { var a = new Int64Array(); a.Values.AddRange(vs); return a; }
|
||||
private static FloatArray ToFloatArray(float[] vs) { var a = new FloatArray(); a.Values.AddRange(vs); return a; }
|
||||
private static DoubleArray ToDoubleArray(double[] vs) { var a = new DoubleArray(); a.Values.AddRange(vs); return a; }
|
||||
private static StringArray ToStringArray(string[] vs) { var a = new StringArray(); a.Values.AddRange(vs); return a; }
|
||||
private static TimestampArray ToTimestampArray(DateTime[] vs)
|
||||
{
|
||||
var a = new TimestampArray();
|
||||
foreach (var dt in vs) a.Values.Add(Timestamp.FromDateTime(EnsureUtc(dt)));
|
||||
return a;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="Timestamp.FromDateTime"/> requires UTC. Convert non-UTC inputs
|
||||
/// explicitly so a caller passing local time gets predictable wire bytes.
|
||||
/// </summary>
|
||||
private static DateTime EnsureUtc(DateTime dt) => dt.Kind switch
|
||||
{
|
||||
DateTimeKind.Utc => dt,
|
||||
DateTimeKind.Local => dt.ToUniversalTime(),
|
||||
_ => DateTime.SpecifyKind(dt, DateTimeKind.Utc),
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,268 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Coordinates GalaxyDriver's recovery from gateway transport failure. Drives a
|
||||
/// state machine — <c>Healthy → TransportLost → Reopening → Replaying → Healthy</c>
|
||||
/// — and exposes the current state through a snapshot + change event so the
|
||||
/// driver's <c>DriverHealth</c> reflects <c>Degraded</c> while we're not in
|
||||
/// <c>Healthy</c>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The supervisor doesn't own the session, the subscription registry, or the
|
||||
/// event pump. It receives transport-failure signals from the rest of the
|
||||
/// driver (EventPump throws, a gw RPC raises, the heartbeat times out), runs
|
||||
/// a one-attempt-at-a-time recovery loop, and lets the rest of the driver
|
||||
/// continue serving cached state during recovery.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Reopen</b>: caller-supplied callback that re-opens the gw session +
|
||||
/// re-Registers the MXAccess client. Throws on failure.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// <b>Replay</b>: caller-supplied callback that re-establishes every active
|
||||
/// subscription. Production wraps gw's <c>ReplaySubscriptionsCommand</c>
|
||||
/// (mxaccessgw issue #0.3); when that's not available, the callback falls
|
||||
/// back to walking the SubscriptionRegistry and re-issuing SubscribeBulk for
|
||||
/// every tracked tag.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// Backoff is capped exponential — first retry after
|
||||
/// <see cref="ReconnectOptions.InitialBackoff"/>, doubled per failed attempt,
|
||||
/// capped at <see cref="ReconnectOptions.MaxBackoff"/>. Persistent failures
|
||||
/// hold the supervisor in <c>Reopening</c> indefinitely; the supervisor never
|
||||
/// gives up on its own — operators / Phase 6.4 soak handle that policy.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
public sealed class ReconnectSupervisor : IDisposable
|
||||
{
|
||||
/// <summary>Recovery state machine.</summary>
|
||||
public enum State
|
||||
{
|
||||
Healthy,
|
||||
TransportLost,
|
||||
Reopening,
|
||||
Replaying,
|
||||
}
|
||||
|
||||
private readonly Func<CancellationToken, Task> _reopen;
|
||||
private readonly Func<CancellationToken, Task> _replay;
|
||||
private readonly ReconnectOptions _options;
|
||||
private readonly ILogger _logger;
|
||||
private readonly Func<int, TimeSpan, TimeSpan, TimeSpan>? _backoffDelay;
|
||||
|
||||
private readonly Lock _stateLock = new();
|
||||
private State _state = State.Healthy;
|
||||
private string? _lastError;
|
||||
private DateTime? _lastTransitionUtc;
|
||||
|
||||
private Task? _recoveryLoop;
|
||||
private CancellationTokenSource? _loopCts;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>Fires after every state transition.</summary>
|
||||
public event EventHandler<StateTransition>? StateChanged;
|
||||
|
||||
public ReconnectSupervisor(
|
||||
Func<CancellationToken, Task> reopen,
|
||||
Func<CancellationToken, Task> replay,
|
||||
ReconnectOptions? options = null,
|
||||
ILogger? logger = null,
|
||||
Func<int, TimeSpan, TimeSpan, TimeSpan>? backoffDelay = null)
|
||||
{
|
||||
_reopen = reopen ?? throw new ArgumentNullException(nameof(reopen));
|
||||
_replay = replay ?? throw new ArgumentNullException(nameof(replay));
|
||||
_options = options ?? new ReconnectOptions();
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_backoffDelay = backoffDelay;
|
||||
}
|
||||
|
||||
/// <summary>Current state. Healthy = fully recovered + subscriptions live.</summary>
|
||||
public State CurrentState
|
||||
{
|
||||
get { lock (_stateLock) return _state; }
|
||||
}
|
||||
|
||||
/// <summary>True when CurrentState != Healthy. Drivers map this to DriverState.Degraded.</summary>
|
||||
public bool IsDegraded
|
||||
{
|
||||
get { lock (_stateLock) return _state != State.Healthy; }
|
||||
}
|
||||
|
||||
public string? LastError
|
||||
{
|
||||
get { lock (_stateLock) return _lastError; }
|
||||
}
|
||||
|
||||
public DateTime? LastTransitionUtc
|
||||
{
|
||||
get { lock (_stateLock) return _lastTransitionUtc; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Notify the supervisor that a gw transport failure has been observed. Idempotent —
|
||||
/// repeated calls during an in-flight recovery do not start a parallel loop. The
|
||||
/// first call spawns a background task that drives reopen → replay until it
|
||||
/// succeeds or <see cref="Dispose"/> cancels it.
|
||||
/// </summary>
|
||||
public void ReportTransportFailure(Exception cause)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(cause);
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
|
||||
lock (_stateLock)
|
||||
{
|
||||
_lastError = cause.Message;
|
||||
if (_state != State.Healthy)
|
||||
{
|
||||
// Already recovering — nothing else to do.
|
||||
_logger.LogDebug("Transport failure reported during {State}: {Message}", _state, cause.Message);
|
||||
return;
|
||||
}
|
||||
|
||||
TransitionLocked(State.TransportLost, cause.Message);
|
||||
|
||||
_loopCts = new CancellationTokenSource();
|
||||
_recoveryLoop = Task.Run(() => RecoveryLoopAsync(_loopCts.Token));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wait until the current recovery cycle reaches Healthy or the supplied token
|
||||
/// is cancelled. Returns immediately when already Healthy. Useful for tests
|
||||
/// and for orchestration that wants to gate calls on recovery completing.
|
||||
/// </summary>
|
||||
public async Task WaitForHealthyAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
while (!cancellationToken.IsCancellationRequested && IsDegraded)
|
||||
{
|
||||
await Task.Delay(50, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RecoveryLoopAsync(CancellationToken ct)
|
||||
{
|
||||
var attempt = 0;
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
attempt++;
|
||||
if (attempt > 1)
|
||||
{
|
||||
var delay = ComputeBackoff(attempt);
|
||||
_logger.LogInformation(
|
||||
"Galaxy reconnect attempt {Attempt} — waiting {Delay} before retry", attempt, delay);
|
||||
try { await Task.Delay(delay, ct).ConfigureAwait(false); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
}
|
||||
|
||||
// === Reopening phase ===
|
||||
lock (_stateLock) TransitionLocked(State.Reopening, _lastError);
|
||||
|
||||
try
|
||||
{
|
||||
await _reopen(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested) { return; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Galaxy reopen failed (attempt {Attempt})", attempt);
|
||||
lock (_stateLock) { _lastError = ex.Message; }
|
||||
continue; // back to backoff + retry
|
||||
}
|
||||
|
||||
// === Replaying phase ===
|
||||
lock (_stateLock) TransitionLocked(State.Replaying, _lastError);
|
||||
|
||||
try
|
||||
{
|
||||
await _replay(ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested) { return; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex, "Galaxy replay failed (attempt {Attempt})", attempt);
|
||||
lock (_stateLock) { _lastError = ex.Message; }
|
||||
continue; // back to backoff + retry
|
||||
}
|
||||
|
||||
// === Done ===
|
||||
lock (_stateLock)
|
||||
{
|
||||
_lastError = null;
|
||||
TransitionLocked(State.Healthy, null);
|
||||
}
|
||||
_logger.LogInformation("Galaxy reconnect succeeded after {Attempt} attempt(s)", attempt);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private TimeSpan ComputeBackoff(int attempt)
|
||||
{
|
||||
if (_backoffDelay is not null)
|
||||
return _backoffDelay(attempt, _options.InitialBackoff, _options.MaxBackoff);
|
||||
|
||||
// Standard capped exponential — InitialBackoff * 2^(attempt-2), capped at MaxBackoff.
|
||||
// Attempt 2 → InitialBackoff, attempt 3 → 2x, attempt 4 → 4x, etc.
|
||||
var multiplier = Math.Min(1L << Math.Max(0, attempt - 2), int.MaxValue);
|
||||
var ticks = _options.InitialBackoff.Ticks * multiplier;
|
||||
if (ticks <= 0 || ticks > _options.MaxBackoff.Ticks) ticks = _options.MaxBackoff.Ticks;
|
||||
return TimeSpan.FromTicks(ticks);
|
||||
}
|
||||
|
||||
private void TransitionLocked(State next, string? cause)
|
||||
{
|
||||
if (next == _state) return;
|
||||
var previous = _state;
|
||||
_state = next;
|
||||
_lastTransitionUtc = DateTime.UtcNow;
|
||||
var transition = new StateTransition(previous, next, cause, _lastTransitionUtc.Value);
|
||||
try { StateChanged?.Invoke(this, transition); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy reconnect StateChanged handler threw — continuing.");
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
|
||||
CancellationTokenSource? cts;
|
||||
Task? loop;
|
||||
lock (_stateLock) { cts = _loopCts; loop = _recoveryLoop; _loopCts = null; _recoveryLoop = null; }
|
||||
|
||||
cts?.Cancel();
|
||||
if (loop is not null)
|
||||
{
|
||||
try { loop.GetAwaiter().GetResult(); } catch { /* shutdown */ }
|
||||
}
|
||||
cts?.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// One state transition observed by the supervisor.
|
||||
/// </summary>
|
||||
public sealed record StateTransition(
|
||||
ReconnectSupervisor.State Previous,
|
||||
ReconnectSupervisor.State Next,
|
||||
string? Cause,
|
||||
DateTime AtUtc);
|
||||
|
||||
/// <summary>
|
||||
/// Knobs for the supervisor's backoff. <see cref="ReconnectOptions"/> on the driver
|
||||
/// options record (PR 4.0) maps onto this — they're separate types so the supervisor
|
||||
/// can be exercised in tests without the full driver options surface.
|
||||
/// </summary>
|
||||
public sealed record ReconnectOptions(
|
||||
TimeSpan? InitialBackoffOverride = null,
|
||||
TimeSpan? MaxBackoffOverride = null)
|
||||
{
|
||||
public TimeSpan InitialBackoff => InitialBackoffOverride ?? TimeSpan.FromMilliseconds(500);
|
||||
public TimeSpan MaxBackoff => MaxBackoffOverride ?? TimeSpan.FromSeconds(30);
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
using Microsoft.Extensions.Logging;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Maps the gateway's <see cref="MxStatusProxy"/> (raw MXAccess HRESULT + category bits)
|
||||
/// to OPC UA <c>StatusCode</c> uints. Replaces the legacy
|
||||
/// <c>MxAccessGalaxyBackend.ToWire</c> heuristic (Quality >= 192 → Good, else Uncertain)
|
||||
/// with an explicit table that preserves specific codes (BadNotConnected, OutOfService,
|
||||
/// UncertainSubNormal, etc.) instead of collapsing to category buckets.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// OPC DA quality bytes are 16-bit values arranged as <c>[QQSSSSSSLLNNNN]</c>:
|
||||
/// Q = quality category (Bad/Uncertain/Good = 0/1/3), S = substatus, L = limit, N = vendor.
|
||||
/// This mapper consumes the LOW byte (where the Q+S bits live) — the same byte the legacy
|
||||
/// Wonderware Historian SDK exposed as the raw quality byte. Category-only fallback paths
|
||||
/// handle deployment versions of MXAccess that surface unfamiliar substatuses.
|
||||
///
|
||||
/// Unknown substatus values fall back to the matching category bucket (<c>Good</c>,
|
||||
/// <c>Uncertain</c>, <c>Bad</c>) and emit a single diagnostic log line per session via
|
||||
/// the supplied logger so field captures can extend the table.
|
||||
/// </remarks>
|
||||
internal static class StatusCodeMap
|
||||
{
|
||||
// OPC UA Part 4 standard StatusCodes — top-byte categories are 0x00 (Good),
|
||||
// 0x40 (Uncertain), 0x80 (Bad). Specific codes layer onto the category byte.
|
||||
|
||||
public const uint Good = 0x00000000u;
|
||||
public const uint GoodLocalOverride = 0x00D80000u;
|
||||
public const uint Uncertain = 0x40000000u;
|
||||
public const uint UncertainLastUsableValue = 0x40A40000u;
|
||||
public const uint UncertainSensorNotAccurate = 0x408D0000u;
|
||||
public const uint UncertainEngineeringUnitsExceeded = 0x408E0000u;
|
||||
public const uint UncertainSubNormal = 0x408F0000u;
|
||||
public const uint Bad = 0x80000000u;
|
||||
public const uint BadConfigurationError = 0x80890000u;
|
||||
public const uint BadNotConnected = 0x808A0000u;
|
||||
public const uint BadDeviceFailure = 0x808B0000u;
|
||||
public const uint BadSensorFailure = 0x808C0000u;
|
||||
public const uint BadCommunicationError = 0x80050000u;
|
||||
public const uint BadOutOfService = 0x808D0000u;
|
||||
public const uint BadWaitingForInitialData = 0x80320000u;
|
||||
public const uint BadInternalError = 0x80020000u;
|
||||
|
||||
/// <summary>
|
||||
/// Map a raw OPC DA quality byte (the low byte of an OPC DA <c>OpcQuality</c> ushort,
|
||||
/// which is what Wonderware Historian + MXAccess surface as <c>OPCITEMSTATE.qLong</c>'s
|
||||
/// low byte) to the OPC UA StatusCode uint.
|
||||
/// </summary>
|
||||
public static uint FromQualityByte(byte q, ILogger? logger = null) => q switch
|
||||
{
|
||||
// Good family — top two bits 11b (192-255).
|
||||
192 => Good,
|
||||
216 => GoodLocalOverride,
|
||||
|
||||
// Uncertain family — top two bits 01b (64-127).
|
||||
64 => Uncertain,
|
||||
68 => UncertainLastUsableValue,
|
||||
80 => UncertainSensorNotAccurate,
|
||||
84 => UncertainEngineeringUnitsExceeded,
|
||||
88 => UncertainSubNormal,
|
||||
|
||||
// Bad family — top two bits 00b (0-63).
|
||||
0 => Bad,
|
||||
4 => BadConfigurationError,
|
||||
8 => BadNotConnected,
|
||||
12 => BadDeviceFailure,
|
||||
16 => BadSensorFailure,
|
||||
20 => BadCommunicationError,
|
||||
24 => BadOutOfService,
|
||||
32 => BadWaitingForInitialData,
|
||||
|
||||
_ => Categorize(q, logger),
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Map a gateway-reported <see cref="MxStatusProxy"/> to OPC UA StatusCode. Honors
|
||||
/// the success flag, then the detail byte (treated as a quality substatus), with a
|
||||
/// transport-error fallback for status rows whose detected_by indicates the failure
|
||||
/// happened before the MXAccess call ran.
|
||||
/// </summary>
|
||||
public static uint FromMxStatus(MxStatusProxy? status, ILogger? logger = null)
|
||||
{
|
||||
if (status is null) return Good;
|
||||
if (status.Success != 0) return Good;
|
||||
|
||||
// Detail field carries the substatus when the worker translated MX-style codes;
|
||||
// when zero, infer from category + detected_by.
|
||||
var detail = (byte)(status.Detail & 0xFF);
|
||||
if (detail != 0) return FromQualityByte(detail, logger);
|
||||
|
||||
// detected_by != Mxaccess (raw_detected_by != the MXAccess source enum) implies
|
||||
// the failure happened pre-call (gateway, worker, transport) — surface as a
|
||||
// communication error rather than a generic Bad.
|
||||
if (status.RawDetectedBy != 0) return BadCommunicationError;
|
||||
|
||||
return Bad;
|
||||
}
|
||||
|
||||
private static uint Categorize(byte q, ILogger? logger)
|
||||
{
|
||||
if (q >= 192) { Log(logger, q, "Good"); return Good; }
|
||||
if (q >= 64) { Log(logger, q, "Uncertain"); return Uncertain; }
|
||||
Log(logger, q, "Bad");
|
||||
return Bad;
|
||||
}
|
||||
|
||||
private static void Log(ILogger? logger, byte q, string bucket)
|
||||
{
|
||||
// Best-effort diagnostic so field captures can extend the table — once per bucket
|
||||
// per session is plenty (the LogWarning level is rate-limited by Serilog filters
|
||||
// in production).
|
||||
logger?.LogWarning(
|
||||
"Unrecognised MXAccess quality byte 0x{Q:X2} — falling back to {Bucket} category. " +
|
||||
"Field capture welcome — extend StatusCodeMap.FromQualityByte.", q, bucket);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
using System.Collections.Concurrent;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Bookkeeping for live subscriptions. Maps each driver-issued <c>SubscriptionId</c> to the
|
||||
/// set of (full-reference, gw item-handle) pairs the gateway returned, and maintains the
|
||||
/// reverse map (item-handle → set of driver subscriptions) so the
|
||||
/// <see cref="EventPump"/> can fan out a single OnDataChange event to every driver
|
||||
/// subscription that includes the changed tag.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// A tag may legitimately appear in multiple driver subscriptions (separate clients or
|
||||
/// OPC UA monitored items observing the same Galaxy attribute). Using a single shared
|
||||
/// gw subscription per session and fanning out on the driver side keeps the gateway's
|
||||
/// work bounded; the reverse map is the fan-out index.
|
||||
/// </remarks>
|
||||
internal sealed class SubscriptionRegistry
|
||||
{
|
||||
private readonly ConcurrentDictionary<long, SubscriptionEntry> _bySubscriptionId = new();
|
||||
private readonly ConcurrentDictionary<int, ConcurrentBag<long>> _subscribersByItemHandle = new();
|
||||
private long _nextSubscriptionId;
|
||||
|
||||
public int TrackedSubscriptionCount => _bySubscriptionId.Count;
|
||||
public int TrackedItemHandleCount => _subscribersByItemHandle.Count;
|
||||
|
||||
/// <summary>Allocate a fresh subscription id. Monotonic; unique per registry lifetime.</summary>
|
||||
public long NextSubscriptionId() => Interlocked.Increment(ref _nextSubscriptionId);
|
||||
|
||||
/// <summary>
|
||||
/// Register a subscription and the per-tag item handles the gateway returned for it.
|
||||
/// Failed tags (item handle = 0 or negative) are stored anyway so unsubscribe can
|
||||
/// emit per-tag UnsubscribeBulk for the ones that did succeed.
|
||||
/// </summary>
|
||||
public void Register(long subscriptionId, IReadOnlyList<TagBinding> bindings)
|
||||
{
|
||||
var entry = new SubscriptionEntry(subscriptionId, bindings);
|
||||
_bySubscriptionId[subscriptionId] = entry;
|
||||
foreach (var binding in bindings)
|
||||
{
|
||||
if (binding.ItemHandle <= 0) continue; // failed gw subscribe — no events expected
|
||||
_subscribersByItemHandle.AddOrUpdate(
|
||||
binding.ItemHandle,
|
||||
_ => [subscriptionId],
|
||||
(_, bag) => { bag.Add(subscriptionId); return bag; });
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Remove a subscription. Returns the bindings the caller should pass to
|
||||
/// <c>UnsubscribeBulkAsync</c>; null when the id was never registered.
|
||||
/// </summary>
|
||||
public IReadOnlyList<TagBinding>? Remove(long subscriptionId)
|
||||
{
|
||||
if (!_bySubscriptionId.TryRemove(subscriptionId, out var entry)) return null;
|
||||
|
||||
foreach (var binding in entry.Bindings)
|
||||
{
|
||||
if (binding.ItemHandle <= 0) continue;
|
||||
if (!_subscribersByItemHandle.TryGetValue(binding.ItemHandle, out var bag)) continue;
|
||||
|
||||
// Filter the bag to drop this subscription id. ConcurrentBag has no Remove —
|
||||
// rebuild it from the remaining entries. The contention here is bounded by
|
||||
// the number of tags in the dropped subscription.
|
||||
var remaining = new ConcurrentBag<long>(bag.Where(id => id != subscriptionId));
|
||||
if (remaining.IsEmpty) _subscribersByItemHandle.TryRemove(binding.ItemHandle, out _);
|
||||
else _subscribersByItemHandle[binding.ItemHandle] = remaining;
|
||||
}
|
||||
|
||||
return entry.Bindings;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Look up the (subscription id, full reference) pairs that should receive an
|
||||
/// OnDataChange for the given gw item handle. Returns empty when nobody subscribes.
|
||||
/// </summary>
|
||||
public IReadOnlyList<(long SubscriptionId, string FullReference)> ResolveSubscribers(int itemHandle)
|
||||
{
|
||||
if (!_subscribersByItemHandle.TryGetValue(itemHandle, out var bag)) return [];
|
||||
|
||||
// Each subscription may include the tag once. Walk every active subscription that
|
||||
// claims this handle and pull the full ref from its binding list.
|
||||
var result = new List<(long, string)>();
|
||||
foreach (var subId in bag.Distinct())
|
||||
{
|
||||
if (!_bySubscriptionId.TryGetValue(subId, out var entry)) continue;
|
||||
var binding = entry.Bindings.FirstOrDefault(b => b.ItemHandle == itemHandle);
|
||||
if (binding is { FullReference: { } fullRef })
|
||||
result.Add((subId, fullRef));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>Snapshot every active binding for diagnostic output.</summary>
|
||||
public IReadOnlyList<TagBinding> SnapshotAllBindings() =>
|
||||
[.. _bySubscriptionId.Values.SelectMany(entry => entry.Bindings)];
|
||||
|
||||
private sealed record SubscriptionEntry(long SubscriptionId, IReadOnlyList<TagBinding> Bindings);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// One (full reference, gw item handle) pair returned by SubscribeBulk. Item handle is
|
||||
/// zero or negative when the gateway rejected this individual tag (bad name, duplicate);
|
||||
/// the registry keeps the binding so the caller can surface a per-tag failure status.
|
||||
/// </summary>
|
||||
internal sealed record TagBinding(string FullReference, int ItemHandle);
|
||||
@@ -0,0 +1,54 @@
|
||||
using ZB.MOM.WW.OtOpcUa.Core.Abstractions;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// PR 6.1 — Decorator that emits one <see cref="System.Diagnostics.Activity"/> span
|
||||
/// per gw write batch. Tags secured-write counts so ops can see the routing-by-
|
||||
/// classification split (FreeAccess/Operate vs Tune/Configure) without re-reading
|
||||
/// the discovery dictionary.
|
||||
/// </summary>
|
||||
internal sealed class TracedGalaxyDataWriter(IGalaxyDataWriter inner, string clientName) : IGalaxyDataWriter
|
||||
{
|
||||
public async Task<IReadOnlyList<WriteResult>> WriteAsync(
|
||||
IReadOnlyList<WriteRequest> writes,
|
||||
Func<string, SecurityClassification> securityResolver,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
using var activity = GalaxyTelemetry.ActivitySource.StartActivity("galaxy.write");
|
||||
activity?.SetTag("galaxy.client", clientName);
|
||||
activity?.SetTag("galaxy.tag_count", writes.Count);
|
||||
|
||||
if (activity is { IsAllDataRequested: true })
|
||||
{
|
||||
// Counting the secured-write split is cheap (one resolver call per request)
|
||||
// and only happens when a tracing listener is actively recording — keeps the
|
||||
// hot path free when no one's listening.
|
||||
var securedCount = 0;
|
||||
foreach (var w in writes)
|
||||
{
|
||||
var sc = securityResolver(w.FullReference);
|
||||
if (sc is SecurityClassification.Tune
|
||||
or SecurityClassification.Configure
|
||||
or SecurityClassification.VerifiedWrite)
|
||||
{
|
||||
securedCount++;
|
||||
}
|
||||
}
|
||||
activity.SetTag("galaxy.secured_write_count", securedCount);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var results = await inner.WriteAsync(writes, securityResolver, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
activity?.SetTag("galaxy.success_count", results.Count(r => r.StatusCode < 0x80000000u));
|
||||
return results;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
activity.RecordError(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
using System.Runtime.CompilerServices;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// PR 6.1 — Decorator that emits one <see cref="System.Diagnostics.Activity"/> span
|
||||
/// per gw subscription RPC. Wraps the production <see cref="GatewayGalaxySubscriber"/>;
|
||||
/// tests substitute a fake at the same seam without taking the tracing overhead.
|
||||
/// </summary>
|
||||
internal sealed class TracedGalaxySubscriber(IGalaxySubscriber inner, string clientName) : IGalaxySubscriber
|
||||
{
|
||||
public async Task<IReadOnlyList<SubscribeResult>> SubscribeBulkAsync(
|
||||
IReadOnlyList<string> fullReferences, int bufferedUpdateIntervalMs, CancellationToken cancellationToken)
|
||||
{
|
||||
using var activity = GalaxyTelemetry.ActivitySource.StartActivity("galaxy.subscribe_bulk");
|
||||
activity?.SetTag("galaxy.client", clientName);
|
||||
activity?.SetTag("galaxy.tag_count", fullReferences.Count);
|
||||
activity?.SetTag("galaxy.buffered_interval_ms", bufferedUpdateIntervalMs);
|
||||
try
|
||||
{
|
||||
var results = await inner.SubscribeBulkAsync(fullReferences, bufferedUpdateIntervalMs, cancellationToken)
|
||||
.ConfigureAwait(false);
|
||||
activity?.SetTag("galaxy.success_count", results.Count(r => r.WasSuccessful));
|
||||
return results;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
activity.RecordError(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task UnsubscribeBulkAsync(IReadOnlyList<int> itemHandles, CancellationToken cancellationToken)
|
||||
{
|
||||
using var activity = GalaxyTelemetry.ActivitySource.StartActivity("galaxy.unsubscribe_bulk");
|
||||
activity?.SetTag("galaxy.client", clientName);
|
||||
activity?.SetTag("galaxy.tag_count", itemHandles.Count);
|
||||
try
|
||||
{
|
||||
await inner.UnsubscribeBulkAsync(itemHandles, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
activity.RecordError(ex);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Streaming RPC — one parent span covers the entire stream lifetime. Per-event
|
||||
/// spans would dominate the trace volume at 50k tags / 1Hz; ops gets per-event
|
||||
/// visibility through <see cref="EventPump"/>'s metrics in PR 6.2 instead.
|
||||
/// </summary>
|
||||
public async IAsyncEnumerable<MxEvent> StreamEventsAsync(
|
||||
[EnumeratorCancellation] CancellationToken cancellationToken)
|
||||
{
|
||||
using var activity = GalaxyTelemetry.ActivitySource.StartActivity("galaxy.stream_events");
|
||||
activity?.SetTag("galaxy.client", clientName);
|
||||
|
||||
IAsyncEnumerator<MxEvent>? enumerator = null;
|
||||
try
|
||||
{
|
||||
enumerator = inner.StreamEventsAsync(cancellationToken).GetAsyncEnumerator(cancellationToken);
|
||||
var eventCount = 0L;
|
||||
while (true)
|
||||
{
|
||||
bool moveNext;
|
||||
try
|
||||
{
|
||||
moveNext = await enumerator.MoveNextAsync().ConfigureAwait(false);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
activity.RecordError(ex);
|
||||
activity?.SetTag("galaxy.event_count", eventCount);
|
||||
throw;
|
||||
}
|
||||
|
||||
if (!moveNext) break;
|
||||
eventCount++;
|
||||
yield return enumerator.Current;
|
||||
}
|
||||
activity?.SetTag("galaxy.event_count", eventCount);
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (enumerator is not null) await enumerator.DisposeAsync().ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user