feat(driver-galaxy): consume the gateway's session-less alarm model
The mxaccessgw updated alarms to a session-less central monitor: AcknowledgeAlarm dropped SessionId and alarm transitions now come from the session-less StreamAlarms feed instead of the per-session worker StreamEvents stream. The GalaxyDriver no longer compiled against the updated client. - GatewayGalaxyAlarmAcknowledger: session-less rewrite — no GalaxyMxSession; outcome read from ProtocolStatus (throw) and Hresult (warn). - New IGalaxyAlarmFeed seam + GatewayGalaxyAlarmFeed: background consumer of StreamAlarms that decodes the active-alarm snapshot plus live transitions into GalaxyAlarmTransition and reopens the stream on transport faults. - EventPump: drop the dead per-session OnAlarmTransition path; the per-session stream no longer carries alarms. - GalaxyDriver: bridge the feed onto IAlarmSource.OnAlarmEvent; the feed starts on SubscribeAlarmsAsync, independent of data subscriptions. - Tests: replace EventPumpAlarmTests with GatewayGalaxyAlarmFeedTests; move the driver alarm-source tests onto the IGalaxyAlarmFeed seam. Browse needed no change — GatewayGalaxyHierarchySource consumes the unchanged DiscoverHierarchy contract. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -63,14 +63,18 @@ public sealed class GalaxyDriver
|
||||
private EventPump? _eventPump;
|
||||
private readonly Lock _pumpLock = new();
|
||||
|
||||
// PR B.2 — IAlarmSource implementation. Production-side acks route through
|
||||
// GatewayGalaxyAlarmAcknowledger which calls MxGatewayClient.AcknowledgeAlarmAsync
|
||||
// (PR E.2 SDK). Tests inject IGalaxyAlarmAcknowledger via the internal ctor to
|
||||
// exercise the wiring without a running gateway. The alarm event stream is
|
||||
// delivered by EventPump.OnAlarmTransition (PR B.1) — this driver is the
|
||||
// consumer that bridges it onto IAlarmSource.OnAlarmEvent.
|
||||
// IAlarmSource implementation. Production-side acks route through
|
||||
// GatewayGalaxyAlarmAcknowledger which calls the session-less
|
||||
// MxGatewayClient.AcknowledgeAlarmAsync RPC; alarm transitions arrive on the
|
||||
// gateway's session-less StreamAlarms feed via GatewayGalaxyAlarmFeed. Tests inject
|
||||
// IGalaxyAlarmAcknowledger + IGalaxyAlarmFeed via the internal ctor to exercise the
|
||||
// wiring without a running gateway. This driver bridges the feed's OnAlarmTransition
|
||||
// onto IAlarmSource.OnAlarmEvent.
|
||||
private IGalaxyAlarmAcknowledger? _alarmAcknowledger;
|
||||
private IGalaxyAlarmFeed? _alarmFeed;
|
||||
private readonly Lock _alarmHandlersLock = new();
|
||||
private readonly Lock _alarmFeedLock = new();
|
||||
private bool _alarmFeedWired;
|
||||
private readonly HashSet<GalaxyAlarmSubscriptionHandle> _alarmSubscriptions = new();
|
||||
|
||||
// PR 4.W — production runtime owned by InitializeAsync. The driver builds these
|
||||
@@ -118,7 +122,7 @@ public sealed class GalaxyDriver
|
||||
ILogger<GalaxyDriver>? logger = null)
|
||||
: this(driverInstanceId, options,
|
||||
hierarchySource: null, dataReader: null, dataWriter: null, subscriber: null,
|
||||
alarmAcknowledger: null, logger)
|
||||
alarmAcknowledger: null, alarmFeed: null, logger)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -136,6 +140,7 @@ public sealed class GalaxyDriver
|
||||
IGalaxyDataWriter? dataWriter = null,
|
||||
IGalaxySubscriber? subscriber = null,
|
||||
IGalaxyAlarmAcknowledger? alarmAcknowledger = null,
|
||||
IGalaxyAlarmFeed? alarmFeed = null,
|
||||
ILogger<GalaxyDriver>? logger = null)
|
||||
{
|
||||
_driverInstanceId = !string.IsNullOrWhiteSpace(driverInstanceId)
|
||||
@@ -148,6 +153,7 @@ public sealed class GalaxyDriver
|
||||
_dataWriter = dataWriter;
|
||||
_subscriber = subscriber;
|
||||
_alarmAcknowledger = alarmAcknowledger;
|
||||
_alarmFeed = alarmFeed;
|
||||
|
||||
// Forward the aggregator's transitions through IHostConnectivityProbe.
|
||||
_hostStatuses.OnHostStatusChanged += (_, args) => OnHostStatusChanged?.Invoke(this, args);
|
||||
@@ -230,8 +236,12 @@ public sealed class GalaxyDriver
|
||||
_subscriber, _hostStatuses, _logger,
|
||||
bufferedUpdateIntervalMs: _options.MxAccess.PublishingIntervalMs);
|
||||
|
||||
// PR B.2 — wire the alarm acknowledger to the live gateway client.
|
||||
_alarmAcknowledger ??= new GatewayGalaxyAlarmAcknowledger(_ownedMxClient, _ownedMxSession, _logger);
|
||||
// Wire the alarm acknowledger + feed to the live gateway client. Both are
|
||||
// session-less — the gateway serves alarms from an always-on central monitor —
|
||||
// so they hang off the owned MxGatewayClient, not the worker session.
|
||||
_alarmAcknowledger ??= new GatewayGalaxyAlarmAcknowledger(_ownedMxClient, _logger);
|
||||
_alarmFeed ??= new GatewayGalaxyAlarmFeed(
|
||||
_ownedMxClient.StreamAlarmsAsync, _logger, _options.MxAccess.ClientName);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -724,13 +734,34 @@ public sealed class GalaxyDriver
|
||||
channelCapacity: _options.MxAccess.EventPumpChannelCapacity,
|
||||
clientName: _options.MxAccess.ClientName);
|
||||
_eventPump.OnDataChange += OnPumpDataChange;
|
||||
_eventPump.OnAlarmTransition += OnPumpAlarmTransition;
|
||||
_eventPump.Start();
|
||||
return _eventPump;
|
||||
}
|
||||
}
|
||||
|
||||
// ===== IAlarmSource (PR B.2) =====
|
||||
// ===== IAlarmSource =====
|
||||
|
||||
/// <summary>
|
||||
/// Start the gateway alarm feed (idempotent) and wire its transitions onto this
|
||||
/// driver's <see cref="OnAlarmEvent"/> bridge. The feed is session-less — it does
|
||||
/// not depend on a data subscription or the <see cref="EventPump"/>.
|
||||
/// </summary>
|
||||
private void EnsureAlarmFeedStarted()
|
||||
{
|
||||
lock (_alarmFeedLock)
|
||||
{
|
||||
if (_alarmFeed is null)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
"GalaxyDriver alarm feed is not wired. InitializeAsync must run (or a feed " +
|
||||
"seam must be injected via the internal ctor) before subscribing to alarms.");
|
||||
}
|
||||
if (_alarmFeedWired) return;
|
||||
_alarmFeed.OnAlarmTransition += OnAlarmFeedTransition;
|
||||
_alarmFeed.Start();
|
||||
_alarmFeedWired = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task<IAlarmSubscriptionHandle> SubscribeAlarmsAsync(
|
||||
@@ -740,12 +771,11 @@ public sealed class GalaxyDriver
|
||||
ArgumentNullException.ThrowIfNull(sourceNodeIds);
|
||||
|
||||
// The driver doesn't multiplex alarm subscriptions per source-node-id today —
|
||||
// alarm events arrive on the same gateway StreamEvents channel as data-change
|
||||
// events once the gateway emits the new family (PRs A.2 + A.3). The
|
||||
// subscription handle is a sentinel the server uses for symmetric Unsubscribe;
|
||||
// every active handle receives every alarm transition, and the server filters
|
||||
// by source node before raising Part 9 conditions. Same shape AbCip uses.
|
||||
EnsureEventPumpStarted();
|
||||
// every active handle receives every transition off the gateway's session-less
|
||||
// StreamAlarms feed, and the server filters by source node before raising Part 9
|
||||
// conditions. The subscription handle is a sentinel the server uses for
|
||||
// symmetric Unsubscribe. Same shape AbCip uses.
|
||||
EnsureAlarmFeedStarted();
|
||||
var handle = new GalaxyAlarmSubscriptionHandle(Guid.NewGuid().ToString("N"));
|
||||
lock (_alarmHandlersLock)
|
||||
{
|
||||
@@ -809,13 +839,13 @@ public sealed class GalaxyDriver
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Receives <see cref="GalaxyAlarmTransition"/> events from the EventPump and
|
||||
/// reshapes them into <see cref="AlarmEventArgs"/> for OPC UA-side consumers.
|
||||
/// Fires <see cref="OnAlarmEvent"/> only when at least one alarm subscription is
|
||||
/// active so a server that hasn't called <see cref="SubscribeAlarmsAsync"/> yet
|
||||
/// doesn't surface untracked transitions.
|
||||
/// Receives <see cref="GalaxyAlarmTransition"/> events from the gateway alarm
|
||||
/// feed and reshapes them into <see cref="AlarmEventArgs"/> for OPC UA-side
|
||||
/// consumers. Fires <see cref="OnAlarmEvent"/> only when at least one alarm
|
||||
/// subscription is active so a server that hasn't called
|
||||
/// <see cref="SubscribeAlarmsAsync"/> yet doesn't surface untracked transitions.
|
||||
/// </summary>
|
||||
private void OnPumpAlarmTransition(object? sender, GalaxyAlarmTransition transition)
|
||||
private void OnAlarmFeedTransition(object? sender, GalaxyAlarmTransition transition)
|
||||
{
|
||||
GalaxyAlarmSubscriptionHandle? handle;
|
||||
lock (_alarmHandlersLock)
|
||||
@@ -921,6 +951,11 @@ public sealed class GalaxyDriver
|
||||
lock (_pumpLock) { pump = _eventPump; _eventPump = null; }
|
||||
pump?.DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
|
||||
IGalaxyAlarmFeed? alarmFeed;
|
||||
lock (_alarmFeedLock) { alarmFeed = _alarmFeed; _alarmFeed = null; }
|
||||
try { alarmFeed?.DisposeAsync().AsTask().GetAwaiter().GetResult(); }
|
||||
catch (Exception ex) { _logger.LogWarning(ex, "Alarm feed dispose failed"); }
|
||||
|
||||
_ownedMxSession?.DisposeAsync().AsTask().GetAwaiter().GetResult();
|
||||
_ownedMxSession = null;
|
||||
|
||||
|
||||
@@ -45,12 +45,6 @@ internal sealed class EventPump : IAsyncDisposable
|
||||
private static readonly Counter<long> EventsDropped =
|
||||
Meter.CreateCounter<long>("galaxy.events.dropped", unit: "{event}",
|
||||
description: "MxEvents dropped because the bounded channel was full (newest-dropped).");
|
||||
private static readonly Counter<long> AlarmTransitionsReceived =
|
||||
Meter.CreateCounter<long>("galaxy.alarm_transitions.received", unit: "{event}",
|
||||
description: "OnAlarmTransition events decoded and forwarded to driver-level handlers.");
|
||||
private static readonly Counter<long> AlarmTransitionsDecodingFailures =
|
||||
Meter.CreateCounter<long>("galaxy.alarm_transitions.decoding_failures", unit: "{event}",
|
||||
description: "OnAlarmTransition events that arrived without a populated body or with an unspecified transition kind.");
|
||||
|
||||
private readonly IGalaxySubscriber _subscriber;
|
||||
private readonly SubscriptionRegistry _registry;
|
||||
@@ -66,15 +60,6 @@ internal sealed class EventPump : IAsyncDisposable
|
||||
|
||||
public event EventHandler<DataChangeEventArgs>? OnDataChange;
|
||||
|
||||
/// <summary>
|
||||
/// Fires for every <see cref="MxEventFamily.OnAlarmTransition"/> event the
|
||||
/// gateway forwards. Decoded into a <see cref="GalaxyAlarmTransition"/> with
|
||||
/// the OPC UA severity bucket already mapped via
|
||||
/// <see cref="MxAccessSeverityMapper"/>. The driver wraps this onto
|
||||
/// <c>IAlarmSource.OnAlarmEvent</c> in PR B.2.
|
||||
/// </summary>
|
||||
internal event EventHandler<GalaxyAlarmTransition>? OnAlarmTransition;
|
||||
|
||||
public EventPump(
|
||||
IGalaxySubscriber subscriber,
|
||||
SubscriptionRegistry registry,
|
||||
@@ -179,13 +164,12 @@ internal sealed class EventPump : IAsyncDisposable
|
||||
case MxEventFamily.OnDataChange:
|
||||
DispatchDataChange(ev);
|
||||
break;
|
||||
case MxEventFamily.OnAlarmTransition:
|
||||
DispatchAlarmTransition(ev);
|
||||
break;
|
||||
default:
|
||||
// OnWriteComplete / OperationComplete / OnBufferedDataChange are filtered
|
||||
// out — write callers get their reply via the InvokeAsync round-trip, not
|
||||
// via the event stream.
|
||||
// OnAlarmTransition is no longer carried on the per-session event stream
|
||||
// — alarms come from the gateway's session-less StreamAlarms feed
|
||||
// (GatewayGalaxyAlarmFeed). OnWriteComplete / OperationComplete /
|
||||
// OnBufferedDataChange are filtered out: write callers get their reply
|
||||
// via the InvokeAsync round-trip, not via the event stream.
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -212,73 +196,6 @@ internal sealed class EventPump : IAsyncDisposable
|
||||
}
|
||||
}
|
||||
|
||||
private void DispatchAlarmTransition(MxEvent ev)
|
||||
{
|
||||
// Body absent (e.g. malformed gateway event or worker version skew) — count and
|
||||
// drop. The Part 9 sub-attribute fallback path keeps an alarm functional even
|
||||
// when the rich payload disappears.
|
||||
if (ev.OnAlarmTransition is not { } body)
|
||||
{
|
||||
AlarmTransitionsDecodingFailures.Add(1, _clientTag);
|
||||
_logger.LogDebug(
|
||||
"Galaxy OnAlarmTransition event arrived without a populated body (sequence={Sequence}); ignoring.",
|
||||
ev.WorkerSequence);
|
||||
return;
|
||||
}
|
||||
if (body.TransitionKind == AlarmTransitionKind.Unspecified)
|
||||
{
|
||||
AlarmTransitionsDecodingFailures.Add(1, _clientTag);
|
||||
_logger.LogDebug(
|
||||
"Galaxy OnAlarmTransition for {AlarmRef} has unspecified transition kind; ignoring.",
|
||||
body.AlarmFullReference);
|
||||
return;
|
||||
}
|
||||
|
||||
var (bucket, opcUaSeverity) = MxAccessSeverityMapper.Map(body.Severity);
|
||||
var transitionTimestamp = body.TransitionTimestamp is { } tts
|
||||
? tts.ToDateTime()
|
||||
: DateTime.UtcNow;
|
||||
DateTime? originalRaiseTimestamp = body.OriginalRaiseTimestamp is { } orts
|
||||
? orts.ToDateTime()
|
||||
: null;
|
||||
|
||||
var transition = new GalaxyAlarmTransition(
|
||||
AlarmFullReference: body.AlarmFullReference,
|
||||
SourceObjectReference: body.SourceObjectReference,
|
||||
AlarmTypeName: body.AlarmTypeName,
|
||||
TransitionKind: MapTransitionKind(body.TransitionKind),
|
||||
SeverityBucket: bucket,
|
||||
OpcUaSeverity: opcUaSeverity,
|
||||
RawMxAccessSeverity: body.Severity,
|
||||
OriginalRaiseTimestampUtc: originalRaiseTimestamp,
|
||||
TransitionTimestampUtc: transitionTimestamp,
|
||||
OperatorUser: body.OperatorUser,
|
||||
OperatorComment: body.OperatorComment,
|
||||
Category: body.Category,
|
||||
Description: body.Description);
|
||||
|
||||
AlarmTransitionsReceived.Add(1, _clientTag);
|
||||
try
|
||||
{
|
||||
OnAlarmTransition?.Invoke(this, transition);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy OnAlarmTransition handler threw for {AlarmRef} — continuing.",
|
||||
transition.AlarmFullReference);
|
||||
}
|
||||
}
|
||||
|
||||
private static GalaxyAlarmTransitionKind MapTransitionKind(AlarmTransitionKind kind) => kind switch
|
||||
{
|
||||
AlarmTransitionKind.Raise => GalaxyAlarmTransitionKind.Raise,
|
||||
AlarmTransitionKind.Acknowledge => GalaxyAlarmTransitionKind.Acknowledge,
|
||||
AlarmTransitionKind.Clear => GalaxyAlarmTransitionKind.Clear,
|
||||
AlarmTransitionKind.Retrigger => GalaxyAlarmTransitionKind.Retrigger,
|
||||
_ => GalaxyAlarmTransitionKind.Unspecified,
|
||||
};
|
||||
|
||||
private DataValueSnapshot ToSnapshot(MxEvent ev)
|
||||
{
|
||||
var value = MxValueDecoder.Decode(ev.Value);
|
||||
|
||||
@@ -5,26 +5,27 @@ using MxGateway.Contracts.Proto;
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Production <see cref="IGalaxyAlarmAcknowledger"/> backed by the
|
||||
/// <c>MxGatewayClient.AcknowledgeAlarmAsync</c> RPC (PR E.2). Maps the
|
||||
/// reply's protocol status into a thrown exception when the gateway
|
||||
/// reports a non-OK condition; native MxStatus failures inside the reply
|
||||
/// surface as a logged warning so operator workflows aren't blocked by a
|
||||
/// transient MxAccess hiccup.
|
||||
/// Production <see cref="IGalaxyAlarmAcknowledger"/> backed by the session-less
|
||||
/// <c>MxGatewayClient.AcknowledgeAlarmAsync</c> RPC. The updated gateway routes
|
||||
/// acknowledgement through its always-on central alarm monitor, so no worker
|
||||
/// session is involved — the driver supplies only the alarm reference, comment,
|
||||
/// and operator principal.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// A non-OK <see cref="ProtocolStatus"/> means the gateway never reached MXAccess
|
||||
/// (transport / dispatch failure) and is surfaced as a thrown exception. A non-zero
|
||||
/// native ack return code (<c>hresult</c>) means MXAccess itself rejected the ack;
|
||||
/// that is logged as a warning rather than thrown so a transient MXAccess hiccup
|
||||
/// doesn't block the operator workflow — the operator can retry.
|
||||
/// </remarks>
|
||||
internal sealed class GatewayGalaxyAlarmAcknowledger : IGalaxyAlarmAcknowledger
|
||||
{
|
||||
private readonly MxGatewayClient _client;
|
||||
private readonly GalaxyMxSession _session;
|
||||
private readonly ILogger _logger;
|
||||
|
||||
public GatewayGalaxyAlarmAcknowledger(
|
||||
MxGatewayClient client,
|
||||
GalaxyMxSession session,
|
||||
ILogger logger)
|
||||
public GatewayGalaxyAlarmAcknowledger(MxGatewayClient client, ILogger logger)
|
||||
{
|
||||
_client = client ?? throw new ArgumentNullException(nameof(client));
|
||||
_session = session ?? throw new ArgumentNullException(nameof(session));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
}
|
||||
|
||||
@@ -36,15 +37,9 @@ internal sealed class GatewayGalaxyAlarmAcknowledger : IGalaxyAlarmAcknowledger
|
||||
{
|
||||
ArgumentException.ThrowIfNullOrEmpty(alarmFullReference);
|
||||
|
||||
var session = _session.Session
|
||||
?? throw new InvalidOperationException(
|
||||
"GatewayGalaxyAlarmAcknowledger requires a connected GalaxyMxSession; underlying gateway session is null.");
|
||||
var sessionId = session.SessionId;
|
||||
|
||||
var reply = await _client.AcknowledgeAlarmAsync(
|
||||
new AcknowledgeAlarmRequest
|
||||
{
|
||||
SessionId = sessionId,
|
||||
ClientCorrelationId = Guid.NewGuid().ToString("N"),
|
||||
AlarmFullReference = alarmFullReference,
|
||||
Comment = comment ?? string.Empty,
|
||||
@@ -52,14 +47,23 @@ internal sealed class GatewayGalaxyAlarmAcknowledger : IGalaxyAlarmAcknowledger
|
||||
},
|
||||
cancellationToken).ConfigureAwait(false);
|
||||
|
||||
if (reply.Status is { Success: 0 } status)
|
||||
// Protocol status — the gateway failed before MXAccess saw the ack. This is a
|
||||
// hard failure: the operator's request was not delivered at all.
|
||||
if (reply.ProtocolStatus is { } proto && proto.Code != ProtocolStatusCode.Ok)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Galaxy AcknowledgeAlarm for '{alarmFullReference}' failed at the gateway: "
|
||||
+ $"{proto.Code} {proto.Message}");
|
||||
}
|
||||
|
||||
// hresult is the authoritative native ack return code (0 = success). It is
|
||||
// absent only on a worker protocol violation; with an OK protocol status a
|
||||
// missing value is treated as success.
|
||||
if (reply.HasHresult && reply.Hresult != 0)
|
||||
{
|
||||
// Native MxAccess rejected the ack — log but don't throw. Treat as a
|
||||
// best-effort operator workflow; the operator can retry via the OPC UA
|
||||
// session if necessary.
|
||||
_logger.LogWarning(
|
||||
"Galaxy AcknowledgeAlarm for {AlarmRef} returned MxStatus failure: category={Category} detail={Detail} text={Text}",
|
||||
alarmFullReference, status.Category, status.Detail, status.DiagnosticText);
|
||||
"Galaxy AcknowledgeAlarm for {AlarmRef} returned native ack failure code {Hresult}.",
|
||||
alarmFullReference, reply.Hresult);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,264 @@
|
||||
using System.Diagnostics.Metrics;
|
||||
using Microsoft.Extensions.Logging;
|
||||
using Microsoft.Extensions.Logging.Abstractions;
|
||||
using MxGateway.Contracts.Proto;
|
||||
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Production <see cref="IGalaxyAlarmFeed"/> over the gateway's session-less
|
||||
/// <c>StreamAlarms</c> RPC. The stream opens with one <see cref="ActiveAlarmSnapshot"/>
|
||||
/// per currently-active alarm (the ConditionRefresh snapshot), then a
|
||||
/// <c>snapshot_complete</c> sentinel, then a live <see cref="OnAlarmTransitionEvent"/>
|
||||
/// for every subsequent raise / acknowledge / clear. Each message is decoded into a
|
||||
/// <see cref="GalaxyAlarmTransition"/> (severity already bucketed via
|
||||
/// <see cref="MxAccessSeverityMapper"/>) and surfaced on <see cref="OnAlarmTransition"/>.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// The feed is independent of any worker session — the gateway's always-on central
|
||||
/// alarm monitor owns the AVEVA subscription. The driver previously decoded alarm
|
||||
/// transitions off the per-session <c>StreamEvents</c> stream (<see cref="EventPump"/>);
|
||||
/// that path was retired when the gateway moved to the session-less alarm model.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// The stream is supplied as a factory delegate (production passes
|
||||
/// <c>MxGatewayClient.StreamAlarmsAsync</c>) so tests can drive synthetic feeds.
|
||||
/// Streaming RPCs are not covered by the client's unary retry pipeline, so the feed
|
||||
/// owns its reconnect: on any non-cancellation stream fault it logs, waits
|
||||
/// <c>reconnectDelay</c>, and re-opens. The gateway re-sends the active-alarm
|
||||
/// snapshot on every re-open, so the OPC UA condition layer sees current state
|
||||
/// after a reconnect.
|
||||
/// </para>
|
||||
/// </remarks>
|
||||
internal sealed class GatewayGalaxyAlarmFeed : IGalaxyAlarmFeed
|
||||
{
|
||||
/// <summary>
|
||||
/// Opens a <c>StreamAlarms</c> feed. Matches the method group
|
||||
/// <c>MxGatewayClient.StreamAlarmsAsync</c>.
|
||||
/// </summary>
|
||||
internal delegate IAsyncEnumerable<AlarmFeedMessage> AlarmStreamFactory(
|
||||
StreamAlarmsRequest request, CancellationToken cancellationToken);
|
||||
|
||||
private static readonly TimeSpan DefaultReconnectDelay = TimeSpan.FromSeconds(5);
|
||||
|
||||
// Shares the driver meter name so a host-level MeterListener catches feed counters
|
||||
// alongside the EventPump's. Distinct Meter instance — same name is intentional.
|
||||
private static readonly Meter Meter = new(EventPump.MeterName);
|
||||
private static readonly Counter<long> AlarmTransitionsReceived =
|
||||
Meter.CreateCounter<long>("galaxy.alarm_feed.transitions.received", unit: "{event}",
|
||||
description: "Alarm feed messages decoded and forwarded to driver-level handlers.");
|
||||
private static readonly Counter<long> AlarmTransitionsDecodingFailures =
|
||||
Meter.CreateCounter<long>("galaxy.alarm_feed.transitions.decoding_failures", unit: "{event}",
|
||||
description: "Alarm feed messages dropped for a missing body or unspecified transition kind.");
|
||||
private static readonly Counter<long> AlarmFeedReconnects =
|
||||
Meter.CreateCounter<long>("galaxy.alarm_feed.reconnects", unit: "{reconnect}",
|
||||
description: "Times the alarm feed re-opened its StreamAlarms stream after a transport fault.");
|
||||
|
||||
private readonly AlarmStreamFactory _streamFactory;
|
||||
private readonly ILogger _logger;
|
||||
private readonly string _alarmFilterPrefix;
|
||||
private readonly TimeSpan _reconnectDelay;
|
||||
private readonly KeyValuePair<string, object?> _clientTag;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
|
||||
private Task? _loop;
|
||||
private bool _disposed;
|
||||
|
||||
public event EventHandler<GalaxyAlarmTransition>? OnAlarmTransition;
|
||||
|
||||
public GatewayGalaxyAlarmFeed(
|
||||
AlarmStreamFactory streamFactory,
|
||||
ILogger? logger = null,
|
||||
string? clientName = null,
|
||||
string? alarmFilterPrefix = null,
|
||||
TimeSpan? reconnectDelay = null)
|
||||
{
|
||||
_streamFactory = streamFactory ?? throw new ArgumentNullException(nameof(streamFactory));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_alarmFilterPrefix = alarmFilterPrefix ?? string.Empty;
|
||||
_reconnectDelay = reconnectDelay ?? DefaultReconnectDelay;
|
||||
_clientTag = new KeyValuePair<string, object?>("galaxy.client", clientName ?? "<unknown>");
|
||||
}
|
||||
|
||||
public void Start()
|
||||
{
|
||||
ObjectDisposedException.ThrowIf(_disposed, this);
|
||||
if (_loop is not null) return;
|
||||
_loop = Task.Run(() => RunAsync(_cts.Token));
|
||||
}
|
||||
|
||||
private async Task RunAsync(CancellationToken ct)
|
||||
{
|
||||
var firstAttempt = true;
|
||||
while (!ct.IsCancellationRequested)
|
||||
{
|
||||
if (!firstAttempt)
|
||||
{
|
||||
AlarmFeedReconnects.Add(1, _clientTag);
|
||||
}
|
||||
firstAttempt = false;
|
||||
|
||||
try
|
||||
{
|
||||
var request = new StreamAlarmsRequest
|
||||
{
|
||||
ClientCorrelationId = Guid.NewGuid().ToString("N"),
|
||||
AlarmFilterPrefix = _alarmFilterPrefix,
|
||||
};
|
||||
|
||||
await foreach (var message in _streamFactory(request, ct)
|
||||
.WithCancellation(ct).ConfigureAwait(false))
|
||||
{
|
||||
if (ct.IsCancellationRequested) break;
|
||||
Dispatch(message);
|
||||
}
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
return; // clean shutdown
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy alarm feed stream faulted — reopening in {DelaySeconds}s.",
|
||||
_reconnectDelay.TotalSeconds);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
await Task.Delay(_reconnectDelay, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void Dispatch(AlarmFeedMessage message)
|
||||
{
|
||||
switch (message.PayloadCase)
|
||||
{
|
||||
case AlarmFeedMessage.PayloadOneofCase.ActiveAlarm:
|
||||
DispatchSnapshotEntry(message.ActiveAlarm);
|
||||
break;
|
||||
case AlarmFeedMessage.PayloadOneofCase.Transition:
|
||||
DispatchTransition(message.Transition);
|
||||
break;
|
||||
case AlarmFeedMessage.PayloadOneofCase.SnapshotComplete:
|
||||
_logger.LogDebug("Galaxy alarm feed active-alarm snapshot complete.");
|
||||
break;
|
||||
default:
|
||||
// Empty oneof — worker / gateway version skew. Count and drop.
|
||||
AlarmTransitionsDecodingFailures.Add(1, _clientTag);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Decode one entry of the initial active-alarm snapshot. Each currently-active
|
||||
/// alarm is surfaced as a transition so the OPC UA Part 9 condition layer sees
|
||||
/// the alarm's present state on (re)connect: an unacknowledged active alarm as
|
||||
/// a <see cref="GalaxyAlarmTransitionKind.Raise"/>, an acknowledged one as a
|
||||
/// <see cref="GalaxyAlarmTransitionKind.Acknowledge"/>.
|
||||
/// </summary>
|
||||
private void DispatchSnapshotEntry(ActiveAlarmSnapshot snapshot)
|
||||
{
|
||||
var kind = snapshot.CurrentState switch
|
||||
{
|
||||
AlarmConditionState.Active => GalaxyAlarmTransitionKind.Raise,
|
||||
AlarmConditionState.ActiveAcked => GalaxyAlarmTransitionKind.Acknowledge,
|
||||
AlarmConditionState.Inactive => GalaxyAlarmTransitionKind.Clear,
|
||||
_ => GalaxyAlarmTransitionKind.Unspecified,
|
||||
};
|
||||
if (kind == GalaxyAlarmTransitionKind.Unspecified)
|
||||
{
|
||||
AlarmTransitionsDecodingFailures.Add(1, _clientTag);
|
||||
_logger.LogDebug(
|
||||
"Galaxy alarm feed snapshot entry for {AlarmRef} has unspecified condition state; ignoring.",
|
||||
snapshot.AlarmFullReference);
|
||||
return;
|
||||
}
|
||||
|
||||
var (bucket, opcUaSeverity) = MxAccessSeverityMapper.Map(snapshot.Severity);
|
||||
Raise(new GalaxyAlarmTransition(
|
||||
AlarmFullReference: snapshot.AlarmFullReference,
|
||||
SourceObjectReference: snapshot.SourceObjectReference,
|
||||
AlarmTypeName: snapshot.AlarmTypeName,
|
||||
TransitionKind: kind,
|
||||
SeverityBucket: bucket,
|
||||
OpcUaSeverity: opcUaSeverity,
|
||||
RawMxAccessSeverity: snapshot.Severity,
|
||||
OriginalRaiseTimestampUtc: snapshot.OriginalRaiseTimestamp?.ToDateTime(),
|
||||
TransitionTimestampUtc: snapshot.LastTransitionTimestamp?.ToDateTime() ?? DateTime.UtcNow,
|
||||
OperatorUser: snapshot.OperatorUser,
|
||||
OperatorComment: snapshot.OperatorComment,
|
||||
Category: snapshot.Category,
|
||||
Description: snapshot.Description));
|
||||
}
|
||||
|
||||
private void DispatchTransition(OnAlarmTransitionEvent body)
|
||||
{
|
||||
if (body.TransitionKind == AlarmTransitionKind.Unspecified)
|
||||
{
|
||||
AlarmTransitionsDecodingFailures.Add(1, _clientTag);
|
||||
_logger.LogDebug(
|
||||
"Galaxy alarm feed transition for {AlarmRef} has unspecified transition kind; ignoring.",
|
||||
body.AlarmFullReference);
|
||||
return;
|
||||
}
|
||||
|
||||
var (bucket, opcUaSeverity) = MxAccessSeverityMapper.Map(body.Severity);
|
||||
Raise(new GalaxyAlarmTransition(
|
||||
AlarmFullReference: body.AlarmFullReference,
|
||||
SourceObjectReference: body.SourceObjectReference,
|
||||
AlarmTypeName: body.AlarmTypeName,
|
||||
TransitionKind: MapTransitionKind(body.TransitionKind),
|
||||
SeverityBucket: bucket,
|
||||
OpcUaSeverity: opcUaSeverity,
|
||||
RawMxAccessSeverity: body.Severity,
|
||||
OriginalRaiseTimestampUtc: body.OriginalRaiseTimestamp?.ToDateTime(),
|
||||
TransitionTimestampUtc: body.TransitionTimestamp?.ToDateTime() ?? DateTime.UtcNow,
|
||||
OperatorUser: body.OperatorUser,
|
||||
OperatorComment: body.OperatorComment,
|
||||
Category: body.Category,
|
||||
Description: body.Description));
|
||||
}
|
||||
|
||||
private void Raise(GalaxyAlarmTransition transition)
|
||||
{
|
||||
AlarmTransitionsReceived.Add(1, _clientTag);
|
||||
try
|
||||
{
|
||||
OnAlarmTransition?.Invoke(this, transition);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy alarm feed OnAlarmTransition handler threw for {AlarmRef} — continuing.",
|
||||
transition.AlarmFullReference);
|
||||
}
|
||||
}
|
||||
|
||||
private static GalaxyAlarmTransitionKind MapTransitionKind(AlarmTransitionKind kind) => kind switch
|
||||
{
|
||||
AlarmTransitionKind.Raise => GalaxyAlarmTransitionKind.Raise,
|
||||
AlarmTransitionKind.Acknowledge => GalaxyAlarmTransitionKind.Acknowledge,
|
||||
AlarmTransitionKind.Clear => GalaxyAlarmTransitionKind.Clear,
|
||||
AlarmTransitionKind.Retrigger => GalaxyAlarmTransitionKind.Retrigger,
|
||||
_ => GalaxyAlarmTransitionKind.Unspecified,
|
||||
};
|
||||
|
||||
public async ValueTask DisposeAsync()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
_cts.Cancel();
|
||||
if (_loop is not null)
|
||||
{
|
||||
try { await _loop.ConfigureAwait(false); } catch { /* shutdown */ }
|
||||
}
|
||||
_cts.Dispose();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
|
||||
/// <summary>
|
||||
/// Driver-side seam for the gateway's session-less alarm feed. Production wraps
|
||||
/// <c>MxGatewayClient.StreamAlarmsAsync</c> (<see cref="GatewayGalaxyAlarmFeed"/>);
|
||||
/// tests substitute a fake to drive synthetic <see cref="GalaxyAlarmTransition"/>
|
||||
/// events through <see cref="GalaxyDriver"/>'s <c>IAlarmSource</c> bridge without a
|
||||
/// running gateway.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// The feed is independent of any worker session — the updated gateway serves
|
||||
/// alarms from an always-on central monitor, so the feed survives subscription
|
||||
/// churn and reconnects its own stream on transient transport failures.
|
||||
/// </remarks>
|
||||
internal interface IGalaxyAlarmFeed : IAsyncDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Fires for every alarm transition the gateway feed delivers — both the
|
||||
/// entries of the initial active-alarm snapshot and every subsequent live
|
||||
/// raise / acknowledge / clear. The OPC UA severity bucket is already mapped.
|
||||
/// </summary>
|
||||
event EventHandler<GalaxyAlarmTransition>? OnAlarmTransition;
|
||||
|
||||
/// <summary>
|
||||
/// Start consuming the alarm feed on a background task. Idempotent — second
|
||||
/// calls are no-ops while the loop is running.
|
||||
/// </summary>
|
||||
void Start();
|
||||
}
|
||||
Reference in New Issue
Block a user