fix(driver-galaxy): wire event-stream faults to the reconnect supervisor (Driver.Galaxy-001)
The ReconnectSupervisor was constructed but its trigger ReportTransportFailure was never called. When the gateway StreamEvents stream faulted, EventPump just logged and exited — the supervisor was never notified, so a transient gateway drop permanently stopped data-change notifications while GetHealth() still reported Healthy. EventPump gains an optional onStreamFault callback invoked from its stream-fault catch block (not on clean shutdown). GalaxyDriver wires it to ReconnectSupervisor.ReportTransportFailure so a transport drop drives reopen → replay. This is the minimal fix for -001; the pump-restart-on-reopen gap remains tracked as Driver.Galaxy-008. Regression tests cover the callback being invoked on fault, the end-to-end supervisor reopen/replay, and that a clean shutdown does not fire it. Driver.Galaxy suite: 206/206 pass. Resolves code-review finding Driver.Galaxy-001 (Critical). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -16,9 +16,11 @@ namespace ZB.MOM.WW.OtOpcUa.Driver.Galaxy.Runtime;
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// One pump per connected <see cref="GalaxyMxSession"/>. Reconnect lives in PR 4.5's
|
||||
/// supervisor; on transport failure here we log + propagate so the supervisor can
|
||||
/// decide whether to restart.
|
||||
/// One pump per connected <see cref="GalaxyMxSession"/>. Reconnect lives in the
|
||||
/// <see cref="ReconnectSupervisor"/>; on transport failure here we log and invoke
|
||||
/// the optional <c>onStreamFault</c> callback so the owner (GalaxyDriver) can
|
||||
/// forward the fault to <see cref="ReconnectSupervisor.ReportTransportFailure"/>
|
||||
/// and the supervisor can drive reopen → replay.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// PR 6.2 — the network-read loop and the listener-fanout loop are decoupled by a
|
||||
@@ -50,6 +52,7 @@ internal sealed class EventPump : IAsyncDisposable
|
||||
private readonly SubscriptionRegistry _registry;
|
||||
private readonly ILogger _logger;
|
||||
private readonly Func<long, ISubscriptionHandle> _handleFactory;
|
||||
private readonly Action<Exception>? _onStreamFault;
|
||||
private readonly Channel<MxEvent> _channel;
|
||||
private readonly KeyValuePair<string, object?> _clientTag;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
@@ -66,12 +69,14 @@ internal sealed class EventPump : IAsyncDisposable
|
||||
ILogger? logger = null,
|
||||
Func<long, ISubscriptionHandle>? handleFactory = null,
|
||||
int channelCapacity = DefaultChannelCapacity,
|
||||
string? clientName = null)
|
||||
string? clientName = null,
|
||||
Action<Exception>? onStreamFault = null)
|
||||
{
|
||||
_subscriber = subscriber ?? throw new ArgumentNullException(nameof(subscriber));
|
||||
_registry = registry ?? throw new ArgumentNullException(nameof(registry));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_handleFactory = handleFactory ?? (id => new GalaxySubscriptionHandle(id));
|
||||
_onStreamFault = onStreamFault;
|
||||
|
||||
if (channelCapacity < 1)
|
||||
{
|
||||
@@ -127,7 +132,20 @@ internal sealed class EventPump : IAsyncDisposable
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogWarning(ex,
|
||||
"Galaxy EventPump loop ended with an exception — reconnect supervisor (PR 4.5) handles restart.");
|
||||
"Galaxy EventPump loop ended with an exception — notifying reconnect supervisor.");
|
||||
|
||||
// The gw StreamEvents stream faulted. Signal the reconnect supervisor so it
|
||||
// drives reopen → replay. Without this the stream silently dies and a
|
||||
// transient gateway drop permanently stops data-change notifications.
|
||||
if (_onStreamFault is not null)
|
||||
{
|
||||
try { _onStreamFault(ex); }
|
||||
catch (Exception cbEx)
|
||||
{
|
||||
_logger.LogWarning(cbEx,
|
||||
"Galaxy EventPump stream-fault callback threw — supervisor may not have been notified.");
|
||||
}
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user