diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs b/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs index 07a05250..468b2502 100644 --- a/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs +++ b/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/Drivers/DriverInstanceActor.cs @@ -288,6 +288,9 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers // own thread); drop it — the feed re-delivers active alarms once Connected. Trace only. Receive(_ => _log.Debug("DriverInstance {Id}: native alarm arrived during connect — dropped (feed re-delivers)", _driverInstanceId)); + // A SubscribeAlarms self-tell (from Connected) can be overtaken by an already-queued disconnect into + // this state; swallow it so it doesn't dead-letter — the next Connected entry re-subscribes. + Receive(_ => { }); Receive(_ => PublishHealthSnapshot()); } @@ -321,7 +324,10 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers // Native-alarm analogue: un-gate the IAlarmSource feed when alarm tags are (now) present. The // common live path — a deploy delivers SetDesiredSubscriptions while the driver is already // Connected — flows through HERE, so the alarm subscribe must happen on this message, not only - // on Connected entry. + // on Connected entry. Unlike the value path above, there is deliberately no unsubscribe-on-empty: + // a removed alarm tag's condition node is torn down on the address-space rebuild and + // DriverHostActor.ForwardNativeAlarm drops any transition whose ConditionId no longer maps, so a + // lingering session-less alarm subscription can never surface a removed alarm. SubscribeDesiredAlarms(); }); ReceiveAsync(HandleSubscribeAlarmsAsync); @@ -378,6 +384,9 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers // own thread); drop it — the feed re-delivers active alarms once Connected. Trace only. Receive(_ => _log.Debug("DriverInstance {Id}: native alarm arrived during reconnect — dropped (feed re-delivers)", _driverInstanceId)); + // A SubscribeAlarms self-tell (from Connected) can be overtaken by an already-queued disconnect into + // this state; swallow it so it doesn't dead-letter — the next Connected entry re-subscribes. + Receive(_ => { }); Receive(_ => PublishHealthSnapshot()); Timers.StartPeriodicTimer("retry-connect", RetryConnect.Instance, _reconnectInterval); } @@ -558,8 +567,13 @@ public sealed class DriverInstanceActor : ReceiveActor, IWithTimers if (_driver is IAlarmSource src && _alarmEventHandler is not null) src.OnAlarmEvent -= _alarmEventHandler; _alarmEventHandler = null; - // Drop the handle so the next Connected entry re-subscribes against the freshly re-initialised - // driver (its old alarm-subscription set was cleared on reconnect). The desired alarm refs persist. + // Drop our handle so the next Connected entry re-subscribes; the desired alarm refs persist across + // reconnects. NOTE: this does NOT tear down the driver-side subscription. For a session-bound + // IAlarmSource the old subscription dies with the session (no accumulation). For a session-less feed + // (GalaxyDriver's always-on central monitor) it survives an in-place reconnect, so the re-subscribe + // is additive — harmless because the gate only checks Count > 0 and the feed fans out once + // regardless of handle count, but it does slowly accumulate handles across many reconnects (a minor + // leak tracked as a follow-up; the correct cleanup is a driver-side reset on re-init). _alarmSubscriptionHandle = null; }