From a9ff1a64b23823126b8e13ae81b801bb6b6b0010 Mon Sep 17 00:00:00 2001 From: Joseph Doherty Date: Mon, 15 Jun 2026 13:00:25 -0400 Subject: [PATCH] fix(redundancy): always publish first ServiceLevel (even 0) + log SafeSelfStatus failures (code-review) --- .../OpcUa/OpcUaPublishActor.cs | 10 ++++++-- .../OpcUa/OpcUaPublishActorTests.cs | 25 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs b/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs index 9336560a..2231e997 100644 --- a/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs +++ b/src/Server/ZB.MOM.WW.OtOpcUa.Runtime/OpcUa/OpcUaPublishActor.cs @@ -64,6 +64,7 @@ public sealed class OpcUaPublishActor : ReceiveActor private int _writes; private byte _lastServiceLevel; + private bool _publishedAtLeastOnce; private DbHealthProbeActor.DbHealthStatus? _lastDbHealth; private RedundancyStateChanged? _lastSnapshot; private Phase7CompositionResult _lastApplied = new( @@ -339,11 +340,15 @@ public sealed class OpcUaPublishActor : ReceiveActor private void HandleServiceLevelChanged(ServiceLevelChanged msg) { - if (msg.ServiceLevel == _lastServiceLevel) return; + // Always publish the FIRST computed level, even if it equals the byte-default 0. Otherwise a + // node starting Detached/role-less (first level = 0) would be dedup'd away, leaving the SDK's + // built-in default (255 = full service) standing — a degraded node wrongly advertising 255. + if (_publishedAtLeastOnce && msg.ServiceLevel == _lastServiceLevel) return; _lastServiceLevel = msg.ServiceLevel; try { _serviceLevel.Publish(msg.ServiceLevel); + _publishedAtLeastOnce = true; OtOpcUaTelemetry.ServiceLevelChange.Add(1, new KeyValuePair("level", msg.ServiceLevel)); _log.Debug("OpcUaPublish: ServiceLevel={Level}", msg.ServiceLevel); @@ -436,8 +441,9 @@ public sealed class OpcUaPublishActor : ReceiveActor { return _cluster.SelfMember.Status; } - catch + catch (Exception ex) { + _log.Debug(ex, "OpcUaPublish: SelfMember status unavailable; treating as Removed (ServiceLevel→0)"); return MemberStatus.Removed; } } diff --git a/tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorTests.cs b/tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorTests.cs index 1dcd9ddd..cce7bc6f 100644 --- a/tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorTests.cs +++ b/tests/Server/ZB.MOM.WW.OtOpcUa.Runtime.Tests/OpcUa/OpcUaPublishActorTests.cs @@ -113,6 +113,31 @@ public sealed class OpcUaPublishActorTests : RuntimeActorTestBase duration: TimeSpan.FromMilliseconds(500)); } + /// Verifies that the very first computed ServiceLevel is always published even when it is + /// 0 — a node starting Detached (or role-less) must publish 0 rather than let the SDK default + /// (255 = full service) stand. The dedup against the byte-default 0 must not swallow + /// the first publish. Drives a Detached local entry (first computed level = 0) with no DB-health + /// probe and asserts the publisher saw exactly [0]. + [Fact] + public void First_service_level_zero_is_published_not_deduped() + { + var publisher = new RecordingPublisher(); + var local = NodeId.Parse("detached-node"); + var actor = Sys.ActorOf(OpcUaPublishActor.PropsForTests( + serviceLevel: publisher, localNode: local)); + + actor.Tell(new RedundancyStateChanged( + Nodes: new[] + { + new NodeRedundancyState(local, RedundancyRole.Detached, + IsClusterLeader: false, IsRoleLeaderForDriver: false, DateTime.UtcNow), + }, + CorrelationId.NewId())); + + AwaitAssert(() => publisher.Levels.ShouldBe(new byte[] { 0 }), + duration: TimeSpan.FromMilliseconds(500)); + } + /// Verifies that RedundancyStateChanged drives local ServiceLevel publish for primary leader. [Fact] public void RedundancyStateChanged_drives_local_ServiceLevel_publish_for_primary_leader()