fix(redundancy): always publish first ServiceLevel (even 0) + log SafeSelfStatus failures (code-review)

This commit is contained in:
Joseph Doherty
2026-06-15 13:00:25 -04:00
parent 3e609a2b19
commit a9ff1a64b2
2 changed files with 33 additions and 2 deletions
@@ -64,6 +64,7 @@ public sealed class OpcUaPublishActor : ReceiveActor
private int _writes;
private byte _lastServiceLevel;
private bool _publishedAtLeastOnce;
private DbHealthProbeActor.DbHealthStatus? _lastDbHealth;
private RedundancyStateChanged? _lastSnapshot;
private Phase7CompositionResult _lastApplied = new(
@@ -339,11 +340,15 @@ public sealed class OpcUaPublishActor : ReceiveActor
private void HandleServiceLevelChanged(ServiceLevelChanged msg)
{
if (msg.ServiceLevel == _lastServiceLevel) return;
// Always publish the FIRST computed level, even if it equals the byte-default 0. Otherwise a
// node starting Detached/role-less (first level = 0) would be dedup'd away, leaving the SDK's
// built-in default (255 = full service) standing — a degraded node wrongly advertising 255.
if (_publishedAtLeastOnce && msg.ServiceLevel == _lastServiceLevel) return;
_lastServiceLevel = msg.ServiceLevel;
try
{
_serviceLevel.Publish(msg.ServiceLevel);
_publishedAtLeastOnce = true;
OtOpcUaTelemetry.ServiceLevelChange.Add(1,
new KeyValuePair<string, object?>("level", msg.ServiceLevel));
_log.Debug("OpcUaPublish: ServiceLevel={Level}", msg.ServiceLevel);
@@ -436,8 +441,9 @@ public sealed class OpcUaPublishActor : ReceiveActor
{
return _cluster.SelfMember.Status;
}
catch
catch (Exception ex)
{
_log.Debug(ex, "OpcUaPublish: SelfMember status unavailable; treating as Removed (ServiceLevel→0)");
return MemberStatus.Removed;
}
}
@@ -113,6 +113,31 @@ public sealed class OpcUaPublishActorTests : RuntimeActorTestBase
duration: TimeSpan.FromMilliseconds(500));
}
/// <summary>Verifies that the very first computed ServiceLevel is always published even when it is
/// 0 — a node starting Detached (or role-less) must publish 0 rather than let the SDK default
/// (255 = full service) stand. The dedup against the <c>byte</c>-default <c>0</c> must not swallow
/// the first publish. Drives a Detached local entry (first computed level = 0) with no DB-health
/// probe and asserts the publisher saw exactly [0].</summary>
[Fact]
public void First_service_level_zero_is_published_not_deduped()
{
var publisher = new RecordingPublisher();
var local = NodeId.Parse("detached-node");
var actor = Sys.ActorOf(OpcUaPublishActor.PropsForTests(
serviceLevel: publisher, localNode: local));
actor.Tell(new RedundancyStateChanged(
Nodes: new[]
{
new NodeRedundancyState(local, RedundancyRole.Detached,
IsClusterLeader: false, IsRoleLeaderForDriver: false, DateTime.UtcNow),
},
CorrelationId.NewId()));
AwaitAssert(() => publisher.Levels.ShouldBe(new byte[] { 0 }),
duration: TimeSpan.FromMilliseconds(500));
}
/// <summary>Verifies that RedundancyStateChanged drives local ServiceLevel publish for primary leader.</summary>
[Fact]
public void RedundancyStateChanged_drives_local_ServiceLevel_publish_for_primary_leader()