fix(redundancy): always publish first ServiceLevel (even 0) + log SafeSelfStatus failures (code-review)
This commit is contained in:
@@ -64,6 +64,7 @@ public sealed class OpcUaPublishActor : ReceiveActor
|
||||
|
||||
private int _writes;
|
||||
private byte _lastServiceLevel;
|
||||
private bool _publishedAtLeastOnce;
|
||||
private DbHealthProbeActor.DbHealthStatus? _lastDbHealth;
|
||||
private RedundancyStateChanged? _lastSnapshot;
|
||||
private Phase7CompositionResult _lastApplied = new(
|
||||
@@ -339,11 +340,15 @@ public sealed class OpcUaPublishActor : ReceiveActor
|
||||
|
||||
private void HandleServiceLevelChanged(ServiceLevelChanged msg)
|
||||
{
|
||||
if (msg.ServiceLevel == _lastServiceLevel) return;
|
||||
// Always publish the FIRST computed level, even if it equals the byte-default 0. Otherwise a
|
||||
// node starting Detached/role-less (first level = 0) would be dedup'd away, leaving the SDK's
|
||||
// built-in default (255 = full service) standing — a degraded node wrongly advertising 255.
|
||||
if (_publishedAtLeastOnce && msg.ServiceLevel == _lastServiceLevel) return;
|
||||
_lastServiceLevel = msg.ServiceLevel;
|
||||
try
|
||||
{
|
||||
_serviceLevel.Publish(msg.ServiceLevel);
|
||||
_publishedAtLeastOnce = true;
|
||||
OtOpcUaTelemetry.ServiceLevelChange.Add(1,
|
||||
new KeyValuePair<string, object?>("level", msg.ServiceLevel));
|
||||
_log.Debug("OpcUaPublish: ServiceLevel={Level}", msg.ServiceLevel);
|
||||
@@ -436,8 +441,9 @@ public sealed class OpcUaPublishActor : ReceiveActor
|
||||
{
|
||||
return _cluster.SelfMember.Status;
|
||||
}
|
||||
catch
|
||||
catch (Exception ex)
|
||||
{
|
||||
_log.Debug(ex, "OpcUaPublish: SelfMember status unavailable; treating as Removed (ServiceLevel→0)");
|
||||
return MemberStatus.Removed;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,6 +113,31 @@ public sealed class OpcUaPublishActorTests : RuntimeActorTestBase
|
||||
duration: TimeSpan.FromMilliseconds(500));
|
||||
}
|
||||
|
||||
/// <summary>Verifies that the very first computed ServiceLevel is always published even when it is
|
||||
/// 0 — a node starting Detached (or role-less) must publish 0 rather than let the SDK default
|
||||
/// (255 = full service) stand. The dedup against the <c>byte</c>-default <c>0</c> must not swallow
|
||||
/// the first publish. Drives a Detached local entry (first computed level = 0) with no DB-health
|
||||
/// probe and asserts the publisher saw exactly [0].</summary>
|
||||
[Fact]
|
||||
public void First_service_level_zero_is_published_not_deduped()
|
||||
{
|
||||
var publisher = new RecordingPublisher();
|
||||
var local = NodeId.Parse("detached-node");
|
||||
var actor = Sys.ActorOf(OpcUaPublishActor.PropsForTests(
|
||||
serviceLevel: publisher, localNode: local));
|
||||
|
||||
actor.Tell(new RedundancyStateChanged(
|
||||
Nodes: new[]
|
||||
{
|
||||
new NodeRedundancyState(local, RedundancyRole.Detached,
|
||||
IsClusterLeader: false, IsRoleLeaderForDriver: false, DateTime.UtcNow),
|
||||
},
|
||||
CorrelationId.NewId()));
|
||||
|
||||
AwaitAssert(() => publisher.Levels.ShouldBe(new byte[] { 0 }),
|
||||
duration: TimeSpan.FromMilliseconds(500));
|
||||
}
|
||||
|
||||
/// <summary>Verifies that RedundancyStateChanged drives local ServiceLevel publish for primary leader.</summary>
|
||||
[Fact]
|
||||
public void RedundancyStateChanged_drives_local_ServiceLevel_publish_for_primary_leader()
|
||||
|
||||
Reference in New Issue
Block a user