fix(redundancy): always publish first ServiceLevel (even 0) + log SafeSelfStatus failures (code-review)
This commit is contained in:
@@ -64,6 +64,7 @@ public sealed class OpcUaPublishActor : ReceiveActor
|
|||||||
|
|
||||||
private int _writes;
|
private int _writes;
|
||||||
private byte _lastServiceLevel;
|
private byte _lastServiceLevel;
|
||||||
|
private bool _publishedAtLeastOnce;
|
||||||
private DbHealthProbeActor.DbHealthStatus? _lastDbHealth;
|
private DbHealthProbeActor.DbHealthStatus? _lastDbHealth;
|
||||||
private RedundancyStateChanged? _lastSnapshot;
|
private RedundancyStateChanged? _lastSnapshot;
|
||||||
private Phase7CompositionResult _lastApplied = new(
|
private Phase7CompositionResult _lastApplied = new(
|
||||||
@@ -339,11 +340,15 @@ public sealed class OpcUaPublishActor : ReceiveActor
|
|||||||
|
|
||||||
private void HandleServiceLevelChanged(ServiceLevelChanged msg)
|
private void HandleServiceLevelChanged(ServiceLevelChanged msg)
|
||||||
{
|
{
|
||||||
if (msg.ServiceLevel == _lastServiceLevel) return;
|
// Always publish the FIRST computed level, even if it equals the byte-default 0. Otherwise a
|
||||||
|
// node starting Detached/role-less (first level = 0) would be dedup'd away, leaving the SDK's
|
||||||
|
// built-in default (255 = full service) standing — a degraded node wrongly advertising 255.
|
||||||
|
if (_publishedAtLeastOnce && msg.ServiceLevel == _lastServiceLevel) return;
|
||||||
_lastServiceLevel = msg.ServiceLevel;
|
_lastServiceLevel = msg.ServiceLevel;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
_serviceLevel.Publish(msg.ServiceLevel);
|
_serviceLevel.Publish(msg.ServiceLevel);
|
||||||
|
_publishedAtLeastOnce = true;
|
||||||
OtOpcUaTelemetry.ServiceLevelChange.Add(1,
|
OtOpcUaTelemetry.ServiceLevelChange.Add(1,
|
||||||
new KeyValuePair<string, object?>("level", msg.ServiceLevel));
|
new KeyValuePair<string, object?>("level", msg.ServiceLevel));
|
||||||
_log.Debug("OpcUaPublish: ServiceLevel={Level}", msg.ServiceLevel);
|
_log.Debug("OpcUaPublish: ServiceLevel={Level}", msg.ServiceLevel);
|
||||||
@@ -436,8 +441,9 @@ public sealed class OpcUaPublishActor : ReceiveActor
|
|||||||
{
|
{
|
||||||
return _cluster.SelfMember.Status;
|
return _cluster.SelfMember.Status;
|
||||||
}
|
}
|
||||||
catch
|
catch (Exception ex)
|
||||||
{
|
{
|
||||||
|
_log.Debug(ex, "OpcUaPublish: SelfMember status unavailable; treating as Removed (ServiceLevel→0)");
|
||||||
return MemberStatus.Removed;
|
return MemberStatus.Removed;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -113,6 +113,31 @@ public sealed class OpcUaPublishActorTests : RuntimeActorTestBase
|
|||||||
duration: TimeSpan.FromMilliseconds(500));
|
duration: TimeSpan.FromMilliseconds(500));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>Verifies that the very first computed ServiceLevel is always published even when it is
|
||||||
|
/// 0 — a node starting Detached (or role-less) must publish 0 rather than let the SDK default
|
||||||
|
/// (255 = full service) stand. The dedup against the <c>byte</c>-default <c>0</c> must not swallow
|
||||||
|
/// the first publish. Drives a Detached local entry (first computed level = 0) with no DB-health
|
||||||
|
/// probe and asserts the publisher saw exactly [0].</summary>
|
||||||
|
[Fact]
|
||||||
|
public void First_service_level_zero_is_published_not_deduped()
|
||||||
|
{
|
||||||
|
var publisher = new RecordingPublisher();
|
||||||
|
var local = NodeId.Parse("detached-node");
|
||||||
|
var actor = Sys.ActorOf(OpcUaPublishActor.PropsForTests(
|
||||||
|
serviceLevel: publisher, localNode: local));
|
||||||
|
|
||||||
|
actor.Tell(new RedundancyStateChanged(
|
||||||
|
Nodes: new[]
|
||||||
|
{
|
||||||
|
new NodeRedundancyState(local, RedundancyRole.Detached,
|
||||||
|
IsClusterLeader: false, IsRoleLeaderForDriver: false, DateTime.UtcNow),
|
||||||
|
},
|
||||||
|
CorrelationId.NewId()));
|
||||||
|
|
||||||
|
AwaitAssert(() => publisher.Levels.ShouldBe(new byte[] { 0 }),
|
||||||
|
duration: TimeSpan.FromMilliseconds(500));
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>Verifies that RedundancyStateChanged drives local ServiceLevel publish for primary leader.</summary>
|
/// <summary>Verifies that RedundancyStateChanged drives local ServiceLevel publish for primary leader.</summary>
|
||||||
[Fact]
|
[Fact]
|
||||||
public void RedundancyStateChanged_drives_local_ServiceLevel_publish_for_primary_leader()
|
public void RedundancyStateChanged_drives_local_ServiceLevel_publish_for_primary_leader()
|
||||||
|
|||||||
Reference in New Issue
Block a user